processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java - druid - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.druid.query.filter;

 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import com.fasterxml.jackson.annotation.JsonInclude;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.base.Predicate;
 import com.google.common.base.Supplier;
 import com.google.common.base.Suppliers;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Range;
 import com.google.common.collect.RangeSet;
 import com.google.common.collect.Sets;
 import com.google.common.collect.TreeRangeSet;
 import com.google.common.hash.Hasher;
 import com.google.common.hash.Hashing;
 import it.unimi.dsi.fastutil.ints.IntArrayList;
 import it.unimi.dsi.fastutil.ints.IntIterable;
 import it.unimi.dsi.fastutil.ints.IntIterator;
 import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
 import it.unimi.dsi.fastutil.longs.LongArrayList;
 import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
 import org.apache.druid.collections.bitmap.ImmutableBitmap;
 import org.apache.druid.common.config.NullHandling;
 import org.apache.druid.java.util.common.IAE;
 import org.apache.druid.java.util.common.StringUtils;
 import org.apache.druid.java.util.common.guava.Comparators;
 import org.apache.druid.query.BitmapResultFactory;
 import org.apache.druid.query.cache.CacheKeyBuilder;
 import org.apache.druid.query.extraction.ExtractionFn;
 import org.apache.druid.query.filter.vector.VectorValueMatcher;
 import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory;
 import org.apache.druid.query.lookup.LookupExtractionFn;
 import org.apache.druid.query.lookup.LookupExtractor;
 import org.apache.druid.segment.ColumnInspector;
 import org.apache.druid.segment.ColumnProcessors;
 import org.apache.druid.segment.ColumnSelector;
 import org.apache.druid.segment.ColumnSelectorFactory;
 import org.apache.druid.segment.DimensionHandlerUtils;
 import org.apache.druid.segment.IntIteratorUtils;
 import org.apache.druid.segment.column.BitmapIndex;
 import org.apache.druid.segment.filter.Filters;
 import org.apache.druid.segment.vector.VectorColumnSelectorFactory;

 import javax.annotation.Nullable;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
 import java.util.SortedSet;

 public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
 {
   // Values can contain `null` object
   private final Set<String> values;
   private final String dimension;
   @Nullable
   private final ExtractionFn extractionFn;
   @Nullable
   private final FilterTuning filterTuning;
   private final DruidPredicateFactory predicateFactory;

   @JsonIgnore
   private final Supplier<byte[]> cacheKeySupplier;

   /**
    * Creates a new filter.
    *
    * @param dimension    column to search
    * @param values       set of values to match. This collection may be reused to avoid copying a big collection.
    *                     Therefore, callers should <b>not</b> modify the collection after it is passed to this
    *                     constructor.
    * @param extractionFn extraction function to apply to the column before checking against "values"
    * @param filterTuning optional tuning
    */
   @JsonCreator
   public InDimFilter(
       @JsonProperty("dimension") String dimension,
       // This 'values' collection instance can be reused if possible to avoid copying a big collection.
       // Callers should _not_ modify the collection after it is passed to this constructor.
       @JsonProperty("values") Set<String> values,
       @JsonProperty("extractionFn") @Nullable ExtractionFn extractionFn,
       @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning
   )
   {
     this(
         dimension,
         values,
         extractionFn,
         filterTuning,
         null
     );
   }

   /**
    * Creates a new filter without an extraction function or any special filter tuning.
    *
    * @param dimension column to search
    * @param values    set of values to match. This collection may be reused to avoid copying a big collection.
    *                  Therefore, callers should <b>not</b> modify the collection after it is passed to this
    *                  constructor.
    */
   public InDimFilter(
       String dimension,
       Set<String> values
   )
   {
     this(
         dimension,
         values,
         null,
         null
     );
   }

   /**
    * This constructor should be called only in unit tests since accepting a Collection makes copying more likely.
    */
   @VisibleForTesting
   public InDimFilter(String dimension, Collection<String> values, @Nullable ExtractionFn extractionFn)
   {
     this(
         dimension,
         values instanceof Set ? (Set<String>) values : new HashSet<>(values),
         extractionFn,
         null,
         null
     );
   }

   /**
    * Internal constructor.
    */
   private InDimFilter(
       final String dimension,
       final Set<String> values,
       @Nullable final ExtractionFn extractionFn,
       @Nullable final FilterTuning filterTuning,
       @Nullable final DruidPredicateFactory predicateFactory
   )
   {
     Preconditions.checkNotNull(values, "values cannot be null");

     // The values set can be huge. Try to avoid copying the set if possible.
     // Note that we may still need to copy values to a list for caching. See getCacheKey().
     if (!NullHandling.sqlCompatible() && values.contains("")) {
       // In Non sql compatible mode, empty strings should be converted to nulls for the filter.
       // In sql compatible mode, empty strings and nulls should be treated differently
       this.values = Sets.newHashSetWithExpectedSize(values.size());
       for (String v : values) {
         this.values.add(NullHandling.emptyToNullIfNeeded(v));
       }
     } else {
       this.values = values;
     }

     this.dimension = Preconditions.checkNotNull(dimension, "dimension cannot be null");
     this.extractionFn = extractionFn;
     this.filterTuning = filterTuning;

     if (predicateFactory != null) {
       this.predicateFactory = predicateFactory;
     } else {
       this.predicateFactory = new InFilterDruidPredicateFactory(extractionFn, this.values);
     }

     this.cacheKeySupplier = Suppliers.memoize(this::computeCacheKey);
   }

   @JsonProperty
   public String getDimension()
   {
     return dimension;
   }

   @JsonProperty
   public Set<String> getValues()
   {
     return values;
   }

   @Nullable
   @JsonInclude(JsonInclude.Include.NON_NULL)
   @JsonProperty
   public ExtractionFn getExtractionFn()
   {
     return extractionFn;
   }

   @Nullable
   @JsonInclude(JsonInclude.Include.NON_NULL)
   @JsonProperty
   public FilterTuning getFilterTuning()
   {
     return filterTuning;
   }

   @Override
   public byte[] getCacheKey()
   {
     return cacheKeySupplier.get();
   }

   @Override
   public DimFilter optimize()
   {
     InDimFilter inFilter = optimizeLookup();

     if (inFilter.values.isEmpty()) {
       return FalseDimFilter.instance();
     }
     if (inFilter.values.size() == 1) {
       return new SelectorDimFilter(
           inFilter.dimension,
           inFilter.values.iterator().next(),
           inFilter.getExtractionFn(),
           filterTuning
       );
     }
     return inFilter;
   }

   @Override
   public Filter toFilter()
   {
     return this;
   }

   @Nullable
   @Override
   public RangeSet<String> getDimensionRangeSet(String dimension)
   {
     if (!Objects.equals(getDimension(), dimension) || getExtractionFn() != null) {
       return null;
     }
     RangeSet<String> retSet = TreeRangeSet.create();
     for (String value : values) {
       String valueEquivalent = NullHandling.nullToEmptyIfNeeded(value);
       if (valueEquivalent == null) {
         // Case when SQL compatible null handling is enabled
         // Range.singleton(null) is invalid, so use the fact that
         // only null values are less than empty string.
         retSet.add(Range.lessThan(""));
       } else {
         retSet.add(Range.singleton(valueEquivalent));
       }
     }
     return retSet;
   }

   @Override
   public Set<String> getRequiredColumns()
   {
     return ImmutableSet.of(dimension);
   }

   @Override
   public <T> T getBitmapResult(BitmapIndexSelector selector, BitmapResultFactory<T> bitmapResultFactory)
   {
     if (extractionFn == null) {
       final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension);
       return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable(values, bitmapIndex));
     } else {
       return Filters.matchPredicate(
           dimension,
           selector,
           bitmapResultFactory,
           predicateFactory.makeStringPredicate()
       );
     }
   }

   @Override
   public double estimateSelectivity(BitmapIndexSelector indexSelector)
   {
     if (extractionFn == null) {
       final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension);
       return Filters.estimateSelectivity(
           bitmapIndex,
           IntIteratorUtils.toIntList(getBitmapIndexIterable(values, bitmapIndex).iterator()),
           indexSelector.getNumRows()
       );
     } else {
       return Filters.estimateSelectivity(
           dimension,
           indexSelector,
           predicateFactory.makeStringPredicate()
       );
     }
   }

   @Override
   public ValueMatcher makeMatcher(ColumnSelectorFactory factory)
   {
     return Filters.makeValueMatcher(factory, dimension, predicateFactory);
   }

   @Override
   public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory)
   {
     return ColumnProcessors.makeVectorProcessor(
         dimension,
         VectorValueMatcherColumnProcessorFactory.instance(),
         factory
     ).makeMatcher(predicateFactory);
   }

   @Override
   public boolean canVectorizeMatcher(ColumnInspector inspector)
   {
     return true;
   }

   @Override
   public boolean supportsRequiredColumnRewrite()
   {
     return true;
   }

   @Override
   public Filter rewriteRequiredColumns(Map<String, String> columnRewrites)
   {
     String rewriteDimensionTo = columnRewrites.get(dimension);
     if (rewriteDimensionTo == null) {
       throw new IAE("Received a non-applicable rewrite: %s, filter's dimension: %s", columnRewrites, dimension);
     }

     if (rewriteDimensionTo.equals(dimension)) {
       return this;
     } else {
       return new InDimFilter(
           rewriteDimensionTo,
           values,
           extractionFn,
           filterTuning,
           predicateFactory
       );
     }
   }

   @Override
   public boolean supportsBitmapIndex(BitmapIndexSelector selector)
   {
     return selector.getBitmapIndex(dimension) != null;
   }

   @Override
   public boolean shouldUseBitmapIndex(BitmapIndexSelector selector)
   {
     return Filters.shouldUseBitmapIndex(this, selector, filterTuning);
   }

   @Override
   public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector)
   {
     return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector);
   }

   @Override
   public String toString()
   {
     final DimFilterToStringBuilder builder = new DimFilterToStringBuilder();
     return builder.appendDimension(dimension, extractionFn)
                   .append(" IN (")
                   .append(Joiner.on(", ").join(Iterables.transform(values, StringUtils::nullToEmptyNonDruidDataString)))
                   .append(")")
                   .appendFilterTuning(filterTuning)
                   .build();
   }

   @Override
   public boolean equals(Object o)
   {
     if (this == o) {
       return true;
     }
     if (o == null || getClass() != o.getClass()) {
       return false;
     }
     InDimFilter that = (InDimFilter) o;
     return values.equals(that.values) &&
            dimension.equals(that.dimension) &&
            Objects.equals(extractionFn, that.extractionFn) &&
            Objects.equals(filterTuning, that.filterTuning);
   }

   @Override
   public int hashCode()
   {
     return Objects.hash(values, dimension, extractionFn, filterTuning);
   }

   private byte[] computeCacheKey()
   {
     final Collection<String> sortedValues;

     if (values instanceof SortedSet && isNaturalOrder(((SortedSet<String>) values).comparator())) {
       // Avoid copying "values" when it is already in the order we need for cache key computation.
       sortedValues = values;
     } else {
       final List<String> sortedValuesList = new ArrayList<>(values);
       sortedValuesList.sort(Comparators.naturalNullsFirst());
       sortedValues = sortedValuesList;
     }

     // Hash all values, in sorted order, as their length followed by their content.
     final Hasher hasher = Hashing.sha256().newHasher();
     for (String v : sortedValues) {
       if (v == null) {
         // Encode null as length -1, no content.
         hasher.putInt(-1);
       } else {
         hasher.putInt(v.length());
         hasher.putString(v, StandardCharsets.UTF_8);
       }
     }

     return new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID)
         .appendString(dimension)
         .appendByte(DimFilterUtils.STRING_SEPARATOR)
         .appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey())
         .appendByte(DimFilterUtils.STRING_SEPARATOR)
         .appendByteArray(hasher.hash().asBytes())
         .build();
   }

   private InDimFilter optimizeLookup()
   {
     if (extractionFn instanceof LookupExtractionFn
         && ((LookupExtractionFn) extractionFn).isOptimize()) {
       LookupExtractionFn exFn = (LookupExtractionFn) extractionFn;
       LookupExtractor lookup = exFn.getLookup();

       final Set<String> keys = new HashSet<>();
       for (String value : values) {

         // We cannot do an unapply()-based optimization if the selector value
         // and the replaceMissingValuesWith value are the same, since we have to match on
         // all values that are not present in the lookup.
         final String convertedValue = NullHandling.emptyToNullIfNeeded(value);
         if (!exFn.isRetainMissingValue() && Objects.equals(convertedValue, exFn.getReplaceMissingValueWith())) {
           return this;
         }
         keys.addAll(lookup.unapply(convertedValue));

         // If retainMissingValues is true and the selector value is not in the lookup map,
         // there may be row values that match the selector value but are not included
         // in the lookup map. Match on the selector value as well.
         // If the selector value is overwritten in the lookup map, don't add selector value to keys.
         if (exFn.isRetainMissingValue() && NullHandling.isNullOrEquivalent(lookup.apply(convertedValue))) {
           keys.add(convertedValue);
         }
       }

       if (keys.isEmpty()) {
         return this;
       } else {
         return new InDimFilter(dimension, keys, null, filterTuning);
       }
     }
     return this;
   }

   /**
    * Returns true if the comparator is null or the singleton {@link Comparators#naturalNullsFirst()}. Useful for
    * detecting if a sorted set is in natural order or not.
    *
    * May return false negatives (i.e. there are naturally-ordered comparators that will return false here).
    */
   private static <T> boolean isNaturalOrder(@Nullable final Comparator<T> comparator)
   {
     return comparator == null || Comparators.naturalNullsFirst().equals(comparator);
   }

   private static Iterable<ImmutableBitmap> getBitmapIterable(final Set<String> values, final BitmapIndex bitmapIndex)
   {
     return Filters.bitmapsFromIndexes(getBitmapIndexIterable(values, bitmapIndex), bitmapIndex);
   }

   private static IntIterable getBitmapIndexIterable(final Set<String> values, final BitmapIndex bitmapIndex)
   {
     return () -> new IntIterator()
     {
       final Iterator<String> iterator = values.iterator();

       @Override
       public boolean hasNext()
       {
         return iterator.hasNext();
       }

       @Override
       public int nextInt()
       {
         return bitmapIndex.getIndex(iterator.next());
       }
     };
   }

   @SuppressWarnings("ReturnValueIgnored")
   private static Predicate<String> createStringPredicate(final Set<String> values)
   {
     Preconditions.checkNotNull(values, "values");

     try {
       // Check to see if values.contains(null) will throw a NullPointerException. Jackson JSON deserialization won't
       // lead to this (it will create a HashSet, which can accept nulls). But when InDimFilters are created
       // programmatically as a result of optimizations like rewriting inner joins as filters, the passed-in Set may
       // not be able to accept nulls. We don't want to copy the Sets (since they may be large) so instead we'll wrap
       // it in a null-checking lambda if needed.
       values.contains(null);

       // Safe to do values.contains(null).
       return values::contains;
     }
     catch (NullPointerException ignored) {
       // Fall through
     }

     // Not safe to do values.contains(null); must return a wrapper.
     // Return false for null, since an exception means the set cannot accept null (and therefore does not include it).
     return value -> value != null && values.contains(value);
   }

   private static DruidLongPredicate createLongPredicate(final Set<String> values)
   {
     LongArrayList longs = new LongArrayList(values.size());
     for (String value : values) {
       final Long longValue = DimensionHandlerUtils.convertObjectToLong(value);
       if (longValue != null) {
         longs.add((long) longValue);
       }
     }

     final LongOpenHashSet longHashSet = new LongOpenHashSet(longs);
     return longHashSet::contains;
   }

   private static DruidFloatPredicate createFloatPredicate(final Set<String> values)
   {
     IntArrayList floatBits = new IntArrayList(values.size());
     for (String value : values) {
       Float floatValue = DimensionHandlerUtils.convertObjectToFloat(value);
       if (floatValue != null) {
         floatBits.add(Float.floatToIntBits(floatValue));
       }
     }

     final IntOpenHashSet floatBitsHashSet = new IntOpenHashSet(floatBits);
     return input -> floatBitsHashSet.contains(Float.floatToIntBits(input));
   }

   private static DruidDoublePredicate createDoublePredicate(final Set<String> values)
   {
     LongArrayList doubleBits = new LongArrayList(values.size());
     for (String value : values) {
       Double doubleValue = DimensionHandlerUtils.convertObjectToDouble(value);
       if (doubleValue != null) {
         doubleBits.add(Double.doubleToLongBits((doubleValue)));
       }
     }

     final LongOpenHashSet doubleBitsHashSet = new LongOpenHashSet(doubleBits);
     return input -> doubleBitsHashSet.contains(Double.doubleToLongBits(input));
   }

   @VisibleForTesting
   public static class InFilterDruidPredicateFactory implements DruidPredicateFactory
   {
     private final ExtractionFn extractionFn;
     private final Set<String> values;
     private final Supplier<Predicate<String>> stringPredicateSupplier;
     private final Supplier<DruidLongPredicate> longPredicateSupplier;
     private final Supplier<DruidFloatPredicate> floatPredicateSupplier;
     private final Supplier<DruidDoublePredicate> doublePredicateSupplier;

     InFilterDruidPredicateFactory(
         final ExtractionFn extractionFn,
         final Set<String> values
     )
     {
       this.extractionFn = extractionFn;
       this.values = values;

       // As the set of filtered values can be large, parsing them as numbers should be done only if needed, and
       // only once. Pass in a common long predicate supplier to all filters created by .toFilter(), so that we only
       // compute the long hashset/array once per query. This supplier must be thread-safe, since this DimFilter will be
       // accessed in the query runners.
       this.stringPredicateSupplier = Suppliers.memoize(() -> createStringPredicate(values));
       this.longPredicateSupplier = Suppliers.memoize(() -> createLongPredicate(values));
       this.floatPredicateSupplier = Suppliers.memoize(() -> createFloatPredicate(values));
       this.doublePredicateSupplier = Suppliers.memoize(() -> createDoublePredicate(values));
     }

     @Override
     public Predicate<String> makeStringPredicate()
     {
       if (extractionFn != null) {
         final Predicate<String> stringPredicate = stringPredicateSupplier.get();
         return input -> stringPredicate.apply(extractionFn.apply(input));
       } else {
         return stringPredicateSupplier.get();
       }
     }

     @Override
     public DruidLongPredicate makeLongPredicate()
     {
       if (extractionFn != null) {
         final Predicate<String> stringPredicate = stringPredicateSupplier.get();
         return input -> stringPredicate.apply(extractionFn.apply(input));
       } else {
         return longPredicateSupplier.get();
       }
     }

     @Override
     public DruidFloatPredicate makeFloatPredicate()
     {
       if (extractionFn != null) {
         final Predicate<String> stringPredicate = stringPredicateSupplier.get();
         return input -> stringPredicate.apply(extractionFn.apply(input));
       } else {
         return floatPredicateSupplier.get();
       }
     }

     @Override
     public DruidDoublePredicate makeDoublePredicate()
     {
       if (extractionFn != null) {
         final Predicate<String> stringPredicate = stringPredicateSupplier.get();
         return input -> stringPredicate.apply(extractionFn.apply(input));
       } else {
         return doublePredicateSupplier.get();
       }
     }

     @Override
     public boolean equals(Object o)
     {
       if (this == o) {
         return true;
       }
       if (o == null || getClass() != o.getClass()) {
         return false;
       }
       InFilterDruidPredicateFactory that = (InFilterDruidPredicateFactory) o;
       return Objects.equals(extractionFn, that.extractionFn) &&
              Objects.equals(values, that.values);
     }

     @Override
     public int hashCode()
     {
       return Objects.hash(extractionFn, values);
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.druid.query.filter;

	import com.fasterxml.jackson.annotation.JsonCreator;
	import com.fasterxml.jackson.annotation.JsonIgnore;
	import com.fasterxml.jackson.annotation.JsonInclude;
	import com.fasterxml.jackson.annotation.JsonProperty;
	import com.google.common.annotations.VisibleForTesting;
	import com.google.common.base.Joiner;
	import com.google.common.base.Preconditions;
	import com.google.common.base.Predicate;
	import com.google.common.base.Supplier;
	import com.google.common.base.Suppliers;
	import com.google.common.collect.ImmutableSet;
	import com.google.common.collect.Iterables;
	import com.google.common.collect.Range;
	import com.google.common.collect.RangeSet;
	import com.google.common.collect.Sets;
	import com.google.common.collect.TreeRangeSet;
	import com.google.common.hash.Hasher;
	import com.google.common.hash.Hashing;
	import it.unimi.dsi.fastutil.ints.IntArrayList;
	import it.unimi.dsi.fastutil.ints.IntIterable;
	import it.unimi.dsi.fastutil.ints.IntIterator;
	import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
	import it.unimi.dsi.fastutil.longs.LongArrayList;
	import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
	import org.apache.druid.collections.bitmap.ImmutableBitmap;
	import org.apache.druid.common.config.NullHandling;
	import org.apache.druid.java.util.common.IAE;
	import org.apache.druid.java.util.common.StringUtils;
	import org.apache.druid.java.util.common.guava.Comparators;
	import org.apache.druid.query.BitmapResultFactory;
	import org.apache.druid.query.cache.CacheKeyBuilder;
	import org.apache.druid.query.extraction.ExtractionFn;
	import org.apache.druid.query.filter.vector.VectorValueMatcher;
	import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory;
	import org.apache.druid.query.lookup.LookupExtractionFn;
	import org.apache.druid.query.lookup.LookupExtractor;
	import org.apache.druid.segment.ColumnInspector;
	import org.apache.druid.segment.ColumnProcessors;
	import org.apache.druid.segment.ColumnSelector;
	import org.apache.druid.segment.ColumnSelectorFactory;
	import org.apache.druid.segment.DimensionHandlerUtils;
	import org.apache.druid.segment.IntIteratorUtils;
	import org.apache.druid.segment.column.BitmapIndex;
	import org.apache.druid.segment.filter.Filters;
	import org.apache.druid.segment.vector.VectorColumnSelectorFactory;

	import javax.annotation.Nullable;
	import java.nio.charset.StandardCharsets;
	import java.util.ArrayList;
	import java.util.Collection;
	import java.util.Comparator;
	import java.util.HashSet;
	import java.util.Iterator;
	import java.util.List;
	import java.util.Map;
	import java.util.Objects;
	import java.util.Set;
	import java.util.SortedSet;

	public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
	{
	// Values can contain `null` object
	private final Set<String> values;
	private final String dimension;
	@Nullable
	private final ExtractionFn extractionFn;
	@Nullable
	private final FilterTuning filterTuning;
	private final DruidPredicateFactory predicateFactory;

	@JsonIgnore
	private final Supplier<byte[]> cacheKeySupplier;

	/**
	* Creates a new filter.
	*
	* @param dimension column to search
	* @param values set of values to match. This collection may be reused to avoid copying a big collection.
	* Therefore, callers should <b>not</b> modify the collection after it is passed to this
	* constructor.
	* @param extractionFn extraction function to apply to the column before checking against "values"
	* @param filterTuning optional tuning
	*/
	@JsonCreator
	public InDimFilter(
	@JsonProperty("dimension") String dimension,
	// This 'values' collection instance can be reused if possible to avoid copying a big collection.
	// Callers should _not_ modify the collection after it is passed to this constructor.
	@JsonProperty("values") Set<String> values,
	@JsonProperty("extractionFn") @Nullable ExtractionFn extractionFn,
	@JsonProperty("filterTuning") @Nullable FilterTuning filterTuning
	)
	{
	this(
	dimension,
	values,
	extractionFn,
	filterTuning,
	null
	);
	}

	/**
	* Creates a new filter without an extraction function or any special filter tuning.
	*
	* @param dimension column to search
	* @param values set of values to match. This collection may be reused to avoid copying a big collection.
	* Therefore, callers should <b>not</b> modify the collection after it is passed to this
	* constructor.
	*/
	public InDimFilter(
	String dimension,
	Set<String> values
	)
	{
	this(
	dimension,
	values,
	null,
	null
	);
	}

	/**
	* This constructor should be called only in unit tests since accepting a Collection makes copying more likely.
	*/
	@VisibleForTesting
	public InDimFilter(String dimension, Collection<String> values, @Nullable ExtractionFn extractionFn)
	{
	this(
	dimension,
	values instanceof Set ? (Set<String>) values : new HashSet<>(values),
	extractionFn,
	null,
	null
	);
	}

	/**
	* Internal constructor.
	*/
	private InDimFilter(
	final String dimension,
	final Set<String> values,
	@Nullable final ExtractionFn extractionFn,
	@Nullable final FilterTuning filterTuning,
	@Nullable final DruidPredicateFactory predicateFactory
	)
	{
	Preconditions.checkNotNull(values, "values cannot be null");

	// The values set can be huge. Try to avoid copying the set if possible.
	// Note that we may still need to copy values to a list for caching. See getCacheKey().
	if (!NullHandling.sqlCompatible() && values.contains("")) {
	// In Non sql compatible mode, empty strings should be converted to nulls for the filter.
	// In sql compatible mode, empty strings and nulls should be treated differently
	this.values = Sets.newHashSetWithExpectedSize(values.size());
	for (String v : values) {
	this.values.add(NullHandling.emptyToNullIfNeeded(v));
	}
	} else {
	this.values = values;
	}

	this.dimension = Preconditions.checkNotNull(dimension, "dimension cannot be null");
	this.extractionFn = extractionFn;
	this.filterTuning = filterTuning;

	if (predicateFactory != null) {
	this.predicateFactory = predicateFactory;
	} else {
	this.predicateFactory = new InFilterDruidPredicateFactory(extractionFn, this.values);
	}

	this.cacheKeySupplier = Suppliers.memoize(this::computeCacheKey);
	}

	@JsonProperty
	public String getDimension()
	{
	return dimension;
	}

	@JsonProperty
	public Set<String> getValues()
	{
	return values;
	}

	@Nullable
	@JsonInclude(JsonInclude.Include.NON_NULL)
	@JsonProperty
	public ExtractionFn getExtractionFn()
	{
	return extractionFn;
	}

	@Nullable
	@JsonInclude(JsonInclude.Include.NON_NULL)
	@JsonProperty
	public FilterTuning getFilterTuning()
	{
	return filterTuning;
	}

	@Override
	public byte[] getCacheKey()
	{
	return cacheKeySupplier.get();
	}

	@Override
	public DimFilter optimize()
	{
	InDimFilter inFilter = optimizeLookup();

	if (inFilter.values.isEmpty()) {
	return FalseDimFilter.instance();
	}
	if (inFilter.values.size() == 1) {
	return new SelectorDimFilter(
	inFilter.dimension,
	inFilter.values.iterator().next(),
	inFilter.getExtractionFn(),
	filterTuning
	);
	}
	return inFilter;
	}

	@Override
	public Filter toFilter()
	{
	return this;
	}

	@Nullable
	@Override
	public RangeSet<String> getDimensionRangeSet(String dimension)
	{
	if (!Objects.equals(getDimension(), dimension) \|\| getExtractionFn() != null) {
	return null;
	}
	RangeSet<String> retSet = TreeRangeSet.create();
	for (String value : values) {
	String valueEquivalent = NullHandling.nullToEmptyIfNeeded(value);
	if (valueEquivalent == null) {
	// Case when SQL compatible null handling is enabled
	// Range.singleton(null) is invalid, so use the fact that
	// only null values are less than empty string.
	retSet.add(Range.lessThan(""));
	} else {
	retSet.add(Range.singleton(valueEquivalent));
	}
	}
	return retSet;
	}

	@Override
	public Set<String> getRequiredColumns()
	{
	return ImmutableSet.of(dimension);
	}

	@Override
	public <T> T getBitmapResult(BitmapIndexSelector selector, BitmapResultFactory<T> bitmapResultFactory)
	{
	if (extractionFn == null) {
	final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension);
	return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable(values, bitmapIndex));
	} else {
	return Filters.matchPredicate(
	dimension,
	selector,
	bitmapResultFactory,
	predicateFactory.makeStringPredicate()
	);
	}
	}

	@Override
	public double estimateSelectivity(BitmapIndexSelector indexSelector)
	{
	if (extractionFn == null) {
	final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension);
	return Filters.estimateSelectivity(
	bitmapIndex,
	IntIteratorUtils.toIntList(getBitmapIndexIterable(values, bitmapIndex).iterator()),
	indexSelector.getNumRows()
	);
	} else {
	return Filters.estimateSelectivity(
	dimension,
	indexSelector,
	predicateFactory.makeStringPredicate()
	);
	}
	}

	@Override
	public ValueMatcher makeMatcher(ColumnSelectorFactory factory)
	{
	return Filters.makeValueMatcher(factory, dimension, predicateFactory);
	}

	@Override
	public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory)
	{
	return ColumnProcessors.makeVectorProcessor(
	dimension,
	VectorValueMatcherColumnProcessorFactory.instance(),
	factory
	).makeMatcher(predicateFactory);
	}

	@Override
	public boolean canVectorizeMatcher(ColumnInspector inspector)
	{
	return true;
	}

	@Override
	public boolean supportsRequiredColumnRewrite()
	{
	return true;
	}

	@Override
	public Filter rewriteRequiredColumns(Map<String, String> columnRewrites)
	{
	String rewriteDimensionTo = columnRewrites.get(dimension);
	if (rewriteDimensionTo == null) {
	throw new IAE("Received a non-applicable rewrite: %s, filter's dimension: %s", columnRewrites, dimension);
	}

	if (rewriteDimensionTo.equals(dimension)) {
	return this;
	} else {
	return new InDimFilter(
	rewriteDimensionTo,
	values,
	extractionFn,
	filterTuning,
	predicateFactory
	);
	}
	}

	@Override
	public boolean supportsBitmapIndex(BitmapIndexSelector selector)
	{
	return selector.getBitmapIndex(dimension) != null;
	}

	@Override
	public boolean shouldUseBitmapIndex(BitmapIndexSelector selector)
	{
	return Filters.shouldUseBitmapIndex(this, selector, filterTuning);
	}

	@Override
	public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector)
	{
	return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector);
	}

	@Override
	public String toString()
	{
	final DimFilterToStringBuilder builder = new DimFilterToStringBuilder();
	return builder.appendDimension(dimension, extractionFn)
	.append(" IN (")
	.append(Joiner.on(", ").join(Iterables.transform(values, StringUtils::nullToEmptyNonDruidDataString)))
	.append(")")
	.appendFilterTuning(filterTuning)
	.build();
	}

	@Override
	public boolean equals(Object o)
	{
	if (this == o) {
	return true;
	}
	if (o == null \|\| getClass() != o.getClass()) {
	return false;
	}
	InDimFilter that = (InDimFilter) o;
	return values.equals(that.values) &&
	dimension.equals(that.dimension) &&
	Objects.equals(extractionFn, that.extractionFn) &&
	Objects.equals(filterTuning, that.filterTuning);
	}

	@Override
	public int hashCode()
	{
	return Objects.hash(values, dimension, extractionFn, filterTuning);
	}

	private byte[] computeCacheKey()
	{
	final Collection<String> sortedValues;

	if (values instanceof SortedSet && isNaturalOrder(((SortedSet<String>) values).comparator())) {
	// Avoid copying "values" when it is already in the order we need for cache key computation.
	sortedValues = values;
	} else {
	final List<String> sortedValuesList = new ArrayList<>(values);
	sortedValuesList.sort(Comparators.naturalNullsFirst());
	sortedValues = sortedValuesList;
	}

	// Hash all values, in sorted order, as their length followed by their content.
	final Hasher hasher = Hashing.sha256().newHasher();
	for (String v : sortedValues) {
	if (v == null) {
	// Encode null as length -1, no content.
	hasher.putInt(-1);
	} else {
	hasher.putInt(v.length());
	hasher.putString(v, StandardCharsets.UTF_8);
	}
	}

	return new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID)
	.appendString(dimension)
	.appendByte(DimFilterUtils.STRING_SEPARATOR)
	.appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey())
	.appendByte(DimFilterUtils.STRING_SEPARATOR)
	.appendByteArray(hasher.hash().asBytes())
	.build();
	}

	private InDimFilter optimizeLookup()
	{
	if (extractionFn instanceof LookupExtractionFn
	&& ((LookupExtractionFn) extractionFn).isOptimize()) {
	LookupExtractionFn exFn = (LookupExtractionFn) extractionFn;
	LookupExtractor lookup = exFn.getLookup();

	final Set<String> keys = new HashSet<>();
	for (String value : values) {

	// We cannot do an unapply()-based optimization if the selector value
	// and the replaceMissingValuesWith value are the same, since we have to match on
	// all values that are not present in the lookup.
	final String convertedValue = NullHandling.emptyToNullIfNeeded(value);
	if (!exFn.isRetainMissingValue() && Objects.equals(convertedValue, exFn.getReplaceMissingValueWith())) {
	return this;
	}
	keys.addAll(lookup.unapply(convertedValue));

	// If retainMissingValues is true and the selector value is not in the lookup map,
	// there may be row values that match the selector value but are not included
	// in the lookup map. Match on the selector value as well.
	// If the selector value is overwritten in the lookup map, don't add selector value to keys.
	if (exFn.isRetainMissingValue() && NullHandling.isNullOrEquivalent(lookup.apply(convertedValue))) {
	keys.add(convertedValue);
	}
	}

	if (keys.isEmpty()) {
	return this;
	} else {
	return new InDimFilter(dimension, keys, null, filterTuning);
	}
	}
	return this;
	}

	/**
	* Returns true if the comparator is null or the singleton {@link Comparators#naturalNullsFirst()}. Useful for
	* detecting if a sorted set is in natural order or not.
	*
	* May return false negatives (i.e. there are naturally-ordered comparators that will return false here).
	*/
	private static <T> boolean isNaturalOrder(@Nullable final Comparator<T> comparator)
	{
	return comparator == null \|\| Comparators.naturalNullsFirst().equals(comparator);
	}

	private static Iterable<ImmutableBitmap> getBitmapIterable(final Set<String> values, final BitmapIndex bitmapIndex)
	{
	return Filters.bitmapsFromIndexes(getBitmapIndexIterable(values, bitmapIndex), bitmapIndex);
	}

	private static IntIterable getBitmapIndexIterable(final Set<String> values, final BitmapIndex bitmapIndex)
	{
	return () -> new IntIterator()
	{
	final Iterator<String> iterator = values.iterator();

	@Override
	public boolean hasNext()
	{
	return iterator.hasNext();
	}

	@Override
	public int nextInt()
	{
	return bitmapIndex.getIndex(iterator.next());
	}
	};
	}

	@SuppressWarnings("ReturnValueIgnored")
	private static Predicate<String> createStringPredicate(final Set<String> values)
	{
	Preconditions.checkNotNull(values, "values");

	try {
	// Check to see if values.contains(null) will throw a NullPointerException. Jackson JSON deserialization won't
	// lead to this (it will create a HashSet, which can accept nulls). But when InDimFilters are created
	// programmatically as a result of optimizations like rewriting inner joins as filters, the passed-in Set may
	// not be able to accept nulls. We don't want to copy the Sets (since they may be large) so instead we'll wrap
	// it in a null-checking lambda if needed.
	values.contains(null);

	// Safe to do values.contains(null).
	return values::contains;
	}
	catch (NullPointerException ignored) {
	// Fall through
	}

	// Not safe to do values.contains(null); must return a wrapper.
	// Return false for null, since an exception means the set cannot accept null (and therefore does not include it).
	return value -> value != null && values.contains(value);
	}

	private static DruidLongPredicate createLongPredicate(final Set<String> values)
	{
	LongArrayList longs = new LongArrayList(values.size());
	for (String value : values) {
	final Long longValue = DimensionHandlerUtils.convertObjectToLong(value);
	if (longValue != null) {
	longs.add((long) longValue);
	}
	}

	final LongOpenHashSet longHashSet = new LongOpenHashSet(longs);
	return longHashSet::contains;
	}

	private static DruidFloatPredicate createFloatPredicate(final Set<String> values)
	{
	IntArrayList floatBits = new IntArrayList(values.size());
	for (String value : values) {
	Float floatValue = DimensionHandlerUtils.convertObjectToFloat(value);
	if (floatValue != null) {
	floatBits.add(Float.floatToIntBits(floatValue));
	}
	}

	final IntOpenHashSet floatBitsHashSet = new IntOpenHashSet(floatBits);
	return input -> floatBitsHashSet.contains(Float.floatToIntBits(input));
	}

	private static DruidDoublePredicate createDoublePredicate(final Set<String> values)
	{
	LongArrayList doubleBits = new LongArrayList(values.size());
	for (String value : values) {
	Double doubleValue = DimensionHandlerUtils.convertObjectToDouble(value);
	if (doubleValue != null) {
	doubleBits.add(Double.doubleToLongBits((doubleValue)));
	}
	}

	final LongOpenHashSet doubleBitsHashSet = new LongOpenHashSet(doubleBits);
	return input -> doubleBitsHashSet.contains(Double.doubleToLongBits(input));
	}

	@VisibleForTesting
	public static class InFilterDruidPredicateFactory implements DruidPredicateFactory
	{
	private final ExtractionFn extractionFn;
	private final Set<String> values;
	private final Supplier<Predicate<String>> stringPredicateSupplier;
	private final Supplier<DruidLongPredicate> longPredicateSupplier;
	private final Supplier<DruidFloatPredicate> floatPredicateSupplier;
	private final Supplier<DruidDoublePredicate> doublePredicateSupplier;

	InFilterDruidPredicateFactory(
	final ExtractionFn extractionFn,
	final Set<String> values
	)
	{
	this.extractionFn = extractionFn;
	this.values = values;

	// As the set of filtered values can be large, parsing them as numbers should be done only if needed, and
	// only once. Pass in a common long predicate supplier to all filters created by .toFilter(), so that we only
	// compute the long hashset/array once per query. This supplier must be thread-safe, since this DimFilter will be
	// accessed in the query runners.
	this.stringPredicateSupplier = Suppliers.memoize(() -> createStringPredicate(values));
	this.longPredicateSupplier = Suppliers.memoize(() -> createLongPredicate(values));
	this.floatPredicateSupplier = Suppliers.memoize(() -> createFloatPredicate(values));
	this.doublePredicateSupplier = Suppliers.memoize(() -> createDoublePredicate(values));
	}

	@Override
	public Predicate<String> makeStringPredicate()
	{
	if (extractionFn != null) {
	final Predicate<String> stringPredicate = stringPredicateSupplier.get();
	return input -> stringPredicate.apply(extractionFn.apply(input));
	} else {
	return stringPredicateSupplier.get();
	}
	}

	@Override
	public DruidLongPredicate makeLongPredicate()
	{
	if (extractionFn != null) {
	final Predicate<String> stringPredicate = stringPredicateSupplier.get();
	return input -> stringPredicate.apply(extractionFn.apply(input));
	} else {
	return longPredicateSupplier.get();
	}
	}

	@Override
	public DruidFloatPredicate makeFloatPredicate()
	{
	if (extractionFn != null) {
	final Predicate<String> stringPredicate = stringPredicateSupplier.get();
	return input -> stringPredicate.apply(extractionFn.apply(input));
	} else {
	return floatPredicateSupplier.get();
	}
	}

	@Override
	public DruidDoublePredicate makeDoublePredicate()
	{
	if (extractionFn != null) {
	final Predicate<String> stringPredicate = stringPredicateSupplier.get();
	return input -> stringPredicate.apply(extractionFn.apply(input));
	} else {
	return doublePredicateSupplier.get();
	}
	}

	@Override
	public boolean equals(Object o)
	{
	if (this == o) {
	return true;
	}
	if (o == null \|\| getClass() != o.getClass()) {
	return false;
	}
	InFilterDruidPredicateFactory that = (InFilterDruidPredicateFactory) o;
	return Objects.equals(extractionFn, that.extractionFn) &&
	Objects.equals(values, that.values);
	}

	@Override
	public int hashCode()
	{
	return Objects.hash(extractionFn, values);
	}
	}
	}