blob: f9118aaf97ff0a3b0ea2f17dd9db7be2b6f82bac [file] [log] [blame]
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.TieredMergePolicy;
/**
* A policy defining which filters should be cached.
*
* Implementations of this class must be thread-safe.
*
* @see UsageTrackingQueryCachingPolicy
* @see LRUQueryCache
* @lucene.experimental
*/
// TODO: add APIs for integration with IndexWriter.IndexReaderWarmer
public interface QueryCachingPolicy {
/** A simple policy that caches all the provided filters on all segments. */
public static final QueryCachingPolicy ALWAYS_CACHE = new QueryCachingPolicy() {
@Override
public void onUse(Query query) {}
@Override
public boolean shouldCache(Query query, LeafReaderContext context) throws IOException {
return true;
}
};
/** A simple policy that only caches on the largest segments of an index.
* The reasoning is that these segments likely account for most of the
* execution time of queries and are also more likely to stay around longer
* than small segments, which makes them more interesting for caching.
*/
public static class CacheOnLargeSegments implements QueryCachingPolicy {
/** {@link CacheOnLargeSegments} instance that only caches on segments that
* account for more than 3% of the total index size. This should guarantee
* that all segments from the upper {@link TieredMergePolicy tier} will be
* cached while ensuring that at most <tt>33</tt> segments can make it to
* the cache (given that some implementations such as {@link LRUQueryCache}
* perform better when the number of cached segments is low). */
public static final CacheOnLargeSegments DEFAULT = new CacheOnLargeSegments(10000, 0.03f);
private final int minIndexSize;
private final float minSizeRatio;
/**
* Create a {@link CacheOnLargeSegments} instance that only caches on a
* given segment if the total number of documents in the index is greater
* than {@code minIndexSize} and the number of documents in the segment
* divided by the total number of documents in the index is greater than
* or equal to {@code minSizeRatio}.
*/
public CacheOnLargeSegments(int minIndexSize, float minSizeRatio) {
if (minSizeRatio <= 0 || minSizeRatio >= 1) {
throw new IllegalArgumentException("minSizeRatio must be in ]0, 1[, got " + minSizeRatio);
}
this.minIndexSize = minIndexSize;
this.minSizeRatio = minSizeRatio;
}
@Override
public void onUse(Query query) {}
@Override
public boolean shouldCache(Query query, LeafReaderContext context) throws IOException {
final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(context);
if (topLevelContext.reader().maxDoc() < minIndexSize) {
return false;
}
final float sizeRatio = (float) context.reader().maxDoc() / topLevelContext.reader().maxDoc();
return sizeRatio >= minSizeRatio;
}
};
/** Callback that is called every time that a cached filter is used.
* This is typically useful if the policy wants to track usage statistics
* in order to make decisions. */
void onUse(Query query);
/** Whether the given {@link DocIdSet} should be cached on a given segment.
* This method will be called on each leaf context to know if the filter
* should be cached on this particular leaf. The filter cache will first
* attempt to load a {@link DocIdSet} from the cache. If it is not cached
* yet and this method returns <tt>true</tt> then a cache entry will be
* generated. Otherwise an uncached set will be returned. */
boolean shouldCache(Query query, LeafReaderContext context) throws IOException;
}