| package org.apache.lucene.search; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.IOException; |
| |
| import org.apache.lucene.index.LeafReaderContext; |
| import org.apache.lucene.util.FrequencyTrackingRingBuffer; |
| |
| |
| /** |
| * A {@link QueryCachingPolicy} that tracks usage statistics of recently-used |
| * filters in order to decide on which filters are worth caching. |
| * |
| * It also uses some heuristics on segments, filters and the doc id sets that |
| * they produce in order to cache more aggressively when the execution cost |
| * significantly outweighs the caching overhead. |
| * |
| * @lucene.experimental |
| */ |
| public final class UsageTrackingQueryCachingPolicy implements QueryCachingPolicy { |
| |
| // the hash code that we use as a sentinel in the ring buffer. |
| private static final int SENTINEL = Integer.MIN_VALUE; |
| |
| static boolean isCostly(Query query) { |
| // This does not measure the cost of iterating over the filter (for this we |
| // already have the DocIdSetIterator#cost API) but the cost to build the |
| // DocIdSet in the first place |
| return query instanceof MultiTermQuery || |
| query instanceof MultiTermQueryConstantScoreWrapper; |
| } |
| |
| static boolean isCheap(Query query) { |
| // same for cheap queries |
| // these queries are so cheap that they usually do not need caching |
| return query instanceof TermQuery; |
| } |
| |
| private final QueryCachingPolicy.CacheOnLargeSegments segmentPolicy; |
| private final FrequencyTrackingRingBuffer recentlyUsedFilters; |
| |
| /** |
| * Create a new instance. |
| * |
| * @param minIndexSize the minimum size of the top-level index |
| * @param minSizeRatio the minimum size ratio for segments to be cached, see {@link QueryCachingPolicy.CacheOnLargeSegments} |
| * @param historySize the number of recently used filters to track |
| */ |
| public UsageTrackingQueryCachingPolicy( |
| int minIndexSize, |
| float minSizeRatio, |
| int historySize) { |
| this(new QueryCachingPolicy.CacheOnLargeSegments(minIndexSize, minSizeRatio), historySize); |
| } |
| |
| /** Create a new instance with an history size of 256. */ |
| public UsageTrackingQueryCachingPolicy() { |
| this(QueryCachingPolicy.CacheOnLargeSegments.DEFAULT, 256); |
| } |
| |
| private UsageTrackingQueryCachingPolicy( |
| QueryCachingPolicy.CacheOnLargeSegments segmentPolicy, |
| int historySize) { |
| this.segmentPolicy = segmentPolicy; |
| this.recentlyUsedFilters = new FrequencyTrackingRingBuffer(historySize, SENTINEL); |
| } |
| |
| /** |
| * For a given query, return how many times it should appear in the history |
| * before being cached. |
| */ |
| protected int minFrequencyToCache(Query query) { |
| if (isCostly(query)) { |
| return 2; |
| } else if (isCheap(query)) { |
| return 20; |
| } else { |
| // default: cache after the filter has been seen 5 times |
| return 5; |
| } |
| } |
| |
| @Override |
| public void onUse(Query query) { |
| assert query instanceof BoostQuery == false; |
| assert query instanceof ConstantScoreQuery == false; |
| |
| // call hashCode outside of sync block |
| // in case it's somewhat expensive: |
| int hashCode = query.hashCode(); |
| |
| // we only track hash codes to avoid holding references to possible |
| // large queries; this may cause rare false positives, but at worse |
| // this just means we cache a query that was not in fact used enough: |
| synchronized (this) { |
| recentlyUsedFilters.add(hashCode); |
| } |
| } |
| |
| int frequency(Query query) { |
| assert query instanceof BoostQuery == false; |
| assert query instanceof ConstantScoreQuery == false; |
| |
| // call hashCode outside of sync block |
| // in case it's somewhat expensive: |
| int hashCode = query.hashCode(); |
| |
| synchronized (this) { |
| return recentlyUsedFilters.frequency(hashCode); |
| } |
| } |
| |
| @Override |
| public boolean shouldCache(Query query, LeafReaderContext context) throws IOException { |
| if (query instanceof MatchAllDocsQuery |
| // MatchNoDocsQuery currently rewrites to a BooleanQuery, |
| // but who knows, it might get its own Weight one day |
| || query instanceof MatchNoDocsQuery) { |
| return false; |
| } |
| if (query instanceof BooleanQuery) { |
| BooleanQuery bq = (BooleanQuery) query; |
| if (bq.clauses().isEmpty()) { |
| return false; |
| } |
| } |
| if (query instanceof DisjunctionMaxQuery) { |
| DisjunctionMaxQuery dmq = (DisjunctionMaxQuery) query; |
| if (dmq.getDisjuncts().isEmpty()) { |
| return false; |
| } |
| } |
| if (segmentPolicy.shouldCache(query, context) == false) { |
| return false; |
| } |
| final int frequency = frequency(query); |
| final int minFrequency = minFrequencyToCache(query); |
| return frequency >= minFrequency; |
| } |
| |
| } |