| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.solr.search; |
| |
| /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */ |
| /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */ |
| |
| |
| import org.apache.lucene.search.*; |
| |
| import java.util.LinkedHashMap; |
| import java.util.List; |
| import java.util.Map; |
| import java.io.IOException; |
| |
| /** Utility which converts certain query clauses into {@link QueryFilter}s and |
| * caches these. Only required {@link TermQuery}s whose boost is zero and |
| * whose term occurs in at least a certain fraction of documents are converted |
| * to cached filters. This accellerates query constraints like language, |
| * document format, etc., which do not affect ranking but might otherwise slow |
| * search considerably. */ |
| // Taken from Nutch and modified - YCS |
| class LuceneQueryOptimizer { |
| private LinkedHashMap cache; // an LRU cache of QueryFilter |
| |
| private float threshold; |
| |
| /** Construct an optimizer that caches and uses filters for required {@link |
| * TermQuery}s whose boost is zero. |
| * @param cacheSize the number of QueryFilters to cache |
| * @param threshold the fraction of documents which must contain term |
| */ |
| public LuceneQueryOptimizer(final int cacheSize, float threshold) { |
| this.cache = new LinkedHashMap(cacheSize, 0.75f, true) { |
| protected boolean removeEldestEntry(Map.Entry eldest) { |
| return size() > cacheSize; // limit size of cache |
| } |
| }; |
| this.threshold = threshold; |
| } |
| |
| public TopDocs optimize(BooleanQuery original, |
| Searcher searcher, |
| int numHits, |
| Query[] queryOut, |
| Filter[] filterOut |
| ) |
| throws IOException { |
| |
| BooleanQuery query = new BooleanQuery(); |
| BooleanQuery filterQuery = null; |
| |
| for (BooleanClause c : (List<BooleanClause>)original.clauses()) { |
| |
| /*** |
| System.out.println("required="+c.required); |
| System.out.println("boost="+c.query.getBoost()); |
| System.out.println("isTermQuery="+(c.query instanceof TermQuery)); |
| if (c.query instanceof TermQuery) { |
| System.out.println("term="+((TermQuery)c.query).getTerm()); |
| System.out.println("docFreq="+searcher.docFreq(((TermQuery)c.query).getTerm())); |
| } |
| ***/ |
| Query q = c.getQuery(); |
| if (c.isRequired() // required |
| && q.getBoost() == 0.0f // boost is zero |
| && q instanceof TermQuery // TermQuery |
| && (searcher.docFreq(((TermQuery)q).getTerm()) |
| / (float)searcher.maxDoc()) >= threshold) { // check threshold |
| if (filterQuery == null) |
| filterQuery = new BooleanQuery(); |
| filterQuery.add(q, BooleanClause.Occur.MUST); // filter it |
| //System.out.println("WooHoo... qualified to be hoisted to a filter!"); |
| } else { |
| query.add(c); // query it |
| } |
| } |
| |
| Filter filter = null; |
| if (filterQuery != null) { |
| synchronized (cache) { // check cache |
| filter = (Filter)cache.get(filterQuery); |
| } |
| if (filter == null) { // miss |
| filter = new QueryFilter(filterQuery); // construct new entry |
| synchronized (cache) { |
| cache.put(filterQuery, filter); // cache it |
| } |
| } |
| } |
| |
| // YCS: added code to pass out optimized query and filter |
| // so they can be used with Hits |
| if (queryOut != null && filterOut != null) { |
| queryOut[0] = query; filterOut[0] = filter; |
| return null; |
| } else { |
| return searcher.search(query, filter, numHits); |
| } |
| |
| } |
| } |