| Index: lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java (revision 1657845) |
| +++ lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java (working copy) |
| @@ -162,378 +162,11 @@ |
| @Override |
| public final Iterator<BooleanClause> iterator() { return clauses().iterator(); } |
| |
| - /** |
| - * Expert: the Weight for BooleanQuery, used to |
| - * normalize, score and explain these queries. |
| - * |
| - * @lucene.experimental |
| - */ |
| - protected class BooleanWeight extends Weight { |
| - /** The Similarity implementation. */ |
| - protected Similarity similarity; |
| - protected ArrayList<Weight> weights; |
| - protected int maxCoord; // num optional + num required |
| - private final boolean disableCoord; |
| |
| - public BooleanWeight(IndexSearcher searcher, boolean disableCoord) |
| - throws IOException { |
| - this.similarity = searcher.getSimilarity(); |
| - this.disableCoord = disableCoord; |
| - weights = new ArrayList<>(clauses.size()); |
| - for (int i = 0 ; i < clauses.size(); i++) { |
| - BooleanClause c = clauses.get(i); |
| - Weight w = c.getQuery().createWeight(searcher); |
| - weights.add(w); |
| - if (!c.isProhibited()) { |
| - maxCoord++; |
| - } |
| - } |
| - } |
| |
| - @Override |
| - public Query getQuery() { return BooleanQuery.this; } |
| - |
| - @Override |
| - public float getValueForNormalization() throws IOException { |
| - float sum = 0.0f; |
| - for (int i = 0 ; i < weights.size(); i++) { |
| - // call sumOfSquaredWeights for all clauses in case of side effects |
| - float s = weights.get(i).getValueForNormalization(); // sum sub weights |
| - if (!clauses.get(i).isProhibited()) { |
| - // only add to sum for non-prohibited clauses |
| - sum += s; |
| - } |
| - } |
| - |
| - sum *= getBoost() * getBoost(); // boost each sub-weight |
| - |
| - return sum ; |
| - } |
| - |
| - public float coord(int overlap, int maxOverlap) { |
| - // LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away, |
| - // so coord() is not applied. But when BQ cannot optimize itself away |
| - // for a single clause (minNrShouldMatch, prohibited clauses, etc), it's |
| - // important not to apply coord(1,1) for consistency, it might not be 1.0F |
| - return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap); |
| - } |
| - |
| - @Override |
| - public void normalize(float norm, float topLevelBoost) { |
| - topLevelBoost *= getBoost(); // incorporate boost |
| - for (Weight w : weights) { |
| - // normalize all clauses, (even if prohibited in case of side affects) |
| - w.normalize(norm, topLevelBoost); |
| - } |
| - } |
| - |
| - @Override |
| - public Explanation explain(LeafReaderContext context, int doc) |
| - throws IOException { |
| - final int minShouldMatch = |
| - BooleanQuery.this.getMinimumNumberShouldMatch(); |
| - ComplexExplanation sumExpl = new ComplexExplanation(); |
| - sumExpl.setDescription("sum of:"); |
| - int coord = 0; |
| - float sum = 0.0f; |
| - boolean fail = false; |
| - int shouldMatchCount = 0; |
| - Iterator<BooleanClause> cIter = clauses.iterator(); |
| - for (Iterator<Weight> wIter = weights.iterator(); wIter.hasNext();) { |
| - Weight w = wIter.next(); |
| - BooleanClause c = cIter.next(); |
| - if (w.scorer(context, context.reader().getLiveDocs(), true) == null) { |
| - if (c.isRequired()) { |
| - fail = true; |
| - Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); |
| - sumExpl.addDetail(r); |
| - } |
| - continue; |
| - } |
| - Explanation e = w.explain(context, doc); |
| - if (e.isMatch()) { |
| - if (!c.isProhibited()) { |
| - sumExpl.addDetail(e); |
| - sum += e.getValue(); |
| - coord++; |
| - } else { |
| - Explanation r = |
| - new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")"); |
| - r.addDetail(e); |
| - sumExpl.addDetail(r); |
| - fail = true; |
| - } |
| - if (c.getOccur() == Occur.SHOULD) { |
| - shouldMatchCount++; |
| - } |
| - } else if (c.isRequired()) { |
| - Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); |
| - r.addDetail(e); |
| - sumExpl.addDetail(r); |
| - fail = true; |
| - } |
| - } |
| - if (fail) { |
| - sumExpl.setMatch(Boolean.FALSE); |
| - sumExpl.setValue(0.0f); |
| - sumExpl.setDescription |
| - ("Failure to meet condition(s) of required/prohibited clause(s)"); |
| - return sumExpl; |
| - } else if (shouldMatchCount < minShouldMatch) { |
| - sumExpl.setMatch(Boolean.FALSE); |
| - sumExpl.setValue(0.0f); |
| - sumExpl.setDescription("Failure to match minimum number "+ |
| - "of optional clauses: " + minShouldMatch); |
| - return sumExpl; |
| - } |
| - |
| - sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE); |
| - sumExpl.setValue(sum); |
| - |
| - final float coordFactor = disableCoord ? 1.0f : coord(coord, maxCoord); |
| - if (coordFactor == 1.0f) { |
| - return sumExpl; // eliminate wrapper |
| - } else { |
| - ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(), |
| - sum*coordFactor, |
| - "product of:"); |
| - result.addDetail(sumExpl); |
| - result.addDetail(new Explanation(coordFactor, |
| - "coord("+coord+"/"+maxCoord+")")); |
| - return result; |
| - } |
| - } |
| - |
| - /** Try to build a boolean scorer for this weight. Returns null if {@link BooleanScorer} |
| - * cannot be used. */ |
| - // pkg-private for forcing use of BooleanScorer in tests |
| - BooleanScorer booleanScorer(LeafReaderContext context, Bits acceptDocs, boolean needsScores) throws IOException { |
| - List<BulkScorer> optional = new ArrayList<BulkScorer>(); |
| - Iterator<BooleanClause> cIter = clauses.iterator(); |
| - for (Weight w : weights) { |
| - BooleanClause c = cIter.next(); |
| - BulkScorer subScorer = w.bulkScorer(context, acceptDocs, needsScores); |
| - if (subScorer == null) { |
| - if (c.isRequired()) { |
| - return null; |
| - } |
| - } else if (c.isRequired()) { |
| - // TODO: there are some cases where BooleanScorer |
| - // would handle conjunctions faster than |
| - // BooleanScorer2... |
| - return null; |
| - } else if (c.isProhibited()) { |
| - // TODO: there are some cases where BooleanScorer could do this faster |
| - return null; |
| - } else { |
| - optional.add(subScorer); |
| - } |
| - } |
| - |
| - if (optional.size() == 0) { |
| - return null; |
| - } |
| - |
| - if (minNrShouldMatch > optional.size()) { |
| - return null; |
| - } |
| - |
| - return new BooleanScorer(this, disableCoord, maxCoord, optional, Math.max(1, minNrShouldMatch)); |
| - } |
| - |
| - @Override |
| - public BulkScorer bulkScorer(LeafReaderContext context, Bits acceptDocs, boolean needsScores) throws IOException { |
| - final BooleanScorer bulkScorer = booleanScorer(context, acceptDocs, needsScores); |
| - if (bulkScorer != null) { // BooleanScorer is applicable |
| - // TODO: what is the right heuristic here? |
| - final long costThreshold; |
| - if (minNrShouldMatch <= 1) { |
| - // when all clauses are optional, use BooleanScorer aggressively |
| - // TODO: is there actually a threshold under which we should rather |
| - // use the regular scorer? |
| - costThreshold = -1; |
| - } else { |
| - // when a minimum number of clauses should match, BooleanScorer is |
| - // going to score all windows that have at least minNrShouldMatch |
| - // matches in the window. But there is no way to know if there is |
| - // an intersection (all clauses might match a different doc ID and |
| - // there will be no matches in the end) so we should only use |
| - // BooleanScorer if matches are very dense |
| - costThreshold = context.reader().maxDoc() / 3; |
| - } |
| - |
| - if (bulkScorer.cost() > costThreshold) { |
| - return bulkScorer; |
| - } |
| - } |
| - return super.bulkScorer(context, acceptDocs, needsScores); |
| - } |
| - |
| - @Override |
| - public Scorer scorer(LeafReaderContext context, Bits acceptDocs, boolean needsScores) throws IOException { |
| - // initially the user provided value, |
| - // but if minNrShouldMatch == optional.size(), |
| - // we will optimize and move these to required, making this 0 |
| - int minShouldMatch = minNrShouldMatch; |
| - |
| - List<Scorer> required = new ArrayList<>(); |
| - List<Scorer> prohibited = new ArrayList<>(); |
| - List<Scorer> optional = new ArrayList<>(); |
| - Iterator<BooleanClause> cIter = clauses.iterator(); |
| - for (Weight w : weights) { |
| - BooleanClause c = cIter.next(); |
| - Scorer subScorer = w.scorer(context, acceptDocs, needsScores && c.isProhibited() == false); |
| - if (subScorer == null) { |
| - if (c.isRequired()) { |
| - return null; |
| - } |
| - } else if (c.isRequired()) { |
| - required.add(subScorer); |
| - } else if (c.isProhibited()) { |
| - prohibited.add(subScorer); |
| - } else { |
| - optional.add(subScorer); |
| - } |
| - } |
| - |
| - // scorer simplifications: |
| - |
| - if (optional.size() == minShouldMatch) { |
| - // any optional clauses are in fact required |
| - required.addAll(optional); |
| - optional.clear(); |
| - minShouldMatch = 0; |
| - } |
| - |
| - if (required.isEmpty() && optional.isEmpty()) { |
| - // no required and optional clauses. |
| - return null; |
| - } else if (optional.size() < minShouldMatch) { |
| - // either >1 req scorer, or there are 0 req scorers and at least 1 |
| - // optional scorer. Therefore if there are not enough optional scorers |
| - // no documents will be matched by the query |
| - return null; |
| - } |
| - |
| - // we don't need scores, so if we have required clauses, drop optional clauses completely |
| - if (!needsScores && minShouldMatch == 0 && required.size() > 0) { |
| - optional.clear(); |
| - } |
| - |
| - // three cases: conjunction, disjunction, or mix |
| - |
| - // pure conjunction |
| - if (optional.isEmpty()) { |
| - return excl(req(required, disableCoord), prohibited); |
| - } |
| - |
| - // pure disjunction |
| - if (required.isEmpty()) { |
| - return excl(opt(optional, minShouldMatch, disableCoord), prohibited); |
| - } |
| - |
| - // conjunction-disjunction mix: |
| - // we create the required and optional pieces with coord disabled, and then |
| - // combine the two: if minNrShouldMatch > 0, then it's a conjunction: because the |
| - // optional side must match. otherwise it's required + optional, factoring the |
| - // number of optional terms into the coord calculation |
| - |
| - Scorer req = excl(req(required, true), prohibited); |
| - Scorer opt = opt(optional, minShouldMatch, true); |
| - |
| - // TODO: clean this up: it's horrible |
| - if (disableCoord) { |
| - if (minShouldMatch > 0) { |
| - return new ConjunctionScorer(this, new Scorer[] { req, opt }, 1F); |
| - } else { |
| - return new ReqOptSumScorer(req, opt); |
| - } |
| - } else if (optional.size() == 1) { |
| - if (minShouldMatch > 0) { |
| - return new ConjunctionScorer(this, new Scorer[] { req, opt }, coord(required.size()+1, maxCoord)); |
| - } else { |
| - float coordReq = coord(required.size(), maxCoord); |
| - float coordBoth = coord(required.size() + 1, maxCoord); |
| - return new BooleanTopLevelScorers.ReqSingleOptScorer(req, opt, coordReq, coordBoth); |
| - } |
| - } else { |
| - if (minShouldMatch > 0) { |
| - return new BooleanTopLevelScorers.CoordinatingConjunctionScorer(this, coords(), req, required.size(), opt); |
| - } else { |
| - return new BooleanTopLevelScorers.ReqMultiOptScorer(req, opt, required.size(), coords()); |
| - } |
| - } |
| - } |
| - |
| - private Scorer req(List<Scorer> required, boolean disableCoord) { |
| - if (required.size() == 1) { |
| - Scorer req = required.get(0); |
| - if (!disableCoord && maxCoord > 1) { |
| - return new BooleanTopLevelScorers.BoostedScorer(req, coord(1, maxCoord)); |
| - } else { |
| - return req; |
| - } |
| - } else { |
| - return new ConjunctionScorer(this, |
| - required.toArray(new Scorer[required.size()]), |
| - disableCoord ? 1.0F : coord(required.size(), maxCoord)); |
| - } |
| - } |
| - |
| - private Scorer excl(Scorer main, List<Scorer> prohibited) throws IOException { |
| - if (prohibited.isEmpty()) { |
| - return main; |
| - } else if (prohibited.size() == 1) { |
| - return new ReqExclScorer(main, prohibited.get(0)); |
| - } else { |
| - float coords[] = new float[prohibited.size()+1]; |
| - Arrays.fill(coords, 1F); |
| - return new ReqExclScorer(main, |
| - new DisjunctionSumScorer(this, |
| - prohibited.toArray(new Scorer[prohibited.size()]), |
| - coords)); |
| - } |
| - } |
| - |
| - private Scorer opt(List<Scorer> optional, int minShouldMatch, boolean disableCoord) throws IOException { |
| - if (optional.size() == 1) { |
| - Scorer opt = optional.get(0); |
| - if (!disableCoord && maxCoord > 1) { |
| - return new BooleanTopLevelScorers.BoostedScorer(opt, coord(1, maxCoord)); |
| - } else { |
| - return opt; |
| - } |
| - } else { |
| - float coords[]; |
| - if (disableCoord) { |
| - coords = new float[optional.size()+1]; |
| - Arrays.fill(coords, 1F); |
| - } else { |
| - coords = coords(); |
| - } |
| - if (minShouldMatch > 1) { |
| - return new MinShouldMatchSumScorer(this, optional, minShouldMatch, coords); |
| - } else { |
| - return new DisjunctionSumScorer(this, |
| - optional.toArray(new Scorer[optional.size()]), |
| - coords); |
| - } |
| - } |
| - } |
| - |
| - private float[] coords() { |
| - float[] coords = new float[maxCoord+1]; |
| - coords[0] = 0F; |
| - for (int i = 1; i < coords.length; i++) { |
| - coords[i] = coord(i, maxCoord); |
| - } |
| - return coords; |
| - } |
| - } |
| - |
| @Override |
| public Weight createWeight(IndexSearcher searcher) throws IOException { |
| - return new BooleanWeight(searcher, disableCoord); |
| + return new BooleanWeight(this, searcher, disableCoord); |
| } |
| |
| @Override |
| Index: lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java (revision 1657845) |
| +++ lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java (working copy) |
| @@ -21,7 +21,7 @@ |
| import java.util.Arrays; |
| import java.util.Collection; |
| |
| -import org.apache.lucene.search.BooleanQuery.BooleanWeight; |
| +import org.apache.lucene.search.BooleanWeight; |
| import org.apache.lucene.util.PriorityQueue; |
| |
| /** |
| Index: lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java (revision 0) |
| +++ lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java (working copy) |
| @@ -0,0 +1,400 @@ |
| +package org.apache.lucene.search; |
| + |
| +/* |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| +import java.util.ArrayList; |
| +import java.util.Arrays; |
| +import java.util.Iterator; |
| +import java.util.List; |
| + |
| +import org.apache.lucene.index.LeafReaderContext; |
| +import org.apache.lucene.search.BooleanClause.Occur; |
| +import org.apache.lucene.search.similarities.Similarity; |
| +import org.apache.lucene.util.Bits; |
| + |
| +/** |
| + * Expert: the Weight for BooleanQuery, used to |
| + * normalize, score and explain these queries. |
| + * |
| + * @lucene.experimental |
| + */ |
| +public class BooleanWeight extends Weight { |
| + /** The Similarity implementation. */ |
| + protected Similarity similarity; |
| + protected ArrayList<Weight> weights; |
| + protected final BooleanQuery query; |
| + protected int maxCoord; // num optional + num required |
| + private final boolean disableCoord; |
| + |
| + public BooleanWeight(BooleanQuery query, IndexSearcher searcher, boolean disableCoord) throws IOException { |
| + this.query = query; |
| + this.similarity = searcher.getSimilarity(); |
| + this.disableCoord = disableCoord; |
| + weights = new ArrayList<>(query.clauses().size()); |
| + for (int i = 0 ; i < query.clauses().size(); i++) { |
| + BooleanClause c = query.clauses().get(i); |
| + Weight w = c.getQuery().createWeight(searcher); |
| + weights.add(w); |
| + if (!c.isProhibited()) { |
| + maxCoord++; |
| + } |
| + } |
| + } |
| + |
| + @Override |
| + public Query getQuery() { |
| + return query; |
| + } |
| + |
| + @Override |
| + public float getValueForNormalization() throws IOException { |
| + float sum = 0.0f; |
| + for (int i = 0 ; i < weights.size(); i++) { |
| + // call sumOfSquaredWeights for all clauses in case of side effects |
| + float s = weights.get(i).getValueForNormalization(); // sum sub weights |
| + if (!query.clauses().get(i).isProhibited()) { |
| + // only add to sum for non-prohibited clauses |
| + sum += s; |
| + } |
| + } |
| + |
| + sum *= query.getBoost() * query.getBoost(); // boost each sub-weight |
| + |
| + return sum ; |
| + } |
| + |
| + public float coord(int overlap, int maxOverlap) { |
| + // LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away, |
| + // so coord() is not applied. But when BQ cannot optimize itself away |
| + // for a single clause (minNrShouldMatch, prohibited clauses, etc), it's |
| + // important not to apply coord(1,1) for consistency, it might not be 1.0F |
| + return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap); |
| + } |
| + |
| + @Override |
| + public void normalize(float norm, float topLevelBoost) { |
| + topLevelBoost *= query.getBoost(); // incorporate boost |
| + for (Weight w : weights) { |
| + // normalize all clauses, (even if prohibited in case of side affects) |
| + w.normalize(norm, topLevelBoost); |
| + } |
| + } |
| + |
| + @Override |
| + public Explanation explain(LeafReaderContext context, int doc) |
| + throws IOException { |
| + final int minShouldMatch = query.getMinimumNumberShouldMatch(); |
| + ComplexExplanation sumExpl = new ComplexExplanation(); |
| + sumExpl.setDescription("sum of:"); |
| + int coord = 0; |
| + float sum = 0.0f; |
| + boolean fail = false; |
| + int shouldMatchCount = 0; |
| + Iterator<BooleanClause> cIter = query.clauses().iterator(); |
| + for (Iterator<Weight> wIter = weights.iterator(); wIter.hasNext();) { |
| + Weight w = wIter.next(); |
| + BooleanClause c = cIter.next(); |
| + if (w.scorer(context, context.reader().getLiveDocs(), true) == null) { |
| + if (c.isRequired()) { |
| + fail = true; |
| + Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); |
| + sumExpl.addDetail(r); |
| + } |
| + continue; |
| + } |
| + Explanation e = w.explain(context, doc); |
| + if (e.isMatch()) { |
| + if (!c.isProhibited()) { |
| + sumExpl.addDetail(e); |
| + sum += e.getValue(); |
| + coord++; |
| + } else { |
| + Explanation r = |
| + new Explanation(0.0f, "match on prohibited clause (" + c.getQuery().toString() + ")"); |
| + r.addDetail(e); |
| + sumExpl.addDetail(r); |
| + fail = true; |
| + } |
| + if (c.getOccur() == Occur.SHOULD) { |
| + shouldMatchCount++; |
| + } |
| + } else if (c.isRequired()) { |
| + Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); |
| + r.addDetail(e); |
| + sumExpl.addDetail(r); |
| + fail = true; |
| + } |
| + } |
| + if (fail) { |
| + sumExpl.setMatch(Boolean.FALSE); |
| + sumExpl.setValue(0.0f); |
| + sumExpl.setDescription |
| + ("Failure to meet condition(s) of required/prohibited clause(s)"); |
| + return sumExpl; |
| + } else if (shouldMatchCount < minShouldMatch) { |
| + sumExpl.setMatch(Boolean.FALSE); |
| + sumExpl.setValue(0.0f); |
| + sumExpl.setDescription("Failure to match minimum number "+ |
| + "of optional clauses: " + minShouldMatch); |
| + return sumExpl; |
| + } |
| + |
| + sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE); |
| + sumExpl.setValue(sum); |
| + |
| + final float coordFactor = disableCoord ? 1.0f : coord(coord, maxCoord); |
| + if (coordFactor == 1.0f) { |
| + return sumExpl; // eliminate wrapper |
| + } else { |
| + ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(), |
| + sum*coordFactor, |
| + "product of:"); |
| + result.addDetail(sumExpl); |
| + result.addDetail(new Explanation(coordFactor, |
| + "coord("+coord+"/"+maxCoord+")")); |
| + return result; |
| + } |
| + } |
| + |
| + /** Try to build a boolean scorer for this weight. Returns null if {@link BooleanScorer} |
| + * cannot be used. */ |
| + // pkg-private for forcing use of BooleanScorer in tests |
| + BooleanScorer booleanScorer(LeafReaderContext context, Bits acceptDocs, boolean needsScores) throws IOException { |
| + List<BulkScorer> optional = new ArrayList<BulkScorer>(); |
| + Iterator<BooleanClause> cIter = query.clauses().iterator(); |
| + for (Weight w : weights) { |
| + BooleanClause c = cIter.next(); |
| + BulkScorer subScorer = w.bulkScorer(context, acceptDocs, needsScores); |
| + if (subScorer == null) { |
| + if (c.isRequired()) { |
| + return null; |
| + } |
| + } else if (c.isRequired()) { |
| + // TODO: there are some cases where BooleanScorer |
| + // would handle conjunctions faster than |
| + // BooleanScorer2... |
| + return null; |
| + } else if (c.isProhibited()) { |
| + // TODO: there are some cases where BooleanScorer could do this faster |
| + return null; |
| + } else { |
| + optional.add(subScorer); |
| + } |
| + } |
| + |
| + if (optional.size() == 0) { |
| + return null; |
| + } |
| + |
| + if (query.minNrShouldMatch > optional.size()) { |
| + return null; |
| + } |
| + |
| + return new BooleanScorer(this, disableCoord, maxCoord, optional, Math.max(1, query.minNrShouldMatch)); |
| + } |
| + |
| + @Override |
| + public BulkScorer bulkScorer(LeafReaderContext context, Bits acceptDocs, boolean needsScores) throws IOException { |
| + final BooleanScorer bulkScorer = booleanScorer(context, acceptDocs, needsScores); |
| + if (bulkScorer != null) { // BooleanScorer is applicable |
| + // TODO: what is the right heuristic here? |
| + final long costThreshold; |
| + if (query.minNrShouldMatch <= 1) { |
| + // when all clauses are optional, use BooleanScorer aggressively |
| + // TODO: is there actually a threshold under which we should rather |
| + // use the regular scorer? |
| + costThreshold = -1; |
| + } else { |
| + // when a minimum number of clauses should match, BooleanScorer is |
| + // going to score all windows that have at least minNrShouldMatch |
| + // matches in the window. But there is no way to know if there is |
| + // an intersection (all clauses might match a different doc ID and |
| + // there will be no matches in the end) so we should only use |
| + // BooleanScorer if matches are very dense |
| + costThreshold = context.reader().maxDoc() / 3; |
| + } |
| + |
| + if (bulkScorer.cost() > costThreshold) { |
| + return bulkScorer; |
| + } |
| + } |
| + return super.bulkScorer(context, acceptDocs, needsScores); |
| + } |
| + |
| + @Override |
| + public Scorer scorer(LeafReaderContext context, Bits acceptDocs, boolean needsScores) throws IOException { |
| + // initially the user provided value, |
| + // but if minNrShouldMatch == optional.size(), |
| + // we will optimize and move these to required, making this 0 |
| + int minShouldMatch = query.minNrShouldMatch; |
| + |
| + List<Scorer> required = new ArrayList<>(); |
| + List<Scorer> prohibited = new ArrayList<>(); |
| + List<Scorer> optional = new ArrayList<>(); |
| + Iterator<BooleanClause> cIter = query.clauses().iterator(); |
| + for (Weight w : weights) { |
| + BooleanClause c = cIter.next(); |
| + Scorer subScorer = w.scorer(context, acceptDocs, needsScores && c.isProhibited() == false); |
| + if (subScorer == null) { |
| + if (c.isRequired()) { |
| + return null; |
| + } |
| + } else if (c.isRequired()) { |
| + required.add(subScorer); |
| + } else if (c.isProhibited()) { |
| + prohibited.add(subScorer); |
| + } else { |
| + optional.add(subScorer); |
| + } |
| + } |
| + |
| + // scorer simplifications: |
| + |
| + if (optional.size() == minShouldMatch) { |
| + // any optional clauses are in fact required |
| + required.addAll(optional); |
| + optional.clear(); |
| + minShouldMatch = 0; |
| + } |
| + |
| + if (required.isEmpty() && optional.isEmpty()) { |
| + // no required and optional clauses. |
| + return null; |
| + } else if (optional.size() < minShouldMatch) { |
| + // either >1 req scorer, or there are 0 req scorers and at least 1 |
| + // optional scorer. Therefore if there are not enough optional scorers |
| + // no documents will be matched by the query |
| + return null; |
| + } |
| + |
| + // we don't need scores, so if we have required clauses, drop optional clauses completely |
| + if (!needsScores && minShouldMatch == 0 && required.size() > 0) { |
| + optional.clear(); |
| + } |
| + |
| + // three cases: conjunction, disjunction, or mix |
| + |
| + // pure conjunction |
| + if (optional.isEmpty()) { |
| + return excl(req(required, disableCoord), prohibited); |
| + } |
| + |
| + // pure disjunction |
| + if (required.isEmpty()) { |
| + return excl(opt(optional, minShouldMatch, disableCoord), prohibited); |
| + } |
| + |
| + // conjunction-disjunction mix: |
| + // we create the required and optional pieces with coord disabled, and then |
| + // combine the two: if minNrShouldMatch > 0, then it's a conjunction: because the |
| + // optional side must match. otherwise it's required + optional, factoring the |
| + // number of optional terms into the coord calculation |
| + |
| + Scorer req = excl(req(required, true), prohibited); |
| + Scorer opt = opt(optional, minShouldMatch, true); |
| + |
| + // TODO: clean this up: it's horrible |
| + if (disableCoord) { |
| + if (minShouldMatch > 0) { |
| + return new ConjunctionScorer(this, new Scorer[] { req, opt }, 1F); |
| + } else { |
| + return new ReqOptSumScorer(req, opt); |
| + } |
| + } else if (optional.size() == 1) { |
| + if (minShouldMatch > 0) { |
| + return new ConjunctionScorer(this, new Scorer[] { req, opt }, coord(required.size()+1, maxCoord)); |
| + } else { |
| + float coordReq = coord(required.size(), maxCoord); |
| + float coordBoth = coord(required.size() + 1, maxCoord); |
| + return new BooleanTopLevelScorers.ReqSingleOptScorer(req, opt, coordReq, coordBoth); |
| + } |
| + } else { |
| + if (minShouldMatch > 0) { |
| + return new BooleanTopLevelScorers.CoordinatingConjunctionScorer(this, coords(), req, required.size(), opt); |
| + } else { |
| + return new BooleanTopLevelScorers.ReqMultiOptScorer(req, opt, required.size(), coords()); |
| + } |
| + } |
| + } |
| + |
| + private Scorer req(List<Scorer> required, boolean disableCoord) { |
| + if (required.size() == 1) { |
| + Scorer req = required.get(0); |
| + if (!disableCoord && maxCoord > 1) { |
| + return new BooleanTopLevelScorers.BoostedScorer(req, coord(1, maxCoord)); |
| + } else { |
| + return req; |
| + } |
| + } else { |
| + return new ConjunctionScorer(this, |
| + required.toArray(new Scorer[required.size()]), |
| + disableCoord ? 1.0F : coord(required.size(), maxCoord)); |
| + } |
| + } |
| + |
| + private Scorer excl(Scorer main, List<Scorer> prohibited) throws IOException { |
| + if (prohibited.isEmpty()) { |
| + return main; |
| + } else if (prohibited.size() == 1) { |
| + return new ReqExclScorer(main, prohibited.get(0)); |
| + } else { |
| + float coords[] = new float[prohibited.size()+1]; |
| + Arrays.fill(coords, 1F); |
| + return new ReqExclScorer(main, |
| + new DisjunctionSumScorer(this, |
| + prohibited.toArray(new Scorer[prohibited.size()]), |
| + coords)); |
| + } |
| + } |
| + |
| + private Scorer opt(List<Scorer> optional, int minShouldMatch, boolean disableCoord) throws IOException { |
| + if (optional.size() == 1) { |
| + Scorer opt = optional.get(0); |
| + if (!disableCoord && maxCoord > 1) { |
| + return new BooleanTopLevelScorers.BoostedScorer(opt, coord(1, maxCoord)); |
| + } else { |
| + return opt; |
| + } |
| + } else { |
| + float coords[]; |
| + if (disableCoord) { |
| + coords = new float[optional.size()+1]; |
| + Arrays.fill(coords, 1F); |
| + } else { |
| + coords = coords(); |
| + } |
| + if (minShouldMatch > 1) { |
| + return new MinShouldMatchSumScorer(this, optional, minShouldMatch, coords); |
| + } else { |
| + return new DisjunctionSumScorer(this, |
| + optional.toArray(new Scorer[optional.size()]), |
| + coords); |
| + } |
| + } |
| + } |
| + |
| + private float[] coords() { |
| + float[] coords = new float[maxCoord+1]; |
| + coords[0] = 0F; |
| + for (int i = 1; i < coords.length; i++) { |
| + coords[i] = coord(i, maxCoord); |
| + } |
| + return coords; |
| + } |
| +} |
| |
| Property changes on: lucene/core/src/java/org/apache/lucene/search/BooleanWeight.java |
| ___________________________________________________________________ |
| Added: svn:eol-style |
| ## -0,0 +1 ## |
| +native |
| \ No newline at end of property |
| Index: lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java (revision 1657845) |
| +++ lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java (working copy) |
| @@ -283,7 +283,7 @@ |
| |
| @Override |
| public Weight createWeight(IndexSearcher searcher) throws IOException { |
| - return new BooleanWeight(searcher, false) { |
| + return new BooleanWeight(this, searcher, false) { |
| @Override |
| public BulkScorer bulkScorer(LeafReaderContext context, Bits acceptDocs, boolean needsScores) throws IOException { |
| Scorer scorer = scorer(context, acceptDocs, needsScores); |
| Index: lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java (revision 1657845) |
| +++ lucene/core/src/test/org/apache/lucene/search/TestMinShouldMatch2.java (working copy) |
| @@ -35,7 +35,6 @@ |
| import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.index.TermContext; |
| -import org.apache.lucene.search.BooleanQuery.BooleanWeight; |
| import org.apache.lucene.search.similarities.DefaultSimilarity; |
| import org.apache.lucene.search.similarities.Similarity.SimScorer; |
| import org.apache.lucene.search.similarities.Similarity.SimWeight; |
| Index: lucene/queries/src/java/org/apache/lucene/queries/BoostingQuery.java |
| =================================================================== |
| --- lucene/queries/src/java/org/apache/lucene/queries/BoostingQuery.java (revision 1657845) |
| +++ lucene/queries/src/java/org/apache/lucene/queries/BoostingQuery.java (working copy) |
| @@ -55,7 +55,7 @@ |
| BooleanQuery result = new BooleanQuery() { |
| @Override |
| public Weight createWeight(IndexSearcher searcher) throws IOException { |
| - return new BooleanWeight(searcher, false) { |
| + return new BooleanWeight(this, searcher, false) { |
| |
| @Override |
| public float coord(int overlap, int max) { |