blob: 59f225b0625ac635e0e103dbdffc720eab2e3f82 [file] [log] [blame]
using J2N;
using J2N.Collections.Generic.Extensions;
using System;
using System.Collections;
using System.Collections.Generic;
#if FEATURE_SERIALIZABLE_EXCEPTIONS
using System.Runtime.Serialization;
#endif
using System.Text;
using JCG = J2N.Collections.Generic;
namespace Lucene.Net.Search
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
using IBits = Lucene.Net.Util.IBits;
using IndexReader = Lucene.Net.Index.IndexReader;
using Occur_e = Lucene.Net.Search.Occur;
using Similarity = Lucene.Net.Search.Similarities.Similarity;
using Term = Lucene.Net.Index.Term;
using ToStringUtils = Lucene.Net.Util.ToStringUtils;
/// <summary>
/// A <see cref="Query"/> that matches documents matching boolean combinations of other
/// queries, e.g. <see cref="TermQuery"/>s, <see cref="PhraseQuery"/>s or other
/// <see cref="BooleanQuery"/>s.
/// <para/>
/// Collection initializer note: To create and populate a <see cref="BooleanQuery"/>
/// in a single statement, you can use the following example as a guide:
///
/// <code>
/// var booleanQuery = new BooleanQuery() {
/// { new WildcardQuery(new Term("field2", "foobar")), Occur.SHOULD },
/// { new MultiPhraseQuery() {
/// new Term("field", "microsoft"),
/// new Term("field", "office")
/// }, Occur.SHOULD }
/// };
///
/// // or
///
/// var booleanQuery = new BooleanQuery() {
/// new BooleanClause(new WildcardQuery(new Term("field2", "foobar")), Occur.SHOULD),
/// new BooleanClause(new MultiPhraseQuery() {
/// new Term("field", "microsoft"),
/// new Term("field", "office")
/// }, Occur.SHOULD)
/// };
/// </code>
/// </summary>
#if FEATURE_SERIALIZABLE
[Serializable]
#endif
public class BooleanQuery : Query, IEnumerable<BooleanClause>
{
private static int maxClauseCount = 1024;
/// <summary>
/// Thrown when an attempt is made to add more than
/// <see cref="MaxClauseCount"/> clauses. This typically happens if
/// a <see cref="PrefixQuery"/>, <see cref="FuzzyQuery"/>, <see cref="WildcardQuery"/>, or <see cref="TermRangeQuery"/>
/// is expanded to many terms during search.
/// </summary>
// LUCENENET: It is no longer good practice to use binary serialization.
// See: https://github.com/dotnet/corefx/issues/23584#issuecomment-325724568
#if FEATURE_SERIALIZABLE_EXCEPTIONS
[Serializable]
#endif
public class TooManyClausesException : Exception // LUCENENET specific - added Exception suffix
{
public TooManyClausesException()
: base("maxClauseCount is set to " + maxClauseCount)
{
}
#if FEATURE_SERIALIZABLE_EXCEPTIONS
/// <summary>
/// Initializes a new instance of this class with serialized data.
/// </summary>
/// <param name="info">The <see cref="SerializationInfo"/> that holds the serialized object data about the exception being thrown.</param>
/// <param name="context">The <see cref="StreamingContext"/> that contains contextual information about the source or destination.</param>
protected TooManyClausesException(SerializationInfo info, StreamingContext context)
: base(info, context)
{
}
#endif
}
/// <summary>
/// Return the maximum number of clauses permitted, 1024 by default.
/// Attempts to add more than the permitted number of clauses cause
/// <see cref="TooManyClausesException"/> to be thrown. </summary>
public static int MaxClauseCount
{
get => maxClauseCount;
set
{
if (value < 1)
{
throw new ArgumentException("maxClauseCount must be >= 1");
}
BooleanQuery.maxClauseCount = value;
}
}
private IList<BooleanClause> clauses = new JCG.List<BooleanClause>();
private readonly bool disableCoord;
/// <summary>
/// Constructs an empty boolean query. </summary>
public BooleanQuery()
{
disableCoord = false;
}
/// <summary>
/// Constructs an empty boolean query.
/// <para/>
/// <see cref="Similarity.Coord(int,int)"/> may be disabled in scoring, as
/// appropriate. For example, this score factor does not make sense for most
/// automatically generated queries, like <see cref="WildcardQuery"/> and
/// <see cref="FuzzyQuery"/>.
/// </summary>
/// <param name="disableCoord"> Disables <see cref="Similarity.Coord(int,int)"/> in scoring. </param>
public BooleanQuery(bool disableCoord)
{
this.disableCoord = disableCoord;
}
/// <summary>
/// Returns true if <see cref="Similarity.Coord(int,int)"/> is disabled in
/// scoring for this query instance. </summary>
/// <seealso cref="BooleanQuery(bool)"/>
public virtual bool CoordDisabled => disableCoord; // LUCENENET TODO: API Change to CoordEnabled? Per MSDN, properties should be in the affirmative.
/// <summary>
/// Specifies a minimum number of the optional <see cref="BooleanClause"/>s
/// which must be satisfied.
///
/// <para>
/// By default no optional clauses are necessary for a match
/// (unless there are no required clauses). If this method is used,
/// then the specified number of clauses is required.
/// </para>
/// <para>
/// Use of this method is totally independent of specifying that
/// any specific clauses are required (or prohibited). This number will
/// only be compared against the number of matching optional clauses.
/// </para>
/// </summary>
/// <param name="value"> The number of optional clauses that must match </param>
public virtual int MinimumNumberShouldMatch
{
get => m_minNrShouldMatch;
set => this.m_minNrShouldMatch = value;
}
protected int m_minNrShouldMatch = 0;
/// <summary>
/// Adds a clause to a boolean query.
/// </summary>
/// <exception cref="TooManyClausesException"> If the new number of clauses exceeds the maximum clause number </exception>
/// <seealso cref="MaxClauseCount"/>
public virtual void Add(Query query, Occur occur)
{
Add(new BooleanClause(query, occur));
}
/// <summary>
/// Adds a clause to a boolean query. </summary>
/// <exception cref="TooManyClausesException"> If the new number of clauses exceeds the maximum clause number </exception>
/// <seealso cref="MaxClauseCount"/>
public virtual void Add(BooleanClause clause)
{
if (clauses.Count >= maxClauseCount)
{
throw new TooManyClausesException();
}
clauses.Add(clause);
}
/// <summary>
/// Returns the set of clauses in this query. </summary>
public virtual BooleanClause[] GetClauses()
{
return clauses.ToArray();
}
/// <summary>
/// Returns the list of clauses in this query. </summary>
public virtual IList<BooleanClause> Clauses => clauses;
/// <summary>
/// Returns an iterator on the clauses in this query. It implements the <see cref="T:IEnumerable{BooleanClause}"/> interface to
/// make it possible to do:
/// <code>foreach (BooleanClause clause in booleanQuery) {}</code>
/// </summary>
public IEnumerator<BooleanClause> GetEnumerator()
{
return Clauses.GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
/// <summary>
/// Expert: the <see cref="Weight"/> for <see cref="BooleanQuery"/>, used to
/// normalize, score and explain these queries.
/// <para/>
/// @lucene.experimental
/// </summary>
public class BooleanWeight : Weight
{
private readonly BooleanQuery outerInstance;
/// <summary>
/// The <see cref="Similarities.Similarity"/> implementation. </summary>
protected Similarity m_similarity;
protected List<Weight> m_weights;
protected int m_maxCoord; // num optional + num required
private readonly bool disableCoord;
public BooleanWeight(BooleanQuery outerInstance, IndexSearcher searcher, bool disableCoord)
{
this.outerInstance = outerInstance;
this.m_similarity = searcher.Similarity;
this.disableCoord = disableCoord;
m_weights = new List<Weight>(outerInstance.clauses.Count);
for (int i = 0; i < outerInstance.clauses.Count; i++)
{
BooleanClause c = outerInstance.clauses[i];
Weight w = c.Query.CreateWeight(searcher);
m_weights.Add(w);
if (!c.IsProhibited)
{
m_maxCoord++;
}
}
}
public Similarity Similarity => m_similarity;
public int MaxCoord => m_maxCoord;
public override Query Query => outerInstance;
public override float GetValueForNormalization()
{
float sum = 0.0f;
for (int i = 0; i < m_weights.Count; i++)
{
// call sumOfSquaredWeights for all clauses in case of side effects
float s = m_weights[i].GetValueForNormalization(); // sum sub weights
if (!outerInstance.clauses[i].IsProhibited)
{
// only add to sum for non-prohibited clauses
sum += s;
}
}
sum *= outerInstance.Boost * outerInstance.Boost; // boost each sub-weight
return sum;
}
public virtual float Coord(int overlap, int maxOverlap)
{
// LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away,
// so coord() is not applied. But when BQ cannot optimize itself away
// for a single clause (minNrShouldMatch, prohibited clauses, etc), its
// important not to apply coord(1,1) for consistency, it might not be 1.0F
return maxOverlap == 1 ? 1F : m_similarity.Coord(overlap, maxOverlap);
}
public override void Normalize(float norm, float topLevelBoost)
{
topLevelBoost *= outerInstance.Boost; // incorporate boost
foreach (Weight w in m_weights)
{
// normalize all clauses, (even if prohibited in case of side affects)
w.Normalize(norm, topLevelBoost);
}
}
public override Explanation Explain(AtomicReaderContext context, int doc)
{
int minShouldMatch = outerInstance.MinimumNumberShouldMatch;
ComplexExplanation sumExpl = new ComplexExplanation();
sumExpl.Description = "sum of:";
int coord = 0;
float sum = 0.0f;
bool fail = false;
int shouldMatchCount = 0;
using (IEnumerator<BooleanClause> cIter = outerInstance.clauses.GetEnumerator())
{
foreach (Weight w in m_weights)
{
cIter.MoveNext();
BooleanClause c = cIter.Current;
if (w.GetScorer(context, context.AtomicReader.LiveDocs) == null)
{
if (c.IsRequired)
{
fail = true;
Explanation r = new Explanation(0.0f, "no match on required clause (" + c.Query.ToString() + ")");
sumExpl.AddDetail(r);
}
continue;
}
Explanation e = w.Explain(context, doc);
if (e.IsMatch)
{
if (!c.IsProhibited)
{
sumExpl.AddDetail(e);
sum += e.Value;
coord++;
}
else
{
Explanation r = new Explanation(0.0f, "match on prohibited clause (" + c.Query.ToString() + ")");
r.AddDetail(e);
sumExpl.AddDetail(r);
fail = true;
}
if (c.Occur == Occur_e.SHOULD)
{
shouldMatchCount++;
}
}
else if (c.IsRequired)
{
Explanation r = new Explanation(0.0f, "no match on required clause (" + c.Query.ToString() + ")");
r.AddDetail(e);
sumExpl.AddDetail(r);
fail = true;
}
}
}
if (fail)
{
sumExpl.Match = false;
sumExpl.Value = 0.0f;
sumExpl.Description = "Failure to meet condition(s) of required/prohibited clause(s)";
return sumExpl;
}
else if (shouldMatchCount < minShouldMatch)
{
sumExpl.Match = false;
sumExpl.Value = 0.0f;
sumExpl.Description = "Failure to match minimum number " + "of optional clauses: " + minShouldMatch;
return sumExpl;
}
sumExpl.Match = 0 < coord ? true : false;
sumExpl.Value = sum;
float coordFactor = disableCoord ? 1.0f : Coord(coord, m_maxCoord);
if (coordFactor == 1.0f)
{
return sumExpl; // eliminate wrapper
}
else
{
ComplexExplanation result = new ComplexExplanation(sumExpl.IsMatch, sum * coordFactor, "product of:");
result.AddDetail(sumExpl);
result.AddDetail(new Explanation(coordFactor, "coord(" + coord + "/" + m_maxCoord + ")"));
return result;
}
}
public override BulkScorer GetBulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, IBits acceptDocs)
{
if (scoreDocsInOrder || outerInstance.m_minNrShouldMatch > 1)
{
// TODO: (LUCENE-4872) in some cases BooleanScorer may be faster for minNrShouldMatch
// but the same is even true of pure conjunctions...
return base.GetBulkScorer(context, scoreDocsInOrder, acceptDocs);
}
IList<BulkScorer> prohibited = new List<BulkScorer>();
IList<BulkScorer> optional = new List<BulkScorer>();
using (IEnumerator<BooleanClause> cIter = outerInstance.clauses.GetEnumerator())
{
foreach (Weight w in m_weights)
{
cIter.MoveNext();
BooleanClause c = cIter.Current;
BulkScorer subScorer = w.GetBulkScorer(context, false, acceptDocs);
if (subScorer == null)
{
if (c.IsRequired)
{
return null;
}
}
else if (c.IsRequired)
{
// TODO: there are some cases where BooleanScorer
// would handle conjunctions faster than
// BooleanScorer2...
return base.GetBulkScorer(context, scoreDocsInOrder, acceptDocs);
}
else if (c.IsProhibited)
{
prohibited.Add(subScorer);
}
else
{
optional.Add(subScorer);
}
}
}
// Check if we can and should return a BooleanScorer
return new BooleanScorer(this, disableCoord, outerInstance.m_minNrShouldMatch, optional, prohibited, m_maxCoord);
}
public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs)
{
IList<Scorer> required = new List<Scorer>();
IList<Scorer> prohibited = new List<Scorer>();
IList<Scorer> optional = new List<Scorer>();
IEnumerator<BooleanClause> cIter = outerInstance.clauses.GetEnumerator();
foreach (Weight w in m_weights)
{
cIter.MoveNext();
BooleanClause c = cIter.Current;
Scorer subScorer = w.GetScorer(context, acceptDocs);
if (subScorer == null)
{
if (c.IsRequired)
{
return null;
}
}
else if (c.IsRequired)
{
required.Add(subScorer);
}
else if (c.IsProhibited)
{
prohibited.Add(subScorer);
}
else
{
optional.Add(subScorer);
}
}
if (required.Count == 0 && optional.Count == 0)
{
// no required and optional clauses.
return null;
}
else if (optional.Count < outerInstance.m_minNrShouldMatch)
{
// either >1 req scorer, or there are 0 req scorers and at least 1
// optional scorer. Therefore if there are not enough optional scorers
// no documents will be matched by the query
return null;
}
// simple conjunction
if (optional.Count == 0 && prohibited.Count == 0)
{
float coord = disableCoord ? 1.0f : Coord(required.Count, m_maxCoord);
return new ConjunctionScorer(this, required.ToArray(), coord);
}
// simple disjunction
if (required.Count == 0 && prohibited.Count == 0 && outerInstance.m_minNrShouldMatch <= 1 && optional.Count > 1)
{
var coord = new float[optional.Count + 1];
for (int i = 0; i < coord.Length; i++)
{
coord[i] = disableCoord ? 1.0f : Coord(i, m_maxCoord);
}
return new DisjunctionSumScorer(this, optional.ToArray(), coord);
}
// Return a BooleanScorer2
return new BooleanScorer2(this, disableCoord, outerInstance.m_minNrShouldMatch, required, prohibited, optional, m_maxCoord);
}
public override bool ScoresDocsOutOfOrder
{
get
{
if (outerInstance.m_minNrShouldMatch > 1)
{
// BS2 (in-order) will be used by scorer()
return false;
}
foreach (BooleanClause c in outerInstance.clauses)
{
if (c.IsRequired)
{
// BS2 (in-order) will be used by scorer()
return false;
}
}
// scorer() will return an out-of-order scorer if requested.
return true;
}
}
}
public override Weight CreateWeight(IndexSearcher searcher)
{
return new BooleanWeight(this, searcher, disableCoord);
}
public override Query Rewrite(IndexReader reader)
{
if (m_minNrShouldMatch == 0 && clauses.Count == 1) // optimize 1-clause queries
{
BooleanClause c = clauses[0];
if (!c.IsProhibited) // just return clause
{
Query query = c.Query.Rewrite(reader); // rewrite first
if (Boost != 1.0f) // incorporate boost
{
if (query == c.Query) // if rewrite was no-op
{
query = (Query)query.Clone(); // then clone before boost
}
// Since the BooleanQuery only has 1 clause, the BooleanQuery will be
// written out. Therefore the rewritten Query's boost must incorporate both
// the clause's boost, and the boost of the BooleanQuery itself
query.Boost = Boost * query.Boost;
}
return query;
}
}
BooleanQuery clone = null; // recursively rewrite
for (int i = 0; i < clauses.Count; i++)
{
BooleanClause c = clauses[i];
Query query = c.Query.Rewrite(reader);
if (query != c.Query) // clause rewrote: must clone
{
if (clone == null)
{
// The BooleanQuery clone is lazily initialized so only initialize
// it if a rewritten clause differs from the original clause (and hasn't been
// initialized already). If nothing differs, the clone isn't needlessly created
clone = (BooleanQuery)this.Clone();
}
clone.clauses[i] = new BooleanClause(query, c.Occur);
}
}
if (clone != null)
{
return clone; // some clauses rewrote
}
else
{
return this; // no clauses rewrote
}
}
// inherit javadoc
public override void ExtractTerms(ISet<Term> terms)
{
foreach (BooleanClause clause in clauses)
{
if (clause.Occur != Occur_e.MUST_NOT)
{
clause.Query.ExtractTerms(terms);
}
}
}
public override object Clone()
{
BooleanQuery clone = (BooleanQuery)base.Clone();
clone.clauses = new JCG.List<BooleanClause>(this.clauses);
return clone;
}
/// <summary>
/// Prints a user-readable version of this query. </summary>
public override string ToString(string field)
{
StringBuilder buffer = new StringBuilder();
bool needParens = Boost != 1.0 || MinimumNumberShouldMatch > 0;
if (needParens)
{
buffer.Append("(");
}
for (int i = 0; i < clauses.Count; i++)
{
BooleanClause c = clauses[i];
if (c.IsProhibited)
{
buffer.Append("-");
}
else if (c.IsRequired)
{
buffer.Append("+");
}
Query subQuery = c.Query;
if (subQuery != null)
{
if (subQuery is BooleanQuery) // wrap sub-bools in parens
{
buffer.Append("(");
buffer.Append(subQuery.ToString(field));
buffer.Append(")");
}
else
{
buffer.Append(subQuery.ToString(field));
}
}
else
{
buffer.Append("null");
}
if (i != clauses.Count - 1)
{
buffer.Append(" ");
}
}
if (needParens)
{
buffer.Append(")");
}
if (MinimumNumberShouldMatch > 0)
{
buffer.Append('~');
buffer.Append(MinimumNumberShouldMatch);
}
if (Boost != 1.0f)
{
buffer.Append(ToStringUtils.Boost(Boost));
}
return buffer.ToString();
}
/// <summary>
/// Returns <c>true</c> if <paramref name="o"/> is equal to this. </summary>
public override bool Equals(object o)
{
if (!(o is BooleanQuery))
{
return false;
}
BooleanQuery other = (BooleanQuery)o;
return this.Boost == other.Boost
&& this.clauses.Equals(other.clauses)
&& this.MinimumNumberShouldMatch == other.MinimumNumberShouldMatch
&& this.disableCoord == other.disableCoord;
}
/// <summary>
/// Returns a hash code value for this object. </summary>
public override int GetHashCode()
{
return BitConversion.SingleToInt32Bits(Boost) ^ clauses.GetHashCode()
+ MinimumNumberShouldMatch + (disableCoord ? 17 : 0);
}
}
}