blob: 563ae3551eccfd59d7a65b1e4be7e3b58620767b [file] [log] [blame]
using System;
using System.Collections.Generic;
using System.Text;
namespace Lucene.Net.Search
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
using BooleanWeight = Lucene.Net.Search.BooleanQuery.BooleanWeight;
/// <summary>
/// Description from Doug Cutting (excerpted from
/// LUCENE-1483):
/// <para/>
/// <see cref="BooleanScorer"/> uses an array to score windows of
/// 2K docs. So it scores docs 0-2K first, then docs 2K-4K,
/// etc. For each window it iterates through all query terms
/// and accumulates a score in table[doc%2K]. It also stores
/// in the table a bitmask representing which terms
/// contributed to the score. Non-zero scores are chained in
/// a linked list. At the end of scoring each window it then
/// iterates through the linked list and, if the bitmask
/// matches the boolean constraints, collects a hit. For
/// boolean queries with lots of frequent terms this can be
/// much faster, since it does not need to update a priority
/// queue for each posting, instead performing constant-time
/// operations per posting. The only downside is that it
/// results in hits being delivered out-of-order within the
/// window, which means it cannot be nested within other
/// scorers. But it works well as a top-level scorer.
/// <para/>
/// The new BooleanScorer2 implementation instead works by
/// merging priority queues of postings, albeit with some
/// clever tricks. For example, a pure conjunction (all terms
/// required) does not require a priority queue. Instead it
/// sorts the posting streams at the start, then repeatedly
/// skips the first to to the last. If the first ever equals
/// the last, then there's a hit. When some terms are
/// required and some terms are optional, the conjunction can
/// be evaluated first, then the optional terms can all skip
/// to the match and be added to the score. Thus the
/// conjunction can reduce the number of priority queue
/// updates for the optional terms.
/// </summary>
internal sealed class BooleanScorer : BulkScorer
{
private sealed class BooleanScorerCollector : ICollector
{
private BucketTable bucketTable;
private int mask;
private Scorer scorer;
public BooleanScorerCollector(int mask, BucketTable bucketTable)
{
this.mask = mask;
this.bucketTable = bucketTable;
}
public void Collect(int doc)
{
BucketTable table = bucketTable;
int i = doc & BucketTable.MASK;
Bucket bucket = table.buckets[i];
if (bucket.Doc != doc) // invalid bucket
{
bucket.Doc = doc; // set doc
bucket.Score = scorer.GetScore(); // initialize score
bucket.Bits = mask; // initialize mask
bucket.Coord = 1; // initialize coord
bucket.Next = table.first; // push onto valid list
table.first = bucket;
} // valid bucket
else
{
bucket.Score += scorer.GetScore(); // increment score
bucket.Bits |= mask; // add bits in mask
bucket.Coord++; // increment coord
}
}
public void SetNextReader(AtomicReaderContext context)
{
// not needed by this implementation
}
public void SetScorer(Scorer scorer)
{
this.scorer = scorer;
}
public bool AcceptsDocsOutOfOrder => true;
}
internal sealed class Bucket
{
internal int Doc { get; set; } // tells if bucket is valid
internal double Score { get; set; } // incremental score
// TODO: break out bool anyProhibited, int
// numRequiredMatched; then we can remove 32 limit on
// required clauses
internal int Bits { get; set; } // used for bool constraints
internal int Coord { get; set; } // count of terms in score
internal Bucket Next { get; set; } // next valid bucket
public Bucket()
{
// Initialize properties
Doc = -1;
}
}
/// <summary>
/// A simple hash table of document scores within a range. </summary>
internal sealed class BucketTable
{
public static readonly int SIZE = 1 << 11;
public static readonly int MASK = SIZE - 1;
internal readonly Bucket[] buckets = new Bucket[SIZE];
internal Bucket first = null; // head of valid list
public BucketTable()
{
// Pre-fill to save the lazy init when collecting
// each sub:
for (int idx = 0; idx < SIZE; idx++)
{
buckets[idx] = new Bucket();
}
}
public ICollector NewCollector(int mask)
{
return new BooleanScorerCollector(mask, this);
}
public int Count => SIZE; // LUCENENET NOTE: This was size() in Lucene.
}
internal sealed class SubScorer
{
public BulkScorer Scorer { get; set; }
// TODO: re-enable this if BQ ever sends us required clauses
//public boolean required = false;
public bool Prohibited { get; set; }
public ICollector Collector { get; set; }
public SubScorer Next { get; set; }
public bool More { get; set; }
public SubScorer(BulkScorer scorer, bool required, bool prohibited, ICollector collector, SubScorer next)
{
if (required)
{
throw new ArgumentException("this scorer cannot handle required=true");
}
this.Scorer = scorer;
this.More = true;
// TODO: re-enable this if BQ ever sends us required clauses
//this.required = required;
this.Prohibited = prohibited;
this.Collector = collector;
this.Next = next;
}
}
private SubScorer scorers = null;
private BucketTable bucketTable = new BucketTable();
private readonly float[] coordFactors;
// TODO: re-enable this if BQ ever sends us required clauses
//private int requiredMask = 0;
private readonly int minNrShouldMatch;
private int end;
private Bucket current;
// Any time a prohibited clause matches we set bit 0:
private const int PROHIBITED_MASK = 1;
private readonly Weight weight;
internal BooleanScorer(BooleanWeight weight, bool disableCoord, int minNrShouldMatch, IList<BulkScorer> optionalScorers, IList<BulkScorer> prohibitedScorers, int maxCoord)
{
this.minNrShouldMatch = minNrShouldMatch;
this.weight = weight;
foreach (BulkScorer scorer in optionalScorers)
{
scorers = new SubScorer(scorer, false, false, bucketTable.NewCollector(0), scorers);
}
foreach (BulkScorer scorer in prohibitedScorers)
{
scorers = new SubScorer(scorer, false, true, bucketTable.NewCollector(PROHIBITED_MASK), scorers);
}
coordFactors = new float[optionalScorers.Count + 1];
for (int i = 0; i < coordFactors.Length; i++)
{
coordFactors[i] = disableCoord ? 1.0f : weight.Coord(i, maxCoord);
}
}
public override bool Score(ICollector collector, int max)
{
bool more;
Bucket tmp;
FakeScorer fs = new FakeScorer();
// The internal loop will set the score and doc before calling collect.
collector.SetScorer(fs);
do
{
bucketTable.first = null;
while (current != null) // more queued
{
// check prohibited & required
if ((current.Bits & PROHIBITED_MASK) == 0)
{
// TODO: re-enable this if BQ ever sends us required
// clauses
//&& (current.bits & requiredMask) == requiredMask) {
// NOTE: Lucene always passes max =
// Integer.MAX_VALUE today, because we never embed
// a BooleanScorer inside another (even though
// that should work)... but in theory an outside
// app could pass a different max so we must check
// it:
if (current.Doc >= max)
{
tmp = current;
current = current.Next;
tmp.Next = bucketTable.first;
bucketTable.first = tmp;
continue;
}
if (current.Coord >= minNrShouldMatch)
{
fs.score = (float)(current.Score * coordFactors[current.Coord]);
fs.doc = current.Doc;
fs.freq = current.Coord;
collector.Collect(current.Doc);
}
}
current = current.Next; // pop the queue
}
if (bucketTable.first != null)
{
current = bucketTable.first;
bucketTable.first = current.Next;
return true;
}
// refill the queue
more = false;
end += BucketTable.SIZE;
for (SubScorer sub = scorers; sub != null; sub = sub.Next)
{
if (sub.More)
{
sub.More = sub.Scorer.Score(sub.Collector, end);
more |= sub.More;
}
}
current = bucketTable.first;
} while (current != null || more);
return false;
}
public override string ToString()
{
StringBuilder buffer = new StringBuilder();
buffer.Append("boolean(");
for (SubScorer sub = scorers; sub != null; sub = sub.Next)
{
buffer.Append(sub.Scorer.ToString());
buffer.Append(" ");
}
buffer.Append(")");
return buffer.ToString();
}
}
}