src/Lucene.Net/Search/ExactPhraseScorer.cs - lucenenet - Git at Google

 using Lucene.Net.Diagnostics;
 using Lucene.Net.Index;
 using Lucene.Net.Support;

 namespace Lucene.Net.Search
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     using Similarity = Lucene.Net.Search.Similarities.Similarity;

     internal sealed class ExactPhraseScorer : Scorer
     {
         private readonly int endMinus1;

         private const int CHUNK = 4096;

         private int gen;
         private readonly int[] counts = new int[CHUNK];
         private readonly int[] gens = new int[CHUNK];

         internal bool noDocs;
         private readonly long cost;

         private sealed class ChunkState
         {
             internal DocsAndPositionsEnum PosEnum { get; private set; }
             internal int Offset { get; private set; }
             internal bool UseAdvance { get; private set; }
             internal int PosUpto { get; set; }
             internal int PosLimit { get; set; }
             internal int Pos { get; set; }
             internal int LastPos { get; set; }

             public ChunkState(DocsAndPositionsEnum posEnum, int offset, bool useAdvance)
             {
                 this.PosEnum = posEnum;
                 this.Offset = offset;
                 this.UseAdvance = useAdvance;
             }
         }

         private readonly ChunkState[] chunkStates;

         private int docID = -1;
         private int freq;

         private readonly Similarity.SimScorer docScorer;

         internal ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, Similarity.SimScorer docScorer)
             : base(weight)
         {
             this.docScorer = docScorer;

             chunkStates = new ChunkState[postings.Length];

             endMinus1 = postings.Length - 1;

             // min(cost)
             cost = postings[0].postings.GetCost();

             for (int i = 0; i < postings.Length; i++)
             {
                 // Coarse optimization: advance(target) is fairly
                 // costly, so, if the relative freq of the 2nd
                 // rarest term is not that much (> 1/5th) rarer than
                 // the first term, then we just use .nextDoc() when
                 // ANDing.  this buys ~15% gain for phrases where
                 // freq of rarest 2 terms is close:
                 bool useAdvance = postings[i].docFreq > 5 * postings[0].docFreq;
                 chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position, useAdvance);
                 if (i > 0 && postings[i].postings.NextDoc() == DocIdSetIterator.NO_MORE_DOCS)
                 {
                     noDocs = true;
                     return;
                 }
             }
         }

         public override int NextDoc()
         {
             while (true)
             {
                 // first (rarest) term
                 int doc = chunkStates[0].PosEnum.NextDoc();
                 if (doc == DocIdSetIterator.NO_MORE_DOCS)
                 {
                     docID = doc;
                     return doc;
                 }

                 // not-first terms
                 int i = 1;
                 while (i < chunkStates.Length)
                 {
                     ChunkState cs = chunkStates[i];
                     int doc2 = cs.PosEnum.DocID;
                     if (cs.UseAdvance)
                     {
                         if (doc2 < doc)
                         {
                             doc2 = cs.PosEnum.Advance(doc);
                         }
                     }
                     else
                     {
                         int iter = 0;
                         while (doc2 < doc)
                         {
                             // safety net -- fallback to .advance if we've
                             // done too many .nextDocs
                             if (++iter == 50)
                             {
                                 doc2 = cs.PosEnum.Advance(doc);
                                 break;
                             }
                             else
                             {
                                 doc2 = cs.PosEnum.NextDoc();
                             }
                         }
                     }
                     if (doc2 > doc)
                     {
                         break;
                     }
                     i++;
                 }

                 if (i == chunkStates.Length)
                 {
                     // this doc has all the terms -- now test whether
                     // phrase occurs
                     docID = doc;

                     freq = PhraseFreq();
                     if (freq != 0)
                     {
                         return docID;
                     }
                 }
             }
         }

         public override int Advance(int target)
         {
             // first term
             int doc = chunkStates[0].PosEnum.Advance(target);
             if (doc == DocIdSetIterator.NO_MORE_DOCS)
             {
                 docID = DocIdSetIterator.NO_MORE_DOCS;
                 return doc;
             }

             while (true)
             {
                 // not-first terms
                 int i = 1;
                 while (i < chunkStates.Length)
                 {
                     int doc2 = chunkStates[i].PosEnum.DocID;
                     if (doc2 < doc)
                     {
                         doc2 = chunkStates[i].PosEnum.Advance(doc);
                     }
                     if (doc2 > doc)
                     {
                         break;
                     }
                     i++;
                 }

                 if (i == chunkStates.Length)
                 {
                     // this doc has all the terms -- now test whether
                     // phrase occurs
                     docID = doc;
                     freq = PhraseFreq();
                     if (freq != 0)
                     {
                         return docID;
                     }
                 }

                 doc = chunkStates[0].PosEnum.NextDoc();
                 if (doc == DocIdSetIterator.NO_MORE_DOCS)
                 {
                     docID = doc;
                     return doc;
                 }
             }
         }

         public override string ToString()
         {
             return "ExactPhraseScorer(" + m_weight + ")";
         }

         public override int Freq => freq;

         public override int DocID => docID;

         public override float GetScore()
         {
             return docScorer.Score(docID, freq);
         }

         private int PhraseFreq()
         {
             freq = 0;

             // init chunks
             for (int i = 0; i < chunkStates.Length; i++)
             {
                 ChunkState cs = chunkStates[i];
                 cs.PosLimit = cs.PosEnum.Freq;
                 cs.Pos = cs.Offset + cs.PosEnum.NextPosition();
                 cs.PosUpto = 1;
                 cs.LastPos = -1;
             }

             int chunkStart = 0;
             int chunkEnd = CHUNK;

             // process chunk by chunk
             bool end = false;

             // TODO: we could fold in chunkStart into offset and
             // save one subtract per pos incr

             while (!end)
             {
                 gen++;

                 if (gen == 0)
                 {
                     // wraparound
                     Arrays.Fill(gens, 0);
                     gen++;
                 }

                 // first term
                 {
                     ChunkState cs = chunkStates[0];
                     while (cs.Pos < chunkEnd)
                     {
                         if (cs.Pos > cs.LastPos)
                         {
                             cs.LastPos = cs.Pos;
                             int posIndex = cs.Pos - chunkStart;
                             counts[posIndex] = 1;
                             if (Debugging.AssertsEnabled) Debugging.Assert(gens[posIndex] != gen);
                             gens[posIndex] = gen;
                         }

                         if (cs.PosUpto == cs.PosLimit)
                         {
                             end = true;
                             break;
                         }
                         cs.PosUpto++;
                         cs.Pos = cs.Offset + cs.PosEnum.NextPosition();
                     }
                 }

                 // middle terms
                 bool any = true;
                 for (int t = 1; t < endMinus1; t++)
                 {
                     ChunkState cs = chunkStates[t];
                     any = false;
                     while (cs.Pos < chunkEnd)
                     {
                         if (cs.Pos > cs.LastPos)
                         {
                             cs.LastPos = cs.Pos;
                             int posIndex = cs.Pos - chunkStart;
                             if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == t)
                             {
                                 // viable
                                 counts[posIndex]++;
                                 any = true;
                             }
                         }

                         if (cs.PosUpto == cs.PosLimit)
                         {
                             end = true;
                             break;
                         }
                         cs.PosUpto++;
                         cs.Pos = cs.Offset + cs.PosEnum.NextPosition();
                     }

                     if (!any)
                     {
                         break;
                     }
                 }

                 if (!any)
                 {
                     // petered out for this chunk
                     chunkStart += CHUNK;
                     chunkEnd += CHUNK;
                     continue;
                 }

                 // last term

                 {
                     ChunkState cs = chunkStates[endMinus1];
                     while (cs.Pos < chunkEnd)
                     {
                         if (cs.Pos > cs.LastPos)
                         {
                             cs.LastPos = cs.Pos;
                             int posIndex = cs.Pos - chunkStart;
                             if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == endMinus1)
                             {
                                 freq++;
                             }
                         }

                         if (cs.PosUpto == cs.PosLimit)
                         {
                             end = true;
                             break;
                         }
                         cs.PosUpto++;
                         cs.Pos = cs.Offset + cs.PosEnum.NextPosition();
                     }
                 }

                 chunkStart += CHUNK;
                 chunkEnd += CHUNK;
             }

             return freq;
         }

         public override long GetCost()
         {
             return cost;
         }
     }
 }
	using Lucene.Net.Diagnostics;
	using Lucene.Net.Index;
	using Lucene.Net.Support;

	namespace Lucene.Net.Search
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	using Similarity = Lucene.Net.Search.Similarities.Similarity;

	internal sealed class ExactPhraseScorer : Scorer
	{
	private readonly int endMinus1;

	private const int CHUNK = 4096;

	private int gen;
	private readonly int[] counts = new int[CHUNK];
	private readonly int[] gens = new int[CHUNK];

	internal bool noDocs;
	private readonly long cost;

	private sealed class ChunkState
	{
	internal DocsAndPositionsEnum PosEnum { get; private set; }
	internal int Offset { get; private set; }
	internal bool UseAdvance { get; private set; }
	internal int PosUpto { get; set; }
	internal int PosLimit { get; set; }
	internal int Pos { get; set; }
	internal int LastPos { get; set; }

	public ChunkState(DocsAndPositionsEnum posEnum, int offset, bool useAdvance)
	{
	this.PosEnum = posEnum;
	this.Offset = offset;
	this.UseAdvance = useAdvance;
	}
	}

	private readonly ChunkState[] chunkStates;

	private int docID = -1;
	private int freq;

	private readonly Similarity.SimScorer docScorer;

	internal ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, Similarity.SimScorer docScorer)
	: base(weight)
	{
	this.docScorer = docScorer;

	chunkStates = new ChunkState[postings.Length];

	endMinus1 = postings.Length - 1;

	// min(cost)
	cost = postings[0].postings.GetCost();

	for (int i = 0; i < postings.Length; i++)
	{
	// Coarse optimization: advance(target) is fairly
	// costly, so, if the relative freq of the 2nd
	// rarest term is not that much (> 1/5th) rarer than
	// the first term, then we just use .nextDoc() when
	// ANDing. this buys ~15% gain for phrases where
	// freq of rarest 2 terms is close:
	bool useAdvance = postings[i].docFreq > 5 * postings[0].docFreq;
	chunkStates[i] = new ChunkState(postings[i].postings, -postings[i].position, useAdvance);
	if (i > 0 && postings[i].postings.NextDoc() == DocIdSetIterator.NO_MORE_DOCS)
	{
	noDocs = true;
	return;
	}
	}
	}

	public override int NextDoc()
	{
	while (true)
	{
	// first (rarest) term
	int doc = chunkStates[0].PosEnum.NextDoc();
	if (doc == DocIdSetIterator.NO_MORE_DOCS)
	{
	docID = doc;
	return doc;
	}

	// not-first terms
	int i = 1;
	while (i < chunkStates.Length)
	{
	ChunkState cs = chunkStates[i];
	int doc2 = cs.PosEnum.DocID;
	if (cs.UseAdvance)
	{
	if (doc2 < doc)
	{
	doc2 = cs.PosEnum.Advance(doc);
	}
	}
	else
	{
	int iter = 0;
	while (doc2 < doc)
	{
	// safety net -- fallback to .advance if we've
	// done too many .nextDocs
	if (++iter == 50)
	{
	doc2 = cs.PosEnum.Advance(doc);
	break;
	}
	else
	{
	doc2 = cs.PosEnum.NextDoc();
	}
	}
	}
	if (doc2 > doc)
	{
	break;
	}
	i++;
	}

	if (i == chunkStates.Length)
	{
	// this doc has all the terms -- now test whether
	// phrase occurs
	docID = doc;

	freq = PhraseFreq();
	if (freq != 0)
	{
	return docID;
	}
	}
	}
	}

	public override int Advance(int target)
	{
	// first term
	int doc = chunkStates[0].PosEnum.Advance(target);
	if (doc == DocIdSetIterator.NO_MORE_DOCS)
	{
	docID = DocIdSetIterator.NO_MORE_DOCS;
	return doc;
	}

	while (true)
	{
	// not-first terms
	int i = 1;
	while (i < chunkStates.Length)
	{
	int doc2 = chunkStates[i].PosEnum.DocID;
	if (doc2 < doc)
	{
	doc2 = chunkStates[i].PosEnum.Advance(doc);
	}
	if (doc2 > doc)
	{
	break;
	}
	i++;
	}

	if (i == chunkStates.Length)
	{
	// this doc has all the terms -- now test whether
	// phrase occurs
	docID = doc;
	freq = PhraseFreq();
	if (freq != 0)
	{
	return docID;
	}
	}

	doc = chunkStates[0].PosEnum.NextDoc();
	if (doc == DocIdSetIterator.NO_MORE_DOCS)
	{
	docID = doc;
	return doc;
	}
	}
	}

	public override string ToString()
	{
	return "ExactPhraseScorer(" + m_weight + ")";
	}

	public override int Freq => freq;

	public override int DocID => docID;

	public override float GetScore()
	{
	return docScorer.Score(docID, freq);
	}

	private int PhraseFreq()
	{
	freq = 0;

	// init chunks
	for (int i = 0; i < chunkStates.Length; i++)
	{
	ChunkState cs = chunkStates[i];
	cs.PosLimit = cs.PosEnum.Freq;
	cs.Pos = cs.Offset + cs.PosEnum.NextPosition();
	cs.PosUpto = 1;
	cs.LastPos = -1;
	}

	int chunkStart = 0;
	int chunkEnd = CHUNK;

	// process chunk by chunk
	bool end = false;

	// TODO: we could fold in chunkStart into offset and
	// save one subtract per pos incr

	while (!end)
	{
	gen++;

	if (gen == 0)
	{
	// wraparound
	Arrays.Fill(gens, 0);
	gen++;
	}

	// first term
	{
	ChunkState cs = chunkStates[0];
	while (cs.Pos < chunkEnd)
	{
	if (cs.Pos > cs.LastPos)
	{
	cs.LastPos = cs.Pos;
	int posIndex = cs.Pos - chunkStart;
	counts[posIndex] = 1;
	if (Debugging.AssertsEnabled) Debugging.Assert(gens[posIndex] != gen);
	gens[posIndex] = gen;
	}

	if (cs.PosUpto == cs.PosLimit)
	{
	end = true;
	break;
	}
	cs.PosUpto++;
	cs.Pos = cs.Offset + cs.PosEnum.NextPosition();
	}
	}

	// middle terms
	bool any = true;
	for (int t = 1; t < endMinus1; t++)
	{
	ChunkState cs = chunkStates[t];
	any = false;
	while (cs.Pos < chunkEnd)
	{
	if (cs.Pos > cs.LastPos)
	{
	cs.LastPos = cs.Pos;
	int posIndex = cs.Pos - chunkStart;
	if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == t)
	{
	// viable
	counts[posIndex]++;
	any = true;
	}
	}

	if (cs.PosUpto == cs.PosLimit)
	{
	end = true;
	break;
	}
	cs.PosUpto++;
	cs.Pos = cs.Offset + cs.PosEnum.NextPosition();
	}

	if (!any)
	{
	break;
	}
	}

	if (!any)
	{
	// petered out for this chunk
	chunkStart += CHUNK;
	chunkEnd += CHUNK;
	continue;
	}

	// last term

	{
	ChunkState cs = chunkStates[endMinus1];
	while (cs.Pos < chunkEnd)
	{
	if (cs.Pos > cs.LastPos)
	{
	cs.LastPos = cs.Pos;
	int posIndex = cs.Pos - chunkStart;
	if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == endMinus1)
	{
	freq++;
	}
	}

	if (cs.PosUpto == cs.PosLimit)
	{
	end = true;
	break;
	}
	cs.PosUpto++;
	cs.Pos = cs.Offset + cs.PosEnum.NextPosition();
	}
	}

	chunkStart += CHUNK;
	chunkEnd += CHUNK;
	}

	return freq;
	}

	public override long GetCost()
	{
	return cost;
	}
	}
	}