blob: 68199c82431abac611ff89f1af1171401ab8febe [file] [log] [blame]
using Lucene.Net.Diagnostics;
using Lucene.Net.Index;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
namespace Lucene.Net.Codecs
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using BytesRef = Lucene.Net.Util.BytesRef;
using DocsAndPositionsEnum = Lucene.Net.Index.DocsAndPositionsEnum;
using DocsEnum = Lucene.Net.Index.DocsEnum;
using IndexOptions = Lucene.Net.Index.IndexOptions;
using FieldInfo = Lucene.Net.Index.FieldInfo; // javadocs
using FixedBitSet = Lucene.Net.Util.FixedBitSet;
using MergeState = Lucene.Net.Index.MergeState;
using MultiDocsAndPositionsEnum = Lucene.Net.Index.MultiDocsAndPositionsEnum;
using MultiDocsEnum = Lucene.Net.Index.MultiDocsEnum;
using TermsEnum = Lucene.Net.Index.TermsEnum;
/// <summary>
/// Abstract API that consumes terms for an individual field.
/// <para/>
/// The lifecycle is:
/// <list type="number">
/// <item><description>TermsConsumer is returned for each field
/// by <see cref="FieldsConsumer.AddField(FieldInfo)"/>.</description></item>
/// <item><description>TermsConsumer returns a <see cref="PostingsConsumer"/> for
/// each term in <see cref="StartTerm(BytesRef)"/>.</description></item>
/// <item><description>When the producer (e.g. IndexWriter)
/// is done adding documents for the term, it calls
/// <see cref="FinishTerm(BytesRef, TermStats)"/>, passing in
/// the accumulated term statistics.</description></item>
/// <item><description>Producer calls <see cref="Finish(long, long, int)"/> with
/// the accumulated collection statistics when it is finished
/// adding terms to the field.</description></item>
/// </list>
/// <para/>
/// @lucene.experimental
/// </summary>
public abstract class TermsConsumer
{
/// <summary>
/// Sole constructor. (For invocation by subclass
/// constructors, typically implicit.)
/// </summary>
protected internal TermsConsumer()
{
}
/// <summary>
/// Starts a new term in this field; this may be called
/// with no corresponding call to finish if the term had
/// no docs.
/// </summary>
public abstract PostingsConsumer StartTerm(BytesRef text);
/// <summary>
/// Finishes the current term; numDocs must be &gt; 0.
/// <c>stats.TotalTermFreq</c> will be -1 when term
/// frequencies are omitted for the field.
/// </summary>
public abstract void FinishTerm(BytesRef text, TermStats stats);
/// <summary>
/// Called when we are done adding terms to this field.
/// <paramref name="sumTotalTermFreq"/> will be -1 when term
/// frequencies are omitted for the field.
/// </summary>
public abstract void Finish(long sumTotalTermFreq, long sumDocFreq, int docCount);
/// <summary>
/// Gets the <see cref="T:IComparer{BytesRef}"/> used to sort terms
/// before feeding to this API.
/// </summary>
public abstract IComparer<BytesRef> Comparer { get; }
private MappingMultiDocsEnum docsEnum;
private MappingMultiDocsEnum docsAndFreqsEnum;
private MappingMultiDocsAndPositionsEnum postingsEnum;
/// <summary>
/// Default merge impl. </summary>
[MethodImpl(MethodImplOptions.NoInlining)]
public virtual void Merge(MergeState mergeState, IndexOptions indexOptions, TermsEnum termsEnum)
{
BytesRef term;
if (Debugging.AssertsEnabled) Debugging.Assert(termsEnum != null);
long sumTotalTermFreq = 0;
long sumDocFreq = 0;
long sumDFsinceLastAbortCheck = 0;
FixedBitSet visitedDocs = new FixedBitSet(mergeState.SegmentInfo.DocCount);
if (indexOptions == IndexOptions.DOCS_ONLY)
{
if (docsEnum == null)
{
docsEnum = new MappingMultiDocsEnum();
}
docsEnum.MergeState = mergeState;
MultiDocsEnum docsEnumIn = null;
while (termsEnum.MoveNext())
{
term = termsEnum.Term;
// We can pass null for liveDocs, because the
// mapping enum will skip the non-live docs:
docsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsEnumIn, DocsFlags.NONE);
if (docsEnumIn != null)
{
docsEnum.Reset(docsEnumIn);
PostingsConsumer postingsConsumer = StartTerm(term);
TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, docsEnum, visitedDocs);
if (stats.DocFreq > 0)
{
FinishTerm(term, stats);
sumTotalTermFreq += stats.DocFreq;
sumDFsinceLastAbortCheck += stats.DocFreq;
sumDocFreq += stats.DocFreq;
if (sumDFsinceLastAbortCheck > 60000)
{
mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0);
sumDFsinceLastAbortCheck = 0;
}
}
}
}
}
else if (indexOptions == IndexOptions.DOCS_AND_FREQS)
{
if (docsAndFreqsEnum == null)
{
docsAndFreqsEnum = new MappingMultiDocsEnum();
}
docsAndFreqsEnum.MergeState = mergeState;
MultiDocsEnum docsAndFreqsEnumIn = null;
while (termsEnum.MoveNext())
{
term = termsEnum.Term;
// We can pass null for liveDocs, because the
// mapping enum will skip the non-live docs:
docsAndFreqsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsAndFreqsEnumIn);
if (Debugging.AssertsEnabled) Debugging.Assert(docsAndFreqsEnumIn != null);
docsAndFreqsEnum.Reset(docsAndFreqsEnumIn);
PostingsConsumer postingsConsumer = StartTerm(term);
TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, docsAndFreqsEnum, visitedDocs);
if (stats.DocFreq > 0)
{
FinishTerm(term, stats);
sumTotalTermFreq += stats.TotalTermFreq;
sumDFsinceLastAbortCheck += stats.DocFreq;
sumDocFreq += stats.DocFreq;
if (sumDFsinceLastAbortCheck > 60000)
{
mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0);
sumDFsinceLastAbortCheck = 0;
}
}
}
}
else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
{
if (postingsEnum == null)
{
postingsEnum = new MappingMultiDocsAndPositionsEnum();
}
postingsEnum.MergeState = mergeState;
MultiDocsAndPositionsEnum postingsEnumIn = null;
while (termsEnum.MoveNext())
{
term = termsEnum.Term;
// We can pass null for liveDocs, because the
// mapping enum will skip the non-live docs:
postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn, DocsAndPositionsFlags.PAYLOADS);
if (Debugging.AssertsEnabled) Debugging.Assert(postingsEnumIn != null);
postingsEnum.Reset(postingsEnumIn);
PostingsConsumer postingsConsumer = StartTerm(term);
TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, postingsEnum, visitedDocs);
if (stats.DocFreq > 0)
{
FinishTerm(term, stats);
sumTotalTermFreq += stats.TotalTermFreq;
sumDFsinceLastAbortCheck += stats.DocFreq;
sumDocFreq += stats.DocFreq;
if (sumDFsinceLastAbortCheck > 60000)
{
mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0);
sumDFsinceLastAbortCheck = 0;
}
}
}
}
else
{
if (Debugging.AssertsEnabled) Debugging.Assert(indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
if (postingsEnum == null)
{
postingsEnum = new MappingMultiDocsAndPositionsEnum();
}
postingsEnum.MergeState = mergeState;
MultiDocsAndPositionsEnum postingsEnumIn = null;
while (termsEnum.MoveNext())
{
term = termsEnum.Term;
// We can pass null for liveDocs, because the
// mapping enum will skip the non-live docs:
postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn);
if (Debugging.AssertsEnabled) Debugging.Assert(postingsEnumIn != null);
postingsEnum.Reset(postingsEnumIn);
PostingsConsumer postingsConsumer = StartTerm(term);
TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, postingsEnum, visitedDocs);
if (stats.DocFreq > 0)
{
FinishTerm(term, stats);
sumTotalTermFreq += stats.TotalTermFreq;
sumDFsinceLastAbortCheck += stats.DocFreq;
sumDocFreq += stats.DocFreq;
if (sumDFsinceLastAbortCheck > 60000)
{
mergeState.CheckAbort.Work(sumDFsinceLastAbortCheck / 5.0);
sumDFsinceLastAbortCheck = 0;
}
}
}
}
Finish(indexOptions == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq, sumDocFreq, visitedDocs.Cardinality());
}
}
}