src/Lucene.Net/Codecs/TermVectorsWriter.cs - lucenenet - Git at Google

 using J2N.Numerics;
 using J2N.Text;
 using Lucene.Net.Diagnostics;
 using System;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Runtime.CompilerServices;

 namespace Lucene.Net.Codecs
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     using AtomicReader = Lucene.Net.Index.AtomicReader;
     using IBits = Lucene.Net.Util.IBits;
     using BytesRef = Lucene.Net.Util.BytesRef;
     using DataInput = Lucene.Net.Store.DataInput;
     using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
     using DocsAndPositionsEnum = Lucene.Net.Index.DocsAndPositionsEnum;
     using FieldInfo = Lucene.Net.Index.FieldInfo;
     using FieldInfos = Lucene.Net.Index.FieldInfos;
     using Fields = Lucene.Net.Index.Fields;
     using MergeState = Lucene.Net.Index.MergeState;
     using Terms = Lucene.Net.Index.Terms;
     using TermsEnum = Lucene.Net.Index.TermsEnum;

     /// <summary>
     /// Codec API for writing term vectors:
     /// <para/>
     /// <list type="number">
     ///   <item><description>For every document, <see cref="StartDocument(int)"/> is called,
     ///       informing the <see cref="Codec"/> how many fields will be written.</description></item>
     ///   <item><description><see cref="StartField(FieldInfo, int, bool, bool, bool)"/> is called for
     ///       each field in the document, informing the codec how many terms
     ///       will be written for that field, and whether or not positions,
     ///       offsets, or payloads are enabled.</description></item>
     ///   <item><description>Within each field, <see cref="StartTerm(BytesRef, int)"/> is called
     ///       for each term.</description></item>
     ///   <item><description>If offsets and/or positions are enabled, then
     ///       <see cref="AddPosition(int, int, int, BytesRef)"/> will be called for each term
     ///       occurrence.</description></item>
     ///   <item><description>After all documents have been written, <see cref="Finish(FieldInfos, int)"/>
     ///       is called for verification/sanity-checks.</description></item>
     ///   <item><description>Finally the writer is disposed (<see cref="Dispose(bool)"/>)</description></item>
     /// </list>
     /// <para/>
     /// @lucene.experimental
     /// </summary>
     public abstract class TermVectorsWriter : IDisposable
     {
         /// <summary>
         /// Sole constructor. (For invocation by subclass
         /// constructors, typically implicit.)
         /// </summary>
         protected internal TermVectorsWriter()
         {
         }

         /// <summary>
         /// Called before writing the term vectors of the document.
         /// <see cref="StartField(FieldInfo, int, bool, bool, bool)"/> will
         /// be called <paramref name="numVectorFields"/> times. Note that if term
         /// vectors are enabled, this is called even if the document
         /// has no vector fields, in this case <paramref name="numVectorFields"/>
         /// will be zero.
         /// </summary>
         public abstract void StartDocument(int numVectorFields);

         /// <summary>
         /// Called after a doc and all its fields have been added. </summary>
         [MethodImpl(MethodImplOptions.NoInlining)]
         public virtual void FinishDocument()
         {
         }

         /// <summary>
         /// Called before writing the terms of the field.
         /// <see cref="StartTerm(BytesRef, int)"/> will be called <paramref name="numTerms"/> times.
         /// </summary>
         public abstract void StartField(FieldInfo info, int numTerms, bool positions, bool offsets, bool payloads);

         /// <summary>
         /// Called after a field and all its terms have been added. </summary>
         public virtual void FinishField()
         {
         }

         /// <summary>
         /// Adds a <paramref name="term"/> and its term frequency <paramref name="freq"/>.
         /// If this field has positions and/or offsets enabled, then
         /// <see cref="AddPosition(int, int, int, BytesRef)"/> will be called
         /// <paramref name="freq"/> times respectively.
         /// </summary>
         public abstract void StartTerm(BytesRef term, int freq);

         /// <summary>
         /// Called after a term and all its positions have been added. </summary>
         public virtual void FinishTerm()
         {
         }

         /// <summary>
         /// Adds a term <paramref name="position"/> and offsets. </summary>
         public abstract void AddPosition(int position, int startOffset, int endOffset, BytesRef payload);

         /// <summary>
         /// Aborts writing entirely, implementation should remove
         /// any partially-written files, etc.
         /// </summary>
         [MethodImpl(MethodImplOptions.NoInlining)]
         public abstract void Abort();

         /// <summary>
         /// Called before <see cref="Dispose(bool)"/>, passing in the number
         /// of documents that were written. Note that this is
         /// intentionally redundant (equivalent to the number of
         /// calls to <see cref="StartDocument(int)"/>, but a <see cref="Codec"/> should
         /// check that this is the case to detect the bug described
         /// in LUCENE-1282.
         /// </summary>
         public abstract void Finish(FieldInfos fis, int numDocs);

         /// <summary>
         /// Called by <see cref="Index.IndexWriter"/> when writing new segments.
         /// <para/>
         /// This is an expert API that allows the codec to consume
         /// positions and offsets directly from the indexer.
         /// <para/>
         /// The default implementation calls <see cref="AddPosition(int, int, int, BytesRef)"/>,
         /// but subclasses can override this if they want to efficiently write
         /// all the positions, then all the offsets, for example.
         /// <para/>
         /// NOTE: this API is extremely expert and subject to change or removal!!!
         /// <para/>
         /// @lucene.internal
         /// </summary>
         // TODO: we should probably nuke this and make a more efficient 4.x format
         // PreFlex-RW could then be slow and buffer (its only used in tests...)
         public virtual void AddProx(int numProx, DataInput positions, DataInput offsets)
         {
             int position = 0;
             int lastOffset = 0;
             BytesRef payload = null;

             for (int i = 0; i < numProx; i++)
             {
                 int startOffset;
                 int endOffset;
                 BytesRef thisPayload;

                 if (positions == null)
                 {
                     position = -1;
                     thisPayload = null;
                 }
                 else
                 {
                     int code = positions.ReadVInt32();
                     position += code.TripleShift(1);
                     if ((code & 1) != 0)
                     {
                         // this position has a payload
                         int payloadLength = positions.ReadVInt32();

                         if (payload == null)
                         {
                             payload = new BytesRef();
                             payload.Bytes = new byte[payloadLength];
                         }
                         else if (payload.Bytes.Length < payloadLength)
                         {
                             payload.Grow(payloadLength);
                         }

                         positions.ReadBytes(payload.Bytes, 0, payloadLength);
                         payload.Length = payloadLength;
                         thisPayload = payload;
                     }
                     else
                     {
                         thisPayload = null;
                     }
                 }

                 if (offsets == null)
                 {
                     startOffset = endOffset = -1;
                 }
                 else
                 {
                     startOffset = lastOffset + offsets.ReadVInt32();
                     endOffset = startOffset + offsets.ReadVInt32();
                     lastOffset = endOffset;
                 }
                 AddPosition(position, startOffset, endOffset, thisPayload);
             }
         }

         /// <summary>
         /// Merges in the term vectors from the readers in
         /// <paramref name="mergeState"/>. The default implementation skips
         /// over deleted documents, and uses <see cref="StartDocument(int)"/>,
         /// <see cref="StartField(FieldInfo, int, bool, bool, bool)"/>,
         /// <see cref="StartTerm(BytesRef, int)"/>, <see cref="AddPosition(int, int, int, BytesRef)"/>,
         /// and <see cref="Finish(FieldInfos, int)"/>,
         /// returning the number of documents that were written.
         /// Implementations can override this method for more sophisticated
         /// merging (bulk-byte copying, etc).
         /// </summary>
         [MethodImpl(MethodImplOptions.NoInlining)]
         public virtual int Merge(MergeState mergeState)
         {
             int docCount = 0;
             for (int i = 0; i < mergeState.Readers.Count; i++)
             {
                 AtomicReader reader = mergeState.Readers[i];
                 int maxDoc = reader.MaxDoc;
                 IBits liveDocs = reader.LiveDocs;

                 for (int docID = 0; docID < maxDoc; docID++)
                 {
                     if (liveDocs != null && !liveDocs.Get(docID))
                     {
                         // skip deleted docs
                         continue;
                     }
                     // NOTE: it's very important to first assign to vectors then pass it to
                     // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                     Fields vectors = reader.GetTermVectors(docID);
                     AddAllDocVectors(vectors, mergeState);
                     docCount++;
                     mergeState.CheckAbort.Work(300);
                 }
             }
             Finish(mergeState.FieldInfos, docCount);
             return docCount;
         }

         /// <summary>
         /// Safe (but, slowish) default method to write every
         /// vector field in the document.
         /// </summary>
         protected void AddAllDocVectors(Fields vectors, MergeState mergeState)
         {
             if (vectors == null)
             {
                 StartDocument(0);
                 FinishDocument();
                 return;
             }

             int numFields = vectors.Count;
             if (numFields == -1)
             {
                 // count manually! TODO: Maybe enforce that Fields.size() returns something valid?
                 numFields = 0;
                 //for (IEnumerator<string> it = vectors.Iterator(); it.hasNext();)
                 foreach (string it in vectors)
                 {
                     numFields++;
                 }
             }
             StartDocument(numFields);

             string lastFieldName = null;

             TermsEnum termsEnum = null;
             DocsAndPositionsEnum docsAndPositionsEnum = null;

             int fieldCount = 0;
             foreach (string fieldName in vectors)
             {
                 fieldCount++;
                 FieldInfo fieldInfo = mergeState.FieldInfos.FieldInfo(fieldName);

                 if (Debugging.AssertsEnabled) Debugging.Assert(lastFieldName == null || fieldName.CompareToOrdinal(lastFieldName) > 0, "lastFieldName={0} fieldName={1}", lastFieldName, fieldName);
                 lastFieldName = fieldName;

                 Terms terms = vectors.GetTerms(fieldName);
                 if (terms == null)
                 {
                     // FieldsEnum shouldn't lie...
                     continue;
                 }

                 bool hasPositions = terms.HasPositions;
                 bool hasOffsets = terms.HasOffsets;
                 bool hasPayloads = terms.HasPayloads;
                 if (Debugging.AssertsEnabled) Debugging.Assert(!hasPayloads || hasPositions);

                 int numTerms = (int)terms.Count;
                 if (numTerms == -1)
                 {
                     // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics function
                     numTerms = 0;
                     termsEnum = terms.GetEnumerator(termsEnum);
                     while (termsEnum.MoveNext())
                     {
                         numTerms++;
                     }
                 }

                 StartField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
                 termsEnum = terms.GetEnumerator(termsEnum);

                 int termCount = 0;
                 while (termsEnum.MoveNext())
                 {
                     termCount++;

                     int freq = (int)termsEnum.TotalTermFreq;

                     StartTerm(termsEnum.Term, freq);

                     if (hasPositions || hasOffsets)
                     {
                         docsAndPositionsEnum = termsEnum.DocsAndPositions(null, docsAndPositionsEnum);
                         if (Debugging.AssertsEnabled) Debugging.Assert(docsAndPositionsEnum != null);

                         int docID = docsAndPositionsEnum.NextDoc();
                         if (Debugging.AssertsEnabled)
                         {
                             Debugging.Assert(docID != DocIdSetIterator.NO_MORE_DOCS);
                             Debugging.Assert(docsAndPositionsEnum.Freq == freq);
                         }

                         for (int posUpto = 0; posUpto < freq; posUpto++)
                         {
                             int pos = docsAndPositionsEnum.NextPosition();
                             int startOffset = docsAndPositionsEnum.StartOffset;
                             int endOffset = docsAndPositionsEnum.EndOffset;

                             BytesRef payload = docsAndPositionsEnum.GetPayload();

                             if (Debugging.AssertsEnabled) Debugging.Assert(!hasPositions || pos >= 0);
                             AddPosition(pos, startOffset, endOffset, payload);
                         }
                     }
                     FinishTerm();
                 }
                 if (Debugging.AssertsEnabled) Debugging.Assert(termCount == numTerms);
                 FinishField();
             }
             if (Debugging.AssertsEnabled) Debugging.Assert(fieldCount == numFields);
             FinishDocument();
         }

         /// <summary>
         /// Return the <see cref="T:IComparer{BytesRef}"/> used to sort terms
         /// before feeding to this API.
         /// </summary>
         public abstract IComparer<BytesRef> Comparer { get; }

         /// <summary>
         /// Disposes all resources used by this object.
         /// </summary>
         public void Dispose()
         {
             Dispose(true);
             GC.SuppressFinalize(this);
         }

         /// <summary>
         /// Implementations must override and should dispose all resources used by this instance.
         /// </summary>
         protected abstract void Dispose(bool disposing);
     }
 }
	using J2N.Numerics;
	using J2N.Text;
	using Lucene.Net.Diagnostics;
	using System;
	using System.Collections.Generic;
	using System.Diagnostics;
	using System.Runtime.CompilerServices;

	namespace Lucene.Net.Codecs
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	using AtomicReader = Lucene.Net.Index.AtomicReader;
	using IBits = Lucene.Net.Util.IBits;
	using BytesRef = Lucene.Net.Util.BytesRef;
	using DataInput = Lucene.Net.Store.DataInput;
	using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator;
	using DocsAndPositionsEnum = Lucene.Net.Index.DocsAndPositionsEnum;
	using FieldInfo = Lucene.Net.Index.FieldInfo;
	using FieldInfos = Lucene.Net.Index.FieldInfos;
	using Fields = Lucene.Net.Index.Fields;
	using MergeState = Lucene.Net.Index.MergeState;
	using Terms = Lucene.Net.Index.Terms;
	using TermsEnum = Lucene.Net.Index.TermsEnum;

	/// <summary>
	/// Codec API for writing term vectors:
	/// <para/>
	/// <list type="number">
	/// <item><description>For every document, <see cref="StartDocument(int)"/> is called,
	/// informing the <see cref="Codec"/> how many fields will be written.</description></item>
	/// <item><description><see cref="StartField(FieldInfo, int, bool, bool, bool)"/> is called for
	/// each field in the document, informing the codec how many terms
	/// will be written for that field, and whether or not positions,
	/// offsets, or payloads are enabled.</description></item>
	/// <item><description>Within each field, <see cref="StartTerm(BytesRef, int)"/> is called
	/// for each term.</description></item>
	/// <item><description>If offsets and/or positions are enabled, then
	/// <see cref="AddPosition(int, int, int, BytesRef)"/> will be called for each term
	/// occurrence.</description></item>
	/// <item><description>After all documents have been written, <see cref="Finish(FieldInfos, int)"/>
	/// is called for verification/sanity-checks.</description></item>
	/// <item><description>Finally the writer is disposed (<see cref="Dispose(bool)"/>)</description></item>
	/// </list>
	/// <para/>
	/// @lucene.experimental
	/// </summary>
	public abstract class TermVectorsWriter : IDisposable
	{
	/// <summary>
	/// Sole constructor. (For invocation by subclass
	/// constructors, typically implicit.)
	/// </summary>
	protected internal TermVectorsWriter()
	{
	}

	/// <summary>
	/// Called before writing the term vectors of the document.
	/// <see cref="StartField(FieldInfo, int, bool, bool, bool)"/> will
	/// be called <paramref name="numVectorFields"/> times. Note that if term
	/// vectors are enabled, this is called even if the document
	/// has no vector fields, in this case <paramref name="numVectorFields"/>
	/// will be zero.
	/// </summary>
	public abstract void StartDocument(int numVectorFields);

	/// <summary>
	/// Called after a doc and all its fields have been added. </summary>
	[MethodImpl(MethodImplOptions.NoInlining)]
	public virtual void FinishDocument()
	{
	}

	/// <summary>
	/// Called before writing the terms of the field.
	/// <see cref="StartTerm(BytesRef, int)"/> will be called <paramref name="numTerms"/> times.
	/// </summary>
	public abstract void StartField(FieldInfo info, int numTerms, bool positions, bool offsets, bool payloads);

	/// <summary>
	/// Called after a field and all its terms have been added. </summary>
	public virtual void FinishField()
	{
	}

	/// <summary>
	/// Adds a <paramref name="term"/> and its term frequency <paramref name="freq"/>.
	/// If this field has positions and/or offsets enabled, then
	/// <see cref="AddPosition(int, int, int, BytesRef)"/> will be called
	/// <paramref name="freq"/> times respectively.
	/// </summary>
	public abstract void StartTerm(BytesRef term, int freq);

	/// <summary>
	/// Called after a term and all its positions have been added. </summary>
	public virtual void FinishTerm()
	{
	}

	/// <summary>
	/// Adds a term <paramref name="position"/> and offsets. </summary>
	public abstract void AddPosition(int position, int startOffset, int endOffset, BytesRef payload);

	/// <summary>
	/// Aborts writing entirely, implementation should remove
	/// any partially-written files, etc.
	/// </summary>
	[MethodImpl(MethodImplOptions.NoInlining)]
	public abstract void Abort();

	/// <summary>
	/// Called before <see cref="Dispose(bool)"/>, passing in the number
	/// of documents that were written. Note that this is
	/// intentionally redundant (equivalent to the number of
	/// calls to <see cref="StartDocument(int)"/>, but a <see cref="Codec"/> should
	/// check that this is the case to detect the bug described
	/// in LUCENE-1282.
	/// </summary>
	public abstract void Finish(FieldInfos fis, int numDocs);

	/// <summary>
	/// Called by <see cref="Index.IndexWriter"/> when writing new segments.
	/// <para/>
	/// This is an expert API that allows the codec to consume
	/// positions and offsets directly from the indexer.
	/// <para/>
	/// The default implementation calls <see cref="AddPosition(int, int, int, BytesRef)"/>,
	/// but subclasses can override this if they want to efficiently write
	/// all the positions, then all the offsets, for example.
	/// <para/>
	/// NOTE: this API is extremely expert and subject to change or removal!!!
	/// <para/>
	/// @lucene.internal
	/// </summary>
	// TODO: we should probably nuke this and make a more efficient 4.x format
	// PreFlex-RW could then be slow and buffer (its only used in tests...)
	public virtual void AddProx(int numProx, DataInput positions, DataInput offsets)
	{
	int position = 0;
	int lastOffset = 0;
	BytesRef payload = null;

	for (int i = 0; i < numProx; i++)
	{
	int startOffset;
	int endOffset;
	BytesRef thisPayload;

	if (positions == null)
	{
	position = -1;
	thisPayload = null;
	}
	else
	{
	int code = positions.ReadVInt32();
	position += code.TripleShift(1);
	if ((code & 1) != 0)
	{
	// this position has a payload
	int payloadLength = positions.ReadVInt32();

	if (payload == null)
	{
	payload = new BytesRef();
	payload.Bytes = new byte[payloadLength];
	}
	else if (payload.Bytes.Length < payloadLength)
	{
	payload.Grow(payloadLength);
	}

	positions.ReadBytes(payload.Bytes, 0, payloadLength);
	payload.Length = payloadLength;
	thisPayload = payload;
	}
	else
	{
	thisPayload = null;
	}
	}

	if (offsets == null)
	{
	startOffset = endOffset = -1;
	}
	else
	{
	startOffset = lastOffset + offsets.ReadVInt32();
	endOffset = startOffset + offsets.ReadVInt32();
	lastOffset = endOffset;
	}
	AddPosition(position, startOffset, endOffset, thisPayload);
	}
	}

	/// <summary>
	/// Merges in the term vectors from the readers in
	/// <paramref name="mergeState"/>. The default implementation skips
	/// over deleted documents, and uses <see cref="StartDocument(int)"/>,
	/// <see cref="StartField(FieldInfo, int, bool, bool, bool)"/>,
	/// <see cref="StartTerm(BytesRef, int)"/>, <see cref="AddPosition(int, int, int, BytesRef)"/>,
	/// and <see cref="Finish(FieldInfos, int)"/>,
	/// returning the number of documents that were written.
	/// Implementations can override this method for more sophisticated
	/// merging (bulk-byte copying, etc).
	/// </summary>
	[MethodImpl(MethodImplOptions.NoInlining)]
	public virtual int Merge(MergeState mergeState)
	{
	int docCount = 0;
	for (int i = 0; i < mergeState.Readers.Count; i++)
	{
	AtomicReader reader = mergeState.Readers[i];
	int maxDoc = reader.MaxDoc;
	IBits liveDocs = reader.LiveDocs;

	for (int docID = 0; docID < maxDoc; docID++)
	{
	if (liveDocs != null && !liveDocs.Get(docID))
	{
	// skip deleted docs
	continue;
	}
	// NOTE: it's very important to first assign to vectors then pass it to
	// termVectorsWriter.addAllDocVectors; see LUCENE-1282
	Fields vectors = reader.GetTermVectors(docID);
	AddAllDocVectors(vectors, mergeState);
	docCount++;
	mergeState.CheckAbort.Work(300);
	}
	}
	Finish(mergeState.FieldInfos, docCount);
	return docCount;
	}

	/// <summary>
	/// Safe (but, slowish) default method to write every
	/// vector field in the document.
	/// </summary>
	protected void AddAllDocVectors(Fields vectors, MergeState mergeState)
	{
	if (vectors == null)
	{
	StartDocument(0);
	FinishDocument();
	return;
	}

	int numFields = vectors.Count;
	if (numFields == -1)
	{
	// count manually! TODO: Maybe enforce that Fields.size() returns something valid?
	numFields = 0;
	//for (IEnumerator<string> it = vectors.Iterator(); it.hasNext();)
	foreach (string it in vectors)
	{
	numFields++;
	}
	}
	StartDocument(numFields);

	string lastFieldName = null;

	TermsEnum termsEnum = null;
	DocsAndPositionsEnum docsAndPositionsEnum = null;

	int fieldCount = 0;
	foreach (string fieldName in vectors)
	{
	fieldCount++;
	FieldInfo fieldInfo = mergeState.FieldInfos.FieldInfo(fieldName);

	if (Debugging.AssertsEnabled) Debugging.Assert(lastFieldName == null \|\| fieldName.CompareToOrdinal(lastFieldName) > 0, "lastFieldName={0} fieldName={1}", lastFieldName, fieldName);
	lastFieldName = fieldName;

	Terms terms = vectors.GetTerms(fieldName);
	if (terms == null)
	{
	// FieldsEnum shouldn't lie...
	continue;
	}

	bool hasPositions = terms.HasPositions;
	bool hasOffsets = terms.HasOffsets;
	bool hasPayloads = terms.HasPayloads;
	if (Debugging.AssertsEnabled) Debugging.Assert(!hasPayloads \|\| hasPositions);

	int numTerms = (int)terms.Count;
	if (numTerms == -1)
	{
	// count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics function
	numTerms = 0;
	termsEnum = terms.GetEnumerator(termsEnum);
	while (termsEnum.MoveNext())
	{
	numTerms++;
	}
	}

	StartField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
	termsEnum = terms.GetEnumerator(termsEnum);

	int termCount = 0;
	while (termsEnum.MoveNext())
	{
	termCount++;

	int freq = (int)termsEnum.TotalTermFreq;

	StartTerm(termsEnum.Term, freq);

	if (hasPositions \|\| hasOffsets)
	{
	docsAndPositionsEnum = termsEnum.DocsAndPositions(null, docsAndPositionsEnum);
	if (Debugging.AssertsEnabled) Debugging.Assert(docsAndPositionsEnum != null);

	int docID = docsAndPositionsEnum.NextDoc();
	if (Debugging.AssertsEnabled)
	{
	Debugging.Assert(docID != DocIdSetIterator.NO_MORE_DOCS);
	Debugging.Assert(docsAndPositionsEnum.Freq == freq);
	}

	for (int posUpto = 0; posUpto < freq; posUpto++)
	{
	int pos = docsAndPositionsEnum.NextPosition();
	int startOffset = docsAndPositionsEnum.StartOffset;
	int endOffset = docsAndPositionsEnum.EndOffset;

	BytesRef payload = docsAndPositionsEnum.GetPayload();

	if (Debugging.AssertsEnabled) Debugging.Assert(!hasPositions \|\| pos >= 0);
	AddPosition(pos, startOffset, endOffset, payload);
	}
	}
	FinishTerm();
	}
	if (Debugging.AssertsEnabled) Debugging.Assert(termCount == numTerms);
	FinishField();
	}
	if (Debugging.AssertsEnabled) Debugging.Assert(fieldCount == numFields);
	FinishDocument();
	}

	/// <summary>
	/// Return the <see cref="T:IComparer{BytesRef}"/> used to sort terms
	/// before feeding to this API.
	/// </summary>
	public abstract IComparer<BytesRef> Comparer { get; }

	/// <summary>
	/// Disposes all resources used by this object.
	/// </summary>
	public void Dispose()
	{
	Dispose(true);
	GC.SuppressFinalize(this);
	}

	/// <summary>
	/// Implementations must override and should dispose all resources used by this instance.
	/// </summary>
	protected abstract void Dispose(bool disposing);
	}
	}