src/Lucene.Net/Index/TermVectorsConsumer.cs - lucenenet - Git at Google

 using J2N.Text;
 using Lucene.Net.Support;
 using System;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Runtime.CompilerServices;

 namespace Lucene.Net.Index
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     using ArrayUtil = Lucene.Net.Util.ArrayUtil;
     using BytesRef = Lucene.Net.Util.BytesRef;
     using FlushInfo = Lucene.Net.Store.FlushInfo;
     using IOContext = Lucene.Net.Store.IOContext;
     using IOUtils = Lucene.Net.Util.IOUtils;
     using RamUsageEstimator = Lucene.Net.Util.RamUsageEstimator;
     using TermVectorsWriter = Lucene.Net.Codecs.TermVectorsWriter;

     internal sealed class TermVectorsConsumer : TermsHashConsumer
     {
         internal TermVectorsWriter writer;
         internal readonly DocumentsWriterPerThread docWriter;
         internal readonly DocumentsWriterPerThread.DocState docState;
         internal readonly BytesRef flushTerm = new BytesRef();

         // Used by perField when serializing the term vectors
         internal readonly ByteSliceReader vectorSliceReaderPos = new ByteSliceReader();

         internal readonly ByteSliceReader vectorSliceReaderOff = new ByteSliceReader();
         internal bool hasVectors;
         internal int numVectorFields;
         internal int lastDocID;
         private TermVectorsConsumerPerField[] perFields = new TermVectorsConsumerPerField[1];

         public TermVectorsConsumer(DocumentsWriterPerThread docWriter)
         {
             this.docWriter = docWriter;
             docState = docWriter.docState;
         }

         // LUCENENE specific - original was internal, but FreqProxTermsWriter requires public (little point, since both are internal classes)
         [MethodImpl(MethodImplOptions.NoInlining)]
         public override void Flush(IDictionary<string, TermsHashConsumerPerField> fieldsToFlush, SegmentWriteState state)
         {
             if (writer != null)
             {
                 int numDocs = state.SegmentInfo.DocCount;
                 Debug.Assert(numDocs > 0);
                 // At least one doc in this run had term vectors enabled
                 try
                 {
                     Fill(numDocs);
                     Debug.Assert(state.SegmentInfo != null);
                     writer.Finish(state.FieldInfos, numDocs);
                 }
                 finally
                 {
                     IOUtils.Dispose(writer);
                     writer = null;
                     lastDocID = 0;
                     hasVectors = false;
                 }
             }

             foreach (TermsHashConsumerPerField field in fieldsToFlush.Values)
             {
                 TermVectorsConsumerPerField perField = (TermVectorsConsumerPerField)field;
                 perField.termsHashPerField.Reset();
                 perField.ShrinkHash();
             }
         }

         /// <summary>
         /// Fills in no-term-vectors for all docs we haven't seen
         /// since the last doc that had term vectors.
         /// </summary>
         internal void Fill(int docID)
         {
             while (lastDocID < docID)
             {
                 writer.StartDocument(0);
                 writer.FinishDocument();
                 lastDocID++;
             }
         }

         [MethodImpl(MethodImplOptions.NoInlining)]
         private void InitTermVectorsWriter()
         {
             if (writer == null)
             {
                 IOContext context = new IOContext(new FlushInfo(docWriter.NumDocsInRAM, docWriter.BytesUsed));
                 writer = docWriter.codec.TermVectorsFormat.VectorsWriter(docWriter.directory, docWriter.SegmentInfo, context);
                 lastDocID = 0;
             }
         }

         [MethodImpl(MethodImplOptions.NoInlining)]
         internal override void FinishDocument(TermsHash termsHash)
         {
             Debug.Assert(docWriter.TestPoint("TermVectorsTermsWriter.finishDocument start"));

             if (!hasVectors)
             {
                 return;
             }

             InitTermVectorsWriter();

             Fill(docState.docID);

             // Append term vectors to the real outputs:
             writer.StartDocument(numVectorFields);
             for (int i = 0; i < numVectorFields; i++)
             {
                 perFields[i].FinishDocument();
             }
             writer.FinishDocument();

             Debug.Assert(lastDocID == docState.docID, "lastDocID=" + lastDocID + " docState.docID=" + docState.docID);

             lastDocID++;

             termsHash.Reset();
             Reset();
             Debug.Assert(docWriter.TestPoint("TermVectorsTermsWriter.finishDocument end"));
         }

         [MethodImpl(MethodImplOptions.NoInlining)]
         public override void Abort()
         {
             hasVectors = false;

             if (writer != null)
             {
                 writer.Abort();
                 writer = null;
             }

             lastDocID = 0;
             Reset();
         }

         internal void Reset()
         {
             Arrays.Fill(perFields, null); // don't hang onto stuff from previous doc
             numVectorFields = 0;
         }

         public override TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo)
         {
             return new TermVectorsConsumerPerField(termsHashPerField, this, fieldInfo);
         }

         [MethodImpl(MethodImplOptions.NoInlining)]
         internal void AddFieldToFlush(TermVectorsConsumerPerField fieldToFlush)
         {
             if (numVectorFields == perFields.Length)
             {
                 int newSize = ArrayUtil.Oversize(numVectorFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
                 TermVectorsConsumerPerField[] newArray = new TermVectorsConsumerPerField[newSize];
                 Array.Copy(perFields, 0, newArray, 0, numVectorFields);
                 perFields = newArray;
             }

             perFields[numVectorFields++] = fieldToFlush;
         }

         internal override void StartDocument()
         {
             Debug.Assert(ClearLastVectorFieldName());
             Reset();
         }

         // Called only by assert
         internal bool ClearLastVectorFieldName()
         {
             lastVectorFieldName = null;
             return true;
         }

         // Called only by assert
         internal string lastVectorFieldName;

         internal bool VectorFieldsInOrder(FieldInfo fi)
         {
             try
             {
                 return lastVectorFieldName != null ? lastVectorFieldName.CompareToOrdinal(fi.Name) < 0 : true;
             }
             finally
             {
                 lastVectorFieldName = fi.Name;
             }
         }
     }
 }
	using J2N.Text;
	using Lucene.Net.Support;
	using System;
	using System.Collections.Generic;
	using System.Diagnostics;
	using System.Runtime.CompilerServices;

	namespace Lucene.Net.Index
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	using ArrayUtil = Lucene.Net.Util.ArrayUtil;
	using BytesRef = Lucene.Net.Util.BytesRef;
	using FlushInfo = Lucene.Net.Store.FlushInfo;
	using IOContext = Lucene.Net.Store.IOContext;
	using IOUtils = Lucene.Net.Util.IOUtils;
	using RamUsageEstimator = Lucene.Net.Util.RamUsageEstimator;
	using TermVectorsWriter = Lucene.Net.Codecs.TermVectorsWriter;

	internal sealed class TermVectorsConsumer : TermsHashConsumer
	{
	internal TermVectorsWriter writer;
	internal readonly DocumentsWriterPerThread docWriter;
	internal readonly DocumentsWriterPerThread.DocState docState;
	internal readonly BytesRef flushTerm = new BytesRef();

	// Used by perField when serializing the term vectors
	internal readonly ByteSliceReader vectorSliceReaderPos = new ByteSliceReader();

	internal readonly ByteSliceReader vectorSliceReaderOff = new ByteSliceReader();
	internal bool hasVectors;
	internal int numVectorFields;
	internal int lastDocID;
	private TermVectorsConsumerPerField[] perFields = new TermVectorsConsumerPerField[1];

	public TermVectorsConsumer(DocumentsWriterPerThread docWriter)
	{
	this.docWriter = docWriter;
	docState = docWriter.docState;
	}

	// LUCENENE specific - original was internal, but FreqProxTermsWriter requires public (little point, since both are internal classes)
	[MethodImpl(MethodImplOptions.NoInlining)]
	public override void Flush(IDictionary<string, TermsHashConsumerPerField> fieldsToFlush, SegmentWriteState state)
	{
	if (writer != null)
	{
	int numDocs = state.SegmentInfo.DocCount;
	Debug.Assert(numDocs > 0);
	// At least one doc in this run had term vectors enabled
	try
	{
	Fill(numDocs);
	Debug.Assert(state.SegmentInfo != null);
	writer.Finish(state.FieldInfos, numDocs);
	}
	finally
	{
	IOUtils.Dispose(writer);
	writer = null;
	lastDocID = 0;
	hasVectors = false;
	}
	}

	foreach (TermsHashConsumerPerField field in fieldsToFlush.Values)
	{
	TermVectorsConsumerPerField perField = (TermVectorsConsumerPerField)field;
	perField.termsHashPerField.Reset();
	perField.ShrinkHash();
	}
	}

	/// <summary>
	/// Fills in no-term-vectors for all docs we haven't seen
	/// since the last doc that had term vectors.
	/// </summary>
	internal void Fill(int docID)
	{
	while (lastDocID < docID)
	{
	writer.StartDocument(0);
	writer.FinishDocument();
	lastDocID++;
	}
	}

	[MethodImpl(MethodImplOptions.NoInlining)]
	private void InitTermVectorsWriter()
	{
	if (writer == null)
	{
	IOContext context = new IOContext(new FlushInfo(docWriter.NumDocsInRAM, docWriter.BytesUsed));
	writer = docWriter.codec.TermVectorsFormat.VectorsWriter(docWriter.directory, docWriter.SegmentInfo, context);
	lastDocID = 0;
	}
	}

	[MethodImpl(MethodImplOptions.NoInlining)]
	internal override void FinishDocument(TermsHash termsHash)
	{
	Debug.Assert(docWriter.TestPoint("TermVectorsTermsWriter.finishDocument start"));

	if (!hasVectors)
	{
	return;
	}

	InitTermVectorsWriter();

	Fill(docState.docID);

	// Append term vectors to the real outputs:
	writer.StartDocument(numVectorFields);
	for (int i = 0; i < numVectorFields; i++)
	{
	perFields[i].FinishDocument();
	}
	writer.FinishDocument();

	Debug.Assert(lastDocID == docState.docID, "lastDocID=" + lastDocID + " docState.docID=" + docState.docID);

	lastDocID++;

	termsHash.Reset();
	Reset();
	Debug.Assert(docWriter.TestPoint("TermVectorsTermsWriter.finishDocument end"));
	}

	[MethodImpl(MethodImplOptions.NoInlining)]
	public override void Abort()
	{
	hasVectors = false;

	if (writer != null)
	{
	writer.Abort();
	writer = null;
	}

	lastDocID = 0;
	Reset();
	}

	internal void Reset()
	{
	Arrays.Fill(perFields, null); // don't hang onto stuff from previous doc
	numVectorFields = 0;
	}

	public override TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo)
	{
	return new TermVectorsConsumerPerField(termsHashPerField, this, fieldInfo);
	}

	[MethodImpl(MethodImplOptions.NoInlining)]
	internal void AddFieldToFlush(TermVectorsConsumerPerField fieldToFlush)
	{
	if (numVectorFields == perFields.Length)
	{
	int newSize = ArrayUtil.Oversize(numVectorFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
	TermVectorsConsumerPerField[] newArray = new TermVectorsConsumerPerField[newSize];
	Array.Copy(perFields, 0, newArray, 0, numVectorFields);
	perFields = newArray;
	}

	perFields[numVectorFields++] = fieldToFlush;
	}

	internal override void StartDocument()
	{
	Debug.Assert(ClearLastVectorFieldName());
	Reset();
	}

	// Called only by assert
	internal bool ClearLastVectorFieldName()
	{
	lastVectorFieldName = null;
	return true;
	}

	// Called only by assert
	internal string lastVectorFieldName;

	internal bool VectorFieldsInOrder(FieldInfo fi)
	{
	try
	{
	return lastVectorFieldName != null ? lastVectorFieldName.CompareToOrdinal(fi.Name) < 0 : true;
	}
	finally
	{
	lastVectorFieldName = fi.Name;
	}
	}
	}
	}