| using System; |
| using System.Collections.Generic; |
| using System.Diagnostics; |
| |
| namespace Lucene.Net.Index |
| { |
| using Lucene.Net.Support; |
| using ArrayUtil = Lucene.Net.Util.ArrayUtil; |
| using BytesRef = Lucene.Net.Util.BytesRef; |
| using FlushInfo = Lucene.Net.Store.FlushInfo; |
| using IOContext = Lucene.Net.Store.IOContext; |
| using IOUtils = Lucene.Net.Util.IOUtils; |
| using RamUsageEstimator = Lucene.Net.Util.RamUsageEstimator; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| using TermVectorsWriter = Lucene.Net.Codecs.TermVectorsWriter; |
| |
| public sealed class TermVectorsConsumer : TermsHashConsumer |
| { |
| internal TermVectorsWriter Writer; |
| internal readonly DocumentsWriterPerThread DocWriter; |
| internal readonly DocumentsWriterPerThread.DocState DocState; |
| internal readonly BytesRef FlushTerm = new BytesRef(); |
| |
| // Used by perField when serializing the term vectors |
| internal readonly ByteSliceReader VectorSliceReaderPos = new ByteSliceReader(); |
| |
| internal readonly ByteSliceReader VectorSliceReaderOff = new ByteSliceReader(); |
| internal bool HasVectors; |
| internal int NumVectorFields; |
| internal int LastDocID; |
| private TermVectorsConsumerPerField[] PerFields = new TermVectorsConsumerPerField[1]; |
| |
| public TermVectorsConsumer(DocumentsWriterPerThread docWriter) |
| { |
| this.DocWriter = docWriter; |
| DocState = docWriter.docState; |
| } |
| |
| public override void Flush(IDictionary<string, TermsHashConsumerPerField> fieldsToFlush, SegmentWriteState state) |
| { |
| if (Writer != null) |
| { |
| int numDocs = state.SegmentInfo.DocCount; |
| Debug.Assert(numDocs > 0); |
| // At least one doc in this run had term vectors enabled |
| try |
| { |
| Fill(numDocs); |
| Debug.Assert(state.SegmentInfo != null); |
| Writer.Finish(state.FieldInfos, numDocs); |
| } |
| finally |
| { |
| IOUtils.Close(Writer); |
| Writer = null; |
| LastDocID = 0; |
| HasVectors = false; |
| } |
| } |
| |
| foreach (TermsHashConsumerPerField field in fieldsToFlush.Values) |
| { |
| TermVectorsConsumerPerField perField = (TermVectorsConsumerPerField)field; |
| perField.TermsHashPerField.Reset(); |
| perField.ShrinkHash(); |
| } |
| } |
| |
| /// <summary> |
| /// Fills in no-term-vectors for all docs we haven't seen |
| /// since the last doc that had term vectors. |
| /// </summary> |
| internal void Fill(int docID) |
| { |
| while (LastDocID < docID) |
| { |
| Writer.StartDocument(0); |
| Writer.FinishDocument(); |
| LastDocID++; |
| } |
| } |
| |
| private void InitTermVectorsWriter() |
| { |
| if (Writer == null) |
| { |
| IOContext context = new IOContext(new FlushInfo(DocWriter.NumDocsInRAM, DocWriter.BytesUsed())); |
| Writer = DocWriter.Codec.TermVectorsFormat().VectorsWriter(DocWriter.Directory, DocWriter.SegmentInfo, context); |
| LastDocID = 0; |
| } |
| } |
| |
| public override void FinishDocument(TermsHash termsHash) |
| { |
| Debug.Assert(DocWriter.TestPoint("TermVectorsTermsWriter.finishDocument start")); |
| |
| if (!HasVectors) |
| { |
| return; |
| } |
| |
| InitTermVectorsWriter(); |
| |
| Fill(DocState.DocID); |
| |
| // Append term vectors to the real outputs: |
| Writer.StartDocument(NumVectorFields); |
| for (int i = 0; i < NumVectorFields; i++) |
| { |
| PerFields[i].FinishDocument(); |
| } |
| Writer.FinishDocument(); |
| |
| Debug.Assert(LastDocID == DocState.DocID, "lastDocID=" + LastDocID + " docState.docID=" + DocState.DocID); |
| |
| LastDocID++; |
| |
| termsHash.Reset(); |
| Reset(); |
| Debug.Assert(DocWriter.TestPoint("TermVectorsTermsWriter.finishDocument end")); |
| } |
| |
| public override void Abort() |
| { |
| HasVectors = false; |
| |
| if (Writer != null) |
| { |
| Writer.Abort(); |
| Writer = null; |
| } |
| |
| LastDocID = 0; |
| Reset(); |
| } |
| |
| internal void Reset() |
| { |
| Arrays.Fill(PerFields, null); // don't hang onto stuff from previous doc |
| NumVectorFields = 0; |
| } |
| |
| public override TermsHashConsumerPerField AddField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) |
| { |
| return new TermVectorsConsumerPerField(termsHashPerField, this, fieldInfo); |
| } |
| |
| internal void AddFieldToFlush(TermVectorsConsumerPerField fieldToFlush) |
| { |
| if (NumVectorFields == PerFields.Length) |
| { |
| int newSize = ArrayUtil.Oversize(NumVectorFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF); |
| TermVectorsConsumerPerField[] newArray = new TermVectorsConsumerPerField[newSize]; |
| Array.Copy(PerFields, 0, newArray, 0, NumVectorFields); |
| PerFields = newArray; |
| } |
| |
| PerFields[NumVectorFields++] = fieldToFlush; |
| } |
| |
| public override void StartDocument() |
| { |
| Debug.Assert(ClearLastVectorFieldName()); |
| Reset(); |
| } |
| |
| // Called only by assert |
| internal bool ClearLastVectorFieldName() |
| { |
| LastVectorFieldName = null; |
| return true; |
| } |
| |
| // Called only by assert |
| internal string LastVectorFieldName; |
| |
| internal bool VectorFieldsInOrder(FieldInfo fi) |
| { |
| try |
| { |
| return LastVectorFieldName != null ? LastVectorFieldName.CompareTo(fi.Name) < 0 : true; |
| } |
| finally |
| { |
| LastVectorFieldName = fi.Name; |
| } |
| } |
| } |
| } |