blob: 8d079246c566e0527809d504f94d5c4e458ef434 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using IndexOutput = Lucene.Net.Store.IndexOutput;
using RAMOutputStream = Lucene.Net.Store.RAMOutputStream;
using ArrayUtil = Lucene.Net.Util.ArrayUtil;
namespace Lucene.Net.Index
{
sealed class TermVectorsTermsWriter:TermsHashConsumer
{
private void InitBlock()
{
docFreeList = new PerDoc[1];
}
internal DocumentsWriter docWriter;
internal TermVectorsWriter termVectorsWriter;
internal PerDoc[] docFreeList;
internal int freeCount;
internal IndexOutput tvx;
internal IndexOutput tvd;
internal IndexOutput tvf;
internal int lastDocID;
public TermVectorsTermsWriter(DocumentsWriter docWriter)
{
InitBlock();
this.docWriter = docWriter;
}
public override TermsHashConsumerPerThread AddThread(TermsHashPerThread termsHashPerThread)
{
return new TermVectorsTermsWriterPerThread(termsHashPerThread, this);
}
internal override void CreatePostings(RawPostingList[] postings, int start, int count)
{
int end = start + count;
for (int i = start; i < end; i++)
postings[i] = new PostingList();
}
public override void Flush(IDictionary<TermsHashConsumerPerThread, ICollection<TermsHashConsumerPerField>> threadsAndFields, SegmentWriteState state)
{
lock (this)
{
// NOTE: it's possible that all documents seen in this segment
// hit non-aborting exceptions, in which case we will
// not have yet init'd the TermVectorsWriter. This is
// actually OK (unlike in the stored fields case)
// because, although IieldInfos.hasVectors() will return
// true, the TermVectorsReader gracefully handles
// non-existence of the term vectors files.
if (tvx != null)
{
if (state.numDocsInStore > 0)
// In case there are some final documents that we
// didn't see (because they hit a non-aborting exception):
Fill(state.numDocsInStore - docWriter.DocStoreOffset);
tvx.Flush();
tvd.Flush();
tvf.Flush();
}
foreach(var entry in threadsAndFields)
{
foreach(var field in entry.Value)
{
TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField)field;
perField.termsHashPerField.Reset();
perField.ShrinkHash();
}
TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.Key;
perThread.termsHashPerThread.Reset(true);
}
}
}
internal override void CloseDocStore(SegmentWriteState state)
{
lock (this)
{
if (tvx != null)
{
// At least one doc in this run had term vectors
// enabled
Fill(state.numDocsInStore - docWriter.DocStoreOffset);
tvx.Close();
tvf.Close();
tvd.Close();
tvx = null;
System.Diagnostics.Debug.Assert(state.docStoreSegmentName != null);
System.String fileName = state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION;
if (4 + ((long) state.numDocsInStore) * 16 != state.directory.FileLength(fileName))
throw new System.SystemException("after flush: tvx size mismatch: " + state.numDocsInStore + " docs vs " + state.directory.FileLength(fileName) + " length in bytes of " + fileName + " file exists?=" + state.directory.FileExists(fileName));
state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
state.flushedFiles.Add(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
docWriter.RemoveOpenFile(state.docStoreSegmentName + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
lastDocID = 0;
}
}
}
internal int allocCount;
internal PerDoc GetPerDoc()
{
lock (this)
{
if (freeCount == 0)
{
allocCount++;
if (allocCount > docFreeList.Length)
{
// Grow our free list up front to make sure we have
// enough space to recycle all outstanding PerDoc
// instances
System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length);
docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)];
}
return new PerDoc(this);
}
else
return docFreeList[--freeCount];
}
}
/// <summary>Fills in no-term-vectors for all docs we haven't seen
/// since the last doc that had term vectors.
/// </summary>
internal void Fill(int docID)
{
int docStoreOffset = docWriter.DocStoreOffset;
int end = docID + docStoreOffset;
if (lastDocID < end)
{
long tvfPosition = tvf.FilePointer;
while (lastDocID < end)
{
tvx.WriteLong(tvd.FilePointer);
tvd.WriteVInt(0);
tvx.WriteLong(tvfPosition);
lastDocID++;
}
}
}
internal void InitTermVectorsWriter()
{
lock (this)
{
if (tvx == null)
{
System.String docStoreSegment = docWriter.DocStoreSegment;
if (docStoreSegment == null)
return ;
System.Diagnostics.Debug.Assert(docStoreSegment != null);
// If we hit an exception while init'ing the term
// vector output files, we must abort this segment
// because those files will be in an unknown
// state:
tvx = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
tvd = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
tvf = docWriter.directory.CreateOutput(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
tvx.WriteInt(TermVectorsReader.FORMAT_CURRENT);
tvd.WriteInt(TermVectorsReader.FORMAT_CURRENT);
tvf.WriteInt(TermVectorsReader.FORMAT_CURRENT);
docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION);
docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION);
docWriter.AddOpenFile(docStoreSegment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION);
lastDocID = 0;
}
}
}
internal void FinishDocument(PerDoc perDoc)
{
lock (this)
{
System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument start"));
InitTermVectorsWriter();
Fill(perDoc.docID);
// Append term vectors to the real outputs:
tvx.WriteLong(tvd.FilePointer);
tvx.WriteLong(tvf.FilePointer);
tvd.WriteVInt(perDoc.numVectorFields);
if (perDoc.numVectorFields > 0)
{
for (int i = 0; i < perDoc.numVectorFields; i++)
tvd.WriteVInt(perDoc.fieldNumbers[i]);
System.Diagnostics.Debug.Assert(0 == perDoc.fieldPointers [0]);
long lastPos = perDoc.fieldPointers[0];
for (int i = 1; i < perDoc.numVectorFields; i++)
{
long pos = perDoc.fieldPointers[i];
tvd.WriteVLong(pos - lastPos);
lastPos = pos;
}
perDoc.perDocTvf.WriteTo(tvf);
perDoc.numVectorFields = 0;
}
System.Diagnostics.Debug.Assert(lastDocID == perDoc.docID + docWriter.DocStoreOffset);
lastDocID++;
perDoc.Reset();
Free(perDoc);
System.Diagnostics.Debug.Assert(docWriter.writer.TestPoint("TermVectorsTermsWriter.finishDocument end"));
}
}
public bool FreeRAM()
{
// We don't hold any state beyond one doc, so we don't
// free persistent RAM here
return false;
}
public override void Abort()
{
if (tvx != null)
{
try
{
tvx.Close();
}
catch (System.Exception)
{
}
tvx = null;
}
if (tvd != null)
{
try
{
tvd.Close();
}
catch (System.Exception)
{
}
tvd = null;
}
if (tvf != null)
{
try
{
tvf.Close();
}
catch (System.Exception)
{
}
tvf = null;
}
lastDocID = 0;
}
internal void Free(PerDoc doc)
{
lock (this)
{
System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length);
docFreeList[freeCount++] = doc;
}
}
internal class PerDoc:DocumentsWriter.DocWriter
{
public PerDoc(TermVectorsTermsWriter enclosingInstance)
{
InitBlock(enclosingInstance);
}
private void InitBlock(TermVectorsTermsWriter enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
buffer = enclosingInstance.docWriter.NewPerDocBuffer();
perDocTvf = new RAMOutputStream(buffer);
}
private TermVectorsTermsWriter enclosingInstance;
public TermVectorsTermsWriter Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
internal DocumentsWriter.PerDocBuffer buffer;
internal RAMOutputStream perDocTvf;
internal int numVectorFields;
internal int[] fieldNumbers = new int[1];
internal long[] fieldPointers = new long[1];
internal void Reset()
{
perDocTvf.Reset();
buffer.Recycle();
numVectorFields = 0;
}
public override void Abort()
{
Reset();
Enclosing_Instance.Free(this);
}
internal void AddField(int fieldNumber)
{
if (numVectorFields == fieldNumbers.Length)
{
fieldNumbers = ArrayUtil.Grow(fieldNumbers);
fieldPointers = ArrayUtil.Grow(fieldPointers);
}
fieldNumbers[numVectorFields] = fieldNumber;
fieldPointers[numVectorFields] = perDocTvf.FilePointer;
numVectorFields++;
}
public override long SizeInBytes()
{
return buffer.SizeInBytes;
}
public override void Finish()
{
Enclosing_Instance.FinishDocument(this);
}
}
internal sealed class PostingList:RawPostingList
{
internal int freq; // How many times this term occurred in the current doc
internal int lastOffset; // Last offset we saw
internal int lastPosition; // Last position where this term occurred
}
internal override int BytesPerPosting()
{
return RawPostingList.BYTES_SIZE + 3 * DocumentsWriter.INT_NUM_BYTE;
}
}
}