blob: be6f34641525d379b9338a99c986527df00d5cfe [file] [log] [blame]
using J2N.Text;
using Lucene.Net.Index;
using Lucene.Net.Store;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
using Debug = Lucene.Net.Diagnostics.Debug; // LUCENENET NOTE: We cannot use System.Diagnostics.Debug because those calls will be optimized out of the release!
namespace Lucene.Net.Codecs.Lucene3x
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma warning disable 612, 618
internal sealed class PreFlexRWTermVectorsWriter : TermVectorsWriter
{
private readonly Directory directory;
private readonly string segment;
private IndexOutput tvx = null, tvd = null, tvf = null;
public PreFlexRWTermVectorsWriter(Directory directory, string segment, IOContext context)
{
this.directory = directory;
this.segment = segment;
bool success = false;
try
{
// Open files for TermVector storage
tvx = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION), context);
tvx.WriteInt32(Lucene3xTermVectorsReader.FORMAT_CURRENT);
tvd = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), context);
tvd.WriteInt32(Lucene3xTermVectorsReader.FORMAT_CURRENT);
tvf = directory.CreateOutput(IndexFileNames.SegmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION), context);
tvf.WriteInt32(Lucene3xTermVectorsReader.FORMAT_CURRENT);
success = true;
}
finally
{
if (!success)
{
Abort();
}
}
}
public override void StartDocument(int numVectorFields)
{
lastFieldName = null;
this.numVectorFields = numVectorFields;
tvx.WriteInt64(tvd.GetFilePointer());
tvx.WriteInt64(tvf.GetFilePointer());
tvd.WriteVInt32(numVectorFields);
fieldCount = 0;
fps = ArrayUtil.Grow(fps, numVectorFields);
}
private long[] fps = new long[10]; // pointers to the tvf before writing each field
private int fieldCount = 0; // number of fields we have written so far for this document
private int numVectorFields = 0; // total number of fields we will write for this document
private string lastFieldName;
public override void StartField(FieldInfo info, int numTerms, bool positions, bool offsets, bool payloads)
{
Debug.Assert(lastFieldName == null || info.Name.CompareToOrdinal(lastFieldName) > 0, "fieldName=" + info.Name + " lastFieldName=" + lastFieldName);
lastFieldName = info.Name;
if (payloads)
{
throw new System.NotSupportedException("3.x codec does not support payloads on vectors!");
}
this.positions = positions;
this.offsets = offsets;
lastTerm.Length = 0;
fps[fieldCount++] = tvf.GetFilePointer();
tvd.WriteVInt32(info.Number);
tvf.WriteVInt32(numTerms);
sbyte bits = 0x0;
if (positions)
{
bits |= Lucene3xTermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR;
}
if (offsets)
{
bits |= Lucene3xTermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR;
}
tvf.WriteByte((byte)bits);
Debug.Assert(fieldCount <= numVectorFields);
if (fieldCount == numVectorFields)
{
// last field of the document
// this is crazy because the file format is crazy!
for (int i = 1; i < fieldCount; i++)
{
tvd.WriteVInt64(fps[i] - fps[i - 1]);
}
}
}
private readonly BytesRef lastTerm = new BytesRef(10);
// NOTE: we override addProx, so we don't need to buffer when indexing.
// we also don't buffer during bulk merges.
private int[] offsetStartBuffer = new int[10];
private int[] offsetEndBuffer = new int[10];
private int offsetIndex = 0;
private int offsetFreq = 0;
private bool positions = false;
private bool offsets = false;
public override void StartTerm(BytesRef term, int freq)
{
int prefix = StringHelper.BytesDifference(lastTerm, term);
int suffix = term.Length - prefix;
tvf.WriteVInt32(prefix);
tvf.WriteVInt32(suffix);
tvf.WriteBytes(term.Bytes, term.Offset + prefix, suffix);
tvf.WriteVInt32(freq);
lastTerm.CopyBytes(term);
lastPosition = lastOffset = 0;
if (offsets && positions)
{
// we might need to buffer if its a non-bulk merge
offsetStartBuffer = ArrayUtil.Grow(offsetStartBuffer, freq);
offsetEndBuffer = ArrayUtil.Grow(offsetEndBuffer, freq);
offsetIndex = 0;
offsetFreq = freq;
}
}
internal int lastPosition = 0;
internal int lastOffset = 0;
public override void AddPosition(int position, int startOffset, int endOffset, BytesRef payload)
{
Debug.Assert(payload == null);
if (positions && offsets)
{
// write position delta
tvf.WriteVInt32(position - lastPosition);
lastPosition = position;
// buffer offsets
offsetStartBuffer[offsetIndex] = startOffset;
offsetEndBuffer[offsetIndex] = endOffset;
offsetIndex++;
// dump buffer if we are done
if (offsetIndex == offsetFreq)
{
for (int i = 0; i < offsetIndex; i++)
{
tvf.WriteVInt32(offsetStartBuffer[i] - lastOffset);
tvf.WriteVInt32(offsetEndBuffer[i] - offsetStartBuffer[i]);
lastOffset = offsetEndBuffer[i];
}
}
}
else if (positions)
{
// write position delta
tvf.WriteVInt32(position - lastPosition);
lastPosition = position;
}
else if (offsets)
{
// write offset deltas
tvf.WriteVInt32(startOffset - lastOffset);
tvf.WriteVInt32(endOffset - startOffset);
lastOffset = endOffset;
}
}
public override void Abort()
{
try
{
Dispose();
}
#pragma warning disable 168
catch (Exception ignored)
#pragma warning restore 168
{
}
IOUtils.DeleteFilesIgnoringExceptions(directory, IndexFileNames.SegmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION), IndexFileNames.SegmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), IndexFileNames.SegmentFileName(segment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION));
}
public override void Finish(FieldInfos fis, int numDocs)
{
if (4 + ((long)numDocs) * 16 != tvx.GetFilePointer())
// this is most likely a bug in Sun JRE 1.6.0_04/_05;
// we detect that the bug has struck, here, and
// throw an exception to prevent the corruption from
// entering the index. See LUCENE-1282 for
// details.
{
throw new Exception("tvx size mismatch: mergedDocs is " + numDocs + " but tvx size is " + tvx.GetFilePointer() + " file=" + tvx.ToString() + "; now aborting this merge to prevent index corruption");
}
}
/// <summary>
/// Close all streams. </summary>
protected override void Dispose(bool disposing)
{
// make an effort to close all streams we can but remember and re-throw
// the first exception encountered in this process
IOUtils.Dispose(tvx, tvd, tvf);
tvx = tvd = tvf = null;
}
public override IComparer<BytesRef> Comparer => BytesRef.UTF8SortedAsUTF16Comparer;
}
#pragma warning restore 612, 618
}