blob: d7afaa0ec8ae623d0ab15c3b937d948f1afbcc85 [file] [log] [blame]
using Lucene.Net.Diagnostics;
using System;
using System.Diagnostics;
using System.Globalization;
using System.Reflection;
using System.Text;
namespace Lucene.Net.Codecs.SimpleText
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using ArrayUtil = Util.ArrayUtil;
using BufferedChecksumIndexInput = Store.BufferedChecksumIndexInput;
using BytesRef = Util.BytesRef;
using CharsRef = Util.CharsRef;
using ChecksumIndexInput = Store.ChecksumIndexInput;
using Directory = Store.Directory;
using FieldInfo = Index.FieldInfo;
using FieldInfos = Index.FieldInfos;
using IndexFileNames = Index.IndexFileNames;
using IndexInput = Store.IndexInput;
using IOContext = Store.IOContext;
using IOUtils = Util.IOUtils;
using SegmentInfo = Index.SegmentInfo;
using StoredFieldVisitor = Index.StoredFieldVisitor;
using StringHelper = Util.StringHelper;
using UnicodeUtil = Util.UnicodeUtil;
/// <summary>
/// Reads plain text stored fields.
/// <para>
/// <b><font color="red">FOR RECREATIONAL USE ONLY</font></b>
/// </para>
/// @lucene.experimental
/// </summary>
public class SimpleTextStoredFieldsReader : StoredFieldsReader
{
private long[] _offsets; // docid -> offset in .fld file
private IndexInput _input;
private readonly BytesRef _scratch = new BytesRef();
private readonly CharsRef _scratchUtf16 = new CharsRef();
private readonly FieldInfos _fieldInfos;
public SimpleTextStoredFieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context)
{
_fieldInfos = fn;
var success = false;
try
{
_input =
directory.OpenInput(
IndexFileNames.SegmentFileName(si.Name, "", SimpleTextStoredFieldsWriter.FIELDS_EXTENSION),
context);
success = true;
}
finally
{
if (!success)
{
try
{
Dispose();
}
catch
{
// ensure we throw our original exception
}
}
}
ReadIndex(si.DocCount);
}
/// <remarks>Used by clone.</remarks>
internal SimpleTextStoredFieldsReader(long[] offsets, IndexInput input, FieldInfos fieldInfos)
{
_offsets = offsets;
_input = input;
_fieldInfos = fieldInfos;
}
/// <remarks>
/// We don't actually write a .fdx-like index, instead we read the
/// stored fields file in entirety up-front and save the offsets
/// so we can seek to the documents later.
/// </remarks>
private void ReadIndex(int size)
{
ChecksumIndexInput input = new BufferedChecksumIndexInput(_input);
_offsets = new long[size];
var upto = 0;
while (!_scratch.Equals(SimpleTextStoredFieldsWriter.END))
{
SimpleTextUtil.ReadLine(input, _scratch);
if (StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.DOC))
{
_offsets[upto] = input.GetFilePointer();
upto++;
}
}
SimpleTextUtil.CheckFooter(input);
if (Debugging.AssertsEnabled) Debugging.Assert(upto == _offsets.Length);
}
public override void VisitDocument(int n, StoredFieldVisitor visitor)
{
_input.Seek(_offsets[n]);
ReadLine();
if (Debugging.AssertsEnabled) Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.NUM));
var numFields = ParseInt32At(SimpleTextStoredFieldsWriter.NUM.Length);
for (var i = 0; i < numFields; i++)
{
ReadLine();
if (Debugging.AssertsEnabled) Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.FIELD));
int fieldNumber = ParseInt32At(SimpleTextStoredFieldsWriter.FIELD.Length);
FieldInfo fieldInfo = _fieldInfos.FieldInfo(fieldNumber);
ReadLine();
if (Debugging.AssertsEnabled) Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.NAME));
ReadLine();
if (Debugging.AssertsEnabled) Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.TYPE));
BytesRef type;
if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_STRING, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
{
type = SimpleTextStoredFieldsWriter.TYPE_STRING;
}
else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_BINARY, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
{
type = SimpleTextStoredFieldsWriter.TYPE_BINARY;
}
else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_INT, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
{
type = SimpleTextStoredFieldsWriter.TYPE_INT;
}
else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_LONG, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
{
type = SimpleTextStoredFieldsWriter.TYPE_LONG;
}
else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_FLOAT, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
{
type = SimpleTextStoredFieldsWriter.TYPE_FLOAT;
}
else if (EqualsAt(SimpleTextStoredFieldsWriter.TYPE_DOUBLE, _scratch, SimpleTextStoredFieldsWriter.TYPE.Length))
{
type = SimpleTextStoredFieldsWriter.TYPE_DOUBLE;
}
else
{
throw new Exception("unknown field type");
}
switch (visitor.NeedsField(fieldInfo))
{
case StoredFieldVisitor.Status.YES:
ReadField(type, fieldInfo, visitor);
break;
case StoredFieldVisitor.Status.NO:
ReadLine();
if (Debugging.AssertsEnabled) Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE));
break;
case StoredFieldVisitor.Status.STOP:
return;
}
}
}
private void ReadField(BytesRef type, FieldInfo fieldInfo, StoredFieldVisitor visitor)
{
ReadLine();
if (Debugging.AssertsEnabled) Debugging.Assert(StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.VALUE));
if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_STRING))
{
visitor.StringField(fieldInfo,
Encoding.UTF8.GetString(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length,
_scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length));
}
else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_BINARY))
{
var copy = new byte[_scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length];
Array.Copy(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, copy, 0, copy.Length);
visitor.BinaryField(fieldInfo, copy);
}
else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_INT))
{
UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
_scratchUtf16);
visitor.Int32Field(fieldInfo, Convert.ToInt32(_scratchUtf16.ToString(), CultureInfo.InvariantCulture));
}
else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_LONG))
{
UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
_scratchUtf16);
visitor.Int64Field(fieldInfo, Convert.ToInt64(_scratchUtf16.ToString(), CultureInfo.InvariantCulture));
}
else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_FLOAT))
{
UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
_scratchUtf16);
visitor.SingleField(fieldInfo, Convert.ToSingle(_scratchUtf16.ToString(), CultureInfo.InvariantCulture));
}
else if (Equals(type, SimpleTextStoredFieldsWriter.TYPE_DOUBLE))
{
UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + SimpleTextStoredFieldsWriter.VALUE.Length, _scratch.Length - SimpleTextStoredFieldsWriter.VALUE.Length,
_scratchUtf16);
visitor.DoubleField(fieldInfo, Convert.ToDouble(_scratchUtf16.ToString(), CultureInfo.InvariantCulture));
}
}
public override object Clone()
{
if (_input == null)
{
throw new ObjectDisposedException(this.GetType().FullName, "this FieldsReader is closed");
}
return new SimpleTextStoredFieldsReader(_offsets, (IndexInput) _input.Clone(), _fieldInfos);
}
protected override void Dispose(bool disposing)
{
if (!disposing) return;
try
{
IOUtils.Dispose(_input);
}
finally
{
_input = null;
_offsets = null;
}
}
private void ReadLine()
{
SimpleTextUtil.ReadLine(_input, _scratch);
}
/// <summary>
/// NOTE: This was parseIntAt() in Lucene.
/// </summary>
private int ParseInt32At(int offset)
{
UnicodeUtil.UTF8toUTF16(_scratch.Bytes, _scratch.Offset + offset, _scratch.Length - offset, _scratchUtf16);
return ArrayUtil.ParseInt32(_scratchUtf16.Chars, 0, _scratchUtf16.Length);
}
private bool EqualsAt(BytesRef a, BytesRef b, int bOffset)
{
return a.Length == b.Length - bOffset &&
ArrayUtil.Equals(a.Bytes, a.Offset, b.Bytes, b.Offset + bOffset, b.Length - bOffset);
}
public override long RamBytesUsed()
{
return 0;
}
public override void CheckIntegrity()
{
}
}
}