src/Lucene.Net.Core/Index/FieldsReader.cs - lucenenet - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 using System;
 using System.IO;
 using Lucene.Net.Support;
 using Lucene.Net.Util;
 using TokenStream = Lucene.Net.Analysis.TokenStream;
 using Lucene.Net.Documents;
 using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException;
 using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
 using Directory = Lucene.Net.Store.Directory;
 using IndexInput = Lucene.Net.Store.IndexInput;

 namespace Lucene.Net.Index
 {

     /// <summary> Class responsible for access to stored document fields.
     /// <p/>
     /// It uses &lt;segment&gt;.fdt and &lt;segment&gt;.fdx; files.
     ///
     /// </summary>
     public sealed class FieldsReader : ICloneable, IDisposable
     {
         private readonly FieldInfos fieldInfos;

         // The main fieldStream, used only for cloning.
         private readonly IndexInput cloneableFieldsStream;

         // This is a clone of cloneableFieldsStream used for reading documents.
         // It should not be cloned outside of a synchronized context.
         private readonly IndexInput fieldsStream;

         private readonly IndexInput cloneableIndexStream;
         private readonly IndexInput indexStream;
         private readonly int numTotalDocs;
         private readonly int size;
         private bool closed;
         private readonly int format;
         private readonly int formatSize;

         // The docID offset where our docs begin in the index
         // file.  This will be 0 if we have our own private file.
         private readonly int docStoreOffset;

         private readonly CloseableThreadLocal<IndexInput> fieldsStreamTL = new CloseableThreadLocal<IndexInput>();
         private readonly bool isOriginal = false;

         /// <summary>Returns a cloned FieldsReader that shares open
         /// IndexInputs with the original one.  It is the caller's
         /// job not to close the original FieldsReader until all
         /// clones are called (eg, currently SegmentReader manages
         /// this logic).
         /// </summary>
         public System.Object Clone()
         {
             EnsureOpen();
             return new FieldsReader(fieldInfos, numTotalDocs, size, format, formatSize, docStoreOffset, cloneableFieldsStream, cloneableIndexStream);
         }

         // Used only by clone
         private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int formatSize, int docStoreOffset, IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream)
         {
             this.fieldInfos = fieldInfos;
             this.numTotalDocs = numTotalDocs;
             this.size = size;
             this.format = format;
             this.formatSize = formatSize;
             this.docStoreOffset = docStoreOffset;
             this.cloneableFieldsStream = cloneableFieldsStream;
             this.cloneableIndexStream = cloneableIndexStream;
             fieldsStream = (IndexInput) cloneableFieldsStream.Clone();
             indexStream = (IndexInput) cloneableIndexStream.Clone();
         }

         public /*internal*/ FieldsReader(Directory d, String segment, FieldInfos fn):this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, - 1, 0)
         {
         }

         internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize):this(d, segment, fn, readBufferSize, - 1, 0)
         {
         }

         internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
         {
             bool success = false;
             isOriginal = true;
             try
             {
                 fieldInfos = fn;

                 cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize);
                 cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize);

                 // First version of fdx did not include a format
                 // header, but, the first int will always be 0 in that
                 // case
                 int firstInt = cloneableIndexStream.ReadInt();
                 format = firstInt == 0 ? 0 : firstInt;

                 if (format > FieldsWriter.FORMAT_CURRENT)
                     throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower");

                 formatSize = format > FieldsWriter.FORMAT ? 4 : 0;

                 if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
                     cloneableFieldsStream.SetModifiedUTF8StringsMode();

                 fieldsStream = (IndexInput) cloneableFieldsStream.Clone();

                 long indexSize = cloneableIndexStream.Length() - formatSize;

                 if (docStoreOffset != - 1)
                 {
                     // We read only a slice out of this shared fields file
                     this.docStoreOffset = docStoreOffset;
                     this.size = size;

                     // Verify the file is long enough to hold all of our
                     // docs
                     System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
                 }
                 else
                 {
                     this.docStoreOffset = 0;
                     this.size = (int) (indexSize >> 3);
                 }

                 indexStream = (IndexInput) cloneableIndexStream.Clone();
                 numTotalDocs = (int) (indexSize >> 3);
                 success = true;
             }
             finally
             {
                 // With lock-less commits, it's entirely possible (and
                 // fine) to hit a FileNotFound exception above. In
                 // this case, we want to explicitly close any subset
                 // of things that were opened so that we don't have to
                 // wait for a GC to do so.
                 if (!success)
                 {
                     Dispose();
                 }
             }
         }

         /// <throws>  AlreadyClosedException if this FieldsReader is closed </throws>
         internal void  EnsureOpen()
         {
             if (closed)
             {
                 throw new AlreadyClosedException("this FieldsReader is closed");
             }
         }

         /// <summary> Closes the underlying <see cref="Lucene.Net.Store.IndexInput" /> streams, including any ones associated with a
         /// lazy implementation of a Field.  This means that the Fields values will not be accessible.
         ///
         /// </summary>
         /// <throws>  IOException </throws>
         public void Dispose()
         {
             // Move to protected method if class becomes unsealed
             if (!closed)
             {
                 if (fieldsStream != null)
                 {
                     fieldsStream.Close();
                 }
                 if (isOriginal)
                 {
                     if (cloneableFieldsStream != null)
                     {
                         cloneableFieldsStream.Close();
                     }
                     if (cloneableIndexStream != null)
                     {
                         cloneableIndexStream.Close();
                     }
                 }
                 if (indexStream != null)
                 {
                     indexStream.Close();
                 }
                 fieldsStreamTL.Close();
                 closed = true;
             }
         }

         public /*internal*/ int Size()
         {
             return size;
         }

         private void  SeekIndex(int docID)
         {
             indexStream.Seek(formatSize + (docID + docStoreOffset) * 8L);
         }

         internal bool CanReadRawDocs()
         {
             // Disable reading raw docs in 2.x format, because of the removal of compressed
             // fields in 3.0. We don't want rawDocs() to decode field bits to figure out
             // if a field was compressed, hence we enforce ordinary (non-raw) stored field merges
             // for <3.0 indexes.
             return format >= FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
         }

         public /*internal*/ Document Doc(int n, FieldSelector fieldSelector)
         {
             SeekIndex(n);
             long position = indexStream.ReadLong();
             fieldsStream.Seek(position);

             var doc = new Document();
             int numFields = fieldsStream.ReadVInt();
             for (int i = 0; i < numFields; i++)
             {
                 int fieldNumber = fieldsStream.ReadVInt();
                 FieldInfo fi = fieldInfos.FieldInfo(fieldNumber);
                 FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name);

                 byte bits = fieldsStream.ReadByte();
                 System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY);

                 bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
                 System.Diagnostics.Debug.Assert(
                     (!compressed || (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS)),
                     "compressed fields are only allowed in indexes of version <= 2.9");
                 bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
                 bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
                 //TODO: Find an alternative approach here if this list continues to grow beyond the
                 //list of 5 or 6 currently here.  See Lucene 762 for discussion
                 if (acceptField.Equals(FieldSelectorResult.LOAD))
                 {
                     AddField(doc, fi, binary, compressed, tokenize);
                 }
                 else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK))
                 {
                     AddField(doc, fi, binary, compressed, tokenize);
                     break; //Get out of this loop
                 }
                 else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD))
                 {
                     AddFieldLazy(doc, fi, binary, compressed, tokenize);
                 }
                 else if (acceptField.Equals(FieldSelectorResult.SIZE))
                 {
                     SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed));
                 }
                 else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK))
                 {
                     AddFieldSize(doc, fi, binary, compressed);
                     break;
                 }
                 else
                 {
                     SkipField(binary, compressed);
                 }
             }

             return doc;
         }

         /// <summary>Returns the length in bytes of each raw document in a
         /// contiguous range of length numDocs starting with
         /// startDocID.  Returns the IndexInput (the fieldStream),
         /// already seeked to the starting point for startDocID.
         /// </summary>
         internal IndexInput RawDocs(int[] lengths, int startDocID, int numDocs)
         {
             SeekIndex(startDocID);
             long startOffset = indexStream.ReadLong();
             long lastOffset = startOffset;
             int count = 0;
             while (count < numDocs)
             {
                 long offset;
                 int docID = docStoreOffset + startDocID + count + 1;
                 System.Diagnostics.Debug.Assert(docID <= numTotalDocs);
                 if (docID < numTotalDocs)
                     offset = indexStream.ReadLong();
                 else
                     offset = fieldsStream.Length();
                 lengths[count++] = (int) (offset - lastOffset);
                 lastOffset = offset;
             }

             fieldsStream.Seek(startOffset);

             return fieldsStream;
         }

         /// <summary> Skip the field.  We still have to read some of the information about the field, but can skip past the actual content.
         /// This will have the most payoff on large fields.
         /// </summary>
         private void  SkipField(bool binary, bool compressed)
         {
             SkipField(binary, compressed, fieldsStream.ReadVInt());
         }

         private void  SkipField(bool binary, bool compressed, int toRead)
         {
             if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed)
             {
                 fieldsStream.Seek(fieldsStream.FilePointer + toRead);
             }
             else
             {
                 // We need to skip chars.  This will slow us down, but still better
                 fieldsStream.SkipChars(toRead);
             }
         }

         private void  AddFieldLazy(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
         {
             if (binary)
             {
                 int toRead = fieldsStream.ReadVInt();
                 long pointer = fieldsStream.FilePointer;
                 //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
                 doc.Add(new LazyField(this, fi.name, Field.Store.YES, toRead, pointer, binary, compressed));

                 //Need to move the pointer ahead by toRead positions
                 fieldsStream.Seek(pointer + toRead);
             }
             else
             {
                 const Field.Store store = Field.Store.YES;
                 Field.Index index = FieldExtensions.ToIndex(fi.isIndexed, tokenize);
                 Field.TermVector termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);

                 AbstractField f;
                 if (compressed)
                 {
                     int toRead = fieldsStream.ReadVInt();
                     long pointer = fieldsStream.FilePointer;
                     f = new LazyField(this, fi.name, store, toRead, pointer, binary, compressed);
                     //skip over the part that we aren't loading
                     fieldsStream.Seek(pointer + toRead);
                     f.OmitNorms = fi.omitNorms;
                     f.OmitTermFreqAndPositions = fi.omitTermFreqAndPositions;
                 }
                 else
                 {
                     int length = fieldsStream.ReadVInt();
                     long pointer = fieldsStream.FilePointer;
                     //Skip ahead of where we are by the length of what is stored
                     if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
                     {
                         fieldsStream.Seek(pointer + length);
                     }
                     else
                     {
                         fieldsStream.SkipChars(length);
                     }
                     f = new LazyField(this, fi.name, store, index, termVector, length, pointer, binary, compressed)
                             {OmitNorms = fi.omitNorms, OmitTermFreqAndPositions = fi.omitTermFreqAndPositions};
                 }

                 doc.Add(f);
             }
         }

         private void AddField(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
         {
             //we have a binary stored field, and it may be compressed
             if (binary)
             {
                 int toRead = fieldsStream.ReadVInt();
                 var b = new byte[toRead];
                 fieldsStream.ReadBytes(b, 0, b.Length);
                 doc.Add(compressed ? new Field(fi.name, Uncompress(b), Field.Store.YES) : new Field(fi.name, b, Field.Store.YES));
             }
             else
             {
                 const Field.Store store = Field.Store.YES;
                 Field.Index index = FieldExtensions.ToIndex(fi.isIndexed, tokenize);
                 Field.TermVector termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);

                 AbstractField f;
                 if (compressed)
                 {
                     int toRead = fieldsStream.ReadVInt();

                     var b = new byte[toRead];
                     fieldsStream.ReadBytes(b, 0, b.Length);
                     f = new Field(fi.name, false, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index,
                                   termVector) {OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms};
                 }
                 else
                 {
                     f = new Field(fi.name, false, fieldsStream.ReadString(), store, index, termVector)
                             {OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms};
                 }

                 doc.Add(f);
             }
         }

         // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
         // Read just the size -- caller must skip the field content to continue reading fields
         // Return the size in bytes or chars, depending on field type
         private int AddFieldSize(Document doc, FieldInfo fi, bool binary, bool compressed)
         {
             int size = fieldsStream.ReadVInt(), bytesize = binary || compressed?size:2 * size;
             var sizebytes = new byte[4];
             sizebytes[0] = (byte) (Number.URShift(bytesize, 24));
             sizebytes[1] = (byte) (Number.URShift(bytesize, 16));
             sizebytes[2] = (byte) (Number.URShift(bytesize, 8));
             sizebytes[3] = (byte) bytesize;
             doc.Add(new Field(fi.name, sizebytes, Field.Store.YES));
             return size;
         }

         /// <summary> A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
         /// loaded.
         /// </summary>
         [Serializable]
         private sealed class LazyField : AbstractField
         {
             private void  InitBlock(FieldsReader enclosingInstance)
             {
                 this.Enclosing_Instance = enclosingInstance;
             }

             private FieldsReader Enclosing_Instance { get; set; }

             private int toRead;
             private long pointer;
             [Obsolete("Only kept for backward-compatbility with <3.0 indexes. Will be removed in 4.0.")]
             private readonly Boolean isCompressed;

             public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, int toRead, long pointer, bool isBinary, bool isCompressed):base(name, store, Field.Index.NO, Field.TermVector.NO)
             {
                 InitBlock(enclosingInstance);
                 this.toRead = toRead;
                 this.pointer = pointer;
                 this.internalIsBinary = isBinary;
                 if (isBinary)
                     internalBinaryLength = toRead;
                 lazy = true;
                 this.isCompressed = isCompressed;
             }

             public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, bool isBinary, bool isCompressed):base(name, store, index, termVector)
             {
                 InitBlock(enclosingInstance);
                 this.toRead = toRead;
                 this.pointer = pointer;
                 this.internalIsBinary = isBinary;
                 if (isBinary)
                     internalBinaryLength = toRead;
                 lazy = true;
                 this.isCompressed = isCompressed;
             }

             private IndexInput GetFieldStream()
             {
                 IndexInput localFieldsStream = Enclosing_Instance.fieldsStreamTL.Get();
                 if (localFieldsStream == null)
                 {
                     localFieldsStream = (IndexInput) Enclosing_Instance.cloneableFieldsStream.Clone();
                     Enclosing_Instance.fieldsStreamTL.Set(localFieldsStream);
                 }
                 return localFieldsStream;
             }

             /// <summary>The value of the field as a Reader, or null.  If null, the String value,
             /// binary value, or TokenStream value is used.  Exactly one of StringValue(),
             /// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set.
             /// </summary>
             public override TextReader ReaderValue
             {
                 get
                 {
                     Enclosing_Instance.EnsureOpen();
                     return null;
                 }
             }

             /// <summary>The value of the field as a TokenStream, or null.  If null, the Reader value,
             /// String value, or binary value is used. Exactly one of StringValue(),
             /// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set.
             /// </summary>
             public override TokenStream TokenStreamValue
             {
                 get
                 {
                     Enclosing_Instance.EnsureOpen();
                     return null;
                 }
             }

             /// <summary>The value of the field as a String, or null.  If null, the Reader value,
             /// binary value, or TokenStream value is used.  Exactly one of StringValue(),
             /// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set.
             /// </summary>
             public override string StringValue
             {
                 get
                 {
                     Enclosing_Instance.EnsureOpen();
                     if (internalIsBinary)
                         return null;

                     if (fieldsData == null)
                     {
                         IndexInput localFieldsStream = GetFieldStream();
                         try
                         {
                             localFieldsStream.Seek(pointer);
                             if (isCompressed)
                             {
                                 var b = new byte[toRead];
                                 localFieldsStream.ReadBytes(b, 0, b.Length);
                                 fieldsData =
                                     System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b));
                             }
                             else
                             {
                                 if (Enclosing_Instance.format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
                                 {
                                     var bytes = new byte[toRead];
                                     localFieldsStream.ReadBytes(bytes, 0, toRead);
                                     fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(bytes);
                                 }
                                 else
                                 {
                                     //read in chars b/c we already know the length we need to read
                                     var chars = new char[toRead];
                                     localFieldsStream.ReadChars(chars, 0, toRead);
                                     fieldsData = new System.String(chars);
                                 }
                             }
                         }
                         catch (System.IO.IOException e)
                         {
                             throw new FieldReaderException(e);
                         }
                     }
                     return (System.String) fieldsData;
                 }
             }

             public long Pointer
             {
                 get
                 {
                     Enclosing_Instance.EnsureOpen();
                     return pointer;
                 }
                 set
                 {
                     Enclosing_Instance.EnsureOpen();
                     this.pointer = value;
                 }
             }

             public int ToRead
             {
                 get
                 {
                     Enclosing_Instance.EnsureOpen();
                     return toRead;
                 }
                 set
                 {
                     Enclosing_Instance.EnsureOpen();
                     this.toRead = value;
                 }
             }

             public override byte[] GetBinaryValue(byte[] result)
             {
                 Enclosing_Instance.EnsureOpen();

                 if (internalIsBinary)
                 {
                     if (fieldsData == null)
                     {
                         // Allocate new buffer if result is null or too small
                         byte[] b;
                         if (result == null || result.Length < toRead)
                             b = new byte[toRead];
                         else
                             b = result;

                         IndexInput localFieldsStream = GetFieldStream();

                         // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people
                         // since they are already handling this exception when getting the document
                         try
                         {
                             localFieldsStream.Seek(pointer);
                             localFieldsStream.ReadBytes(b, 0, toRead);
                             fieldsData = isCompressed ? Enclosing_Instance.Uncompress(b) : b;
                         }
                         catch (IOException e)
                         {
                             throw new FieldReaderException(e);
                         }

                         internalbinaryOffset = 0;
                         internalBinaryLength = toRead;
                     }

                     return (byte[]) fieldsData;
                 }
                 return null;
             }
         }

         private byte[] Uncompress(byte[] b)
         {
             try
             {
                 return CompressionTools.Decompress(b);
             }
             catch (Exception e)
             {
                 // this will happen if the field is not compressed
                 throw new CorruptIndexException("field data are in wrong format: " + e, e);
             }
         }
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	using System;
	using System.IO;
	using Lucene.Net.Support;
	using Lucene.Net.Util;
	using TokenStream = Lucene.Net.Analysis.TokenStream;
	using Lucene.Net.Documents;
	using AlreadyClosedException = Lucene.Net.Store.AlreadyClosedException;
	using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
	using Directory = Lucene.Net.Store.Directory;
	using IndexInput = Lucene.Net.Store.IndexInput;

	namespace Lucene.Net.Index
	{

	/// <summary> Class responsible for access to stored document fields.
	/// <p/>
	/// It uses <segment>.fdt and <segment>.fdx; files.
	///
	/// </summary>
	public sealed class FieldsReader : ICloneable, IDisposable
	{
	private readonly FieldInfos fieldInfos;

	// The main fieldStream, used only for cloning.
	private readonly IndexInput cloneableFieldsStream;

	// This is a clone of cloneableFieldsStream used for reading documents.
	// It should not be cloned outside of a synchronized context.
	private readonly IndexInput fieldsStream;

	private readonly IndexInput cloneableIndexStream;
	private readonly IndexInput indexStream;
	private readonly int numTotalDocs;
	private readonly int size;
	private bool closed;
	private readonly int format;
	private readonly int formatSize;

	// The docID offset where our docs begin in the index
	// file. This will be 0 if we have our own private file.
	private readonly int docStoreOffset;

	private readonly CloseableThreadLocal<IndexInput> fieldsStreamTL = new CloseableThreadLocal<IndexInput>();
	private readonly bool isOriginal = false;

	/// <summary>Returns a cloned FieldsReader that shares open
	/// IndexInputs with the original one. It is the caller's
	/// job not to close the original FieldsReader until all
	/// clones are called (eg, currently SegmentReader manages
	/// this logic).
	/// </summary>
	public System.Object Clone()
	{
	EnsureOpen();
	return new FieldsReader(fieldInfos, numTotalDocs, size, format, formatSize, docStoreOffset, cloneableFieldsStream, cloneableIndexStream);
	}

	// Used only by clone
	private FieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, int format, int formatSize, int docStoreOffset, IndexInput cloneableFieldsStream, IndexInput cloneableIndexStream)
	{
	this.fieldInfos = fieldInfos;
	this.numTotalDocs = numTotalDocs;
	this.size = size;
	this.format = format;
	this.formatSize = formatSize;
	this.docStoreOffset = docStoreOffset;
	this.cloneableFieldsStream = cloneableFieldsStream;
	this.cloneableIndexStream = cloneableIndexStream;
	fieldsStream = (IndexInput) cloneableFieldsStream.Clone();
	indexStream = (IndexInput) cloneableIndexStream.Clone();
	}

	public /internal/ FieldsReader(Directory d, String segment, FieldInfos fn):this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE, - 1, 0)
	{
	}

	internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize):this(d, segment, fn, readBufferSize, - 1, 0)
	{
	}

	internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size)
	{
	bool success = false;
	isOriginal = true;
	try
	{
	fieldInfos = fn;

	cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize);
	cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize);

	// First version of fdx did not include a format
	// header, but, the first int will always be 0 in that
	// case
	int firstInt = cloneableIndexStream.ReadInt();
	format = firstInt == 0 ? 0 : firstInt;

	if (format > FieldsWriter.FORMAT_CURRENT)
	throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower");

	formatSize = format > FieldsWriter.FORMAT ? 4 : 0;

	if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
	cloneableFieldsStream.SetModifiedUTF8StringsMode();

	fieldsStream = (IndexInput) cloneableFieldsStream.Clone();

	long indexSize = cloneableIndexStream.Length() - formatSize;

	if (docStoreOffset != - 1)
	{
	// We read only a slice out of this shared fields file
	this.docStoreOffset = docStoreOffset;
	this.size = size;

	// Verify the file is long enough to hold all of our
	// docs
	System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset);
	}
	else
	{
	this.docStoreOffset = 0;
	this.size = (int) (indexSize >> 3);
	}

	indexStream = (IndexInput) cloneableIndexStream.Clone();
	numTotalDocs = (int) (indexSize >> 3);
	success = true;
	}
	finally
	{
	// With lock-less commits, it's entirely possible (and
	// fine) to hit a FileNotFound exception above. In
	// this case, we want to explicitly close any subset
	// of things that were opened so that we don't have to
	// wait for a GC to do so.
	if (!success)
	{
	Dispose();
	}
	}
	}

	/// <throws> AlreadyClosedException if this FieldsReader is closed </throws>
	internal void EnsureOpen()
	{
	if (closed)
	{
	throw new AlreadyClosedException("this FieldsReader is closed");
	}
	}

	/// <summary> Closes the underlying <see cref="Lucene.Net.Store.IndexInput" /> streams, including any ones associated with a
	/// lazy implementation of a Field. This means that the Fields values will not be accessible.
	///
	/// </summary>
	/// <throws> IOException </throws>
	public void Dispose()
	{
	// Move to protected method if class becomes unsealed
	if (!closed)
	{
	if (fieldsStream != null)
	{
	fieldsStream.Close();
	}
	if (isOriginal)
	{
	if (cloneableFieldsStream != null)
	{
	cloneableFieldsStream.Close();
	}
	if (cloneableIndexStream != null)
	{
	cloneableIndexStream.Close();
	}
	}
	if (indexStream != null)
	{
	indexStream.Close();
	}
	fieldsStreamTL.Close();
	closed = true;
	}
	}

	public /internal/ int Size()
	{
	return size;
	}

	private void SeekIndex(int docID)
	{
	indexStream.Seek(formatSize + (docID + docStoreOffset) * 8L);
	}

	internal bool CanReadRawDocs()
	{
	// Disable reading raw docs in 2.x format, because of the removal of compressed
	// fields in 3.0. We don't want rawDocs() to decode field bits to figure out
	// if a field was compressed, hence we enforce ordinary (non-raw) stored field merges
	// for <3.0 indexes.
	return format >= FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
	}

	public /internal/ Document Doc(int n, FieldSelector fieldSelector)
	{
	SeekIndex(n);
	long position = indexStream.ReadLong();
	fieldsStream.Seek(position);

	var doc = new Document();
	int numFields = fieldsStream.ReadVInt();
	for (int i = 0; i < numFields; i++)
	{
	int fieldNumber = fieldsStream.ReadVInt();
	FieldInfo fi = fieldInfos.FieldInfo(fieldNumber);
	FieldSelectorResult acceptField = fieldSelector == null?FieldSelectorResult.LOAD:fieldSelector.Accept(fi.name);

	byte bits = fieldsStream.ReadByte();
	System.Diagnostics.Debug.Assert(bits <= FieldsWriter.FIELD_IS_COMPRESSED + FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY);

	bool compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
	System.Diagnostics.Debug.Assert(
	(!compressed \|\| (format < FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS)),
	"compressed fields are only allowed in indexes of version <= 2.9");
	bool tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
	bool binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
	//TODO: Find an alternative approach here if this list continues to grow beyond the
	//list of 5 or 6 currently here. See Lucene 762 for discussion
	if (acceptField.Equals(FieldSelectorResult.LOAD))
	{
	AddField(doc, fi, binary, compressed, tokenize);
	}
	else if (acceptField.Equals(FieldSelectorResult.LOAD_AND_BREAK))
	{
	AddField(doc, fi, binary, compressed, tokenize);
	break; //Get out of this loop
	}
	else if (acceptField.Equals(FieldSelectorResult.LAZY_LOAD))
	{
	AddFieldLazy(doc, fi, binary, compressed, tokenize);
	}
	else if (acceptField.Equals(FieldSelectorResult.SIZE))
	{
	SkipField(binary, compressed, AddFieldSize(doc, fi, binary, compressed));
	}
	else if (acceptField.Equals(FieldSelectorResult.SIZE_AND_BREAK))
	{
	AddFieldSize(doc, fi, binary, compressed);
	break;
	}
	else
	{
	SkipField(binary, compressed);
	}
	}

	return doc;
	}

	/// <summary>Returns the length in bytes of each raw document in a
	/// contiguous range of length numDocs starting with
	/// startDocID. Returns the IndexInput (the fieldStream),
	/// already seeked to the starting point for startDocID.
	/// </summary>
	internal IndexInput RawDocs(int[] lengths, int startDocID, int numDocs)
	{
	SeekIndex(startDocID);
	long startOffset = indexStream.ReadLong();
	long lastOffset = startOffset;
	int count = 0;
	while (count < numDocs)
	{
	long offset;
	int docID = docStoreOffset + startDocID + count + 1;
	System.Diagnostics.Debug.Assert(docID <= numTotalDocs);
	if (docID < numTotalDocs)
	offset = indexStream.ReadLong();
	else
	offset = fieldsStream.Length();
	lengths[count++] = (int) (offset - lastOffset);
	lastOffset = offset;
	}

	fieldsStream.Seek(startOffset);

	return fieldsStream;
	}

	/// <summary> Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
	/// This will have the most payoff on large fields.
	/// </summary>
	private void SkipField(bool binary, bool compressed)
	{
	SkipField(binary, compressed, fieldsStream.ReadVInt());
	}

	private void SkipField(bool binary, bool compressed, int toRead)
	{
	if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES \|\| binary \|\| compressed)
	{
	fieldsStream.Seek(fieldsStream.FilePointer + toRead);
	}
	else
	{
	// We need to skip chars. This will slow us down, but still better
	fieldsStream.SkipChars(toRead);
	}
	}

	private void AddFieldLazy(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
	{
	if (binary)
	{
	int toRead = fieldsStream.ReadVInt();
	long pointer = fieldsStream.FilePointer;
	//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
	doc.Add(new LazyField(this, fi.name, Field.Store.YES, toRead, pointer, binary, compressed));

	//Need to move the pointer ahead by toRead positions
	fieldsStream.Seek(pointer + toRead);
	}
	else
	{
	const Field.Store store = Field.Store.YES;
	Field.Index index = FieldExtensions.ToIndex(fi.isIndexed, tokenize);
	Field.TermVector termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);

	AbstractField f;
	if (compressed)
	{
	int toRead = fieldsStream.ReadVInt();
	long pointer = fieldsStream.FilePointer;
	f = new LazyField(this, fi.name, store, toRead, pointer, binary, compressed);
	//skip over the part that we aren't loading
	fieldsStream.Seek(pointer + toRead);
	f.OmitNorms = fi.omitNorms;
	f.OmitTermFreqAndPositions = fi.omitTermFreqAndPositions;
	}
	else
	{
	int length = fieldsStream.ReadVInt();
	long pointer = fieldsStream.FilePointer;
	//Skip ahead of where we are by the length of what is stored
	if (format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
	{
	fieldsStream.Seek(pointer + length);
	}
	else
	{
	fieldsStream.SkipChars(length);
	}
	f = new LazyField(this, fi.name, store, index, termVector, length, pointer, binary, compressed)
	{OmitNorms = fi.omitNorms, OmitTermFreqAndPositions = fi.omitTermFreqAndPositions};
	}

	doc.Add(f);
	}
	}

	private void AddField(Document doc, FieldInfo fi, bool binary, bool compressed, bool tokenize)
	{
	//we have a binary stored field, and it may be compressed
	if (binary)
	{
	int toRead = fieldsStream.ReadVInt();
	var b = new byte[toRead];
	fieldsStream.ReadBytes(b, 0, b.Length);
	doc.Add(compressed ? new Field(fi.name, Uncompress(b), Field.Store.YES) : new Field(fi.name, b, Field.Store.YES));
	}
	else
	{
	const Field.Store store = Field.Store.YES;
	Field.Index index = FieldExtensions.ToIndex(fi.isIndexed, tokenize);
	Field.TermVector termVector = FieldExtensions.ToTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);

	AbstractField f;
	if (compressed)
	{
	int toRead = fieldsStream.ReadVInt();

	var b = new byte[toRead];
	fieldsStream.ReadBytes(b, 0, b.Length);
	f = new Field(fi.name, false, System.Text.Encoding.GetEncoding("UTF-8").GetString(Uncompress(b)), store, index,
	termVector) {OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms};
	}
	else
	{
	f = new Field(fi.name, false, fieldsStream.ReadString(), store, index, termVector)
	{OmitTermFreqAndPositions = fi.omitTermFreqAndPositions, OmitNorms = fi.omitNorms};
	}

	doc.Add(f);
	}
	}

	// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
	// Read just the size -- caller must skip the field content to continue reading fields
	// Return the size in bytes or chars, depending on field type
	private int AddFieldSize(Document doc, FieldInfo fi, bool binary, bool compressed)
	{
	int size = fieldsStream.ReadVInt(), bytesize = binary \|\| compressed?size:2 * size;
	var sizebytes = new byte[4];
	sizebytes[0] = (byte) (Number.URShift(bytesize, 24));
	sizebytes[1] = (byte) (Number.URShift(bytesize, 16));
	sizebytes[2] = (byte) (Number.URShift(bytesize, 8));
	sizebytes[3] = (byte) bytesize;
	doc.Add(new Field(fi.name, sizebytes, Field.Store.YES));
	return size;
	}

	/// <summary> A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
	/// loaded.
	/// </summary>
	[Serializable]
	private sealed class LazyField : AbstractField
	{
	private void InitBlock(FieldsReader enclosingInstance)
	{
	this.Enclosing_Instance = enclosingInstance;
	}

	private FieldsReader Enclosing_Instance { get; set; }

	private int toRead;
	private long pointer;
	[Obsolete("Only kept for backward-compatbility with <3.0 indexes. Will be removed in 4.0.")]
	private readonly Boolean isCompressed;

	public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, int toRead, long pointer, bool isBinary, bool isCompressed):base(name, store, Field.Index.NO, Field.TermVector.NO)
	{
	InitBlock(enclosingInstance);
	this.toRead = toRead;
	this.pointer = pointer;
	this.internalIsBinary = isBinary;
	if (isBinary)
	internalBinaryLength = toRead;
	lazy = true;
	this.isCompressed = isCompressed;
	}

	public LazyField(FieldsReader enclosingInstance, System.String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, bool isBinary, bool isCompressed):base(name, store, index, termVector)
	{
	InitBlock(enclosingInstance);
	this.toRead = toRead;
	this.pointer = pointer;
	this.internalIsBinary = isBinary;
	if (isBinary)
	internalBinaryLength = toRead;
	lazy = true;
	this.isCompressed = isCompressed;
	}

	private IndexInput GetFieldStream()
	{
	IndexInput localFieldsStream = Enclosing_Instance.fieldsStreamTL.Get();
	if (localFieldsStream == null)
	{
	localFieldsStream = (IndexInput) Enclosing_Instance.cloneableFieldsStream.Clone();
	Enclosing_Instance.fieldsStreamTL.Set(localFieldsStream);
	}
	return localFieldsStream;
	}

	/// <summary>The value of the field as a Reader, or null. If null, the String value,
	/// binary value, or TokenStream value is used. Exactly one of StringValue(),
	/// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set.
	/// </summary>
	public override TextReader ReaderValue
	{
	get
	{
	Enclosing_Instance.EnsureOpen();
	return null;
	}
	}

	/// <summary>The value of the field as a TokenStream, or null. If null, the Reader value,
	/// String value, or binary value is used. Exactly one of StringValue(),
	/// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set.
	/// </summary>
	public override TokenStream TokenStreamValue
	{
	get
	{
	Enclosing_Instance.EnsureOpen();
	return null;
	}
	}

	/// <summary>The value of the field as a String, or null. If null, the Reader value,
	/// binary value, or TokenStream value is used. Exactly one of StringValue(),
	/// ReaderValue(), GetBinaryValue(), and TokenStreamValue() must be set.
	/// </summary>
	public override string StringValue
	{
	get
	{
	Enclosing_Instance.EnsureOpen();
	if (internalIsBinary)
	return null;

	if (fieldsData == null)
	{
	IndexInput localFieldsStream = GetFieldStream();
	try
	{
	localFieldsStream.Seek(pointer);
	if (isCompressed)
	{
	var b = new byte[toRead];
	localFieldsStream.ReadBytes(b, 0, b.Length);
	fieldsData =
	System.Text.Encoding.GetEncoding("UTF-8").GetString(Enclosing_Instance.Uncompress(b));
	}
	else
	{
	if (Enclosing_Instance.format >= FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES)
	{
	var bytes = new byte[toRead];
	localFieldsStream.ReadBytes(bytes, 0, toRead);
	fieldsData = System.Text.Encoding.GetEncoding("UTF-8").GetString(bytes);
	}
	else
	{
	//read in chars b/c we already know the length we need to read
	var chars = new char[toRead];
	localFieldsStream.ReadChars(chars, 0, toRead);
	fieldsData = new System.String(chars);
	}
	}
	}
	catch (System.IO.IOException e)
	{
	throw new FieldReaderException(e);
	}
	}
	return (System.String) fieldsData;
	}
	}

	public long Pointer
	{
	get
	{
	Enclosing_Instance.EnsureOpen();
	return pointer;
	}
	set
	{
	Enclosing_Instance.EnsureOpen();
	this.pointer = value;
	}
	}

	public int ToRead
	{
	get
	{
	Enclosing_Instance.EnsureOpen();
	return toRead;
	}
	set
	{
	Enclosing_Instance.EnsureOpen();
	this.toRead = value;
	}
	}

	public override byte[] GetBinaryValue(byte[] result)
	{
	Enclosing_Instance.EnsureOpen();

	if (internalIsBinary)
	{
	if (fieldsData == null)
	{
	// Allocate new buffer if result is null or too small
	byte[] b;
	if (result == null \|\| result.Length < toRead)
	b = new byte[toRead];
	else
	b = result;

	IndexInput localFieldsStream = GetFieldStream();

	// Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people
	// since they are already handling this exception when getting the document
	try
	{
	localFieldsStream.Seek(pointer);
	localFieldsStream.ReadBytes(b, 0, toRead);
	fieldsData = isCompressed ? Enclosing_Instance.Uncompress(b) : b;
	}
	catch (IOException e)
	{
	throw new FieldReaderException(e);
	}

	internalbinaryOffset = 0;
	internalBinaryLength = toRead;
	}

	return (byte[]) fieldsData;
	}
	return null;
	}
	}

	private byte[] Uncompress(byte[] b)
	{
	try
	{
	return CompressionTools.Decompress(b);
	}
	catch (Exception e)
	{
	// this will happen if the field is not compressed
	throw new CorruptIndexException("field data are in wrong format: " + e, e);
	}
	}
	}
	}