blob: 83193ece74c71d6feeb4b64499acb1aeb01dc001 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using BufferedIndexInput = Lucene.Net.Store.BufferedIndexInput;
using Directory = Lucene.Net.Store.Directory;
namespace Lucene.Net.Index
{
/// <summary>This stores a monotonically increasing set of <Term, TermInfo> pairs in a
/// Directory. Pairs are accessed either by Term or by ordinal position the
/// set.
/// </summary>
public sealed class TermInfosReader
{
private Directory directory;
private System.String segment;
private FieldInfos fieldInfos;
private System.LocalDataStoreSlot enumerators = System.Threading.Thread.AllocateDataSlot();
private SegmentTermEnum origEnum;
private long size;
private Term[] indexTerms = null;
private TermInfo[] indexInfos;
private long[] indexPointers;
private SegmentTermEnum indexEnum;
private int indexDivisor = 1;
private int totalIndexInterval;
internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis) : this(dir, seg, fis, BufferedIndexInput.BUFFER_SIZE)
{
}
internal TermInfosReader(Directory dir, System.String seg, FieldInfos fis, int readBufferSize)
{
bool success = false;
try
{
directory = dir;
segment = seg;
fieldInfos = fis;
origEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tis", readBufferSize), fieldInfos, false);
size = origEnum.size;
totalIndexInterval = origEnum.indexInterval;
indexEnum = new SegmentTermEnum(directory.OpenInput(segment + ".tii", readBufferSize), fieldInfos, true);
success = true;
}
finally
{
// With lock-less commits, it's entirely possible (and
// fine) to hit a FileNotFound exception above. In
// this case, we want to explicitly close any subset
// of things that were opened so that we don't have to
// wait for a GC to do so.
if (!success)
{
Close();
}
}
}
public int GetSkipInterval()
{
return origEnum.skipInterval;
}
public int GetMaxSkipLevels()
{
return origEnum.maxSkipLevels;
}
/// <summary> <p>Sets the indexDivisor, which subsamples the number
/// of indexed terms loaded into memory. This has a
/// similar effect as {@link
/// IndexWriter#setTermIndexInterval} except that setting
/// must be done at indexing time while this setting can be
/// set per reader. When set to N, then one in every
/// N*termIndexInterval terms in the index is loaded into
/// memory. By setting this to a value > 1 you can reduce
/// memory usage, at the expense of higher latency when
/// loading a TermInfo. The default value is 1.</p>
///
/// <b>NOTE:</b> you must call this before the term
/// index is loaded. If the index is already loaded,
/// an IllegalStateException is thrown.
///
/// + @throws IllegalStateException if the term index has
/// already been loaded into memory.
/// </summary>
public void SetIndexDivisor(int indexDivisor)
{
if (indexDivisor < 1)
throw new System.ArgumentException("indexDivisor must be > 0: got " + indexDivisor);
if (indexTerms != null)
throw new System.SystemException("index terms are already loaded");
this.indexDivisor = indexDivisor;
totalIndexInterval = origEnum.indexInterval * indexDivisor;
}
/// <summary>Returns the indexDivisor.</summary>
/// <seealso cref="setIndexDivisor">
/// </seealso>
public int GetIndexDivisor()
{
return indexDivisor;
}
public void Close()
{
if (origEnum != null)
origEnum.Close();
if (indexEnum != null)
indexEnum.Close();
}
/// <summary>Returns the number of term/value pairs in the set. </summary>
internal long Size()
{
return size;
}
private SegmentTermEnum GetEnum()
{
SegmentTermEnum termEnum = (SegmentTermEnum) System.Threading.Thread.GetData(enumerators);
if (termEnum == null)
{
termEnum = Terms();
System.Threading.Thread.SetData(enumerators, termEnum);
}
return termEnum;
}
private void EnsureIndexIsRead()
{
lock (this)
{
if (indexTerms != null)
// index already read
return ; // do nothing
try
{
int indexSize = 1 + ((int) indexEnum.size - 1) / indexDivisor; // otherwise read index
indexTerms = new Term[indexSize];
indexInfos = new TermInfo[indexSize];
indexPointers = new long[indexSize];
for (int i = 0; indexEnum.Next(); i++)
{
indexTerms[i] = indexEnum.Term();
indexInfos[i] = indexEnum.TermInfo();
indexPointers[i] = indexEnum.indexPointer;
for (int j = 1; j < indexDivisor; j++)
if (!indexEnum.Next())
break;
}
}
finally
{
indexEnum.Close();
indexEnum = null;
}
}
}
/// <summary>Returns the offset of the greatest index entry which is less than or equal to term.</summary>
private int GetIndexOffset(Term term)
{
int lo = 0; // binary search indexTerms[]
int hi = indexTerms.Length - 1;
while (hi >= lo)
{
int mid = (lo + hi) >> 1;
int delta = term.CompareTo(indexTerms[mid]);
if (delta < 0)
hi = mid - 1;
else if (delta > 0)
lo = mid + 1;
else
return mid;
}
return hi;
}
private void SeekEnum(int indexOffset)
{
GetEnum().Seek(indexPointers[indexOffset], (indexOffset * totalIndexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]);
}
/// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
public TermInfo Get(Term term)
{
if (size == 0)
return null;
EnsureIndexIsRead();
// optimize sequential access: first try scanning cached enum w/o seeking
SegmentTermEnum enumerator = GetEnum();
if (enumerator.Term() != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term()) >= 0))
{
int enumOffset = (int) (enumerator.position / totalIndexInterval) + 1;
if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
return ScanEnum(term); // no need to seek
}
// random-access: must seek
SeekEnum(GetIndexOffset(term));
return ScanEnum(term);
}
/// <summary>Scans within block for matching term. </summary>
private TermInfo ScanEnum(Term term)
{
SegmentTermEnum enumerator = GetEnum();
enumerator.ScanTo(term);
if (enumerator.Term() != null && term.CompareTo(enumerator.Term()) == 0)
return enumerator.TermInfo();
else
return null;
}
/// <summary>Returns the nth term in the set. </summary>
internal Term Get(int position)
{
if (size == 0)
return null;
SegmentTermEnum enumerator = GetEnum();
if (enumerator != null && enumerator.Term() != null && position >= enumerator.position && position < (enumerator.position + totalIndexInterval))
return ScanEnum(position); // can avoid seek
SeekEnum(position / totalIndexInterval); // must seek
return ScanEnum(position);
}
private Term ScanEnum(int position)
{
SegmentTermEnum enumerator = GetEnum();
while (enumerator.position < position)
if (!enumerator.Next())
return null;
return enumerator.Term();
}
/// <summary>Returns the position of a Term in the set or -1. </summary>
internal long GetPosition(Term term)
{
if (size == 0)
return - 1;
EnsureIndexIsRead();
int indexOffset = GetIndexOffset(term);
SeekEnum(indexOffset);
SegmentTermEnum enumerator = GetEnum();
while (term.CompareTo(enumerator.Term()) > 0 && enumerator.Next())
{
}
if (term.CompareTo(enumerator.Term()) == 0)
return enumerator.position;
else
return - 1;
}
/// <summary>Returns an enumeration of all the Terms and TermInfos in the set. </summary>
public SegmentTermEnum Terms()
{
return (SegmentTermEnum) origEnum.Clone();
}
/// <summary>Returns an enumeration of terms starting at or after the named term. </summary>
public SegmentTermEnum Terms(Term term)
{
Get(term);
return (SegmentTermEnum) GetEnum().Clone();
}
}
}