blob: fc0d201365af82a160676b041b5fb638b92604c0 [file] [log] [blame]
using J2N.Text;
using Lucene.Net.Diagnostics;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using BytesRef = Lucene.Net.Util.BytesRef;
using Directory = Lucene.Net.Store.Directory;
namespace Lucene.Net.Codecs.Lucene3x
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
using FieldInfos = Lucene.Net.Index.FieldInfos;
using IndexFileNames = Lucene.Net.Index.IndexFileNames;
using IOContext = Lucene.Net.Store.IOContext;
using IOUtils = Lucene.Net.Util.IOUtils;
using Term = Lucene.Net.Index.Term;
/// <summary>
/// This stores a monotonically increasing set of <c>Term, TermInfo</c> pairs in a
/// Directory. Pairs are accessed either by <see cref="Term"/> or by ordinal position the
/// set.
/// <para/>
/// @lucene.experimental
/// </summary>
[Obsolete("(4.0) this class has been replaced by FormatPostingsTermsDictReader, except for reading old segments.")]
internal sealed class TermInfosReader : IDisposable
private readonly Directory directory;
private readonly string segment;
private readonly FieldInfos fieldInfos;
#pragma warning disable CA2213 // Disposable fields should be disposed
private readonly DisposableThreadLocal<ThreadResources> threadResources = new DisposableThreadLocal<ThreadResources>();
private readonly SegmentTermEnum origEnum;
#pragma warning restore CA2213 // Disposable fields should be disposed
private readonly long size;
private readonly TermInfosReaderIndex index;
private readonly int indexLength;
private readonly int totalIndexInterval;
private const int DEFAULT_CACHE_SIZE = 1024;
// Just adds term's ord to TermInfo
public sealed class TermInfoAndOrd : TermInfo
internal readonly long termOrd;
public TermInfoAndOrd(TermInfo ti, long termOrd)
: base(ti)
if (Debugging.AssertsEnabled) Debugging.Assert(termOrd >= 0);
this.termOrd = termOrd;
private class CloneableTerm : DoubleBarrelLRUCache.CloneableKey
internal Term term;
public CloneableTerm(Term t)
this.term = t;
public override bool Equals(object other)
CloneableTerm t = (CloneableTerm)other;
return this.term.Equals(t.term);
public override int GetHashCode()
return term.GetHashCode();
public override object Clone()
return new CloneableTerm(term);
private readonly DoubleBarrelLRUCache<CloneableTerm, TermInfoAndOrd> termsCache = new DoubleBarrelLRUCache<CloneableTerm, TermInfoAndOrd>(DEFAULT_CACHE_SIZE);
/// <summary>
/// Per-thread resources managed by ThreadLocal.
/// </summary>
private sealed class ThreadResources
internal SegmentTermEnum termEnum;
internal TermInfosReader(Directory dir, string seg, FieldInfos fis, IOContext context, int indexDivisor)
bool success = false;
if (indexDivisor < 1 && indexDivisor != -1)
throw new ArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
directory = dir;
segment = seg;
fieldInfos = fis;
origEnum = new SegmentTermEnum(directory.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_EXTENSION), context), fieldInfos, false);
size = origEnum.size;
if (indexDivisor != -1)
// Load terms index
totalIndexInterval = origEnum.indexInterval * indexDivisor;
string indexFileName = IndexFileNames.SegmentFileName(segment, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION);
SegmentTermEnum indexEnum = new SegmentTermEnum(directory.OpenInput(indexFileName, context), fieldInfos, true);
index = new TermInfosReaderIndex(indexEnum, indexDivisor, dir.FileLength(indexFileName), totalIndexInterval);
indexLength = index.Length;
// Do not load terms index:
totalIndexInterval = -1;
index = null;
indexLength = -1;
success = true;
// With lock-less commits, it's entirely possible (and
// fine) to hit a FileNotFound exception above. In
// this case, we want to explicitly close any subset
// of things that were opened so that we don't have to
// wait for a GC to do so.
if (!success)
public int SkipInterval => origEnum.skipInterval;
public int MaxSkipLevels => origEnum.maxSkipLevels;
public void Dispose()
IOUtils.Dispose(origEnum, threadResources);
/// <summary>
/// Returns the number of term/value pairs in the set.
/// <para/>
/// NOTE: This was size() in Lucene.
/// </summary>
internal long Count => size;
private ThreadResources GetThreadResources()
ThreadResources resources = threadResources.Value;
if (resources == null)
resources = new ThreadResources();
resources.termEnum = Terms();
threadResources.Value = resources;
return resources;
private static readonly IComparer<BytesRef> legacyComparer = BytesRef.UTF8SortedAsUTF16Comparer;
private static int CompareAsUTF16(Term term1, Term term2) // LUCENENET: CA1822: Mark members as static
if (term1.Field.Equals(term2.Field, StringComparison.Ordinal))
return legacyComparer.Compare(term1.Bytes, term2.Bytes);
return term1.Field.CompareToOrdinal(term2.Field);
/// <summary>
/// Returns the <see cref="TermInfo"/> for a <see cref="Term"/> in the set, or <c>null</c>. </summary>
internal TermInfo Get(Term term)
return Get(term, false);
/// <summary>
/// Returns the <see cref="TermInfo"/> for a <see cref="Term"/> in the set, or <c>null</c>. </summary>
private TermInfo Get(Term term, bool mustSeekEnum)
if (size == 0)
return null;
TermInfoAndOrd tiOrd = termsCache.Get(new CloneableTerm(term));
ThreadResources resources = GetThreadResources();
if (!mustSeekEnum && tiOrd != null)
return tiOrd;
return SeekEnum(resources.termEnum, term, tiOrd, true);
public void CacheCurrentTerm(SegmentTermEnum enumerator)
termsCache.Put(new CloneableTerm(enumerator.Term()), new TermInfoAndOrd(enumerator.termInfo, enumerator.position));
internal static Term DeepCopyOf(Term other)
return new Term(other.Field, BytesRef.DeepCopyOf(other.Bytes));
internal TermInfo SeekEnum(SegmentTermEnum enumerator, Term term, bool useCache)
if (useCache)
return SeekEnum(enumerator, term, termsCache.Get(new CloneableTerm(DeepCopyOf(term))), useCache);
return SeekEnum(enumerator, term, null, useCache);
internal TermInfo SeekEnum(SegmentTermEnum enumerator, Term term, TermInfoAndOrd tiOrd, bool useCache)
if (size == 0)
return null;
// optimize sequential access: first try scanning cached enum w/o seeking
if (enumerator.Term() != null && ((enumerator.Prev() != null && CompareAsUTF16(term, enumerator.Prev()) > 0) || CompareAsUTF16(term, enumerator.Term()) >= 0)) // term is at or past current
int enumOffset = (int)(enumerator.position / totalIndexInterval) + 1;
if (indexLength == enumOffset || index.CompareTo(term, enumOffset) < 0) // but before end of block
// no need to seek
TermInfo ti;
int numScans = enumerator.ScanTo(term);
if (enumerator.Term() != null && CompareAsUTF16(term, enumerator.Term()) == 0)
ti = enumerator.termInfo;
if (numScans > 1)
// we only want to put this TermInfo into the cache if
// scanEnum skipped more than one dictionary entry.
// this prevents RangeQueries or WildcardQueries to
// wipe out the cache when they iterate over a large numbers
// of terms in order
if (tiOrd == null)
if (useCache)
termsCache.Put(new CloneableTerm(DeepCopyOf(term)), new TermInfoAndOrd(ti, enumerator.position));
else if (Debugging.AssertsEnabled)
Debugging.Assert(SameTermInfo(ti, tiOrd, enumerator));
Debugging.Assert((int)enumerator.position == tiOrd.termOrd);
ti = null;
return ti;
// random-access: must seek
int indexPos;
if (tiOrd != null)
indexPos = (int)(tiOrd.termOrd / totalIndexInterval);
// Must do binary search:
indexPos = index.GetIndexOffset(term);
index.SeekEnum(enumerator, indexPos);
TermInfo ti_;
if (enumerator.Term() != null && CompareAsUTF16(term, enumerator.Term()) == 0)
ti_ = enumerator.termInfo;
if (tiOrd == null)
if (useCache)
termsCache.Put(new CloneableTerm(DeepCopyOf(term)), new TermInfoAndOrd(ti_, enumerator.position));
else if (Debugging.AssertsEnabled)
Debugging.Assert(SameTermInfo(ti_, tiOrd, enumerator));
Debugging.Assert(enumerator.position == tiOrd.termOrd);
ti_ = null;
return ti_;
// called only from asserts
private static bool SameTermInfo(TermInfo ti1, TermInfo ti2, SegmentTermEnum enumerator) // LUCENENET: CA1822: Mark members as static
if (ti1.DocFreq != ti2.DocFreq)
return false;
if (ti1.FreqPointer != ti2.FreqPointer)
return false;
if (ti1.ProxPointer != ti2.ProxPointer)
return false;
// skipOffset is only valid when docFreq >= skipInterval:
if (ti1.DocFreq >= enumerator.skipInterval && ti1.SkipOffset != ti2.SkipOffset)
return false;
return true;
private void EnsureIndexIsRead()
if (index == null)
throw new InvalidOperationException("terms index was not loaded when this reader was created");
/// <summary>
/// Returns the position of a <see cref="Term"/> in the set or -1. </summary>
internal long GetPosition(Term term)
if (size == 0)
return -1;
int indexOffset = index.GetIndexOffset(term);
SegmentTermEnum enumerator = GetThreadResources().termEnum;
index.SeekEnum(enumerator, indexOffset);
while (CompareAsUTF16(term, enumerator.Term()) > 0 && enumerator.Next())
if (CompareAsUTF16(term, enumerator.Term()) == 0)
return enumerator.position;
return -1;
/// <summary>
/// Returns an enumeration of all the <see cref="Term"/>s and <see cref="TermInfo"/>s in the set. </summary>
public SegmentTermEnum Terms()
return (SegmentTermEnum)origEnum.Clone();
/// <summary>
/// Returns an enumeration of terms starting at or after the named term. </summary>
public SegmentTermEnum Terms(Term term)
Get(term, true);
return (SegmentTermEnum)GetThreadResources().termEnum.Clone();
internal long RamBytesUsed()
return index == null ? 0 : index.RamBytesUsed();