blob: 9e013f18fffe975823facfe84d10d2406761fc8c [file] [log] [blame]
using Lucene.Net.Diagnostics;
using System;
using System.Collections.Generic;
using System.IO;
namespace Lucene.Net.Index
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// javadocs
using Directory = Lucene.Net.Store.Directory;
/// <summary>
/// <see cref="DirectoryReader"/> is an implementation of <see cref="CompositeReader"/>
/// that can read indexes in a <see cref="Store.Directory"/>.
///
/// <para/><see cref="DirectoryReader"/> instances are usually constructed with a call to
/// one of the static <c>Open()</c> methods, e.g. <see cref="Open(Directory)"/>.
///
/// <para/> For efficiency, in this API documents are often referred to via
/// <i>document numbers</i>, non-negative integers which each name a unique
/// document in the index. These document numbers are ephemeral -- they may change
/// as documents are added to and deleted from an index. Clients should thus not
/// rely on a given document having the same number between sessions.
///
/// <para/><b>NOTE</b>:
/// <see cref="IndexReader"/> instances are completely thread
/// safe, meaning multiple threads can call any of its methods,
/// concurrently. If your application requires external
/// synchronization, you should <b>not</b> synchronize on the
/// <see cref="IndexReader"/> instance; use your own
/// (non-Lucene) objects instead.
/// </summary>
public abstract class DirectoryReader : BaseCompositeReader<AtomicReader>
{
/// <summary>
/// Default termInfosIndexDivisor. </summary>
public static readonly int DEFAULT_TERMS_INDEX_DIVISOR = 1;
/// <summary>
/// The index directory. </summary>
protected readonly Directory m_directory;
/// <summary>
/// Returns a <see cref="IndexReader"/> reading the index in the given
/// <see cref="Store.Directory"/> </summary>
/// <param name="directory"> the index directory </param>
/// <exception cref="IOException"> if there is a low-level IO error </exception>
new public static DirectoryReader Open(Directory directory)
{
return StandardDirectoryReader.Open(directory, null, DEFAULT_TERMS_INDEX_DIVISOR);
}
/// <summary>
/// Expert: Returns a <see cref="IndexReader"/> reading the index in the given
/// <see cref="Store.Directory"/> with the given termInfosIndexDivisor. </summary>
/// <param name="directory"> the index directory </param>
/// <param name="termInfosIndexDivisor"> Subsamples which indexed
/// terms are loaded into RAM. this has the same effect as setting
/// <see cref="LiveIndexWriterConfig.TermIndexInterval"/> (on <see cref="IndexWriterConfig"/>) except that setting
/// must be done at indexing time while this setting can be
/// set per reader. When set to N, then one in every
/// N*termIndexInterval terms in the index is loaded into
/// memory. By setting this to a value &gt; 1 you can reduce
/// memory usage, at the expense of higher latency when
/// loading a TermInfo. The default value is 1. Set this
/// to -1 to skip loading the terms index entirely.
/// <b>NOTE:</b> divisor settings &gt; 1 do not apply to all <see cref="Codecs.PostingsFormat"/>
/// implementations, including the default one in this release. It only makes
/// sense for terms indexes that can efficiently re-sample terms at load time. </param>
/// <exception cref="IOException"> if there is a low-level IO error </exception>
new public static DirectoryReader Open(Directory directory, int termInfosIndexDivisor)
{
return StandardDirectoryReader.Open(directory, null, termInfosIndexDivisor);
}
/// <summary>
/// Open a near real time <see cref="IndexReader"/> from the <see cref="IndexWriter"/>.
/// <para/>
/// @lucene.experimental
/// </summary>
/// <param name="writer"> The <see cref="IndexWriter"/> to open from </param>
/// <param name="applyAllDeletes"> If <c>true</c>, all buffered deletes will
/// be applied (made visible) in the returned reader. If
/// <c>false</c>, the deletes are not applied but remain buffered
/// (in IndexWriter) so that they will be applied in the
/// future. Applying deletes can be costly, so if your app
/// can tolerate deleted documents being returned you might
/// gain some performance by passing <c>false</c>. </param>
/// <returns> The new <see cref="IndexReader"/> </returns>
/// <exception cref="CorruptIndexException"> if the index is corrupt </exception>
/// <exception cref="IOException"> if there is a low-level IO error
/// </exception>
/// <seealso cref="OpenIfChanged(DirectoryReader, IndexWriter, bool)"/>
new public static DirectoryReader Open(IndexWriter writer, bool applyAllDeletes)
{
return writer.GetReader(applyAllDeletes);
}
/// <summary>
/// Expert: returns an <see cref="IndexReader"/> reading the index in the given
/// <see cref="Index.IndexCommit"/>. </summary>
/// <param name="commit"> the commit point to open </param>
/// <exception cref="IOException"> if there is a low-level IO error </exception>
new public static DirectoryReader Open(IndexCommit commit)
{
return StandardDirectoryReader.Open(commit.Directory, commit, DEFAULT_TERMS_INDEX_DIVISOR);
}
/// <summary>
/// Expert: returns an <see cref="IndexReader"/> reading the index in the given
/// <seealso cref="Index.IndexCommit"/> and <paramref name="termInfosIndexDivisor"/>. </summary>
/// <param name="commit"> the commit point to open </param>
/// <param name="termInfosIndexDivisor"> Subsamples which indexed
/// terms are loaded into RAM. this has the same effect as setting
/// <see cref="LiveIndexWriterConfig.TermIndexInterval"/> (on <see cref="IndexWriterConfig"/>) except that setting
/// must be done at indexing time while this setting can be
/// set per reader. When set to N, then one in every
/// N*termIndexInterval terms in the index is loaded into
/// memory. By setting this to a value &gt; 1 you can reduce
/// memory usage, at the expense of higher latency when
/// loading a TermInfo. The default value is 1. Set this
/// to -1 to skip loading the terms index entirely.
/// <b>NOTE:</b> divisor settings &gt; 1 do not apply to all <see cref="Codecs.PostingsFormat"/>
/// implementations, including the default one in this release. It only makes
/// sense for terms indexes that can efficiently re-sample terms at load time. </param>
/// <exception cref="IOException"> if there is a low-level IO error </exception>
new public static DirectoryReader Open(IndexCommit commit, int termInfosIndexDivisor)
{
return StandardDirectoryReader.Open(commit.Directory, commit, termInfosIndexDivisor);
}
/// <summary>
/// If the index has changed since the provided reader was
/// opened, open and return a new reader; else, return
/// <c>null</c>. The new reader, if not <c>null</c>, will be the same
/// type of reader as the previous one, ie a near-real-time (NRT) reader
/// will open a new NRT reader, a <see cref="MultiReader"/> will open a
/// new <see cref="MultiReader"/>, etc.
///
/// <para/>This method is typically far less costly than opening a
/// fully new <see cref="DirectoryReader"/> as it shares
/// resources (for example sub-readers) with the provided
/// <see cref="DirectoryReader"/>, when possible.
///
/// <para/>The provided reader is not disposed (you are responsible
/// for doing so); if a new reader is returned you also
/// must eventually dispose it. Be sure to never dispose a
/// reader while other threads are still using it; see
/// <see cref="Search.SearcherManager"/> to simplify managing this.
/// </summary>
/// <exception cref="CorruptIndexException"> if the index is corrupt </exception>
/// <exception cref="IOException"> if there is a low-level IO error </exception>
/// <returns> <c>null</c> if there are no changes; else, a new
/// <see cref="DirectoryReader"/> instance which you must eventually dispose </returns>
public static DirectoryReader OpenIfChanged(DirectoryReader oldReader)
{
DirectoryReader newReader = oldReader.DoOpenIfChanged();
if (Debugging.AssertsEnabled) Debugging.Assert(newReader != oldReader);
return newReader;
}
/// <summary>
/// If the <see cref="Index.IndexCommit"/> differs from what the
/// provided reader is searching, open and return a new
/// reader; else, return <c>null</c>.
/// </summary>
/// <seealso cref="OpenIfChanged(DirectoryReader)"/>
public static DirectoryReader OpenIfChanged(DirectoryReader oldReader, IndexCommit commit)
{
DirectoryReader newReader = oldReader.DoOpenIfChanged(commit);
if (Debugging.AssertsEnabled) Debugging.Assert(newReader != oldReader);
return newReader;
}
/// <summary>
/// Expert: If there changes (committed or not) in the
/// <see cref="IndexWriter"/> versus what the provided reader is
/// searching, then open and return a new
/// <see cref="IndexReader"/> searching both committed and uncommitted
/// changes from the writer; else, return <c>null</c> (though, the
/// current implementation never returns <c>null</c>).
///
/// <para/>This provides "near real-time" searching, in that
/// changes made during an <see cref="IndexWriter"/> session can be
/// quickly made available for searching without closing
/// the writer nor calling <see cref="IndexWriter.Commit()"/>.
///
/// <para>It's <i>near</i> real-time because there is no hard
/// guarantee on how quickly you can get a new reader after
/// making changes with <see cref="IndexWriter"/>. You'll have to
/// experiment in your situation to determine if it's
/// fast enough. As this is a new and experimental
/// feature, please report back on your findings so we can
/// learn, improve and iterate.</para>
///
/// <para>The very first time this method is called, this
/// writer instance will make every effort to pool the
/// readers that it opens for doing merges, applying
/// deletes, etc. This means additional resources (RAM,
/// file descriptors, CPU time) will be consumed.</para>
///
/// <para>For lower latency on reopening a reader, you should
/// call <see cref="LiveIndexWriterConfig.MergedSegmentWarmer"/> (on <see cref="IndexWriterConfig"/>) to
/// pre-warm a newly merged segment before it's committed
/// to the index. This is important for minimizing
/// index-to-search delay after a large merge. </para>
///
/// <para>If an AddIndexes* call is running in another thread,
/// then this reader will only search those segments from
/// the foreign index that have been successfully copied
/// over, so far.</para>
///
/// <para><b>NOTE</b>: Once the writer is disposed, any
/// outstanding readers may continue to be used. However,
/// if you attempt to reopen any of those readers, you'll
/// hit an <see cref="ObjectDisposedException"/>.</para>
///
/// @lucene.experimental
/// </summary>
/// <returns> <see cref="DirectoryReader"/> that covers entire index plus all
/// changes made so far by this <see cref="IndexWriter"/> instance, or
/// <c>null</c> if there are no new changes
/// </returns>
/// <param name="writer"> The <see cref="IndexWriter"/> to open from
/// </param>
/// <param name="applyAllDeletes"> If <c>true</c>, all buffered deletes will
/// be applied (made visible) in the returned reader. If
/// <c>false</c>, the deletes are not applied but remain buffered
/// (in <see cref="IndexWriter"/>) so that they will be applied in the
/// future. Applying deletes can be costly, so if your app
/// can tolerate deleted documents being returned you might
/// gain some performance by passing <c>false</c>.
/// </param>
/// <exception cref="IOException"> if there is a low-level IO error </exception>
public static DirectoryReader OpenIfChanged(DirectoryReader oldReader, IndexWriter writer, bool applyAllDeletes)
{
DirectoryReader newReader = oldReader.DoOpenIfChanged(writer, applyAllDeletes);
if (Debugging.AssertsEnabled) Debugging.Assert(newReader != oldReader);
return newReader;
}
/// <summary>
/// Returns all commit points that exist in the <see cref="Store.Directory"/>.
/// Normally, because the default is
/// <see cref="KeepOnlyLastCommitDeletionPolicy"/>, there would be only
/// one commit point. But if you're using a custom
/// <see cref="IndexDeletionPolicy"/> then there could be many commits.
/// Once you have a given commit, you can open a reader on
/// it by calling <see cref="DirectoryReader.Open(IndexCommit)"/>
/// There must be at least one commit in
/// the <see cref="Store.Directory"/>, else this method throws
/// <see cref="IndexNotFoundException"/>. Note that if a commit is in
/// progress while this method is running, that commit
/// may or may not be returned.
/// </summary>
/// <returns> a sorted list of <see cref="Index.IndexCommit"/>s, from oldest
/// to latest. </returns>
public static IList<IndexCommit> ListCommits(Directory dir)
{
string[] files = dir.ListAll();
List<IndexCommit> commits = new List<IndexCommit>();
SegmentInfos latest = new SegmentInfos();
latest.Read(dir);
long currentGen = latest.Generation;
commits.Add(new StandardDirectoryReader.ReaderCommit(latest, dir));
for (int i = 0; i < files.Length; i++)
{
string fileName = files[i];
if (fileName.StartsWith(IndexFileNames.SEGMENTS, StringComparison.Ordinal) && !fileName.Equals(IndexFileNames.SEGMENTS_GEN, StringComparison.Ordinal) && SegmentInfos.GenerationFromSegmentsFileName(fileName) < currentGen)
{
SegmentInfos sis = new SegmentInfos();
try
{
// IOException allowed to throw there, in case
// segments_N is corrupt
sis.Read(dir, fileName);
}
catch (FileNotFoundException)
{
// LUCENE-948: on NFS (and maybe others), if
// you have writers switching back and forth
// between machines, it's very likely that the
// dir listing will be stale and will claim a
// file segments_X exists when in fact it
// doesn't. So, we catch this and handle it
// as if the file does not exist
sis = null;
}
// LUCENENET specific - .NET (thankfully) only has one FileNotFoundException, so we don't need this
//catch (NoSuchFileException)
//{
// sis = null;
//}
// LUCENENET specific - since NoSuchDirectoryException subclasses FileNotFoundException
// in Lucene, we need to catch it here to be on the safe side.
catch (DirectoryNotFoundException)
{
// LUCENE-948: on NFS (and maybe others), if
// you have writers switching back and forth
// between machines, it's very likely that the
// dir listing will be stale and will claim a
// file segments_X exists when in fact it
// doesn't. So, we catch this and handle it
// as if the file does not exist
sis = null;
}
if (sis != null)
{
commits.Add(new StandardDirectoryReader.ReaderCommit(sis, dir));
}
}
}
// Ensure that the commit points are sorted in ascending order.
commits.Sort();
return commits;
}
/// <summary>
/// Returns <c>true</c> if an index likely exists at
/// the specified directory. Note that if a corrupt index
/// exists, or if an index in the process of committing </summary>
/// <param name="directory"> the directory to check for an index </param>
/// <returns> <c>true</c> if an index exists; <c>false</c> otherwise </returns>
public static bool IndexExists(Directory directory)
{
// LUCENE-2812, LUCENE-2727, LUCENE-4738: this logic will
// return true in cases that should arguably be false,
// such as only IW.prepareCommit has been called, or a
// corrupt first commit, but it's too deadly to make
// this logic "smarter" and risk accidentally returning
// false due to various cases like file description
// exhaustion, access denied, etc., because in that
// case IndexWriter may delete the entire index. It's
// safer to err towards "index exists" than try to be
// smart about detecting not-yet-fully-committed or
// corrupt indices. this means that IndexWriter will
// throw an exception on such indices and the app must
// resolve the situation manually:
string[] files;
try
{
files = directory.ListAll();
}
#pragma warning disable 168
catch (DirectoryNotFoundException nsde)
#pragma warning restore 168
{
// Directory does not exist --> no index exists
return false;
}
// Defensive: maybe a Directory impl returns null
// instead of throwing NoSuchDirectoryException:
if (files != null)
{
string prefix = IndexFileNames.SEGMENTS + "_";
foreach (string file in files)
{
if (file.StartsWith(prefix, StringComparison.Ordinal) || file.Equals(IndexFileNames.SEGMENTS_GEN, StringComparison.Ordinal))
{
return true;
}
}
}
return false;
}
/// <summary>
/// Expert: Constructs a <see cref="DirectoryReader"/> on the given <paramref name="segmentReaders"/>. </summary>
/// <param name="segmentReaders"> the wrapped atomic index segment readers. This array is
/// returned by <see cref="CompositeReader.GetSequentialSubReaders"/> and used to resolve the correct
/// subreader for docID-based methods. <b>Please note:</b> this array is <b>not</b>
/// cloned and not protected for modification outside of this reader.
/// Subclasses of <see cref="DirectoryReader"/> should take care to not allow
/// modification of this internal array, e.g. <see cref="DoOpenIfChanged()"/>. </param>
protected DirectoryReader(Directory directory, AtomicReader[] segmentReaders)
: base(segmentReaders)
{
this.m_directory = directory;
}
/// <summary>
/// Returns the directory this index resides in. </summary>
public Directory Directory =>
// Don't ensureOpen here -- in certain cases, when a
// cloned/reopened reader needs to commit, it may call
// this method on the closed original reader
m_directory;
/// <summary>
/// Implement this method to support <see cref="OpenIfChanged(DirectoryReader)"/>.
/// If this reader does not support reopen, return <c>null</c>, so
/// client code is happy. This should be consistent with <see cref="IsCurrent()"/>
/// (should always return <c>true</c>) if reopen is not supported. </summary>
/// <exception cref="IOException"> if there is a low-level IO error </exception>
/// <returns> <c>null</c> if there are no changes; else, a new
/// <see cref="DirectoryReader"/> instance. </returns>
protected internal abstract DirectoryReader DoOpenIfChanged();
/// <summary>
/// Implement this method to support <see cref="OpenIfChanged(DirectoryReader, IndexCommit)"/>.
/// If this reader does not support reopen from a specific <see cref="Index.IndexCommit"/>,
/// throw <see cref="NotSupportedException"/>. </summary>
/// <exception cref="IOException"> if there is a low-level IO error </exception>
/// <returns> <c>null</c> if there are no changes; else, a new
/// <see cref="DirectoryReader"/> instance. </returns>
protected internal abstract DirectoryReader DoOpenIfChanged(IndexCommit commit);
/// <summary>
/// Implement this method to support <see cref="OpenIfChanged(DirectoryReader, IndexWriter, bool)"/>.
/// If this reader does not support reopen from <see cref="IndexWriter"/>,
/// throw <see cref="NotSupportedException"/>. </summary>
/// <exception cref="IOException"> if there is a low-level IO error </exception>
/// <returns> <c>null</c> if there are no changes; else, a new
/// <see cref="DirectoryReader"/> instance. </returns>
protected internal abstract DirectoryReader DoOpenIfChanged(IndexWriter writer, bool applyAllDeletes);
/// <summary>
/// Version number when this <see cref="IndexReader"/> was opened.
///
/// <para>This method
/// returns the version recorded in the commit that the
/// reader opened. This version is advanced every time
/// a change is made with <see cref="IndexWriter"/>.</para>
/// </summary>
public abstract long Version { get; }
/// <summary>
/// Check whether any new changes have occurred to the
/// index since this reader was opened.
///
/// <para>If this reader was created by calling an overload of <see cref="Open(Directory)"/>,
/// then this method checks if any further commits
/// (see <see cref="IndexWriter.Commit()"/>) have occurred in the
/// directory.</para>
///
/// <para>If instead this reader is a near real-time reader
/// (ie, obtained by a call to
/// <see cref="DirectoryReader.Open(IndexWriter, bool)"/>, or by calling an overload of <see cref="OpenIfChanged(DirectoryReader)"/>
/// on a near real-time reader), then this method checks if
/// either a new commit has occurred, or any new
/// uncommitted changes have taken place via the writer.
/// Note that even if the writer has only performed
/// merging, this method will still return <c>false</c>.</para>
///
/// <para>In any event, if this returns <c>false</c>, you should call
/// an overload of <see cref="OpenIfChanged(DirectoryReader)"/> to get a new reader that sees the
/// changes.</para>
/// </summary>
/// <exception cref="IOException"> if there is a low-level IO error </exception>
public abstract bool IsCurrent();
/// <summary>
/// Expert: return the <see cref="Index.IndexCommit"/> that this reader has opened.
/// <para/>
/// @lucene.experimental
/// </summary>
public abstract IndexCommit IndexCommit { get; }
}
}