blob: bdfc679df27ae72028a033d1d74b8c0dce8737e7 [file] [log] [blame]
using J2N;
using J2N.Collections.Generic.Extensions;
using Lucene.Net.Diagnostics;
using Lucene.Net.Support;
using Lucene.Net.Support.IO;
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Runtime.CompilerServices;
using System.Text;
using System.Threading;
using JCG = J2N.Collections.Generic;
namespace Lucene.Net.Index
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
using ChecksumIndexInput = Lucene.Net.Store.ChecksumIndexInput;
using Codec = Lucene.Net.Codecs.Codec;
using CodecUtil = Lucene.Net.Codecs.CodecUtil;
using Directory = Lucene.Net.Store.Directory;
using IndexInput = Lucene.Net.Store.IndexInput;
using IndexOutput = Lucene.Net.Store.IndexOutput;
using IOContext = Lucene.Net.Store.IOContext;
using IOUtils = Lucene.Net.Util.IOUtils;
using Lucene3xCodec = Lucene.Net.Codecs.Lucene3x.Lucene3xCodec;
using Lucene3xSegmentInfoFormat = Lucene.Net.Codecs.Lucene3x.Lucene3xSegmentInfoFormat;
using Lucene3xSegmentInfoReader = Lucene.Net.Codecs.Lucene3x.Lucene3xSegmentInfoReader;
using StringHelper = Lucene.Net.Util.StringHelper;
/// <summary>
/// A collection of segmentInfo objects with methods for operating on
/// those segments in relation to the file system.
/// <para>
/// The active segments in the index are stored in the segment info file,
/// <c>segments_N</c>. There may be one or more <c>segments_N</c> files in the
/// index; however, the one with the largest generation is the active one (when
/// older segments_N files are present it's because they temporarily cannot be
/// deleted, or, a writer is in the process of committing, or a custom
/// <see cref="Lucene.Net.Index.IndexDeletionPolicy"/>
/// is in use). This file lists each segment by name and has details about the
/// codec and generation of deletes.
/// </para>
/// <para>There is also a file <c>segments.gen</c>. this file contains
/// the current generation (the <c>_N</c> in <c>segments_N</c>) of the index.
/// This is used only as a fallback in case the current generation cannot be
/// accurately determined by directory listing alone (as is the case for some NFS
/// clients with time-based directory cache expiration). This file simply contains
/// an <see cref="Store.DataOutput.WriteInt32(int)"/> version header
/// (<see cref="FORMAT_SEGMENTS_GEN_CURRENT"/>), followed by the
/// generation recorded as <see cref="Store.DataOutput.WriteInt64(long)"/>, written twice.</para>
/// <para>
/// Files:
/// <list type="bullet">
/// <item><description><c>segments.gen</c>: GenHeader, Generation, Generation, Footer</description></item>
/// <item><description><c>segments_N</c>: Header, Version, NameCounter, SegCount,
/// &lt;SegName, SegCodec, DelGen, DeletionCount, FieldInfosGen, UpdatesFiles&gt;<sup>SegCount</sup>,
/// CommitUserData, Footer</description></item>
/// </list>
/// </para>
/// Data types:
/// <para>
/// <list type="bullet">
/// <item><description>Header --&gt; <see cref="CodecUtil.WriteHeader(Store.DataOutput, string, int)"/></description></item>
/// <item><description>GenHeader, NameCounter, SegCount, DeletionCount --&gt; <see cref="Store.DataOutput.WriteInt32(int)"/></description></item>
/// <item><description>Generation, Version, DelGen, Checksum, FieldInfosGen --&gt; <see cref="Store.DataOutput.WriteInt64(long)"/></description></item>
/// <item><description>SegName, SegCodec --&gt; <see cref="Store.DataOutput.WriteString(string)"/></description></item>
/// <item><description>CommitUserData --&gt; <see cref="Store.DataOutput.WriteStringStringMap(IDictionary{string, string})"/></description></item>
/// <item><description>UpdatesFiles --&gt; <see cref="Store.DataOutput.WriteStringSet(ISet{string})"/></description></item>
/// <item><description>Footer --&gt; <see cref="CodecUtil.WriteFooter(IndexOutput)"/></description></item>
/// </list>
/// </para>
/// Field Descriptions:
/// <para>
/// <list type="bullet">
/// <item><description>Version counts how often the index has been changed by adding or deleting
/// documents.</description></item>
/// <item><description>NameCounter is used to generate names for new segment files.</description></item>
/// <item><description>SegName is the name of the segment, and is used as the file name prefix for
/// all of the files that compose the segment's index.</description></item>
/// <item><description>DelGen is the generation count of the deletes file. If this is -1,
/// there are no deletes. Anything above zero means there are deletes
/// stored by <see cref="Codecs.LiveDocsFormat"/>.</description></item>
/// <item><description>DeletionCount records the number of deleted documents in this segment.</description></item>
/// <item><description>SegCodec is the <see cref="Codec.Name"/> of the <see cref="Codec"/> that encoded
/// this segment.</description></item>
/// <item><description>CommitUserData stores an optional user-supplied opaque
/// <see cref="T:IDictionary{string, string}"/> that was passed to
/// <see cref="IndexWriter.SetCommitData(IDictionary{string, string})"/>.</description></item>
/// <item><description>FieldInfosGen is the generation count of the fieldInfos file. If this is -1,
/// there are no updates to the fieldInfos in that segment. Anything above zero
/// means there are updates to fieldInfos stored by <see cref="Codecs.FieldInfosFormat"/>.</description></item>
/// <item><description>UpdatesFiles stores the list of files that were updated in that segment.</description></item>
/// </list>
/// </para>
/// @lucene.experimental
/// </summary>
public sealed class SegmentInfos : IEnumerable<SegmentCommitInfo>
, System.ICloneable
/// <summary>
/// The file format version for the segments_N codec header, up to 4.5. </summary>
public static readonly int VERSION_40 = 0;
/// <summary>
/// The file format version for the segments_N codec header, since 4.6+. </summary>
public static readonly int VERSION_46 = 1;
/// <summary>
/// The file format version for the segments_N codec header, since 4.8+ </summary>
public const int VERSION_48 = 2;
// Used for the segments.gen file only!
// Whenever you add a new format, make it 1 smaller (negative version logic)!
private const int FORMAT_SEGMENTS_GEN_47 = -2;
private const int FORMAT_SEGMENTS_GEN_CHECKSUM = -3;
/// <summary>
/// Current format of segments.gen </summary>
/// <summary>
/// Used to name new segments. </summary>
public int Counter { get; set; }
// LUCENENET specific: Version made into property (see below)
private long generation; // generation of the "segments_N" for the next commit
private long lastGeneration; // generation of the "segments_N" file we last successfully read
// or wrote; this is normally the same as generation except if
// there was an IOException that had interrupted a commit
/// <summary>
/// Opaque <see cref="T:IDictionary{string, string}"/> that user can specify during <see cref="IndexWriter.Commit()"/> </summary>
private IDictionary<string, string> userData = Collections.EmptyMap<string, string>();
private IList<SegmentCommitInfo> segments = new JCG.List<SegmentCommitInfo>();
/// <summary>
/// If non-null, information about loading segments_N files
/// will be printed here.</summary>
/// <seealso cref="InfoStream"/>
private static TextWriter infoStream = null;
/// <summary>
/// Sole constructor. Typically you call this and then
/// use <see cref="Read(Directory)"/> or
/// <see cref="Read(Directory, string)"/> to populate each
/// <see cref="SegmentCommitInfo"/>. Alternatively, you can add/remove your
/// own <see cref="SegmentCommitInfo"/>s.
/// </summary>
public SegmentInfos()
/// <summary>
/// Returns <see cref="SegmentCommitInfo"/> at the provided
/// index.
/// </summary>
public SegmentCommitInfo Info(int i) // LUCENENET TODO: API - add indexer for this class
return segments[i];
/// <summary>
/// Get the generation of the most recent commit to the
/// list of index files (N in the segments_N file).
/// </summary>
/// <param name="files"> array of file names to check </param>
public static long GetLastCommitGeneration(string[] files)
if (files == null)
return -1;
long max = -1;
foreach (var file in files)
if (file.StartsWith(IndexFileNames.SEGMENTS, StringComparison.Ordinal) && !file.Equals(IndexFileNames.SEGMENTS_GEN, StringComparison.Ordinal))
long gen = GenerationFromSegmentsFileName(file);
if (gen > max)
max = gen;
return max;
/// <summary>
/// Get the generation of the most recent commit to the
/// index in this directory (N in the segments_N file).
/// </summary>
/// <param name="directory"> directory to search for the latest segments_N file </param>
public static long GetLastCommitGeneration(Directory directory)
return GetLastCommitGeneration(directory.ListAll());
catch (DirectoryNotFoundException)
return -1;
/// <summary>
/// Get the filename of the segments_N file for the most
/// recent commit in the list of index files.
/// </summary>
/// <param name="files"> array of file names to check </param>
public static string GetLastCommitSegmentsFileName(string[] files)
return IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", GetLastCommitGeneration(files));
/// <summary>
/// Get the filename of the segments_N file for the most
/// recent commit to the index in this Directory.
/// </summary>
/// <param name="directory"> directory to search for the latest segments_N file </param>
public static string GetLastCommitSegmentsFileName(Directory directory)
return IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", GetLastCommitGeneration(directory));
/// <summary>
/// Get the segments_N filename in use by this segment infos.
/// </summary>
public string GetSegmentsFileName()
return IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", lastGeneration);
/// <summary>
/// Parse the generation off the segments file name and
/// return it.
/// </summary>
public static long GenerationFromSegmentsFileName(string fileName)
if (fileName.Equals(IndexFileNames.SEGMENTS, StringComparison.Ordinal))
return 0;
else if (fileName.StartsWith(IndexFileNames.SEGMENTS, StringComparison.Ordinal))
return Number.Parse(fileName.Substring(1 + IndexFileNames.SEGMENTS.Length), Character.MaxRadix);
throw new ArgumentException("fileName \"" + fileName + "\" is not a segments file");
/// <summary>
/// A utility for writing the <see cref="IndexFileNames.SEGMENTS_GEN"/> file to a
/// <see cref="Directory"/>.
/// <para/>
/// <b>NOTE:</b> this is an internal utility which is kept public so that it's
/// accessible by code from other packages. You should avoid calling this
/// method unless you're absolutely sure what you're doing!
/// <para/>
/// @lucene.internal
/// </summary>
public static void WriteSegmentsGen(Directory dir, long generation)
IndexOutput genOutput = dir.CreateOutput(IndexFileNames.SEGMENTS_GEN, IOContext.READ_ONCE);
dir.Sync(new JCG.HashSet<string> { IndexFileNames.SEGMENTS_GEN });
catch (Exception)
// It's OK if we fail to write this file since it's
// used only as one of the retry fallbacks.
catch (Exception)
// Ignore; this file is only used in a retry
// fallback on init.
/// <summary>
/// Get the next segments_N filename that will be written.
/// </summary>
public string GetNextSegmentFileName()
long nextGeneration;
if (generation == -1)
nextGeneration = 1;
nextGeneration = generation + 1;
return IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", nextGeneration);
/// <summary>
/// Read a particular <paramref name="segmentFileName"/>. Note that this may
/// throw an <see cref="IOException"/> if a commit is in process.
/// </summary>
/// <param name="directory"> directory containing the segments file </param>
/// <param name="segmentFileName"> segment file to load </param>
/// <exception cref="CorruptIndexException"> if the index is corrupt </exception>
/// <exception cref="IOException"> if there is a low-level IO error </exception>
public void Read(Directory directory, string segmentFileName)
var success = false;
// Clear any previous segments:
generation = GenerationFromSegmentsFileName(segmentFileName);
lastGeneration = generation;
var input = directory.OpenChecksumInput(segmentFileName, IOContext.READ);
int format = input.ReadInt32();
int actualFormat;
if (format == CodecUtil.CODEC_MAGIC)
// 4.0+
actualFormat = CodecUtil.CheckHeaderNoMagic(input, "segments", VERSION_40, VERSION_48);
Version = input.ReadInt64();
Counter = input.ReadInt32();
int numSegments = input.ReadInt32();
if (numSegments < 0)
throw new CorruptIndexException("invalid segment count: " + numSegments + " (resource: " + input + ")");
for (var seg = 0; seg < numSegments; seg++)
var segName = input.ReadString();
var codec = Codec.ForName(input.ReadString());
//System.out.println(" seg=" + seg + " codec=" + codec);
var info = codec.SegmentInfoFormat.SegmentInfoReader.Read(directory, segName, IOContext.READ);
info.Codec = codec;
long delGen = input.ReadInt64();
int delCount = input.ReadInt32();
if (delCount < 0 || delCount > info.DocCount)
throw new CorruptIndexException("invalid deletion count: " + delCount + " vs docCount=" + info.DocCount + " (resource: " + input + ")");
long fieldInfosGen = -1;
if (actualFormat >= VERSION_46)
fieldInfosGen = input.ReadInt64();
var siPerCommit = new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen);
if (actualFormat >= VERSION_46)
int numGensUpdatesFiles = input.ReadInt32();
IDictionary<long, ISet<string>> genUpdatesFiles;
if (numGensUpdatesFiles == 0)
genUpdatesFiles = Collections.EmptyMap<long, ISet<string>>();
genUpdatesFiles = new Dictionary<long, ISet<string>>(numGensUpdatesFiles);
for (int i = 0; i < numGensUpdatesFiles; i++)
genUpdatesFiles[input.ReadInt64()] = input.ReadStringSet();
userData = input.ReadStringStringMap();
actualFormat = -1;
Lucene3xSegmentInfoReader.ReadLegacyInfos(this, directory, input, format);
Codec codec = Codec.ForName("Lucene3x");
foreach (SegmentCommitInfo info in segments)
info.Info.Codec = codec;
if (actualFormat >= VERSION_48)
long checksumNow = input.Checksum;
long checksumThen = input.ReadInt64();
if (checksumNow != checksumThen)
throw new CorruptIndexException("checksum mismatch in segments file (resource: " + input + ")");
#pragma warning disable 612, 618
#pragma warning restore 612, 618
success = true;
if (!success)
// Clear any segment infos we had loaded so we
// have a clean slate on retry:
/// <summary>
/// Find the latest commit (<c>segments_N file</c>) and
/// load all <see cref="SegmentCommitInfo"/>s.
/// </summary>
public void Read(Directory directory)
generation = lastGeneration = -1;
new FindSegmentsFileAnonymousInnerClassHelper(this, directory).Run();
private class FindSegmentsFileAnonymousInnerClassHelper : FindSegmentsFile
private readonly SegmentInfos outerInstance;
public FindSegmentsFileAnonymousInnerClassHelper(SegmentInfos outerInstance, Directory directory)
: base(directory)
this.outerInstance = outerInstance;
protected internal override object DoBody(string segmentFileName)
outerInstance.Read(directory, segmentFileName);
return null;
// Only non-null after prepareCommit has been called and
// before finishCommit is called
internal IndexOutput pendingSegnOutput;
private const string SEGMENT_INFO_UPGRADE_CODEC = "SegmentInfo3xUpgrade";
private const int SEGMENT_INFO_UPGRADE_VERSION = 0;
private void Write(Directory directory)
string segmentsFileName = GetNextSegmentFileName();
// Always advance the generation on write:
if (generation == -1)
generation = 1;
IndexOutput segnOutput = null;
bool success = false;
var upgradedSIFiles = new JCG.HashSet<string>();
segnOutput = directory.CreateOutput(segmentsFileName, IOContext.DEFAULT);
CodecUtil.WriteHeader(segnOutput, "segments", VERSION_48);
segnOutput.WriteInt32(Counter); // write counter
segnOutput.WriteInt32(Count); // write infos
foreach (SegmentCommitInfo siPerCommit in segments)
SegmentInfo si = siPerCommit.Info;
int delCount = siPerCommit.DelCount;
if (delCount < 0 || delCount > si.DocCount)
throw new InvalidOperationException("cannot write segment: invalid docCount segment=" + si.Name + " docCount=" + si.DocCount + " delCount=" + delCount);
IDictionary<long, ISet<string>> genUpdatesFiles = siPerCommit.UpdatesFiles;
foreach (KeyValuePair<long, ISet<string>> e in genUpdatesFiles)
if (Debugging.AssertsEnabled) Debugging.Assert(si.Dir == directory);
// If this segment is pre-4.x, perform a one-time
// "ugprade" to write the .si file for it:
string version = si.Version;
if (version == null || StringHelper.VersionComparer.Compare(version, "4.0") < 0)
if (!SegmentWasUpgraded(directory, si))
string markerFileName = IndexFileNames.SegmentFileName(si.Name, "upgraded", Lucene3xSegmentInfoFormat.UPGRADED_SI_EXTENSION);
#pragma warning disable 612, 618
string segmentFileName = Write3xInfo(directory, si, IOContext.DEFAULT);
#pragma warning restore 612, 618
directory.Sync(/*Collections.singletonList(*/new[] { segmentFileName }/*)*/);
// Write separate marker file indicating upgrade
// is completed. this way, if there is a JVM
// kill/crash, OS crash, power loss, etc. while
// writing the upgraded file, the marker file
// will be missing:
IndexOutput @out = directory.CreateOutput(markerFileName, IOContext.DEFAULT);
directory.Sync(/*Collections.SingletonList(*/new[] { markerFileName }/*)*/);
pendingSegnOutput = segnOutput;
success = true;
if (!success)
// We hit an exception above; try to close the file
// but suppress any exception:
foreach (string fileName in upgradedSIFiles)
catch (Exception)
// Suppress so we keep throwing the original exception
// Try not to leave a truncated segments_N file in
// the index:
catch (Exception)
// Suppress so we keep throwing the original exception
private static bool SegmentWasUpgraded(Directory directory, SegmentInfo si)
// Check marker file:
string markerFileName = IndexFileNames.SegmentFileName(si.Name, "upgraded", Lucene3xSegmentInfoFormat.UPGRADED_SI_EXTENSION);
IndexInput @in = null;
@in = directory.OpenInput(markerFileName, IOContext.READ_ONCE);
return true;
catch (IOException)
// Ignore: if something is wrong w/ the marker file,
// we will just upgrade again
if (@in != null)
return false;
public static string Write3xInfo(Directory dir, SegmentInfo si, IOContext context)
// NOTE: this is NOT how 3.x is really written...
string fileName = IndexFileNames.SegmentFileName(si.Name, "", Lucene3xSegmentInfoFormat.UPGRADED_SI_EXTENSION);
//System.out.println("UPGRADE write " + fileName);
bool success = false;
IndexOutput output = dir.CreateOutput(fileName, context);
// we are about to write this SI in 3.x format, dropping all codec information, etc.
// so it had better be a 3.x segment or you will get very confusing errors later.
if ((si.Codec is Lucene3xCodec) == false)
throw new InvalidOperationException("cannot write 3x SegmentInfo unless codec is Lucene3x (got: " + si.Codec + ")");
CodecUtil.WriteHeader(output, Lucene3xSegmentInfoFormat.UPGRADED_SI_CODEC_NAME, Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_CURRENT);
// Write the Lucene version that created this segment, since 3.1
output.WriteByte((byte)(sbyte)(si.UseCompoundFile ? SegmentInfo.YES : SegmentInfo.NO));
success = true;
if (!success)
catch (Exception)
// Suppress so we keep throwing the original exception
return fileName;
/// <summary>
/// Returns a copy of this instance, also copying each
/// <see cref="SegmentInfo"/>.
/// </summary>
public object Clone()
var sis = (SegmentInfos)base.MemberwiseClone();
// deep clone, first recreate all collections:
sis.segments = new List<SegmentCommitInfo>(Count);
foreach (SegmentCommitInfo info in segments)
if (Debugging.AssertsEnabled) Debugging.Assert(info.Info.Codec != null);
// dont directly access segments, use add method!!!
sis.userData = new Dictionary<string, string>(userData);
return sis;
// LUCENENET specific property for accessing segments private field
public IList<SegmentCommitInfo> Segments => segments;
/// <summary>
/// Version number when this <see cref="SegmentInfos"/> was generated.
/// </summary>
public long Version { get; internal set; }
/// <summary>
/// Returns current generation. </summary>
public long Generation => generation;
/// <summary>
/// Returns last succesfully read or written generation. </summary>
public long LastGeneration => lastGeneration;
/// <summary>
/// If non-null, information about retries when loading
/// the segments file will be printed to this.
/// </summary>
public static TextWriter InfoStream
set =>
// LUCENENET specific - use a SafeTextWriterWrapper to ensure that if the TextWriter
// is disposed by the caller (using block) we don't get any exceptions if we keep using it.
infoStream = value == null
? null
: (value is SafeTextWriterWrapper ? value : new SafeTextWriterWrapper(value));
get => infoStream;
/// <summary>
/// Advanced configuration of retry logic in loading
/// segments_N file
/// </summary>
private static int defaultGenLookaheadCount = 10;
/// <summary>
/// Gets or Sets the <see cref="defaultGenLookaheadCount"/>.
/// <para/>
/// Advanced: set how many times to try incrementing the
/// gen when loading the segments file. this only runs if
/// the primary (listing directory) and secondary (opening
/// segments.gen file) methods fail to find the segments
/// file.
/// <para/>
/// @lucene.experimental
/// </summary>
public static int DefaultGenLookaheadCount // LUCENENET specific: corrected spelling issue with the getter
get => defaultGenLookaheadCount;
set => defaultGenLookaheadCount = value;
/// <summary>
/// Prints the given message to the <see cref="InfoStream"/>. Note, this method does not
/// check for <c>null</c> <see cref="InfoStream"/>. It assumes this check has been performed by the
/// caller, which is recommended to avoid the (usually) expensive message
/// creation.
/// </summary>
private static void Message(string message)
infoStream.WriteLine("SIS [" + Thread.CurrentThread.Name + "]: " + message);
/// <summary>
/// Utility class for executing code that needs to do
/// something with the current segments file. This is
/// necessary with lock-less commits because from the time
/// you locate the current segments file name, until you
/// actually open it, read its contents, or check modified
/// time, etc., it could have been deleted due to a writer
/// commit finishing.
/// </summary>
public abstract class FindSegmentsFile
internal readonly Directory directory;
/// <summary>
/// Sole constructor. </summary>
protected FindSegmentsFile(Directory directory) // LUCENENET: CA1012: Abstract types should not have constructors (marked protected)
{ = directory;
/// <summary>
/// Locate the most recent <c>segments</c> file and
/// run <see cref="DoBody(string)"/> on it.
/// </summary>
public virtual object Run()
return Run(null);
/// <summary>
/// Run <see cref="DoBody(string)"/> on the provided commit. </summary>
public virtual object Run(IndexCommit commit)
if (commit != null)
if (directory != commit.Directory)
throw new IOException("the specified commit does not match the specified Directory");
return DoBody(commit.SegmentsFileName);
string segmentFileName/* = null*/; // LUCENENET: IDE0059: Remove unnecessary value assignment
long lastGen = -1;
long gen = 0;
int genLookaheadCount = 0;
IOException exc = null;
int retryCount = 0;
bool useFirstMethod = true;
// Loop until we succeed in calling doBody() without
// hitting an IOException. An IOException most likely
// means a commit was in process and has finished, in
// the time it took us to load the now-old infos files
// (and segments files). It's also possible it's a
// true error (corrupt index). To distinguish these,
// on each retry we must see "forward progress" on
// which generation we are trying to load. If we
// don't, then the original error is real and we throw
// it.
// We have three methods for determining the current
// generation. We try the first two in parallel (when
// useFirstMethod is true), and fall back to the third
// when necessary.
while (true)
if (useFirstMethod)
// List the directory and use the highest
// segments_N file. this method works well as long
// as there is no stale caching on the directory
// contents (NOTE: NFS clients often have such stale
// caching):
string[] files = directory.ListAll(); // LUCENENET: IDE0059: Remove unnecessary value assignment
long genA = -1;
if (files != null)
genA = GetLastCommitGeneration(files);
if (infoStream != null)
Message("directory listing genA=" + genA);
// Also open segments.gen and read its
// contents. Then we take the larger of the two
// gens. this way, if either approach is hitting
// a stale cache (NFS) we have a better chance of
// getting the right generation.
long genB = -1;
ChecksumIndexInput genInput = null;
genInput = directory.OpenChecksumInput(IndexFileNames.SEGMENTS_GEN, IOContext.READ_ONCE);
catch (IOException e)
if (infoStream != null)
Message("segments.gen open: IOException " + e);
if (genInput != null)
int version = genInput.ReadInt32();
long gen0 = genInput.ReadInt64();
long gen1 = genInput.ReadInt64();
if (infoStream != null)
Message("fallback check: " + gen0 + "; " + gen1);
#pragma warning disable 612, 618
#pragma warning restore 612, 618
if (gen0 == gen1)
// The file is consistent.
genB = gen0;
throw new IndexFormatTooNewException(genInput, version, FORMAT_SEGMENTS_GEN_START, FORMAT_SEGMENTS_GEN_CURRENT);
catch (IOException err2)
// rethrow any format exception
if (err2 is CorruptIndexException)
throw; // LUCENENET: CA2200: Rethrow to preserve stack details (
if (infoStream != null)
Message(IndexFileNames.SEGMENTS_GEN + " check: genB=" + genB);
// Pick the larger of the two gen's:
gen = Math.Max(genA, genB);
if (gen == -1)
// Neither approach found a generation
throw new IndexNotFoundException("no segments* file found in " + directory + ": files: " + Arrays.ToString(files));
if (useFirstMethod && lastGen == gen && retryCount >= 2)
// Give up on first method -- this is 3rd cycle on
// listing directory and checking gen file to
// attempt to locate the segments file.
useFirstMethod = false;
// Second method: since both directory cache and
// file contents cache seem to be stale, just
// advance the generation.
if (!useFirstMethod)
if (genLookaheadCount < defaultGenLookaheadCount)
if (infoStream != null)
Message("look ahead increment gen to " + gen);
// All attempts have failed -- throw first exc:
throw exc;
else if (lastGen == gen)
// this means we're about to try the same
// segments_N last tried.
// Segment file has advanced since our last loop
// (we made "progress"), so reset retryCount:
retryCount = 0;
lastGen = gen;
segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen);
object v = DoBody(segmentFileName);
if (infoStream != null)
Message("success on " + segmentFileName);
return v;
catch (IOException err)
// Save the original root cause:
if (exc == null)
exc = err;
if (infoStream != null)
Message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retryCount=" + retryCount + "; gen = " + gen);
if (gen > 1 && useFirstMethod && retryCount == 1)
// this is our second time trying this same segments
// file (because retryCount is 1), and, there is
// possibly a segments_(N-1) (because gen > 1).
// So, check if the segments_(N-1) exists and
// try it if so:
string prevSegmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen - 1);
bool prevExists;
directory.OpenInput(prevSegmentFileName, IOContext.DEFAULT).Dispose();
prevExists = true;
catch (IOException) // LUCENENET: IDE0059: Remove unnecessary value assignment
prevExists = false;
if (prevExists)
if (infoStream != null)
Message("fallback to prior segment file '" + prevSegmentFileName + "'");
object v = DoBody(prevSegmentFileName);
if (infoStream != null)
Message("success on fallback " + prevSegmentFileName);
return v;
catch (IOException err2)
if (infoStream != null)
Message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry");
/// <summary>
/// Subclass must implement this. The assumption is an
/// <see cref="IOException"/> will be thrown if something goes wrong
/// during the processing that could have been caused by
/// a writer committing.
/// </summary>
protected internal abstract object DoBody(string segmentFileName);
// Carry over generation numbers from another SegmentInfos
internal void UpdateGeneration(SegmentInfos other)
lastGeneration = other.lastGeneration;
generation = other.generation;
internal void RollbackCommit(Directory dir)
if (pendingSegnOutput != null)
// Suppress so we keep throwing the original exception
// in our caller
pendingSegnOutput = null;
// Must carefully compute fileName from "generation"
// since lastGeneration isn't incremented:
string segmentFileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation);
// Suppress so we keep throwing the original exception
// in our caller
IOUtils.DeleteFilesIgnoringExceptions(dir, segmentFileName);
/// <summary>
/// Call this to start a commit. This writes the new
/// segments file, but writes an invalid checksum at the
/// end, so that it is not visible to readers. Once this
/// is called you must call <see cref="FinishCommit(Directory)"/> to complete
/// the commit or <see cref="RollbackCommit(Directory)"/> to abort it.
/// <para>
/// Note: <see cref="Changed()"/> should be called prior to this
/// method if changes have been made to this <see cref="SegmentInfos"/> instance
/// </para>
/// </summary>
internal void PrepareCommit(Directory dir)
if (pendingSegnOutput != null)
throw new InvalidOperationException("prepareCommit was already called");
/// <summary>
/// Returns all file names referenced by <see cref="SegmentInfo"/>
/// instances matching the provided <see cref="Directory"/> (ie files
/// associated with any "external" segments are skipped).
/// The returned collection is recomputed on each
/// invocation.
/// </summary>
public ICollection<string> GetFiles(Directory dir, bool includeSegmentsFile)
var files = new JCG.HashSet<string>();
if (includeSegmentsFile)
string segmentFileName = GetSegmentsFileName();
if (segmentFileName != null)
var size = Count;
for (int i = 0; i < size; i++)
var info = Info(i);
if (Debugging.AssertsEnabled) Debugging.Assert(info.Info.Dir == dir);
if (info.Info.Dir == dir)
return files;
internal void FinishCommit(Directory dir)
if (pendingSegnOutput == null)
throw new InvalidOperationException("prepareCommit was not called");
bool success = false;
success = true;
if (!success)
// Closes pendingSegnOutput & deletes partial segments_N:
success = false;
success = true;
if (!success)
// Closes pendingSegnOutput & deletes partial segments_N:
pendingSegnOutput = null;
// NOTE: if we crash here, we have left a segments_N
// file in the directory in a possibly corrupt state (if
// some bytes made it to stable storage and others
// didn't). But, the segments_N file includes checksum
// at the end, which should catch this case. So when a
// reader tries to read it, it will throw a
// CorruptIndexException, which should cause the retry
// logic in SegmentInfos to kick in and load the last
// good (previous) segments_N-1 file.
var fileName = IndexFileNames.FileNameFromGeneration(IndexFileNames.SEGMENTS, "", generation);
success = false;
dir.Sync(new JCG.HashSet<string> { fileName });
success = true;
if (!success)
catch (Exception)
// Suppress so we keep throwing the original exception
lastGeneration = generation;
WriteSegmentsGen(dir, generation);
/// <summary>
/// Writes &amp; syncs to the Directory dir, taking care to
/// remove the segments file on exception
/// <para>
/// Note: <see cref="Changed()"/> should be called prior to this
/// method if changes have been made to this <see cref="SegmentInfos"/> instance
/// </para>
/// </summary>
internal void Commit(Directory dir)
/// <summary>
/// Returns readable description of this segment. </summary>
public string ToString(Directory directory)
var buffer = new StringBuilder();
buffer.Append(GetSegmentsFileName()).Append(": ");
int count = Count;
for (int i = 0; i < count; i++)
if (i > 0)
buffer.Append(' ');
SegmentCommitInfo info = Info(i);
buffer.Append(info.ToString(directory, 0));
return buffer.ToString();
/// <summary>
/// Gets <see cref="userData"/> saved with this commit.
/// </summary>
/// <seealso cref="IndexWriter.Commit()"/>
public IDictionary<string, string> UserData
get => userData;
internal set
if (value == null)
userData = Collections.EmptyMap<string, string>();
userData = value;
/// <summary>
/// Replaces all segments in this instance, but keeps
/// generation, version, counter so that future commits
/// remain write once.
/// </summary>
internal void Replace(SegmentInfos other)
lastGeneration = other.lastGeneration;
/// <summary>
/// Returns sum of all segment's docCounts. Note that
/// this does not include deletions
/// </summary>
public int TotalDocCount
int count = 0;
foreach (SegmentCommitInfo info in this)
count += info.Info.DocCount;
return count;
/// <summary>
/// Call this before committing if changes have been made to the
/// segments.
/// </summary>
public void Changed()
/// <summary>
/// applies all changes caused by committing a merge to this <see cref="SegmentInfos"/> </summary>
internal void ApplyMergeChanges(MergePolicy.OneMerge merge, bool dropSegment)
var mergedAway = new JCG.HashSet<SegmentCommitInfo>(merge.Segments);
bool inserted = false;
int newSegIdx = 0;
for (int segIdx = 0, cnt = segments.Count; segIdx < cnt; segIdx++)
if (Debugging.AssertsEnabled) Debugging.Assert(segIdx >= newSegIdx);
SegmentCommitInfo info = segments[segIdx];
if (mergedAway.Contains(info))
if (!inserted && !dropSegment)
segments[segIdx] =;
inserted = true;
segments[newSegIdx] = info;
// the rest of the segments in list are duplicates, so don't remove from map, only list!
segments.SubList(newSegIdx, segments.Count).Clear();
// Either we found place to insert segment, or, we did
// not, but only because all segments we merged becamee
// deleted while we are merging, in which case it should
// be the case that the new segment is also all deleted,
// we insert it at the beginning if it should not be dropped:
if (!inserted && !dropSegment)
internal IList<SegmentCommitInfo> CreateBackupSegmentInfos()
var list = new List<SegmentCommitInfo>(Count);
foreach (var info in segments)
if (Debugging.AssertsEnabled) Debugging.Assert(info.Info.Codec != null);
return list;
internal void RollbackSegmentInfos(IList<SegmentCommitInfo> infos)
/// <summary>
/// Returns an <b>unmodifiable</b> <see cref="T:IEnumerator{SegmentCommitInfo}"/> of contained segments in order.
/// </summary>
public IEnumerator<SegmentCommitInfo> GetEnumerator()
return AsList().GetEnumerator();
IEnumerator IEnumerable.GetEnumerator()
return GetEnumerator();
/// <summary>
/// Returns all contained segments as an <b>unmodifiable</b> <see cref="T:IList{SegmentCommitInfo}"/> view. </summary>
public IList<SegmentCommitInfo> AsList()
return segments.AsReadOnly();
/// <summary>
/// Returns number of <see cref="SegmentCommitInfo"/>s.
/// <para/>
/// NOTE: This was size() in Lucene.
/// </summary>
public int Count => segments.Count;
/// <summary>
/// Appends the provided <see cref="SegmentCommitInfo"/>. </summary>
public void Add(SegmentCommitInfo si)
/// <summary>
/// Appends the provided <see cref="SegmentCommitInfo"/>s. </summary>
public void AddAll(IEnumerable<SegmentCommitInfo> sis)
foreach (var si in sis)
/// <summary>
/// Clear all <see cref="SegmentCommitInfo"/>s. </summary>
public void Clear()
/// <summary>
/// Remove the provided <see cref="SegmentCommitInfo"/>.
/// <para/><b>WARNING</b>: O(N) cost
/// </summary>
public void Remove(SegmentCommitInfo si)
/// <summary>
/// Remove the <see cref="SegmentCommitInfo"/> at the
/// provided index.
/// <para/><b>WARNING</b>: O(N) cost
/// </summary>
internal void Remove(int index)
/// <summary>
/// Return true if the provided
/// <see cref="SegmentCommitInfo"/> is contained.
/// <para/><b>WARNING</b>: O(N) cost
/// </summary>
internal bool Contains(SegmentCommitInfo si)
return segments.Contains(si);
/// <summary>
/// Returns index of the provided
/// <see cref="SegmentCommitInfo"/>.
/// <para/><b>WARNING</b>: O(N) cost
/// </summary>
internal int IndexOf(SegmentCommitInfo si)
return segments.IndexOf(si);