blob: b48a55bea9d45c38022e09408405131c9418f2a0 [file] [log] [blame]
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Text;
using System.Threading;
namespace Lucene.Net.Index
{
using Lucene.Net.Util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Directory = Lucene.Net.Store.Directory;
using FixedBitSet = Lucene.Net.Util.FixedBitSet;
using MergeInfo = Lucene.Net.Store.MergeInfo;
//using AlreadySetException = Lucene.Net.Util.SetOnce.AlreadySetException;
/// <summary>
/// <p>Expert: a MergePolicy determines the sequence of
/// primitive merge operations.</p>
///
/// <p>Whenever the segments in an index have been altered by
/// <seealso cref="IndexWriter"/>, either the addition of a newly
/// flushed segment, addition of many segments from
/// addIndexes* calls, or a previous merge that may now need
/// to cascade, <seealso cref="IndexWriter"/> invokes {@link
/// #findMerges} to give the MergePolicy a chance to pick
/// merges that are now required. this method returns a
/// <seealso cref="MergeSpecification"/> instance describing the set of
/// merges that should be done, or null if no merges are
/// necessary. When IndexWriter.forceMerge is called, it calls
/// <seealso cref="#findForcedMerges(SegmentInfos,int,Map)"/> and the MergePolicy should
/// then return the necessary merges.</p>
///
/// <p>Note that the policy can return more than one merge at
/// a time. In this case, if the writer is using {@link
/// SerialMergeScheduler}, the merges will be run
/// sequentially but if it is using {@link
/// ConcurrentMergeScheduler} they will be run concurrently.</p>
///
/// <p>The default MergePolicy is {@link
/// TieredMergePolicy}.</p>
///
/// @lucene.experimental
/// </summary>
public abstract class MergePolicy : IDisposable, ICloneable
{
/// <summary>
/// A map of doc IDs. </summary>
public abstract class DocMap
{
/// <summary>
/// Sole constructor, typically invoked from sub-classes constructors. </summary>
protected internal DocMap()
{
}
/// <summary>
/// Return the new doc ID according to its old value. </summary>
public abstract int Map(int old);
/// <summary>
/// Useful from an assert. </summary>
internal virtual bool IsConsistent(int maxDoc)
{
FixedBitSet targets = new FixedBitSet(maxDoc);
for (int i = 0; i < maxDoc; ++i)
{
int target = Map(i);
if (target < 0 || target >= maxDoc)
{
Debug.Assert(false, "out of range: " + target + " not in [0-" + maxDoc + "[");
return false;
}
else if (targets.Get(target))
{
Debug.Assert(false, target + " is already taken (" + i + ")");
return false;
}
}
return true;
}
}
/// <summary>
/// OneMerge provides the information necessary to perform
/// an individual primitive merge operation, resulting in
/// a single new segment. The merge spec includes the
/// subset of segments to be merged as well as whether the
/// new segment should use the compound file format.
/// </summary>
public class OneMerge
{
internal SegmentCommitInfo info; // used by IndexWriter
internal bool RegisterDone; // used by IndexWriter
internal long MergeGen; // used by IndexWriter
internal bool IsExternal; // used by IndexWriter
public int MaxNumSegments = -1; // used by IndexWriter
/// <summary>
/// Estimated size in bytes of the merged segment. </summary>
public long EstimatedMergeBytes; // used by IndexWriter
// Sum of sizeInBytes of all SegmentInfos; set by IW.mergeInit
internal long TotalMergeBytes;
internal IList<SegmentReader> Readers; // used by IndexWriter
/// <summary>
/// Segments to be merged. </summary>
public readonly IList<SegmentCommitInfo> Segments;
/// <summary>
/// Number of documents in the merged segment. </summary>
public readonly int TotalDocCount;
internal bool Aborted_Renamed;
internal Exception Error;
internal bool Paused;
/// <summary>
/// Sole constructor. </summary>
/// <param name="segments"> List of <seealso cref="SegmentCommitInfo"/>s
/// to be merged. </param>
public OneMerge(IList<SegmentCommitInfo> segments)
{
if (0 == segments.Count)
{
throw new Exception("segments must include at least one segment");
}
// clone the list, as the in list may be based off original SegmentInfos and may be modified
this.Segments = new List<SegmentCommitInfo>(segments);
int count = 0;
foreach (SegmentCommitInfo info in segments)
{
count += info.Info.DocCount;
}
TotalDocCount = count;
}
/// <summary>
/// Expert: Get the list of readers to merge. Note that this list does not
/// necessarily match the list of segments to merge and should only be used
/// to feed SegmentMerger to initialize a merge. When a <seealso cref="OneMerge"/>
/// reorders doc IDs, it must override <seealso cref="#getDocMap"/> too so that
/// deletes that happened during the merge can be applied to the newly
/// merged segment.
/// </summary>
public virtual IList<AtomicReader> MergeReaders
{
get
{
if (Readers == null)
{
throw new InvalidOperationException("IndexWriter has not initialized readers from the segment infos yet");
}
IList<AtomicReader> readers = new List<AtomicReader>(this.Readers.Count);
foreach (AtomicReader reader in this.Readers)
{
if (reader.NumDocs > 0)
{
readers.Add(reader);
}
}
return readers;
}
}
/// <summary>
/// Expert: Sets the <seealso cref="SegmentCommitInfo"/> of this <seealso cref="OneMerge"/>.
/// Allows sub-classes to e.g. set diagnostics properties.
/// </summary>
public virtual SegmentCommitInfo Info
{
set
{
this.info = value;
}
get
{
return info;
}
}
/// <summary>
/// Expert: If <seealso cref="#getMergeReaders()"/> reorders document IDs, this method
/// must be overridden to return a mapping from the <i>natural</i> doc ID
/// (the doc ID that would result from a natural merge) to the actual doc
/// ID. this mapping is used to apply deletions that happened during the
/// merge to the new segment.
/// </summary>
public virtual DocMap GetDocMap(MergeState mergeState)
{
return new DocMapAnonymousInnerClassHelper(this);
}
private class DocMapAnonymousInnerClassHelper : DocMap
{
private readonly OneMerge OuterInstance;
public DocMapAnonymousInnerClassHelper(OneMerge outerInstance)
{
this.OuterInstance = outerInstance;
}
public override int Map(int docID)
{
return docID;
}
}
/// <summary>
/// Record that an exception occurred while executing
/// this merge
/// </summary>
internal virtual Exception Exception
{
set
{
lock (this)
{
this.Error = value;
}
}
get
{
lock (this)
{
return Error;
}
}
}
/// <summary>
/// Mark this merge as aborted. If this is called
/// before the merge is committed then the merge will
/// not be committed.
/// </summary>
internal virtual void Abort()
{
lock (this)
{
Aborted_Renamed = true;
Monitor.PulseAll(this);
}
}
/// <summary>
/// Returns true if this merge was aborted. </summary>
internal virtual bool Aborted
{
get
{
lock (this)
{
return Aborted_Renamed;
}
}
}
/// <summary>
/// Called periodically by <seealso cref="IndexWriter"/> while
/// merging to see if the merge is aborted.
/// </summary>
public virtual void CheckAborted(Directory dir)
{
lock (this)
{
if (Aborted_Renamed)
{
throw new MergeAbortedException("merge is aborted: " + SegString(dir));
}
while (Paused)
{
try
{
// In theory we could wait() indefinitely, but we
// do 1000 msec, defensively
Monitor.Wait(this, TimeSpan.FromMilliseconds(1000));
}
catch (ThreadInterruptedException ie)
{
throw new Exception(ie.Message, ie);
}
if (Aborted_Renamed)
{
throw new MergeAbortedException("merge is aborted: " + SegString(dir));
}
}
}
}
/// <summary>
/// Set or clear whether this merge is paused paused (for example
/// <seealso cref="ConcurrentMergeScheduler"/> will pause merges
/// if too many are running).
/// </summary>
public virtual bool Pause
{
set
{
lock (this)
{
this.Paused = value;
if (!value)
{
// Wakeup merge thread, if it's waiting
Monitor.PulseAll(this);
}
}
}
get
{
lock (this)
{
return Paused;
}
}
}
/// <summary>
/// Returns a readable description of the current merge
/// state.
/// </summary>
public virtual string SegString(Directory dir)
{
StringBuilder b = new StringBuilder();
int numSegments = Segments.Count;
for (int i = 0; i < numSegments; i++)
{
if (i > 0)
{
b.Append(' ');
}
b.Append(Segments[i].ToString(dir, 0));
}
if (info != null)
{
b.Append(" into ").Append(info.Info.Name);
}
if (MaxNumSegments != -1)
{
b.Append(" [maxNumSegments=" + MaxNumSegments + "]");
}
if (Aborted_Renamed)
{
b.Append(" [ABORTED]");
}
return b.ToString();
}
/// <summary>
/// Returns the total size in bytes of this merge. Note that this does not
/// indicate the size of the merged segment, but the
/// input total size. this is only set once the merge is
/// initialized by IndexWriter.
/// </summary>
public virtual long TotalBytesSize()
{
return TotalMergeBytes;
}
/// <summary>
/// Returns the total number of documents that are included with this merge.
/// Note that this does not indicate the number of documents after the merge.
///
/// </summary>
public virtual int TotalNumDocs()
{
int total = 0;
foreach (SegmentCommitInfo info in Segments)
{
total += info.Info.DocCount;
}
return total;
}
/// <summary>
/// Return <seealso cref="MergeInfo"/> describing this merge. </summary>
public virtual MergeInfo MergeInfo
{
get
{
return new MergeInfo(TotalDocCount, EstimatedMergeBytes, IsExternal, MaxNumSegments);
}
}
}
/// <summary>
/// A MergeSpecification instance provides the information
/// necessary to perform multiple merges. It simply
/// contains a list of <seealso cref="OneMerge"/> instances.
/// </summary>
public class MergeSpecification
{
/// <summary>
/// The subset of segments to be included in the primitive merge.
/// </summary>
public readonly IList<OneMerge> Merges = new List<OneMerge>();
/// <summary>
/// Sole constructor. Use {@link
/// #add(MergePolicy.OneMerge)} to add merges.
/// </summary>
public MergeSpecification()
{
}
/// <summary>
/// Adds the provided <seealso cref="OneMerge"/> to this
/// specification.
/// </summary>
public virtual void Add(OneMerge merge)
{
Merges.Add(merge);
}
/// <summary>
/// Returns a description of the merges in this
/// specification.
/// </summary>
public virtual string SegString(Directory dir)
{
StringBuilder b = new StringBuilder();
b.Append("MergeSpec:\n");
int count = Merges.Count;
for (int i = 0; i < count; i++)
{
b.Append(" ").Append(1 + i).Append(": ").Append(Merges[i].SegString(dir));
}
return b.ToString();
}
}
/// <summary>
/// Exception thrown if there are any problems while
/// executing a merge.
/// </summary>
public class MergeException : Exception
{
internal Directory Dir;
/// <summary>
/// Create a {@code MergeException}. </summary>
public MergeException(string message, Directory dir)
: base(message)
{
this.Dir = dir;
}
/// <summary>
/// Create a {@code MergeException}. </summary>
public MergeException(Exception exc, Directory dir)
: base(exc.Message)
{
this.Dir = dir;
}
/// <summary>
/// Returns the <seealso cref="Directory"/> of the index that hit
/// the exception.
/// </summary>
public virtual Directory Directory
{
get
{
return Dir;
}
}
}
/// <summary>
/// Thrown when a merge was explicity aborted because
/// <seealso cref="IndexWriter#close(boolean)"/> was called with
/// <code>false</code>. Normally this exception is
/// privately caught and suppresed by <seealso cref="IndexWriter"/>.
/// </summary>
public class MergeAbortedException : System.IO.IOException
{
/// <summary>
/// Create a <seealso cref="MergeAbortedException"/>. </summary>
public MergeAbortedException()
: base("merge is aborted")
{
}
/// <summary>
/// Create a <seealso cref="MergeAbortedException"/> with a
/// specified message.
/// </summary>
public MergeAbortedException(string message)
: base(message)
{
}
}
/// <summary>
/// Default ratio for compound file system usage. Set to <tt>1.0</tt>, always use
/// compound file system.
/// </summary>
protected internal const double DEFAULT_NO_CFS_RATIO = 1.0;
/// <summary>
/// Default max segment size in order to use compound file system. Set to <seealso cref="Long#MAX_VALUE"/>.
/// </summary>
protected internal static readonly long DEFAULT_MAX_CFS_SEGMENT_SIZE = long.MaxValue;
/// <summary>
/// <seealso cref="IndexWriter"/> that contains this instance. </summary>
protected internal SetOnce<IndexWriter> Writer;
/// <summary>
/// If the size of the merge segment exceeds this ratio of
/// the total index size then it will remain in
/// non-compound format
/// </summary>
protected internal double NoCFSRatio_Renamed = DEFAULT_NO_CFS_RATIO;
/// <summary>
/// If the size of the merged segment exceeds
/// this value then it will not use compound file format.
/// </summary>
protected internal long MaxCFSSegmentSize = DEFAULT_MAX_CFS_SEGMENT_SIZE;
public virtual object Clone()
{
MergePolicy clone;
clone = (MergePolicy)base.MemberwiseClone();
clone.Writer = new SetOnce<IndexWriter>();
return clone;
}
/// <summary>
/// Creates a new merge policy instance. Note that if you intend to use it
/// without passing it to <seealso cref="IndexWriter"/>, you should call
/// <seealso cref="#setIndexWriter(IndexWriter)"/>.
/// </summary>
public MergePolicy()
: this(DEFAULT_NO_CFS_RATIO, DEFAULT_MAX_CFS_SEGMENT_SIZE)
{
}
/// <summary>
/// Creates a new merge policy instance with default settings for noCFSRatio
/// and maxCFSSegmentSize. this ctor should be used by subclasses using different
/// defaults than the <seealso cref="MergePolicy"/>
/// </summary>
protected internal MergePolicy(double defaultNoCFSRatio, long defaultMaxCFSSegmentSize)
{
Writer = new SetOnce<IndexWriter>();
this.NoCFSRatio_Renamed = defaultNoCFSRatio;
this.MaxCFSSegmentSize = defaultMaxCFSSegmentSize;
}
/// <summary>
/// Sets the <seealso cref="IndexWriter"/> to use by this merge policy. this method is
/// allowed to be called only once, and is usually set by IndexWriter. If it is
/// called more than once, <seealso cref="AlreadySetException"/> is thrown.
/// </summary>
/// <seealso cref= SetOnce </seealso>
public virtual IndexWriter IndexWriter
{
set
{
this.Writer.Set(value);
}
}
/// <summary>
/// Determine what set of merge operations are now necessary on the index.
/// <seealso cref="IndexWriter"/> calls this whenever there is a change to the segments.
/// this call is always synchronized on the <seealso cref="IndexWriter"/> instance so
/// only one thread at a time will call this method. </summary>
/// <param name="mergeTrigger"> the event that triggered the merge </param>
/// <param name="segmentInfos">
/// the total set of segments in the index </param>
public abstract MergeSpecification FindMerges(MergeTrigger? mergeTrigger, SegmentInfos segmentInfos);
/// <summary>
/// Determine what set of merge operations is necessary in
/// order to merge to <= the specified segment count. <seealso cref="IndexWriter"/> calls this when its
/// <seealso cref="IndexWriter#forceMerge"/> method is called. this call is always
/// synchronized on the <seealso cref="IndexWriter"/> instance so only one thread at a
/// time will call this method.
/// </summary>
/// <param name="segmentInfos">
/// the total set of segments in the index </param>
/// <param name="maxSegmentCount">
/// requested maximum number of segments in the index (currently this
/// is always 1) </param>
/// <param name="segmentsToMerge">
/// contains the specific SegmentInfo instances that must be merged
/// away. this may be a subset of all
/// SegmentInfos. If the value is True for a
/// given SegmentInfo, that means this segment was
/// an original segment present in the
/// to-be-merged index; else, it was a segment
/// produced by a cascaded merge. </param>
public abstract MergeSpecification FindForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, IDictionary<SegmentCommitInfo, bool?> segmentsToMerge);
/// <summary>
/// Determine what set of merge operations is necessary in order to expunge all
/// deletes from the index.
/// </summary>
/// <param name="segmentInfos">
/// the total set of segments in the index </param>
public abstract MergeSpecification FindForcedDeletesMerges(SegmentInfos segmentInfos);
/// <summary>
/// Release all resources for the policy.
/// </summary>
public abstract void Dispose();
/// <summary>
/// Returns true if a new segment (regardless of its origin) should use the
/// compound file format. The default implementation returns <code>true</code>
/// iff the size of the given mergedInfo is less or equal to
/// <seealso cref="#getMaxCFSSegmentSizeMB()"/> and the size is less or equal to the
/// TotalIndexSize * <seealso cref="#getNoCFSRatio()"/> otherwise <code>false</code>.
/// </summary>
public virtual bool UseCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo)
{
if (NoCFSRatio == 0.0)
{
return false;
}
long mergedInfoSize = Size(mergedInfo);
if (mergedInfoSize > MaxCFSSegmentSize)
{
return false;
}
if (NoCFSRatio >= 1.0)
{
return true;
}
long totalSize = 0;
foreach (SegmentCommitInfo info in infos.Segments)
{
totalSize += Size(info);
}
return mergedInfoSize <= NoCFSRatio * totalSize;
}
/// <summary>
/// Return the byte size of the provided {@link
/// SegmentCommitInfo}, pro-rated by percentage of
/// non-deleted documents is set.
/// </summary>
protected internal virtual long Size(SegmentCommitInfo info)
{
long byteSize = info.SizeInBytes();
int delCount = Writer.Get().NumDeletedDocs(info);
double delRatio = (info.Info.DocCount <= 0 ? 0.0f : ((float)delCount / (float)info.Info.DocCount));
Debug.Assert(delRatio <= 1.0);
return (info.Info.DocCount <= 0 ? byteSize : (long)(byteSize * (1.0 - delRatio)));
}
/// <summary>
/// Returns true if this single info is already fully merged (has no
/// pending deletes, is in the same dir as the
/// writer, and matches the current compound file setting
/// </summary>
protected internal bool IsMerged(SegmentInfos infos, SegmentCommitInfo info)
{
IndexWriter w = Writer.Get();
Debug.Assert(w != null);
bool hasDeletions = w.NumDeletedDocs(info) > 0;
return !hasDeletions && !info.Info.HasSeparateNorms() && info.Info.Dir == w.Directory && UseCompoundFile(infos, info) == info.Info.UseCompoundFile;
}
/// <summary>
/// Returns current {@code noCFSRatio}.
/// </summary>
/// <seealso cref= #setNoCFSRatio </seealso>
public virtual double NoCFSRatio
{
get
{
return NoCFSRatio_Renamed;
}
set
{
if (value < 0.0 || value > 1.0)
{
throw new System.ArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + value);
}
this.NoCFSRatio_Renamed = value;
}
}
/// <summary>
/// Returns the largest size allowed for a compound file segment </summary>
public virtual double MaxCFSSegmentSizeMB
{
get
{
return MaxCFSSegmentSize / 1024 / 1024.0;
}
set
{
if (value < 0.0)
{
throw new System.ArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + value + ")");
}
value *= 1024 * 1024;
this.MaxCFSSegmentSize = (value > long.MaxValue) ? long.MaxValue : (long)value;
}
}
}
}