src/Lucene.Net.Facet/Taxonomy/Directory/DirectoryTaxonomyWriter.cs - lucenenet - Git at Google

 using Lucene.Net.Analysis.TokenAttributes;
 using Lucene.Net.Index;
 using Lucene.Net.Index.Extensions;
 using Lucene.Net.Store;
 using J2N.Threading.Atomic;
 using Lucene.Net.Support;
 using Lucene.Net.Util;
 using System;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Linq;
 using System.IO;
 using System.Reflection;

 namespace Lucene.Net.Facet.Taxonomy.Directory
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     using AtomicReader = Lucene.Net.Index.AtomicReader;
     using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
     using LockObtainFailedException = Lucene.Net.Store.LockObtainFailedException; // javadocs
     using BytesRef = Lucene.Net.Util.BytesRef;
     using Cl2oTaxonomyWriterCache = Lucene.Net.Facet.Taxonomy.WriterCache.Cl2oTaxonomyWriterCache;
     using Directory = Lucene.Net.Store.Directory;
     using CorruptIndexException = Lucene.Net.Index.CorruptIndexException; // javadocs
     using DirectoryReader = Lucene.Net.Index.DirectoryReader;
     using DocsEnum = Lucene.Net.Index.DocsEnum;
     using Document = Lucene.Net.Documents.Document;
     using Field = Lucene.Net.Documents.Field;
     using FieldType = Lucene.Net.Documents.FieldType;
     using IndexWriter = Lucene.Net.Index.IndexWriter;
     using IndexWriterConfig = Lucene.Net.Index.IndexWriterConfig;
     using LogByteSizeMergePolicy = Lucene.Net.Index.LogByteSizeMergePolicy;
     using OpenMode = Lucene.Net.Index.OpenMode;
     using ReaderManager = Lucene.Net.Index.ReaderManager;
     using SegmentInfos = Lucene.Net.Index.SegmentInfos;
     using StringField = Lucene.Net.Documents.StringField;
     using ITaxonomyWriterCache = Lucene.Net.Facet.Taxonomy.WriterCache.ITaxonomyWriterCache;
     using Terms = Lucene.Net.Index.Terms;
     using TermsEnum = Lucene.Net.Index.TermsEnum;
     using TextField = Lucene.Net.Documents.TextField;
     using TieredMergePolicy = Lucene.Net.Index.TieredMergePolicy;
     using TokenStream = Lucene.Net.Analysis.TokenStream;

     /// <summary>
     /// <see cref="ITaxonomyWriter"/> which uses a <see cref="Store.Directory"/> to store the taxonomy
     /// information on disk, and keeps an additional in-memory cache of some or all
     /// categories.
     /// <para>
     /// In addition to the permanently-stored information in the <see cref="Store.Directory"/>,
     /// efficiency dictates that we also keep an in-memory cache of <b>recently
     /// seen</b> or <b>all</b> categories, so that we do not need to go back to disk
     /// for every category addition to see which ordinal this category already has,
     /// if any. A <see cref="ITaxonomyWriterCache"/> object determines the specific caching
     /// algorithm used.
     /// </para>
     /// <para>
     /// This class offers some hooks for extending classes to control the
     /// <see cref="IndexWriter"/> instance that is used. See <see cref="OpenIndexWriter"/>.
     ///
     /// @lucene.experimental
     /// </para>
     /// </summary>
     public class DirectoryTaxonomyWriter : ITaxonomyWriter
     {
         /// <summary>
         /// Property name of user commit data that contains the index epoch. The epoch
         /// changes whenever the taxonomy is recreated (i.e. opened with
         /// <see cref="OpenMode.CREATE"/>.
         /// <para>
         /// Applications should not use this property in their commit data because it
         /// will be overridden by this taxonomy writer.
         /// </para>
         /// </summary>
         public const string INDEX_EPOCH = "index.epoch";

         private readonly Directory dir;
         private readonly IndexWriter indexWriter;
         private readonly ITaxonomyWriterCache cache;
         private readonly AtomicInt32 cacheMisses = new AtomicInt32(0);

         // Records the taxonomy index epoch, updated on replaceTaxonomy as well.
         private long indexEpoch;

         private SinglePositionTokenStream parentStream = new SinglePositionTokenStream(Consts.PAYLOAD_PARENT);
         private Field parentStreamField;
         private Field fullPathField;
         private int cacheMissesUntilFill = 11;
         private bool shouldFillCache = true;

         // even though lazily initialized, not volatile so that access to it is
         // faster. we keep a volatile boolean init instead.
         private ReaderManager readerManager;
         private volatile bool initializedReaderManager = false;
         private volatile bool shouldRefreshReaderManager;

         /// <summary>
         /// We call the cache "complete" if we know that every category in our
         /// taxonomy is in the cache. When the cache is <b>not</b> complete, and
         /// we can't find a category in the cache, we still need to look for it
         /// in the on-disk index; Therefore when the cache is not complete, we
         /// need to open a "reader" to the taxonomy index.
         /// The cache becomes incomplete if it was never filled with the existing
         /// categories, or if a Put() to the cache ever returned true (meaning
         /// that some of the cached data was cleared).
         /// </summary>
         private volatile bool cacheIsComplete;
         private volatile bool isClosed = false;
         private volatile TaxonomyIndexArrays taxoArrays;
         private volatile int nextID;

         /// <summary>
         /// Reads the commit data from a <see cref="Store.Directory"/>. </summary>
         private static IDictionary<string, string> ReadCommitData(Directory dir)
         {
             SegmentInfos infos = new SegmentInfos();
             infos.Read(dir);
             return infos.UserData;
         }

         /// <summary>
         /// Forcibly unlocks the taxonomy in the named directory.
         /// <para/>
         /// Caution: this should only be used by failure recovery code, when it is
         /// known that no other process nor thread is in fact currently accessing
         /// this taxonomy.
         /// <para/>
         /// This method is unnecessary if your <see cref="Store.Directory"/> uses a
         /// <see cref="NativeFSLockFactory"/> instead of the default
         /// <see cref="SimpleFSLockFactory"/>. When the "native" lock is used, a lock
         /// does not stay behind forever when the process using it dies.
         /// </summary>
         public static void Unlock(Directory directory)
         {
             IndexWriter.Unlock(directory);
         }

         /// <summary>
         /// Construct a Taxonomy writer.
         /// </summary>
         /// <param name="directory">
         ///    The <see cref="Store.Directory"/> in which to store the taxonomy. Note that
         ///    the taxonomy is written directly to that directory (not to a
         ///    subdirectory of it). </param>
         /// <param name="openMode">
         ///    Specifies how to open a taxonomy for writing: <see cref="OpenMode.APPEND"/>
         ///    means open an existing index for append (failing if the index does
         ///    not yet exist). <see cref="OpenMode.CREATE"/> means create a new index (first
         ///    deleting the old one if it already existed).
         ///    <see cref="OpenMode.CREATE_OR_APPEND"/> appends to an existing index if there
         ///    is one, otherwise it creates a new index. </param>
         /// <param name="cache">
         ///    A <see cref="ITaxonomyWriterCache"/> implementation which determines
         ///    the in-memory caching policy. See for example
         ///    <see cref="WriterCache.LruTaxonomyWriterCache"/> and <see cref="Cl2oTaxonomyWriterCache"/>.
         ///    If null or missing, <see cref="DefaultTaxonomyWriterCache()"/> is used. </param>
         /// <exception cref="CorruptIndexException">
         ///     if the taxonomy is corrupted. </exception>
         /// <exception cref="LockObtainFailedException">
         ///     if the taxonomy is locked by another writer. If it is known
         ///     that no other concurrent writer is active, the lock might
         ///     have been left around by an old dead process, and should be
         ///     removed using <see cref="Unlock(Directory)"/>. </exception>
         /// <exception cref="IOException">
         ///     if another error occurred. </exception>
         public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode,
             ITaxonomyWriterCache cache)
         {
             dir = directory;
             IndexWriterConfig config = CreateIndexWriterConfig(openMode);
             indexWriter = OpenIndexWriter(dir, config);

             // verify (to some extent) that merge policy in effect would preserve category docids
             if (indexWriter != null)
             {
                 Debug.Assert(!(indexWriter.Config.MergePolicy is TieredMergePolicy), "for preserving category docids, merging none-adjacent segments is not allowed");
             }

             // after we opened the writer, and the index is locked, it's safe to check
             // the commit data and read the index epoch
             openMode = config.OpenMode;
             if (!DirectoryReader.IndexExists(directory))
             {
                 indexEpoch = 1;
             }
             else
             {
                 string epochStr = null;
                 IDictionary<string, string> commitData = ReadCommitData(directory);
                 if (commitData != null && commitData.TryGetValue(INDEX_EPOCH, out string value))
                 {
                     epochStr = value;
                 }
                 // no commit data, or no epoch in it means an old taxonomy, so set its epoch to 1, for lack
                 // of a better value.
                 indexEpoch = epochStr == null ? 1 : Convert.ToInt64(epochStr, 16);
             }

             if (openMode == OpenMode.CREATE)
             {
                 ++indexEpoch;
             }

             FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
             ft.OmitNorms = true;
             parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream, ft);
             fullPathField = new StringField(Consts.FULL, "", Field.Store.YES);

             if (indexWriter == null)
                 return;

             nextID = indexWriter.MaxDoc;

             if (cache == null)
             {
                 cache = DefaultTaxonomyWriterCache();
             }
             this.cache = cache;

             if (nextID == 0)
             {
                 cacheIsComplete = true;
                 // Make sure that the taxonomy always contain the root category
                 // with category id 0.
                 AddCategory(new FacetLabel());
             }
             else
             {
                 // There are some categories on the disk, which we have not yet
                 // read into the cache, and therefore the cache is incomplete.
                 // We choose not to read all the categories into the cache now,
                 // to avoid terrible performance when a taxonomy index is opened
                 // to add just a single category. We will do it later, after we
                 // notice a few cache misses.
                 cacheIsComplete = false;
             }
         }

         /// <summary>
         /// Open internal index writer, which contains the taxonomy data.
         /// <para/>
         /// Extensions may provide their own <see cref="IndexWriter"/> implementation or instance.
         /// <para/>
         /// <b>NOTE:</b> the instance this method returns will be disposed upon calling
         /// to <see cref="Dispose()"/>.
         /// <para/>
         /// <b>NOTE:</b> the merge policy in effect must not merge none adjacent segments. See
         /// comment in <see cref="CreateIndexWriterConfig(OpenMode)"/> for the logic behind this.
         /// </summary>
         /// <seealso cref="CreateIndexWriterConfig(OpenMode)"/>
         /// <param name="directory">
         ///          the <see cref="Store.Directory"/> on top of which an <see cref="IndexWriter"/>
         ///          should be opened. </param>
         /// <param name="config">
         ///          configuration for the internal index writer. </param>
         protected virtual IndexWriter OpenIndexWriter(Directory directory, IndexWriterConfig config)
         {
             return new IndexWriter(directory, config);
         }

         /// <summary>
         /// Create the <see cref="IndexWriterConfig"/> that would be used for opening the internal index writer.
         /// <para/>
         /// Extensions can configure the <see cref="IndexWriter"/> as they see fit,
         /// including setting a <see cref="Index.MergeScheduler"/>, or
         /// <see cref="Index.IndexDeletionPolicy"/>, different RAM size
         /// etc.
         /// <para/>
         /// <b>NOTE:</b> internal docids of the configured index must not be altered.
         /// For that, categories are never deleted from the taxonomy index.
         /// In addition, merge policy in effect must not merge none adjacent segments.
         /// </summary>
         /// <seealso cref="OpenIndexWriter(Directory, IndexWriterConfig)"/>
         /// <param name="openMode"> see <see cref="OpenMode"/> </param>
         protected virtual IndexWriterConfig CreateIndexWriterConfig(OpenMode openMode)
         {
             // TODO: should we use a more optimized Codec, e.g. Pulsing (or write custom)?
             // The taxonomy has a unique structure, where each term is associated with one document

             // :Post-Release-Update-Version.LUCENE_XY:
             // Make sure we use a MergePolicy which always merges adjacent segments and thus
             // keeps the doc IDs ordered as well (this is crucial for the taxonomy index).
             return (new IndexWriterConfig(LuceneVersion.LUCENE_48, null)).SetOpenMode(openMode).SetMergePolicy(new LogByteSizeMergePolicy());
         }

         /// <summary>
         /// Opens a <see cref="ReaderManager"/> from the internal <see cref="IndexWriter"/>.
         /// </summary>
         private void InitReaderManager()
         {
             if (!initializedReaderManager)
             {
                 lock (this)
                 {
                     // verify that the taxo-writer hasn't been closed on us.
                     EnsureOpen();
                     if (!initializedReaderManager)
                     {
                         readerManager = new ReaderManager(indexWriter, false);
                         shouldRefreshReaderManager = false;
                         initializedReaderManager = true;
                     }
                 }
             }
         }

         /// <summary>
         /// Creates a new instance with a default cache as defined by
         /// <see cref="DefaultTaxonomyWriterCache()"/>.
         /// </summary>
         public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode = OpenMode.CREATE_OR_APPEND)
             : this(directory, openMode, DefaultTaxonomyWriterCache())
         {
         }

         /// <summary>
         /// Defines the default <see cref="ITaxonomyWriterCache"/> to use in constructors
         /// which do not specify one.
         /// <para>
         /// The current default is <see cref="Cl2oTaxonomyWriterCache"/> constructed
         /// with the parameters (1024, 0.15f, 3), i.e., the entire taxonomy is
         /// cached in memory while building it.
         /// </para>
         /// </summary>
         public static ITaxonomyWriterCache DefaultTaxonomyWriterCache()
         {
             return new Cl2oTaxonomyWriterCache(1024, 0.15f, 3);
         }

         /// <summary>
         /// Frees used resources as well as closes the underlying <see cref="IndexWriter"/>,
         /// which commits whatever changes made to it to the underlying
         /// <see cref="Store.Directory"/>.
         /// </summary>
         public void Dispose()
         {
             lock (this)
             {
                 if (!isClosed)
                 {
                     Commit();
                     DoClose();
                 }
             }
         }

         private void DoClose()
         {
             indexWriter.Dispose();
             isClosed = true;
             CloseResources();
         }

         /// <summary>
         /// A hook for extending classes to close additional resources that were used.
         /// The default implementation closes the <see cref="Index.IndexReader"/> as well as the
         /// <see cref="ITaxonomyWriterCache"/> instances that were used.
         /// <para>
         /// <b>NOTE:</b> if you override this method, you should include a
         /// <c>base.CloseResources()</c> call in your implementation.
         /// </para>
         /// </summary>
         protected virtual void CloseResources()
         {
             lock (this)
             {
                 if (initializedReaderManager)
                 {
                     readerManager.Dispose();
                     readerManager = null;
                     initializedReaderManager = false;
                 }
                 if (cache != null)
                 {
                     cache.Dispose();
                 }
             }
         }

         /// <summary>
         /// Look up the given category in the cache and/or the on-disk storage,
         /// returning the category's ordinal, or a negative number in case the
         /// category does not yet exist in the taxonomy.
         /// </summary>
         protected virtual int FindCategory(FacetLabel categoryPath)
         {
             lock (this)
             {
                 // If we can find the category in the cache, or we know the cache is
                 // complete, we can return the response directly from it
                 int res = cache.Get(categoryPath);
                 if (res >= 0 || cacheIsComplete)
                 {
                     return res;
                 }

                 cacheMisses.IncrementAndGet();
                 // After a few cache misses, it makes sense to read all the categories
                 // from disk and into the cache. The reason not to do this on the first
                 // cache miss (or even when opening the writer) is that it will
                 // significantly slow down the case when a taxonomy is opened just to
                 // add one category. The idea only spending a long time on reading
                 // after enough time was spent on cache misses is known as an "online
                 // algorithm".
                 PerhapsFillCache();
                 res = cache.Get(categoryPath);
                 if (res >= 0 || cacheIsComplete)
                 {
                     // if after filling the cache from the info on disk, the category is in it
                     // or the cache is complete, return whatever cache.get returned.
                     return res;
                 }

                 // if we get here, it means the category is not in the cache, and it is not
                 // complete, and therefore we must look for the category on disk.

                 // We need to get an answer from the on-disk index.
                 InitReaderManager();

                 int doc = -1;
                 DirectoryReader reader = readerManager.Acquire();
                 try
                 {
                     BytesRef catTerm = new BytesRef(FacetsConfig.PathToString(categoryPath.Components, categoryPath.Length));
                     TermsEnum termsEnum = null; // reuse
                     DocsEnum docs = null; // reuse
                     foreach (AtomicReaderContext ctx in reader.Leaves)
                     {
                         Terms terms = ctx.AtomicReader.GetTerms(Consts.FULL);
                         if (terms != null)
                         {
                             termsEnum = terms.GetIterator(termsEnum);
                             if (termsEnum.SeekExact(catTerm))
                             {
                                 // liveDocs=null because the taxonomy has no deletes
                                 docs = termsEnum.Docs(null, docs, 0); // freqs not required
                                 // if the term was found, we know it has exactly one document.
                                 doc = docs.NextDoc() + ctx.DocBase;
                                 break;
                             }
                         }
                     }
                 }
                 finally
                 {
                     readerManager.Release(reader);
                 }
                 if (doc > 0)
                 {
                     AddToCache(categoryPath, doc);
                 }
                 return doc;
             }
         }

         public virtual int AddCategory(FacetLabel categoryPath)
         {
             EnsureOpen();
             // check the cache outside the synchronized block. this results in better
             // concurrency when categories are there.
             int res = cache.Get(categoryPath);
             if (res < 0)
             {
                 // the category is not in the cache - following code cannot be executed in parallel.
                 lock (this)
                 {
                     res = FindCategory(categoryPath);
                     if (res < 0)
                     {
                         // This is a new category, and we need to insert it into the index
                         // (and the cache). Actually, we might also need to add some of
                         // the category's ancestors before we can add the category itself
                         // (while keeping the invariant that a parent is always added to
                         // the taxonomy before its child). internalAddCategory() does all
                         // this recursively
                         res = InternalAddCategory(categoryPath);
                     }
                 }
             }
             return res;
         }

         /// <summary>
         /// Add a new category into the index (and the cache), and return its new
         /// ordinal.
         /// <para>
         /// Actually, we might also need to add some of the category's ancestors
         /// before we can add the category itself (while keeping the invariant that a
         /// parent is always added to the taxonomy before its child). We do this by
         /// recursion.
         /// </para>
         /// </summary>
         private int InternalAddCategory(FacetLabel cp)
         {
             // Find our parent's ordinal (recursively adding the parent category
             // to the taxonomy if it's not already there). Then add the parent
             // ordinal as payloads (rather than a stored field; payloads can be
             // more efficiently read into memory in bulk by LuceneTaxonomyReader)
             int parent;
             if (cp.Length > 1)
             {
                 FacetLabel parentPath = cp.Subpath(cp.Length - 1);
                 parent = FindCategory(parentPath);
                 if (parent < 0)
                 {
                     parent = InternalAddCategory(parentPath);
                 }
             }
             else if (cp.Length == 1)
             {
                 parent = TaxonomyReader.ROOT_ORDINAL;
             }
             else
             {
                 parent = TaxonomyReader.INVALID_ORDINAL;
             }
             int id = AddCategoryDocument(cp, parent);

             return id;
         }

         /// <summary>
         /// Verifies that this instance wasn't closed, or throws
         /// <see cref="ObjectDisposedException"/> if it is.
         /// </summary>
         protected internal void EnsureOpen()
         {
             if (isClosed)
             {
                 throw new ObjectDisposedException(this.GetType().GetTypeInfo().FullName, "The taxonomy writer has already been closed");
             }
         }

         /// <summary>
         /// Note that the methods calling <see cref="AddCategoryDocument"/> are synchornized, so
         /// this method is effectively synchronized as well.
         /// </summary>
         private int AddCategoryDocument(FacetLabel categoryPath, int parent)
         {
             // Before Lucene 2.9, position increments >=0 were supported, so we
             // added 1 to parent to allow the parent -1 (the parent of the root).
             // Unfortunately, starting with Lucene 2.9, after LUCENE-1542, this is
             // no longer enough, since 0 is not encoded consistently either (see
             // comment in SinglePositionTokenStream). But because we must be
             // backward-compatible with existing indexes, we can't just fix what
             // we write here (e.g., to write parent+2), and need to do a workaround
             // in the reader (which knows that anyway only category 0 has a parent
             // -1).
             parentStream.Set(Math.Max(parent + 1, 1));
             Document d = new Document();
             d.Add(parentStreamField);

             fullPathField.SetStringValue(FacetsConfig.PathToString(categoryPath.Components, categoryPath.Length));
             d.Add(fullPathField);

             // Note that we do no pass an Analyzer here because the fields that are
             // added to the Document are untokenized or contains their own TokenStream.
             // Therefore the IndexWriter's Analyzer has no effect.
             indexWriter.AddDocument(d);
             int id = nextID++;

             // added a category document, mark that ReaderManager is not up-to-date
             shouldRefreshReaderManager = true;

             // also add to the parent array
             taxoArrays = GetTaxoArrays().Add(id, parent);

             // NOTE: this line must be executed last, or else the cache gets updated
             // before the parents array (LUCENE-4596)
             AddToCache(categoryPath, id);

             return id;
         }

         private class SinglePositionTokenStream : TokenStream
         {
             private ICharTermAttribute termAtt;
             private IPositionIncrementAttribute posIncrAtt;
             private bool returned;
             private int val;
             private readonly string word;

             public SinglePositionTokenStream(string word)
             {
                 termAtt = AddAttribute<ICharTermAttribute>();
                 posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
                 this.word = word;
                 returned = true;
             }

             /// <summary>
             /// Set the value we want to keep, as the position increment.
             /// Note that when TermPositions.NextPosition() is later used to
             /// retrieve this value, val-1 will be returned, not val.
             /// <para>
             /// IMPORTANT NOTE: Before Lucene 2.9, val>=0 were safe (for val==0,
             /// the retrieved position would be -1). But starting with Lucene 2.9,
             /// this unfortunately changed, and only val>0 are safe. val=0 can
             /// still be used, but don't count on the value you retrieve later
             /// (it could be 0 or -1, depending on circumstances or versions).
             /// This change is described in Lucene's JIRA: LUCENE-1542.
             /// </para>
             /// </summary>
             public virtual void Set(int val)
             {
                 this.val = val;
                 returned = false;
             }

             public sealed override bool IncrementToken()
             {
                 if (returned)
                 {
                     return false;
                 }
                 ClearAttributes();
                 posIncrAtt.PositionIncrement = val;
                 termAtt.SetEmpty();
                 termAtt.Append(word);
                 returned = true;
                 return true;
             }
         }

         private void AddToCache(FacetLabel categoryPath, int id)
         {
             if (cache.Put(categoryPath, id))
             {
                 // If cache.put() returned true, it means the cache was limited in
                 // size, became full, and parts of it had to be evicted. It is
                 // possible that a relatively-new category that isn't yet visible
                 // to our 'reader' was evicted, and therefore we must now refresh
                 // the reader.
                 RefreshReaderManager();
                 cacheIsComplete = false;
             }
         }

         private void RefreshReaderManager()
         {
             lock (this)
             {
                 // this method is synchronized since it cannot happen concurrently with
                 // addCategoryDocument -- when this method returns, we must know that the
                 // reader manager's state is current. also, it sets shouldRefresh to false,
                 // and this cannot overlap with addCatDoc too.
                 // NOTE: since this method is sync'ed, it can call maybeRefresh, instead of
                 // maybeRefreshBlocking. If ever this is changed, make sure to change the
                 // call too.
                 if (shouldRefreshReaderManager && initializedReaderManager)
                 {
                     readerManager.MaybeRefresh();
                     shouldRefreshReaderManager = false;
                 }
             }
         }

         public virtual void Commit()
         {
             lock (this)
             {
                 EnsureOpen();
                 // LUCENE-4972: if we always call setCommitData, we create empty commits
                 string epochStr = null;
                 indexWriter.CommitData.TryGetValue(INDEX_EPOCH, out epochStr);
                 if (epochStr == null || Convert.ToInt64(epochStr, 16) != indexEpoch)
                 {
                     indexWriter.SetCommitData(CombinedCommitData(indexWriter.CommitData));
                 }
                 indexWriter.Commit();
             }
         }

         /// <summary>
         /// Combine original user data with the taxonomy epoch.
         /// </summary>
         private IDictionary<string, string> CombinedCommitData(IDictionary<string, string> commitData)
         {
             IDictionary<string, string> m = new Dictionary<string, string>();
             if (commitData != null)
             {
                 m.PutAll(commitData);
             }
             m[INDEX_EPOCH] = Convert.ToString(indexEpoch, 16);
             return m;
         }

         public virtual void SetCommitData(IDictionary<string, string> commitUserData)
         {
             indexWriter.SetCommitData(CombinedCommitData(commitUserData));
         }

         public virtual IDictionary<string, string> CommitData
         {
             get
             {
                 return CombinedCommitData(indexWriter.CommitData);
             }
         }


         /// <summary>
         /// prepare most of the work needed for a two-phase commit.
         /// See <see cref="IndexWriter.PrepareCommit"/>.
         /// </summary>
         public virtual void PrepareCommit()
         {
             lock (this)
             {
                 EnsureOpen();
                 // LUCENE-4972: if we always call setCommitData, we create empty commits
                 string epochStr;
                 if (!indexWriter.CommitData.TryGetValue(INDEX_EPOCH, out epochStr)
                     || epochStr == null
                     || Convert.ToInt64(epochStr, 16) != indexEpoch)
                 {
                     indexWriter.SetCommitData(CombinedCommitData(indexWriter.CommitData));
                 }
                 indexWriter.PrepareCommit();
             }
         }

         public virtual int Count
         {
             get
             {
                 EnsureOpen();
                 return nextID;
             }
         }

         /// <summary>
         /// Set the number of cache misses before an attempt is made to read the entire
         /// taxonomy into the in-memory cache.
         /// <para>
         /// This taxonomy writer holds an in-memory cache of recently seen categories
         /// to speed up operation. On each cache-miss, the on-disk index needs to be
         /// consulted. When an existing taxonomy is opened, a lot of slow disk reads
         /// like that are needed until the cache is filled, so it is more efficient to
         /// read the entire taxonomy into memory at once. We do this complete read
         /// after a certain number (defined by this method) of cache misses.
         /// </para>
         /// <para>
         /// If the number is set to <c>0</c>, the entire taxonomy is read into the
         /// cache on first use, without fetching individual categories first.
         /// </para>
         /// <para>
         /// NOTE: it is assumed that this method is called immediately after the
         /// taxonomy writer has been created.
         /// </para>
         /// </summary>
         public virtual void SetCacheMissesUntilFill(int i)
         {
             EnsureOpen();
             cacheMissesUntilFill = i;
         }

         // we need to guarantee that if several threads call this concurrently, only
         // one executes it, and after it returns, the cache is updated and is either
         // complete or not.
         private void PerhapsFillCache()
         {
             lock (this)
             {
                 if (cacheMisses < cacheMissesUntilFill)
                 {
                     return;
                 }

                 if (!shouldFillCache)
                 {
                     // we already filled the cache once, there's no need to re-fill it
                     return;
                 }
                 shouldFillCache = false;

                 InitReaderManager();

                 bool aborted = false;
                 DirectoryReader reader = readerManager.Acquire();
                 try
                 {
                     TermsEnum termsEnum = null;
                     DocsEnum docsEnum = null;
                     foreach (AtomicReaderContext ctx in reader.Leaves)
                     {
                         Terms terms = ctx.AtomicReader.GetTerms(Consts.FULL);
                         if (terms != null) // cannot really happen, but be on the safe side
                         {
                             termsEnum = terms.GetIterator(termsEnum);
                             while (termsEnum.Next() != null)
                             {
                                 if (!cache.IsFull)
                                 {
                                     BytesRef t = termsEnum.Term;
                                     // Since we guarantee uniqueness of categories, each term has exactly
                                     // one document. Also, since we do not allow removing categories (and
                                     // hence documents), there are no deletions in the index. Therefore, it
                                     // is sufficient to call next(), and then doc(), exactly once with no
                                     // 'validation' checks.
                                     FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(t.Utf8ToString()));
                                     docsEnum = termsEnum.Docs(null, docsEnum, DocsFlags.NONE);
                                     bool res = cache.Put(cp, docsEnum.NextDoc() + ctx.DocBase);
                                     Debug.Assert(!res, "entries should not have been evicted from the cache");
                                 }
                                 else
                                 {
                                     // the cache is full and the next put() will evict entries from it, therefore abort the iteration.
                                     aborted = true;
                                     break;
                                 }
                             }
                         }
                         if (aborted)
                         {
                             break;
                         }
                     }
                 }
                 finally
                 {
                     readerManager.Release(reader);
                 }

                 cacheIsComplete = !aborted;
                 if (cacheIsComplete)
                 {
                     lock (this)
                     {
                         // everything is in the cache, so no need to keep readerManager open.
                         // this block is executed in a sync block so that it works well with
                         // initReaderManager called in parallel.
                         readerManager.Dispose();
                         readerManager = null;
                         initializedReaderManager = false;
                     }
                 }
             }
         }

         private TaxonomyIndexArrays GetTaxoArrays()
         {
             if (taxoArrays == null)
             {
                 lock (this)
                 {
                     if (taxoArrays == null)
                     {
                         InitReaderManager();
                         DirectoryReader reader = readerManager.Acquire();
                         try
                         {
                             // according to Java Concurrency, this might perform better on some
                             // JVMs, since the object initialization doesn't happen on the
                             // volatile member.
                             TaxonomyIndexArrays tmpArrays = new TaxonomyIndexArrays(reader);
                             taxoArrays = tmpArrays;
                         }
                         finally
                         {
                             readerManager.Release(reader);
                         }
                     }
                 }
             }
             return taxoArrays;
         }

         public virtual int GetParent(int ordinal)
         {
             EnsureOpen();
             // Note: the following if() just enforces that a user can never ask
             // for the parent of a nonexistant category - even if the parent array
             // was allocated bigger than it really needs to be.
             if (ordinal >= nextID)
             {
                 throw new System.IndexOutOfRangeException("requested ordinal is bigger than the largest ordinal in the taxonomy");
             }

             int[] parents = GetTaxoArrays().Parents;
             Debug.Assert(ordinal < parents.Length, "requested ordinal (" + ordinal + "); parents.length (" + parents.Length + ") !");
             return parents[ordinal];
         }

         /// <summary>
         /// Takes the categories from the given taxonomy directory, and adds the
         /// missing ones to this taxonomy. Additionally, it fills the given
         /// <see cref="IOrdinalMap"/> with a mapping from the original ordinal to the new
         /// ordinal.
         /// </summary>
         public virtual void AddTaxonomy(Directory taxoDir, IOrdinalMap map)
         {
             EnsureOpen();
             DirectoryReader r = DirectoryReader.Open(taxoDir);
             try
             {
                 int size = r.NumDocs;
                 IOrdinalMap ordinalMap = map;
                 ordinalMap.SetSize(size);
                 int @base = 0;
                 TermsEnum te = null;
                 DocsEnum docs = null;
                 foreach (AtomicReaderContext ctx in r.Leaves)
                 {
                     AtomicReader ar = ctx.AtomicReader;
                     Terms terms = ar.GetTerms(Consts.FULL);
                     te = terms.GetIterator(te);
                     while (te.Next() != null)
                     {
                         FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(te.Term.Utf8ToString()));
                         int ordinal = AddCategory(cp);
                         docs = te.Docs(null, docs, DocsFlags.NONE);
                         ordinalMap.AddMapping(docs.NextDoc() + @base, ordinal);
                     }
                     @base += ar.MaxDoc; // no deletions, so we're ok
                 }
                 ordinalMap.AddDone();
             }
             finally
             {
                 r.Dispose();
             }
         }

         /// <summary>
         /// Mapping from old ordinal to new ordinals, used when merging indexes
         /// wit separate taxonomies.
         /// <para/>
         /// <see cref="AddMapping"/> merges one or more taxonomies into the given taxonomy
         /// (this). An <see cref="IOrdinalMap"/> is filled for each of the added taxonomies,
         /// containing the new ordinal (in the merged taxonomy) of each of the
         /// categories in the old taxonomy.
         /// <para/>
         /// There exist two implementations of <see cref="IOrdinalMap"/>: <see cref="MemoryOrdinalMap"/> and
         /// <see cref="DiskOrdinalMap"/>. As their names suggest, the former keeps the map in
         /// memory and the latter in a temporary disk file. Because these maps will
         /// later be needed one by one (to remap the counting lists), not all at the
         /// same time, it is recommended to put the first taxonomy's map in memory,
         /// and all the rest on disk (later to be automatically read into memory one
         /// by one, when needed).
         /// </summary>
         public interface IOrdinalMap
         {
             /// <summary>
             /// Set the size of the map. This MUST be called before <see cref="AddMapping"/>.
             /// It is assumed (but not verified) that <see cref="AddMapping"/> will then be
             /// called exactly 'size' times, with different <c>origOrdinals</c> between 0
             /// and size - 1.
             /// </summary>
             void SetSize(int taxonomySize);

             /// <summary>
             /// Record a mapping. </summary>
             void AddMapping(int origOrdinal, int newOrdinal);

             /// <summary>
             /// Call <see cref="AddDone()"/> to say that all <see cref="AddMapping"/> have been done.
             /// In some implementations this might free some resources.
             /// </summary>
             void AddDone();

             /// <summary>
             /// Return the map from the taxonomy's original (consecutive) ordinals
             /// to the new taxonomy's ordinals. If the map has to be read from disk
             /// and ordered appropriately, it is done when getMap() is called.
             /// getMap() should only be called once, and only when the map is actually
             /// needed. Calling it will also free all resources that the map might
             /// be holding (such as temporary disk space), other than the returned int[].
             /// </summary>
             int[] GetMap();
         }

         /// <summary>
         /// <see cref="IOrdinalMap"/> maintained in memory
         /// </summary>
         public sealed class MemoryOrdinalMap : IOrdinalMap
         {
             internal int[] map;

             /// <summary>
             /// Sole constructor.
             /// </summary>
             public MemoryOrdinalMap()
             {
                 map = new int[] { };
             }

             public void SetSize(int taxonomySize)
             {
                 map = new int[taxonomySize];
             }

             public void AddMapping(int origOrdinal, int newOrdinal)
             {
                 if (map.Length - 1 >= origOrdinal)
                 {
                     map[origOrdinal] = newOrdinal;
                 }
                 else
                 {
                     Array.Resize(ref map, origOrdinal + 1);
                     map[origOrdinal] = newOrdinal;
                 }
             }

             public void AddDone() // nothing to do
             {
             }

             public int[] GetMap()
             {
                 return (int[])map.Clone(); // LUCENENET specific: Since this is clearly not meant to be written to, we are cloning the array https://msdn.microsoft.com/en-us/library/0fss9skc.aspx
             }
         }

         /// <summary>
         /// <see cref="IOrdinalMap"/> maintained on file system
         /// </summary>
         public sealed class DiskOrdinalMap : IOrdinalMap
         {
             internal string tmpfile;
             internal OutputStreamDataOutput @out;

             /// <summary>
             /// Sole constructor.
             /// </summary>
             public DiskOrdinalMap(string tmpfile)
             {
                 this.tmpfile = tmpfile;
                 var outfs = new FileStream(tmpfile, FileMode.OpenOrCreate, FileAccess.Write);
                 @out = new OutputStreamDataOutput(outfs);
             }

             public void AddMapping(int origOrdinal, int newOrdinal)
             {
                 @out.WriteInt32(origOrdinal);
                 @out.WriteInt32(newOrdinal);
             }

             public void SetSize(int taxonomySize)
             {
                 @out.WriteInt32(taxonomySize);
             }

             public void AddDone()
             {
                 if (@out != null)
                 {
                     @out.Dispose();
                     @out = null;
                 }
             }

             int[] map = null;

             public int[] GetMap()
             {
                 if (map != null)
                 {
                     return map;
                 }
                 AddDone(); // in case this wasn't previously called

                 var ifs = new FileStream(tmpfile, FileMode.OpenOrCreate, FileAccess.Read);
                 var @in = new InputStreamDataInput(ifs);
                 map = new int[@in.ReadInt32()];
                 // NOTE: The current code assumes here that the map is complete,
                 // i.e., every ordinal gets one and exactly one value. Otherwise,
                 // we may run into an EOF here, or vice versa, not read everything.
                 for (int i = 0; i < map.Length; i++)
                 {
                     int origordinal = @in.ReadInt32();
                     int newordinal = @in.ReadInt32();
                     map[origordinal] = newordinal;
                 }
                 @in.Dispose();

                 // Delete the temporary file, which is no longer needed.
                 if (File.Exists(tmpfile))
                 {
                     File.Delete(tmpfile);
                 }
                 return map;
             }
         }

         /// <summary>
         /// Rollback changes to the taxonomy writer and closes the instance. Following
         /// this method the instance becomes unusable (calling any of its API methods
         /// will yield an <see cref="ObjectDisposedException"/>).
         /// </summary>
         public virtual void Rollback()
         {
             lock (this)
             {
                 EnsureOpen();
                 indexWriter.Rollback();
                 DoClose();
             }
         }

         /// <summary>
         /// Replaces the current taxonomy with the given one. This method should
         /// generally be called in conjunction with
         /// <see cref="IndexWriter.AddIndexes(Directory[])"/> to replace both the taxonomy
         /// as well as the search index content.
         /// </summary>
         public virtual void ReplaceTaxonomy(Directory taxoDir)
         {
             lock (this)
             {
                 // replace the taxonomy by doing IW optimized operations
                 indexWriter.DeleteAll();
                 indexWriter.AddIndexes(taxoDir);
                 shouldRefreshReaderManager = true;
                 InitReaderManager(); // ensure that it's initialized
                 RefreshReaderManager();
                 nextID = indexWriter.MaxDoc;
                 taxoArrays = null; // must nullify so that it's re-computed next time it's needed

                 // need to clear the cache, so that addCategory won't accidentally return
                 // old categories that are in the cache.
                 cache.Clear();
                 cacheIsComplete = false;
                 shouldFillCache = true;
                 cacheMisses.Value = 0;

                 // update indexEpoch as a taxonomy replace is just like it has be recreated
                 ++indexEpoch;
             }
         }

         /// <summary>
         /// Returns the <see cref="Store.Directory"/> of this taxonomy writer.
         /// </summary>
         public virtual Directory Directory
         {
             get
             {
                 return dir;
             }
         }

         /// <summary>
         /// Used by <see cref="DirectoryTaxonomyReader"/> to support NRT.
         /// <para>
         /// <b>NOTE:</b> you should not use the obtained <see cref="IndexWriter"/> in any
         /// way, other than opening an IndexReader on it, or otherwise, the taxonomy
         /// index may become corrupt!
         /// </para>
         /// </summary>
         internal IndexWriter InternalIndexWriter
         {
             get
             {
                 return indexWriter;
             }
         }

         /// <summary>
         /// Expert: returns current index epoch, if this is a
         /// near-real-time reader.  Used by
         /// <see cref="DirectoryTaxonomyReader"/> to support NRT.
         ///
         /// @lucene.internal
         /// </summary>
         public long TaxonomyEpoch
         {
             get
             {
                 return indexEpoch;
             }
         }
     }
 }