src/Lucene.Net.Misc/Index/PKIndexSplitter.cs - lucenenet - Git at Google

 using Lucene.Net.Diagnostics;
 using Lucene.Net.Search;
 using Lucene.Net.Store;
 using Lucene.Net.Util;
 using System.Collections.Generic;
 using System.Diagnostics;

 namespace Lucene.Net.Index
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     /// <summary>
     /// Split an index based on a <see cref="Filter"/>.
     /// </summary>
     public class PKIndexSplitter
     {
         private readonly Filter docsInFirstIndex;
         private readonly Directory input;
         private readonly Directory dir1;
         private readonly Directory dir2;
         private readonly IndexWriterConfig config1;
         private readonly IndexWriterConfig config2;

         /// <summary>
         /// Split an index based on a <see cref="Filter"/>. All documents that match the filter
         /// are sent to dir1, remaining ones to dir2.
         /// </summary>
         public PKIndexSplitter(LuceneVersion version, Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex)
               : this(input, dir1, dir2, docsInFirstIndex, NewDefaultConfig(version), NewDefaultConfig(version))
         {
         }

         private static IndexWriterConfig NewDefaultConfig(LuceneVersion version)
         {
             return (new IndexWriterConfig(version, null) { OpenMode = OpenMode.CREATE });
         }

         public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2)
         {
             this.input = input;
             this.dir1 = dir1;
             this.dir2 = dir2;
             this.docsInFirstIndex = docsInFirstIndex;
             this.config1 = config1;
             this.config2 = config2;
         }

         /// <summary>
         /// Split an index based on a  given primary key term
         /// and a 'middle' term.  If the middle term is present, it's
         /// sent to dir2.
         /// </summary>
         public PKIndexSplitter(LuceneVersion version, Directory input, Directory dir1, Directory dir2, Term midTerm)
               : this(version, input, dir1, dir2, new TermRangeFilter(midTerm.Field, null, midTerm.Bytes, true, false))
         {
         }

         public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2)
               : this(input, dir1, dir2, new TermRangeFilter(midTerm.Field, null, midTerm.Bytes, true, false), config1, config2)
         {
         }

         public virtual void Split()
         {
             bool success = false;
             DirectoryReader reader = DirectoryReader.Open(input);
             try
             {
                 // pass an individual config in here since one config can not be reused!
                 CreateIndex(config1, dir1, reader, docsInFirstIndex, false);
                 CreateIndex(config2, dir2, reader, docsInFirstIndex, true);
                 success = true;
             }
             finally
             {
                 if (success)
                 {
                     IOUtils.Dispose(reader);
                 }
                 else
                 {
                     IOUtils.DisposeWhileHandlingException(reader);
                 }
             }
         }

         private void CreateIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, bool negateFilter)
         {
             bool success = false;
             IndexWriter w = new IndexWriter(target, config);
             try
             {
                 IList<AtomicReaderContext> leaves = reader.Leaves;
                 IndexReader[] subReaders = new IndexReader[leaves.Count];
                 int i = 0;
                 foreach (AtomicReaderContext ctx in leaves)
                 {
                     subReaders[i++] = new DocumentFilteredAtomicIndexReader(ctx, preserveFilter, negateFilter);
                 }
                 w.AddIndexes(subReaders);
                 success = true;
             }
             finally
             {
                 if (success)
                 {
                     IOUtils.Dispose(w);
                 }
                 else
                 {
                     IOUtils.DisposeWhileHandlingException(w);
                 }
             }
         }

         private class DocumentFilteredAtomicIndexReader : FilterAtomicReader
         {
             internal readonly IBits liveDocs;
             internal readonly int numDocs;

             public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, Filter preserveFilter, bool negateFilter)
                     : base(context.AtomicReader)
             {
                 int maxDoc = m_input.MaxDoc;
                 FixedBitSet bits = new FixedBitSet(maxDoc);
                 // ignore livedocs here, as we filter them later:
                 DocIdSet docs = preserveFilter.GetDocIdSet(context, null);
                 if (docs != null)
                 {
                     DocIdSetIterator it = docs.GetIterator();
                     if (it != null)
                     {
                         bits.Or(it);
                     }
                 }
                 if (negateFilter)
                 {
                     bits.Flip(0, maxDoc);
                 }

                 if (m_input.HasDeletions)
                 {
                     IBits oldLiveDocs = m_input.LiveDocs;
                     if (Debugging.AssertsEnabled) Debugging.Assert(oldLiveDocs != null);
                     DocIdSetIterator it = bits.GetIterator();
                     for (int i = it.NextDoc(); i < maxDoc; i = it.NextDoc())
                     {
                         if (!oldLiveDocs.Get(i))
                         {
                             // we can safely modify the current bit, as the iterator already stepped over it:
                             bits.Clear(i);
                         }
                     }
                 }

                 this.liveDocs = bits;
                 this.numDocs = bits.Cardinality();
             }

             public override int NumDocs => numDocs;

             public override IBits LiveDocs => liveDocs;
         }
     }
 }
	using Lucene.Net.Diagnostics;
	using Lucene.Net.Search;
	using Lucene.Net.Store;
	using Lucene.Net.Util;
	using System.Collections.Generic;
	using System.Diagnostics;

	namespace Lucene.Net.Index
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/// <summary>
	/// Split an index based on a <see cref="Filter"/>.
	/// </summary>
	public class PKIndexSplitter
	{
	private readonly Filter docsInFirstIndex;
	private readonly Directory input;
	private readonly Directory dir1;
	private readonly Directory dir2;
	private readonly IndexWriterConfig config1;
	private readonly IndexWriterConfig config2;

	/// <summary>
	/// Split an index based on a <see cref="Filter"/>. All documents that match the filter
	/// are sent to dir1, remaining ones to dir2.
	/// </summary>
	public PKIndexSplitter(LuceneVersion version, Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex)
	: this(input, dir1, dir2, docsInFirstIndex, NewDefaultConfig(version), NewDefaultConfig(version))
	{
	}

	private static IndexWriterConfig NewDefaultConfig(LuceneVersion version)
	{
	return (new IndexWriterConfig(version, null) { OpenMode = OpenMode.CREATE });
	}

	public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Filter docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2)
	{
	this.input = input;
	this.dir1 = dir1;
	this.dir2 = dir2;
	this.docsInFirstIndex = docsInFirstIndex;
	this.config1 = config1;
	this.config2 = config2;
	}

	/// <summary>
	/// Split an index based on a given primary key term
	/// and a 'middle' term. If the middle term is present, it's
	/// sent to dir2.
	/// </summary>
	public PKIndexSplitter(LuceneVersion version, Directory input, Directory dir1, Directory dir2, Term midTerm)
	: this(version, input, dir1, dir2, new TermRangeFilter(midTerm.Field, null, midTerm.Bytes, true, false))
	{
	}

	public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2)
	: this(input, dir1, dir2, new TermRangeFilter(midTerm.Field, null, midTerm.Bytes, true, false), config1, config2)
	{
	}

	public virtual void Split()
	{
	bool success = false;
	DirectoryReader reader = DirectoryReader.Open(input);
	try
	{
	// pass an individual config in here since one config can not be reused!
	CreateIndex(config1, dir1, reader, docsInFirstIndex, false);
	CreateIndex(config2, dir2, reader, docsInFirstIndex, true);
	success = true;
	}
	finally
	{
	if (success)
	{
	IOUtils.Dispose(reader);
	}
	else
	{
	IOUtils.DisposeWhileHandlingException(reader);
	}
	}
	}

	private void CreateIndex(IndexWriterConfig config, Directory target, IndexReader reader, Filter preserveFilter, bool negateFilter)
	{
	bool success = false;
	IndexWriter w = new IndexWriter(target, config);
	try
	{
	IList<AtomicReaderContext> leaves = reader.Leaves;
	IndexReader[] subReaders = new IndexReader[leaves.Count];
	int i = 0;
	foreach (AtomicReaderContext ctx in leaves)
	{
	subReaders[i++] = new DocumentFilteredAtomicIndexReader(ctx, preserveFilter, negateFilter);
	}
	w.AddIndexes(subReaders);
	success = true;
	}
	finally
	{
	if (success)
	{
	IOUtils.Dispose(w);
	}
	else
	{
	IOUtils.DisposeWhileHandlingException(w);
	}
	}
	}

	private class DocumentFilteredAtomicIndexReader : FilterAtomicReader
	{
	internal readonly IBits liveDocs;
	internal readonly int numDocs;

	public DocumentFilteredAtomicIndexReader(AtomicReaderContext context, Filter preserveFilter, bool negateFilter)
	: base(context.AtomicReader)
	{
	int maxDoc = m_input.MaxDoc;
	FixedBitSet bits = new FixedBitSet(maxDoc);
	// ignore livedocs here, as we filter them later:
	DocIdSet docs = preserveFilter.GetDocIdSet(context, null);
	if (docs != null)
	{
	DocIdSetIterator it = docs.GetIterator();
	if (it != null)
	{
	bits.Or(it);
	}
	}
	if (negateFilter)
	{
	bits.Flip(0, maxDoc);
	}

	if (m_input.HasDeletions)
	{
	IBits oldLiveDocs = m_input.LiveDocs;
	if (Debugging.AssertsEnabled) Debugging.Assert(oldLiveDocs != null);
	DocIdSetIterator it = bits.GetIterator();
	for (int i = it.NextDoc(); i < maxDoc; i = it.NextDoc())
	{
	if (!oldLiveDocs.Get(i))
	{
	// we can safely modify the current bit, as the iterator already stepped over it:
	bits.Clear(i);
	}
	}
	}

	this.liveDocs = bits;
	this.numDocs = bits.Cardinality();
	}

	public override int NumDocs => numDocs;

	public override IBits LiveDocs => liveDocs;
	}
	}
	}