lucene/misc/src/java/org/apache/lucene/index/PKIndexSplitter.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.index;

 import java.io.IOException;
 import java.util.List;

 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.IOUtils;

 /**
  * Split an index based on a {@link Query}.
  */

 public class PKIndexSplitter {
   private final Query docsInFirstIndex;
   private final Directory input;
   private final Directory dir1;
   private final Directory dir2;
   private final IndexWriterConfig config1;
   private final IndexWriterConfig config2;

   /**
    * Split an index based on a {@link Query}. All documents that match the query
    * are sent to dir1, remaining ones to dir2.
    */
   public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Query docsInFirstIndex) {
     this(input, dir1, dir2, docsInFirstIndex, newDefaultConfig(), newDefaultConfig());
   }

   private static IndexWriterConfig newDefaultConfig() {
     return new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE);
   }

   public PKIndexSplitter(Directory input, Directory dir1,
       Directory dir2, Query docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2) {
     this.input = input;
     this.dir1 = dir1;
     this.dir2 = dir2;
     this.docsInFirstIndex = docsInFirstIndex;
     this.config1 = config1;
     this.config2 = config2;
   }

   /**
    * Split an index based on a  given primary key term
    * and a 'middle' term.  If the middle term is present, it's
    * sent to dir2.
    */
   public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm) {
     this(input, dir1, dir2,
       new TermRangeQuery(midTerm.field(), null, midTerm.bytes(), true, false));
   }

   public PKIndexSplitter(Directory input, Directory dir1,
       Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) {
     this(input, dir1, dir2,
         new TermRangeQuery(midTerm.field(), null, midTerm.bytes(), true, false), config1, config2);
   }

   public void split() throws IOException {
     boolean success = false;
     DirectoryReader reader = DirectoryReader.open(input);
     try {
       // pass an individual config in here since one config can not be reused!
       createIndex(config1, dir1, reader, docsInFirstIndex, false);
       createIndex(config2, dir2, reader, docsInFirstIndex, true);
       success = true;
     } finally {
       if (success) {
         IOUtils.close(reader);
       } else {
         IOUtils.closeWhileHandlingException(reader);
       }
     }
   }

   private void createIndex(IndexWriterConfig config, Directory target, DirectoryReader reader, Query preserveFilter, boolean negateFilter) throws IOException {
     boolean success = false;
     final IndexWriter w = new IndexWriter(target, config);
     try {
       final IndexSearcher searcher = new IndexSearcher(reader);
       searcher.setQueryCache(null);
       preserveFilter = searcher.rewrite(preserveFilter);
       final Weight preserveWeight = searcher.createWeight(preserveFilter, ScoreMode.COMPLETE_NO_SCORES, 1);
       final List<LeafReaderContext> leaves = reader.leaves();
       final CodecReader[] subReaders = new CodecReader[leaves.size()];
       int i = 0;
       for (final LeafReaderContext ctx : leaves) {
         subReaders[i++] = new DocumentFilteredLeafIndexReader(ctx, preserveWeight, negateFilter);
       }
       w.addIndexes(subReaders);
       success = true;
     } finally {
       if (success) {
         w.close();
       } else {
         IOUtils.closeWhileHandlingException(w);
       }
     }
   }

   private static class DocumentFilteredLeafIndexReader extends FilterCodecReader {
     final Bits liveDocs;
     final int numDocs;

     public DocumentFilteredLeafIndexReader(LeafReaderContext context, Weight preserveWeight, boolean negateFilter) throws IOException {
       // our cast is ok, since we open the Directory.
       super((CodecReader) context.reader());
       final int maxDoc = in.maxDoc();
       final FixedBitSet bits = new FixedBitSet(maxDoc);
       // ignore livedocs here, as we filter them later:
       final Scorer preverveScorer = preserveWeight.scorer(context);
       if (preverveScorer != null) {
         bits.or(preverveScorer.iterator());
       }
       if (negateFilter) {
         bits.flip(0, maxDoc);
       }

       if (in.hasDeletions()) {
         final Bits oldLiveDocs = in.getLiveDocs();
         assert oldLiveDocs != null;
         final DocIdSetIterator it = new BitSetIterator(bits, 0L); // the cost is not useful here
         for (int i = it.nextDoc(); i != DocIdSetIterator.NO_MORE_DOCS; i = it.nextDoc()) {
           if (!oldLiveDocs.get(i)) {
             // we can safely modify the current bit, as the iterator already stepped over it:
             bits.clear(i);
           }
         }
       }

       this.liveDocs = bits;
       this.numDocs = bits.cardinality();
     }

     @Override
     public int numDocs() {
       return numDocs;
     }

     @Override
     public Bits getLiveDocs() {
       return liveDocs;
     }

     @Override
     public CacheHelper getCoreCacheHelper() {
       return in.getCoreCacheHelper();
     }

     @Override
     public CacheHelper getReaderCacheHelper() {
       return null;
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.index;

	import java.io.IOException;
	import java.util.List;

	import org.apache.lucene.index.IndexWriterConfig.OpenMode;
	import org.apache.lucene.search.DocIdSetIterator;
	import org.apache.lucene.search.IndexSearcher;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.ScoreMode;
	import org.apache.lucene.search.Scorer;
	import org.apache.lucene.search.TermRangeQuery;
	import org.apache.lucene.search.Weight;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.util.BitSetIterator;
	import org.apache.lucene.util.Bits;
	import org.apache.lucene.util.FixedBitSet;
	import org.apache.lucene.util.IOUtils;

	/**
	* Split an index based on a {@link Query}.
	*/

	public class PKIndexSplitter {
	private final Query docsInFirstIndex;
	private final Directory input;
	private final Directory dir1;
	private final Directory dir2;
	private final IndexWriterConfig config1;
	private final IndexWriterConfig config2;

	/**
	* Split an index based on a {@link Query}. All documents that match the query
	* are sent to dir1, remaining ones to dir2.
	*/
	public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Query docsInFirstIndex) {
	this(input, dir1, dir2, docsInFirstIndex, newDefaultConfig(), newDefaultConfig());
	}

	private static IndexWriterConfig newDefaultConfig() {
	return new IndexWriterConfig(null).setOpenMode(OpenMode.CREATE);
	}

	public PKIndexSplitter(Directory input, Directory dir1,
	Directory dir2, Query docsInFirstIndex, IndexWriterConfig config1, IndexWriterConfig config2) {
	this.input = input;
	this.dir1 = dir1;
	this.dir2 = dir2;
	this.docsInFirstIndex = docsInFirstIndex;
	this.config1 = config1;
	this.config2 = config2;
	}

	/**
	* Split an index based on a given primary key term
	* and a 'middle' term. If the middle term is present, it's
	* sent to dir2.
	*/
	public PKIndexSplitter(Directory input, Directory dir1, Directory dir2, Term midTerm) {
	this(input, dir1, dir2,
	new TermRangeQuery(midTerm.field(), null, midTerm.bytes(), true, false));
	}

	public PKIndexSplitter(Directory input, Directory dir1,
	Directory dir2, Term midTerm, IndexWriterConfig config1, IndexWriterConfig config2) {
	this(input, dir1, dir2,
	new TermRangeQuery(midTerm.field(), null, midTerm.bytes(), true, false), config1, config2);
	}

	public void split() throws IOException {
	boolean success = false;
	DirectoryReader reader = DirectoryReader.open(input);
	try {
	// pass an individual config in here since one config can not be reused!
	createIndex(config1, dir1, reader, docsInFirstIndex, false);
	createIndex(config2, dir2, reader, docsInFirstIndex, true);
	success = true;
	} finally {
	if (success) {
	IOUtils.close(reader);
	} else {
	IOUtils.closeWhileHandlingException(reader);
	}
	}
	}

	private void createIndex(IndexWriterConfig config, Directory target, DirectoryReader reader, Query preserveFilter, boolean negateFilter) throws IOException {
	boolean success = false;
	final IndexWriter w = new IndexWriter(target, config);
	try {
	final IndexSearcher searcher = new IndexSearcher(reader);
	searcher.setQueryCache(null);
	preserveFilter = searcher.rewrite(preserveFilter);
	final Weight preserveWeight = searcher.createWeight(preserveFilter, ScoreMode.COMPLETE_NO_SCORES, 1);
	final List<LeafReaderContext> leaves = reader.leaves();
	final CodecReader[] subReaders = new CodecReader[leaves.size()];
	int i = 0;
	for (final LeafReaderContext ctx : leaves) {
	subReaders[i++] = new DocumentFilteredLeafIndexReader(ctx, preserveWeight, negateFilter);
	}
	w.addIndexes(subReaders);
	success = true;
	} finally {
	if (success) {
	w.close();
	} else {
	IOUtils.closeWhileHandlingException(w);
	}
	}
	}

	private static class DocumentFilteredLeafIndexReader extends FilterCodecReader {
	final Bits liveDocs;
	final int numDocs;

	public DocumentFilteredLeafIndexReader(LeafReaderContext context, Weight preserveWeight, boolean negateFilter) throws IOException {
	// our cast is ok, since we open the Directory.
	super((CodecReader) context.reader());
	final int maxDoc = in.maxDoc();
	final FixedBitSet bits = new FixedBitSet(maxDoc);
	// ignore livedocs here, as we filter them later:
	final Scorer preverveScorer = preserveWeight.scorer(context);
	if (preverveScorer != null) {
	bits.or(preverveScorer.iterator());
	}
	if (negateFilter) {
	bits.flip(0, maxDoc);
	}

	if (in.hasDeletions()) {
	final Bits oldLiveDocs = in.getLiveDocs();
	assert oldLiveDocs != null;
	final DocIdSetIterator it = new BitSetIterator(bits, 0L); // the cost is not useful here
	for (int i = it.nextDoc(); i != DocIdSetIterator.NO_MORE_DOCS; i = it.nextDoc()) {
	if (!oldLiveDocs.get(i)) {
	// we can safely modify the current bit, as the iterator already stepped over it:
	bits.clear(i);
	}
	}
	}

	this.liveDocs = bits;
	this.numDocs = bits.cardinality();
	}

	@Override
	public int numDocs() {
	return numDocs;
	}

	@Override
	public Bits getLiveDocs() {
	return liveDocs;
	}

	@Override
	public CacheHelper getCoreCacheHelper() {
	return in.getCoreCacheHelper();
	}

	@Override
	public CacheHelper getReaderCacheHelper() {
	return null;
	}
	}
	}