lucene/test-framework/src/java/org/apache/lucene/index/PerThreadPKLookup.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.index;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;

 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;

 /** Utility class to do efficient primary-key (only 1 doc contains the
  *  given term) lookups by segment, re-using the enums.  This class is
  *  not thread safe, so it is the caller's job to create and use one
  *  instance of this per thread.  Do not use this if a term may appear
  *  in more than one document!  It will only return the first one it
  *  finds. */
 public class PerThreadPKLookup {

   protected final TermsEnum[] termsEnums;
   protected final PostingsEnum[] postingsEnums;
   protected final Bits[] liveDocs;
   protected final int[] docBases;
   protected final int numSegs;
   protected final boolean hasDeletions;

   public PerThreadPKLookup(IndexReader r, String idFieldName) throws IOException {

     List<LeafReaderContext> leaves = new ArrayList<>(r.leaves());

     // Larger segments are more likely to have the id, so we sort largest to smallest by numDocs:
     Collections.sort(leaves, new Comparator<LeafReaderContext>() {
         @Override
         public int compare(LeafReaderContext c1, LeafReaderContext c2) {
           return c2.reader().numDocs() - c1.reader().numDocs();
         }
       });

     termsEnums = new TermsEnum[leaves.size()];
     postingsEnums = new PostingsEnum[leaves.size()];
     liveDocs = new Bits[leaves.size()];
     docBases = new int[leaves.size()];
     int numSegs = 0;
     boolean hasDeletions = false;
     for(int i=0;i<leaves.size();i++) {
       Terms terms = leaves.get(i).reader().terms(idFieldName);
       if (terms != null) {
         termsEnums[numSegs] = terms.iterator();
         assert termsEnums[numSegs] != null;
         docBases[numSegs] = leaves.get(i).docBase;
         liveDocs[numSegs] = leaves.get(i).reader().getLiveDocs();
         hasDeletions |= leaves.get(i).reader().hasDeletions();
         numSegs++;
       }
     }
     this.numSegs = numSegs;
     this.hasDeletions = hasDeletions;
   }

   /** Returns docID if found, else -1. */
   public int lookup(BytesRef id) throws IOException {
     for(int seg=0;seg<numSegs;seg++) {
       if (termsEnums[seg].seekExact(id)) {
         postingsEnums[seg] = termsEnums[seg].postings(postingsEnums[seg], 0);
         int docID = postingsEnums[seg].nextDoc();
         if (docID != PostingsEnum.NO_MORE_DOCS
             && (liveDocs[seg] == null || liveDocs[seg].get(docID))) {
           return docBases[seg] + docID;
         }
         assert hasDeletions;
       }
     }

     return -1;
   }

   // TODO: add reopen method to carry over re-used enums...?
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.index;

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.Collections;
	import java.util.Comparator;
	import java.util.List;

	import org.apache.lucene.util.Bits;
	import org.apache.lucene.util.BytesRef;

	/** Utility class to do efficient primary-key (only 1 doc contains the
	* given term) lookups by segment, re-using the enums. This class is
	* not thread safe, so it is the caller's job to create and use one
	* instance of this per thread. Do not use this if a term may appear
	* in more than one document! It will only return the first one it
	* finds. */
	public class PerThreadPKLookup {

	protected final TermsEnum[] termsEnums;
	protected final PostingsEnum[] postingsEnums;
	protected final Bits[] liveDocs;
	protected final int[] docBases;
	protected final int numSegs;
	protected final boolean hasDeletions;

	public PerThreadPKLookup(IndexReader r, String idFieldName) throws IOException {

	List<LeafReaderContext> leaves = new ArrayList<>(r.leaves());

	// Larger segments are more likely to have the id, so we sort largest to smallest by numDocs:
	Collections.sort(leaves, new Comparator<LeafReaderContext>() {
	@Override
	public int compare(LeafReaderContext c1, LeafReaderContext c2) {
	return c2.reader().numDocs() - c1.reader().numDocs();
	}
	});

	termsEnums = new TermsEnum[leaves.size()];
	postingsEnums = new PostingsEnum[leaves.size()];
	liveDocs = new Bits[leaves.size()];
	docBases = new int[leaves.size()];
	int numSegs = 0;
	boolean hasDeletions = false;
	for(int i=0;i<leaves.size();i++) {
	Terms terms = leaves.get(i).reader().terms(idFieldName);
	if (terms != null) {
	termsEnums[numSegs] = terms.iterator();
	assert termsEnums[numSegs] != null;
	docBases[numSegs] = leaves.get(i).docBase;
	liveDocs[numSegs] = leaves.get(i).reader().getLiveDocs();
	hasDeletions \|= leaves.get(i).reader().hasDeletions();
	numSegs++;
	}
	}
	this.numSegs = numSegs;
	this.hasDeletions = hasDeletions;
	}

	/** Returns docID if found, else -1. */
	public int lookup(BytesRef id) throws IOException {
	for(int seg=0;seg<numSegs;seg++) {
	if (termsEnums[seg].seekExact(id)) {
	postingsEnums[seg] = termsEnums[seg].postings(postingsEnums[seg], 0);
	int docID = postingsEnums[seg].nextDoc();
	if (docID != PostingsEnum.NO_MORE_DOCS
	&& (liveDocs[seg] == null \|\| liveDocs[seg].get(docID))) {
	return docBases[seg] + docID;
	}
	assert hasDeletions;
	}
	}

	return -1;
	}

	// TODO: add reopen method to carry over re-used enums...?
	}