blob: c417a886450752660ee6327bb035092c1e3fb240 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
/** Utility class to do efficient primary-key (only 1 doc contains the
* given term) lookups by segment, re-using the enums. This class is
* not thread safe, so it is the caller's job to create and use one
* instance of this per thread. Do not use this if a term may appear
* in more than one document! It will only return the first one it
* finds. */
public class PerThreadPKLookup {
protected final TermsEnum[] termsEnums;
protected final PostingsEnum[] postingsEnums;
protected final Bits[] liveDocs;
protected final int[] docBases;
protected final int numSegs;
protected final boolean hasDeletions;
public PerThreadPKLookup(IndexReader r, String idFieldName) throws IOException {
List<LeafReaderContext> leaves = new ArrayList<>(r.leaves());
// Larger segments are more likely to have the id, so we sort largest to smallest by numDocs:
Collections.sort(leaves, new Comparator<LeafReaderContext>() {
@Override
public int compare(LeafReaderContext c1, LeafReaderContext c2) {
return c2.reader().numDocs() - c1.reader().numDocs();
}
});
termsEnums = new TermsEnum[leaves.size()];
postingsEnums = new PostingsEnum[leaves.size()];
liveDocs = new Bits[leaves.size()];
docBases = new int[leaves.size()];
int numSegs = 0;
boolean hasDeletions = false;
for(int i=0;i<leaves.size();i++) {
Terms terms = leaves.get(i).reader().terms(idFieldName);
if (terms != null) {
termsEnums[numSegs] = terms.iterator();
assert termsEnums[numSegs] != null;
docBases[numSegs] = leaves.get(i).docBase;
liveDocs[numSegs] = leaves.get(i).reader().getLiveDocs();
hasDeletions |= leaves.get(i).reader().hasDeletions();
numSegs++;
}
}
this.numSegs = numSegs;
this.hasDeletions = hasDeletions;
}
/** Returns docID if found, else -1. */
public int lookup(BytesRef id) throws IOException {
for(int seg=0;seg<numSegs;seg++) {
if (termsEnums[seg].seekExact(id)) {
postingsEnums[seg] = termsEnums[seg].postings(postingsEnums[seg], 0);
int docID = postingsEnums[seg].nextDoc();
if (docID != PostingsEnum.NO_MORE_DOCS
&& (liveDocs[seg] == null || liveDocs[seg].get(docID))) {
return docBases[seg] + docID;
}
assert hasDeletions;
}
}
return -1;
}
// TODO: add reopen method to carry over re-used enums...?
}