| Index: modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java |
| =================================================================== |
| --- modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java (revision 1203233) |
| +++ modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java (working copy) |
| @@ -51,7 +51,18 @@ |
| |
| public void reset() throws IOException { |
| // no one should call us for deleted docs? |
| - docs = terms == null ? null : terms.docs(null, indexedBytes, null); |
| + |
| + if (terms != null) { |
| + final TermsEnum termsEnum = terms.iterator(null); |
| + if (termsEnum.seekExact(indexedBytes, false)) { |
| + docs = termsEnum.docs(null, null); |
| + } else { |
| + docs = null; |
| + } |
| + } else { |
| + docs = null; |
| + } |
| + |
| if (docs == null) { |
| docs = new DocsEnum() { |
| @Override |
| Index: modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java |
| =================================================================== |
| --- modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java (revision 1203233) |
| +++ modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java (working copy) |
| @@ -59,7 +59,17 @@ |
| |
| public void reset() throws IOException { |
| // no one should call us for deleted docs? |
| - docs = terms==null ? null : terms.docs(null, indexedBytes, null); |
| + if (terms != null) { |
| + final TermsEnum termsEnum = terms.iterator(null); |
| + if (termsEnum.seekExact(indexedBytes, false)) { |
| + docs = termsEnum.docs(null, null); |
| + } else { |
| + docs = null; |
| + } |
| + } else { |
| + docs = null; |
| + } |
| + |
| if (docs == null) { |
| docs = new DocsEnum() { |
| @Override |
| Index: lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java |
| =================================================================== |
| --- lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java (revision 1203233) |
| +++ lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java (working copy) |
| @@ -199,7 +199,7 @@ |
| writer.close(); |
| SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random)); |
| |
| - DocsAndPositionsEnum termPositions = reader.fields().terms("f1").docsAndPositions(reader.getLiveDocs(), new BytesRef("a"), null); |
| + DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, reader.getLiveDocs(), "f1", new BytesRef("a")); |
| assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); |
| int freq = termPositions.freq(); |
| assertEquals(3, freq); |
| @@ -243,18 +243,18 @@ |
| writer.close(); |
| SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random)); |
| |
| - DocsAndPositionsEnum termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term1"), null); |
| + DocsAndPositionsEnum termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term1")); |
| assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); |
| assertEquals(1, termPositions.freq()); |
| assertEquals(0, termPositions.nextPosition()); |
| |
| - termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term2"), null); |
| + termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term2")); |
| assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); |
| assertEquals(2, termPositions.freq()); |
| assertEquals(1, termPositions.nextPosition()); |
| assertEquals(3, termPositions.nextPosition()); |
| |
| - termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term3"), null); |
| + termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term3")); |
| assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); |
| assertEquals(1, termPositions.freq()); |
| assertEquals(2, termPositions.nextPosition()); |
| Index: lucene/src/test/org/apache/lucene/index/TestIndexReader.java |
| =================================================================== |
| --- lucene/src/test/org/apache/lucene/index/TestIndexReader.java (revision 1203233) |
| +++ lucene/src/test/org/apache/lucene/index/TestIndexReader.java (working copy) |
| @@ -1340,13 +1340,12 @@ |
| writer.addDocument(d); |
| IndexReader r = writer.getReader(); |
| writer.close(); |
| - Terms terms = MultiFields.getTerms(r, "f"); |
| try { |
| // Make sure codec impls totalTermFreq (eg PreFlex doesn't) |
| - Assume.assumeTrue(terms.totalTermFreq(new BytesRef("b")) != -1); |
| - assertEquals(1, terms.totalTermFreq(new BytesRef("b"))); |
| - assertEquals(2, terms.totalTermFreq(new BytesRef("a"))); |
| - assertEquals(1, terms.totalTermFreq(new BytesRef("b"))); |
| + Assume.assumeTrue(MultiFields.totalTermFreq(r, "f", new BytesRef("b")) != -1); |
| + assertEquals(1, MultiFields.totalTermFreq(r, "f", new BytesRef("b"))); |
| + assertEquals(2, MultiFields.totalTermFreq(r, "f", new BytesRef("a"))); |
| + assertEquals(1, MultiFields.totalTermFreq(r, "f", new BytesRef("b"))); |
| } finally { |
| r.close(); |
| dir.close(); |
| Index: lucene/src/test/org/apache/lucene/index/TestOmitTf.java |
| =================================================================== |
| --- lucene/src/test/org/apache/lucene/index/TestOmitTf.java (revision 1203233) |
| +++ lucene/src/test/org/apache/lucene/index/TestOmitTf.java (working copy) |
| @@ -447,7 +447,7 @@ |
| IndexReader ir = iw.getReader(); |
| iw.close(); |
| Terms terms = MultiFields.getTerms(ir, "foo"); |
| - assertEquals(-1, terms.totalTermFreq(new BytesRef("bar"))); |
| + assertEquals(-1, MultiFields.totalTermFreq(ir, "foo", new BytesRef("bar"))); |
| assertEquals(-1, terms.getSumTotalTermFreq()); |
| ir.close(); |
| dir.close(); |
| Index: lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java |
| =================================================================== |
| --- lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java (revision 1203233) |
| +++ lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java (working copy) |
| @@ -349,7 +349,9 @@ |
| assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| |
| // now reuse and check again |
| - disi = r.terms("foo").docs(null, new BytesRef("bar"), disi); |
| + TermsEnum te = r.terms("foo").iterator(null); |
| + assertTrue(te.seekExact(new BytesRef("bar"), true)); |
| + disi = te.docs(null, disi); |
| docid = disi.docID(); |
| assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); |
| assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| @@ -372,7 +374,9 @@ |
| assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| |
| // now reuse and check again |
| - disi = r.terms("foo").docsAndPositions(null, new BytesRef("bar"), disi); |
| + TermsEnum te = r.terms("foo").iterator(null); |
| + assertTrue(te.seekExact(new BytesRef("bar"), true)); |
| + disi = te.docsAndPositions(null, disi); |
| docid = disi.docID(); |
| assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); |
| assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| Index: lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java |
| =================================================================== |
| --- lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (revision 1203233) |
| +++ lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java (working copy) |
| @@ -342,6 +342,7 @@ |
| return; |
| } |
| Terms terms2 = fields.terms(idField); |
| + TermsEnum termsEnum2 = terms2.iterator(null); |
| |
| DocsEnum termDocs1 = null; |
| DocsEnum termDocs2 = null; |
| @@ -354,7 +355,11 @@ |
| } |
| |
| termDocs1 = termsEnum.docs(liveDocs1, termDocs1); |
| - termDocs2 = terms2.docs(liveDocs2, term, termDocs2); |
| + if (termsEnum2.seekExact(term, false)) { |
| + termDocs2 = termsEnum2.docs(liveDocs2, termDocs2); |
| + } else { |
| + termDocs2 = null; |
| + } |
| |
| if (termDocs1.nextDoc() == DocsEnum.NO_MORE_DOCS) { |
| // This doc is deleted and wasn't replaced |
| @@ -397,11 +402,11 @@ |
| System.out.println(" " + field + ":"); |
| Terms terms3 = fieldsEnum.terms(); |
| assertNotNull(terms3); |
| - TermsEnum termsEnum2 = terms3.iterator(null); |
| + TermsEnum termsEnum3 = terms3.iterator(null); |
| BytesRef term2; |
| - while((term2 = termsEnum2.next()) != null) { |
| - System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum2.totalTermFreq()); |
| - dpEnum = termsEnum2.docsAndPositions(null, dpEnum); |
| + while((term2 = termsEnum3.next()) != null) { |
| + System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq()); |
| + dpEnum = termsEnum3.docsAndPositions(null, dpEnum); |
| if (dpEnum != null) { |
| assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS); |
| final int freq = dpEnum.freq(); |
| @@ -410,7 +415,7 @@ |
| System.out.println(" pos=" + dpEnum.nextPosition()); |
| } |
| } else { |
| - dEnum = termsEnum2.docs(null, dEnum); |
| + dEnum = termsEnum3.docs(null, dEnum); |
| assertNotNull(dEnum); |
| assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS); |
| final int freq = dEnum.freq(); |
| @@ -431,11 +436,11 @@ |
| System.out.println(" " + field + ":"); |
| Terms terms3 = fieldsEnum.terms(); |
| assertNotNull(terms3); |
| - TermsEnum termsEnum2 = terms3.iterator(null); |
| + TermsEnum termsEnum3 = terms3.iterator(null); |
| BytesRef term2; |
| - while((term2 = termsEnum2.next()) != null) { |
| - System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum2.totalTermFreq()); |
| - dpEnum = termsEnum2.docsAndPositions(null, dpEnum); |
| + while((term2 = termsEnum3.next()) != null) { |
| + System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq()); |
| + dpEnum = termsEnum3.docsAndPositions(null, dpEnum); |
| if (dpEnum != null) { |
| assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS); |
| final int freq = dpEnum.freq(); |
| @@ -444,7 +449,7 @@ |
| System.out.println(" pos=" + dpEnum.nextPosition()); |
| } |
| } else { |
| - dEnum = termsEnum2.docs(null, dEnum); |
| + dEnum = termsEnum3.docs(null, dEnum); |
| assertNotNull(dEnum); |
| assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS); |
| final int freq = dEnum.freq(); |
| @@ -467,7 +472,7 @@ |
| |
| String field1=null, field2=null; |
| TermsEnum termsEnum1 = null; |
| - TermsEnum termsEnum2 = null; |
| + termsEnum2 = null; |
| DocsEnum docs1=null, docs2=null; |
| |
| // pack both doc and freq into single element for easy sorting |
| Index: lucene/src/test/org/apache/lucene/index/TestMultiFields.java |
| =================================================================== |
| --- lucene/src/test/org/apache/lucene/index/TestMultiFields.java (revision 1203233) |
| +++ lucene/src/test/org/apache/lucene/index/TestMultiFields.java (working copy) |
| @@ -113,7 +113,6 @@ |
| for(int delDoc : deleted) { |
| assertFalse(liveDocs.get(delDoc)); |
| } |
| - Terms terms2 = MultiFields.getTerms(reader, "field"); |
| |
| for(int i=0;i<100;i++) { |
| BytesRef term = terms.get(random.nextInt(terms.size())); |
| @@ -121,7 +120,7 @@ |
| System.out.println("TEST: seek term="+ UnicodeUtil.toHexString(term.utf8ToString()) + " " + term); |
| } |
| |
| - DocsEnum docsEnum = terms2.docs(liveDocs, term, null); |
| + DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, liveDocs, "field", term); |
| assertNotNull(docsEnum); |
| |
| for(int docID : docs.get(term)) { |
| Index: lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java |
| =================================================================== |
| --- lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java (revision 1203233) |
| +++ lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java (working copy) |
| @@ -17,7 +17,6 @@ |
| * limitations under the License. |
| */ |
| |
| -import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexReader.AtomicReaderContext; |
| import org.apache.lucene.index.Fields; |
| import org.apache.lucene.index.Term; |
| @@ -26,7 +25,6 @@ |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.util.Bits; |
| -import org.apache.lucene.util.ReaderUtil; |
| import org.apache.lucene.util.TermContext; |
| import org.apache.lucene.util.ToStringUtils; |
| |
| @@ -99,7 +97,7 @@ |
| if (fields != null) { |
| final Terms terms = fields.terms(term.field()); |
| if (terms != null) { |
| - final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share! |
| + final TermsEnum termsEnum = terms.iterator(null); |
| if (termsEnum.seekExact(term.bytes(), true)) { |
| state = termsEnum.termState(); |
| } else { |
| @@ -119,7 +117,7 @@ |
| return TermSpans.EMPTY_TERM_SPANS; |
| } |
| |
| - final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum(); |
| + final TermsEnum termsEnum = context.reader.terms(term.field()).iterator(null); |
| termsEnum.seekExact(term.bytes(), state); |
| |
| final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null); |
| Index: lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java |
| =================================================================== |
| --- lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (revision 1203233) |
| +++ lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (working copy) |
| @@ -20,20 +20,23 @@ |
| import java.io.IOException; |
| import java.util.*; |
| |
| -import org.apache.lucene.index.IndexReader; |
| +import org.apache.lucene.index.DocsAndPositionsEnum; |
| +import org.apache.lucene.index.DocsEnum; |
| import org.apache.lucene.index.IndexReader.AtomicReaderContext; |
| import org.apache.lucene.index.IndexReader.ReaderContext; |
| +import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.Term; |
| -import org.apache.lucene.index.DocsEnum; |
| -import org.apache.lucene.index.DocsAndPositionsEnum; |
| +import org.apache.lucene.index.TermState; |
| +import org.apache.lucene.index.Terms; |
| +import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; |
| import org.apache.lucene.search.similarities.Similarity; |
| -import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; |
| import org.apache.lucene.util.ArrayUtil; |
| +import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.PriorityQueue; |
| import org.apache.lucene.util.TermContext; |
| import org.apache.lucene.util.ToStringUtils; |
| -import org.apache.lucene.util.PriorityQueue; |
| -import org.apache.lucene.util.Bits; |
| |
| /** |
| * MultiPhraseQuery is a generalized version of PhraseQuery, with an added |
| @@ -134,6 +137,7 @@ |
| private class MultiPhraseWeight extends Weight { |
| private final Similarity similarity; |
| private final Similarity.Stats stats; |
| + private final Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>(); |
| |
| public MultiPhraseWeight(IndexSearcher searcher) |
| throws IOException { |
| @@ -144,7 +148,11 @@ |
| ArrayList<TermStatistics> allTermStats = new ArrayList<TermStatistics>(); |
| for(final Term[] terms: termArrays) { |
| for (Term term: terms) { |
| - TermContext termContext = TermContext.build(context, term, true); |
| + TermContext termContext = termContexts.get(term); |
| + if (termContext == null) { |
| + termContext = TermContext.build(context, term, true); |
| + termContexts.put(term, termContext); |
| + } |
| allTermStats.add(searcher.termStatistics(term, termContext)); |
| } |
| } |
| @@ -174,6 +182,14 @@ |
| |
| PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()]; |
| |
| + final Terms fieldTerms = reader.terms(field); |
| + if (fieldTerms == null) { |
| + return null; |
| + } |
| + |
| + // Reuse single TermsEnum below: |
| + final TermsEnum termsEnum = fieldTerms.iterator(null); |
| + |
| for (int pos=0; pos<postingsFreqs.length; pos++) { |
| Term[] terms = termArrays.get(pos); |
| |
| @@ -181,31 +197,43 @@ |
| int docFreq; |
| |
| if (terms.length > 1) { |
| - postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, reader, terms); |
| + postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum); |
| |
| // coarse -- this overcounts since a given doc can |
| - // have more than one terms: |
| + // have more than one term: |
| docFreq = 0; |
| for(int termIdx=0;termIdx<terms.length;termIdx++) { |
| - docFreq += reader.docFreq(terms[termIdx]); |
| + final Term term = terms[termIdx]; |
| + TermState termState = termContexts.get(term).get(context.ord); |
| + if (termState == null) { |
| + // Term not in reader |
| + continue; |
| + } |
| + termsEnum.seekExact(term.bytes(), termState); |
| + docFreq += termsEnum.docFreq(); |
| } |
| + |
| + if (docFreq == 0) { |
| + // None of the terms are in this reader |
| + return null; |
| + } |
| } else { |
| final Term term = terms[0]; |
| - postingsEnum = reader.termPositionsEnum(liveDocs, |
| - term.field(), |
| - term.bytes()); |
| + TermState termState = termContexts.get(term).get(context.ord); |
| + if (termState == null) { |
| + // Term not in reader |
| + return null; |
| + } |
| + termsEnum.seekExact(term.bytes(), termState); |
| + postingsEnum = termsEnum.docsAndPositions(liveDocs, null); |
| |
| if (postingsEnum == null) { |
| - if (reader.termDocsEnum(liveDocs, term.field(), term.bytes()) != null) { |
| - // term does exist, but has no positions |
| - throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")"); |
| - } else { |
| - // term does not exist |
| - return null; |
| - } |
| + // term does exist, but has no positions |
| + assert termsEnum.docs(liveDocs, null) != null: "termstate found but no term exists in reader"; |
| + throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")"); |
| } |
| |
| - docFreq = reader.docFreq(term.field(), term.bytes()); |
| + docFreq = termsEnum.docFreq(); |
| } |
| |
| postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]); |
| @@ -437,20 +465,22 @@ |
| private DocsQueue _queue; |
| private IntQueue _posList; |
| |
| - public UnionDocsAndPositionsEnum(Bits liveDocs, IndexReader indexReader, Term[] terms) throws IOException { |
| + public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException { |
| List<DocsAndPositionsEnum> docsEnums = new LinkedList<DocsAndPositionsEnum>(); |
| for (int i = 0; i < terms.length; i++) { |
| - DocsAndPositionsEnum postings = indexReader.termPositionsEnum(liveDocs, |
| - terms[i].field(), |
| - terms[i].bytes()); |
| - if (postings != null) { |
| - docsEnums.add(postings); |
| - } else { |
| - if (indexReader.termDocsEnum(liveDocs, terms[i].field(), terms[i].bytes()) != null) { |
| - // term does exist, but has no positions |
| - throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + terms[i].text() + ")"); |
| - } |
| + final Term term = terms[i]; |
| + TermState termState = termContexts.get(term).get(context.ord); |
| + if (termState == null) { |
| + // Term doesn't exist in reader |
| + continue; |
| } |
| + termsEnum.seekExact(term.bytes(), termState); |
| + DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null); |
| + if (postings == null) { |
| + // term does exist, but has no positions |
| + throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")"); |
| + } |
| + docsEnums.add(postings); |
| } |
| |
| _queue = new DocsQueue(docsEnums); |
| Index: lucene/src/java/org/apache/lucene/search/PhraseQuery.java |
| =================================================================== |
| --- lucene/src/java/org/apache/lucene/search/PhraseQuery.java (revision 1203233) |
| +++ lucene/src/java/org/apache/lucene/search/PhraseQuery.java (working copy) |
| @@ -18,24 +18,24 @@ |
| */ |
| |
| import java.io.IOException; |
| +import java.util.ArrayList; |
| import java.util.Set; |
| -import java.util.ArrayList; |
| |
| +import org.apache.lucene.index.DocsAndPositionsEnum; |
| import org.apache.lucene.index.IndexReader.AtomicReaderContext; |
| import org.apache.lucene.index.IndexReader.ReaderContext; |
| +import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.Term; |
| -import org.apache.lucene.index.DocsAndPositionsEnum; |
| -import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.TermState; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| +import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; |
| import org.apache.lucene.search.similarities.Similarity; |
| -import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; |
| +import org.apache.lucene.util.ArrayUtil; |
| +import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.TermContext; |
| import org.apache.lucene.util.ToStringUtils; |
| -import org.apache.lucene.util.ArrayUtil; |
| -import org.apache.lucene.util.Bits; |
| |
| /** A Query that matches documents containing a particular sequence of terms. |
| * A PhraseQuery is built by QueryParser for input like <code>"new york"</code>. |
| @@ -222,27 +222,32 @@ |
| final IndexReader reader = context.reader; |
| final Bits liveDocs = acceptDocs; |
| PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()]; |
| + |
| + final Terms fieldTerms = reader.terms(field); |
| + if (fieldTerms == null) { |
| + return null; |
| + } |
| + |
| + // Reuse single TermsEnum below: |
| + final TermsEnum te = fieldTerms.iterator(null); |
| + |
| for (int i = 0; i < terms.size(); i++) { |
| final Term t = terms.get(i); |
| final TermState state = states[i].get(context.ord); |
| if (state == null) { /* term doesnt exist in this segment */ |
| - assert termNotInReader(reader, field, t.bytes()) : "no termstate found but term exists in reader"; |
| + assert termNotInReader(reader, field, t.bytes()): "no termstate found but term exists in reader"; |
| return null; |
| } |
| - DocsAndPositionsEnum postingsEnum = reader.termPositionsEnum(liveDocs, |
| - t.field(), |
| - t.bytes(), |
| - state); |
| + te.seekExact(t.bytes(), state); |
| + DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null); |
| + |
| // PhraseQuery on a field that did not index |
| // positions. |
| if (postingsEnum == null) { |
| - assert (reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null) : "termstate found but no term exists in reader"; |
| + assert reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null: "termstate found but no term exists in reader"; |
| // term does exist, but has no positions |
| throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")"); |
| } |
| - // get the docFreq without seeking |
| - TermsEnum te = reader.fields().terms(field).getThreadTermsEnum(); |
| - te.seekExact(t.bytes(), state); |
| postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t); |
| } |
| |
| @@ -264,10 +269,9 @@ |
| } |
| } |
| |
| + // only called from assert |
| private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException { |
| - // only called from assert |
| - final Terms terms = reader.terms(field); |
| - return terms == null || terms.docFreq(bytes) == 0; |
| + return reader.docFreq(field, bytes) == 0; |
| } |
| |
| @Override |
| Index: lucene/src/java/org/apache/lucene/search/TermQuery.java |
| =================================================================== |
| --- lucene/src/java/org/apache/lucene/search/TermQuery.java (revision 1203233) |
| +++ lucene/src/java/org/apache/lucene/search/TermQuery.java (working copy) |
| @@ -23,7 +23,6 @@ |
| import org.apache.lucene.index.DocsEnum; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.TermState; |
| -import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.index.IndexReader.AtomicReaderContext; |
| import org.apache.lucene.index.IndexReader.ReaderContext; |
| @@ -41,13 +40,13 @@ |
| */ |
| public class TermQuery extends Query { |
| private final Term term; |
| - private int docFreq; |
| - private transient TermContext perReaderTermState; |
| + private final int docFreq; |
| + private final TermContext perReaderTermState; |
| |
| final class TermWeight extends Weight { |
| private final Similarity similarity; |
| private final Similarity.Stats stats; |
| - private transient TermContext termStates; |
| + private final TermContext termStates; |
| |
| public TermWeight(IndexSearcher searcher, TermContext termStates) |
| throws IOException { |
| @@ -108,7 +107,7 @@ |
| return null; |
| } |
| //System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null")); |
| - final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum(); |
| + final TermsEnum termsEnum = context.reader.terms(term.field()).iterator(null); |
| termsEnum.seekExact(term.bytes(), state); |
| return termsEnum; |
| } |
| @@ -116,8 +115,7 @@ |
| private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException { |
| // only called from assert |
| //System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString()); |
| - final Terms terms = reader.terms(field); |
| - return terms == null || terms.docFreq(bytes) == 0; |
| + return reader.docFreq(field, bytes) == 0; |
| } |
| |
| @Override |
| Index: lucene/src/java/org/apache/lucene/index/SegmentReader.java |
| =================================================================== |
| --- lucene/src/java/org/apache/lucene/index/SegmentReader.java (revision 1203233) |
| +++ lucene/src/java/org/apache/lucene/index/SegmentReader.java (working copy) |
| @@ -36,7 +36,6 @@ |
| import org.apache.lucene.store.IOContext; |
| import org.apache.lucene.util.BitVector; |
| import org.apache.lucene.util.Bits; |
| -import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.CloseableThreadLocal; |
| import org.apache.lucene.util.StringHelper; |
| |
| @@ -474,17 +473,6 @@ |
| } |
| |
| @Override |
| - public int docFreq(String field, BytesRef term) throws IOException { |
| - ensureOpen(); |
| - Terms terms = core.fields.terms(field); |
| - if (terms != null) { |
| - return terms.docFreq(term); |
| - } else { |
| - return 0; |
| - } |
| - } |
| - |
| - @Override |
| public int numDocs() { |
| // Don't call ensureOpen() here (it could affect performance) |
| if (liveDocs != null) { |
| Index: lucene/src/java/org/apache/lucene/index/Terms.java |
| =================================================================== |
| --- lucene/src/java/org/apache/lucene/index/Terms.java (revision 1203233) |
| +++ lucene/src/java/org/apache/lucene/index/Terms.java (working copy) |
| @@ -20,9 +20,7 @@ |
| import java.io.IOException; |
| import java.util.Comparator; |
| |
| -import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.util.CloseableThreadLocal; |
| import org.apache.lucene.util.automaton.CompiledAutomaton; |
| |
| /** |
| @@ -32,10 +30,6 @@ |
| |
| public abstract class Terms { |
| |
| - // Privately cache a TermsEnum per-thread for looking up |
| - // docFreq and getting a private DocsEnum |
| - private final CloseableThreadLocal<TermsEnum> threadEnums = new CloseableThreadLocal<TermsEnum>(); |
| - |
| /** Returns an iterator that will step through all |
| * terms. This method will not return null. If you have |
| * a previous TermsEnum, for example from a different |
| @@ -83,81 +77,6 @@ |
| * reuse it. */ |
| public abstract Comparator<BytesRef> getComparator() throws IOException; |
| |
| - /** Returns the number of documents containing the |
| - * specified term text. Returns 0 if the term does not |
| - * exist. */ |
| - public int docFreq(BytesRef text) throws IOException { |
| - final TermsEnum termsEnum = getThreadTermsEnum(); |
| - if (termsEnum.seekExact(text, true)) { |
| - return termsEnum.docFreq(); |
| - } else { |
| - return 0; |
| - } |
| - } |
| - |
| - /** Returns the total number of occurrences of this term |
| - * across all documents (the sum of the freq() for each |
| - * doc that has this term). This will be -1 if the |
| - * codec doesn't support this measure. Note that, like |
| - * other term measures, this measure does not take |
| - * deleted documents into account. */ |
| - public long totalTermFreq(BytesRef text) throws IOException { |
| - final TermsEnum termsEnum = getThreadTermsEnum(); |
| - if (termsEnum.seekExact(text, true)) { |
| - return termsEnum.totalTermFreq(); |
| - } else { |
| - return 0; |
| - } |
| - } |
| - |
| - /** Get {@link DocsEnum} for the specified term. This |
| - * method may return null if the term does not exist. */ |
| - public DocsEnum docs(Bits liveDocs, BytesRef text, DocsEnum reuse) throws IOException { |
| - final TermsEnum termsEnum = getThreadTermsEnum(); |
| - if (termsEnum.seekExact(text, true)) { |
| - return termsEnum.docs(liveDocs, reuse); |
| - } else { |
| - return null; |
| - } |
| - } |
| - |
| - /** Get {@link DocsEnum} for the specified term. This |
| - * method will may return null if the term does not |
| - * exists, or positions were not indexed. */ |
| - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException { |
| - final TermsEnum termsEnum = getThreadTermsEnum(); |
| - if (termsEnum.seekExact(text, true)) { |
| - return termsEnum.docsAndPositions(liveDocs, reuse); |
| - } else { |
| - return null; |
| - } |
| - } |
| - |
| - /** |
| - * Expert: Get {@link DocsEnum} for the specified {@link TermState}. |
| - * This method may return <code>null</code> if the term does not exist. |
| - * |
| - * @see TermsEnum#termState() |
| - * @see TermsEnum#seekExact(BytesRef, TermState) */ |
| - public DocsEnum docs(Bits liveDocs, BytesRef term, TermState termState, DocsEnum reuse) throws IOException { |
| - final TermsEnum termsEnum = getThreadTermsEnum(); |
| - termsEnum.seekExact(term, termState); |
| - return termsEnum.docs(liveDocs, reuse); |
| - } |
| - |
| - /** |
| - * Get {@link DocsEnum} for the specified {@link TermState}. This |
| - * method will may return <code>null</code> if the term does not exists, or positions were |
| - * not indexed. |
| - * |
| - * @see TermsEnum#termState() |
| - * @see TermsEnum#seekExact(BytesRef, TermState) */ |
| - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef term, TermState termState, DocsAndPositionsEnum reuse) throws IOException { |
| - final TermsEnum termsEnum = getThreadTermsEnum(); |
| - termsEnum.seekExact(term, termState); |
| - return termsEnum.docsAndPositions(liveDocs, reuse); |
| - } |
| - |
| /** Returns the number of terms for this field, or -1 if this |
| * measure isn't stored by the codec. Note that, just like |
| * other term measures, this measure does not take deleted |
| @@ -172,7 +91,7 @@ |
| * into account. */ |
| public abstract long getSumTotalTermFreq() throws IOException; |
| |
| - /** Returns the sum of {@link #docFreq(BytesRef)} for |
| + /** Returns the sum of {@link TermsEnum#docFreq()} for |
| * all terms in this field, or -1 if this measure isn't |
| * stored by the codec. Note that, just like other term |
| * measures, this measure does not take deleted documents |
| @@ -185,34 +104,6 @@ |
| * measures, this measure does not take deleted documents |
| * into account. */ |
| public abstract int getDocCount() throws IOException; |
| - |
| - /** |
| - * Returns a thread-private {@link TermsEnum} instance. Obtaining |
| - * {@link TermsEnum} from this method might be more efficient than using |
| - * {@link #iterator(TermsEnum)} directly since this method doesn't necessarily create a |
| - * new {@link TermsEnum} instance. |
| - * <p> |
| - * NOTE: {@link TermsEnum} instances obtained from this method must not be |
| - * shared across threads. The enum should only be used within a local context |
| - * where other threads can't access it. |
| - * |
| - * @return a thread-private {@link TermsEnum} instance |
| - * @throws IOException |
| - * if an IOException occurs |
| - * @lucene.internal |
| - */ |
| - public TermsEnum getThreadTermsEnum() throws IOException { |
| - TermsEnum termsEnum = threadEnums.get(); |
| - if (termsEnum == null) { |
| - termsEnum = iterator(null); |
| - threadEnums.set(termsEnum); |
| - } |
| - return termsEnum; |
| - } |
| |
| - // subclass must close when done: |
| - protected void close() { |
| - threadEnums.close(); |
| - } |
| public final static Terms[] EMPTY_ARRAY = new Terms[0]; |
| } |
| Index: lucene/src/java/org/apache/lucene/index/FilterIndexReader.java |
| =================================================================== |
| --- lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (revision 1203233) |
| +++ lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (working copy) |
| @@ -85,21 +85,6 @@ |
| } |
| |
| @Override |
| - public int docFreq(BytesRef text) throws IOException { |
| - return in.docFreq(text); |
| - } |
| - |
| - @Override |
| - public DocsEnum docs(Bits liveDocs, BytesRef text, DocsEnum reuse) throws IOException { |
| - return in.docs(liveDocs, text, reuse); |
| - } |
| - |
| - @Override |
| - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException { |
| - return in.docsAndPositions(liveDocs, text, reuse); |
| - } |
| - |
| - @Override |
| public long getUniqueTermCount() throws IOException { |
| return in.getUniqueTermCount(); |
| } |
| Index: lucene/src/java/org/apache/lucene/index/IndexReader.java |
| =================================================================== |
| --- lucene/src/java/org/apache/lucene/index/IndexReader.java (revision 1203233) |
| +++ lucene/src/java/org/apache/lucene/index/IndexReader.java (working copy) |
| @@ -991,7 +991,12 @@ |
| if (terms == null) { |
| return 0; |
| } |
| - return terms.docFreq(term); |
| + final TermsEnum termsEnum = terms.iterator(null); |
| + if (termsEnum.seekExact(term, true)) { |
| + return termsEnum.docFreq(); |
| + } else { |
| + return 0; |
| + } |
| } |
| |
| /** Returns the number of documents containing the term |
| @@ -1008,7 +1013,12 @@ |
| if (terms == null) { |
| return 0; |
| } |
| - return terms.totalTermFreq(term); |
| + final TermsEnum termsEnum = terms.iterator(null); |
| + if (termsEnum.seekExact(term, true)) { |
| + return termsEnum.totalTermFreq(); |
| + } else { |
| + return 0; |
| + } |
| } |
| |
| /** This may return null if the field does not exist.*/ |
| @@ -1027,15 +1037,16 @@ |
| assert field != null; |
| assert term != null; |
| final Fields fields = fields(); |
| - if (fields == null) { |
| - return null; |
| + if (fields != null) { |
| + final Terms terms = fields.terms(field); |
| + if (terms != null) { |
| + final TermsEnum termsEnum = terms.iterator(null); |
| + if (termsEnum.seekExact(term, true)) { |
| + return termsEnum.docs(liveDocs, null); |
| + } |
| + } |
| } |
| - final Terms terms = fields.terms(field); |
| - if (terms != null) { |
| - return terms.docs(liveDocs, term, null); |
| - } else { |
| - return null; |
| - } |
| + return null; |
| } |
| |
| /** Returns {@link DocsAndPositionsEnum} for the specified |
| @@ -1046,15 +1057,16 @@ |
| assert field != null; |
| assert term != null; |
| final Fields fields = fields(); |
| - if (fields == null) { |
| - return null; |
| + if (fields != null) { |
| + final Terms terms = fields.terms(field); |
| + if (terms != null) { |
| + final TermsEnum termsEnum = terms.iterator(null); |
| + if (termsEnum.seekExact(term, true)) { |
| + return termsEnum.docsAndPositions(liveDocs, null); |
| + } |
| + } |
| } |
| - final Terms terms = fields.terms(field); |
| - if (terms != null) { |
| - return terms.docsAndPositions(liveDocs, term, null); |
| - } else { |
| - return null; |
| - } |
| + return null; |
| } |
| |
| /** |
| @@ -1066,15 +1078,15 @@ |
| assert state != null; |
| assert field != null; |
| final Fields fields = fields(); |
| - if (fields == null) { |
| - return null; |
| + if (fields != null) { |
| + final Terms terms = fields.terms(field); |
| + if (terms != null) { |
| + final TermsEnum termsEnum = terms.iterator(null); |
| + termsEnum.seekExact(term, state); |
| + return termsEnum.docs(liveDocs, null); |
| + } |
| } |
| - final Terms terms = fields.terms(field); |
| - if (terms != null) { |
| - return terms.docs(liveDocs, term, state, null); |
| - } else { |
| - return null; |
| - } |
| + return null; |
| } |
| |
| /** |
| @@ -1086,15 +1098,15 @@ |
| assert state != null; |
| assert field != null; |
| final Fields fields = fields(); |
| - if (fields == null) { |
| - return null; |
| + if (fields != null) { |
| + final Terms terms = fields.terms(field); |
| + if (terms != null) { |
| + final TermsEnum termsEnum = terms.iterator(null); |
| + termsEnum.seekExact(term, state); |
| + return termsEnum.docsAndPositions(liveDocs, null); |
| + } |
| } |
| - final Terms terms = fields.terms(field); |
| - if (terms != null) { |
| - return terms.docsAndPositions(liveDocs, term, state, null); |
| - } else { |
| - return null; |
| - } |
| + return null; |
| } |
| |
| |
| Index: lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java |
| =================================================================== |
| --- lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java (revision 1203233) |
| +++ lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java (working copy) |
| @@ -17,7 +17,6 @@ |
| * limitations under the License. |
| */ |
| |
| -import java.io.Closeable; |
| import java.io.IOException; |
| import java.util.Collection; |
| import java.util.Comparator; |
| @@ -181,14 +180,8 @@ |
| } |
| } |
| } finally { |
| - try { |
| - if (postingsReader != null) { |
| - postingsReader.close(); |
| - } |
| - } finally { |
| - for(FieldReader field : fields.values()) { |
| - field.close(); |
| - } |
| + if (postingsReader != null) { |
| + postingsReader.close(); |
| } |
| } |
| } |
| @@ -238,7 +231,7 @@ |
| } |
| } |
| |
| - private class FieldReader extends Terms implements Closeable { |
| + private class FieldReader extends Terms { |
| final long numTerms; |
| final FieldInfo fieldInfo; |
| final long termsStartPointer; |
| @@ -262,11 +255,6 @@ |
| } |
| |
| @Override |
| - public void close() { |
| - super.close(); |
| - } |
| - |
| - @Override |
| public TermsEnum iterator(TermsEnum reuse) throws IOException { |
| return new SegmentTermsEnum(); |
| } |
| Index: lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java |
| =================================================================== |
| --- lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java (revision 1203233) |
| +++ lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java (working copy) |
| @@ -18,7 +18,6 @@ |
| */ |
| |
| import java.io.ByteArrayOutputStream; |
| -import java.io.Closeable; |
| import java.io.IOException; |
| import java.io.PrintStream; |
| import java.util.Collection; |
| @@ -194,9 +193,6 @@ |
| try { |
| IOUtils.close(in, postingsReader); |
| } finally { |
| - for(FieldReader field : fields.values()) { |
| - field.close(); |
| - } |
| // Clear so refs to terms index is GCable even if |
| // app hangs onto us: |
| fields.clear(); |
| @@ -392,7 +388,7 @@ |
| final Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton(); |
| final BytesRef NO_OUTPUT = fstOutputs.getNoOutput(); |
| |
| - public final class FieldReader extends Terms implements Closeable { |
| + public final class FieldReader extends Terms { |
| final long numTerms; |
| final FieldInfo fieldInfo; |
| final long sumTotalTermFreq; |
| @@ -451,11 +447,6 @@ |
| } |
| |
| @Override |
| - public void close() { |
| - super.close(); |
| - } |
| - |
| - @Override |
| public TermsEnum iterator(TermsEnum reuse) throws IOException { |
| return new SegmentTermsEnum(); |
| } |
| @@ -744,7 +735,7 @@ |
| } |
| } |
| |
| - private final BytesRef savedStartTerm; |
| + private BytesRef savedStartTerm; |
| |
| // TODO: in some cases we can filter by length? eg |
| // regexp foo*bar must be at least length 6 bytes |
| @@ -784,7 +775,7 @@ |
| f.load(rootCode); |
| |
| // for assert: |
| - savedStartTerm = startTerm == null ? null : new BytesRef(startTerm); |
| + assert setSavedStartTerm(startTerm); |
| |
| currentFrame = f; |
| if (startTerm != null) { |
| @@ -792,6 +783,12 @@ |
| } |
| } |
| |
| + // only for assert: |
| + private boolean setSavedStartTerm(BytesRef startTerm) { |
| + savedStartTerm = startTerm == null ? null : new BytesRef(startTerm); |
| + return true; |
| + } |
| + |
| @Override |
| public TermState termState() throws IOException { |
| currentFrame.decodeMetaData(); |
| @@ -1163,7 +1160,7 @@ |
| |
| // Iterates through terms in this field |
| private final class SegmentTermsEnum extends TermsEnum { |
| - private final IndexInput in; |
| + private IndexInput in; |
| |
| private Frame[] stack; |
| private final Frame staticFrame; |
| @@ -1182,29 +1179,21 @@ |
| |
| final BytesRef term = new BytesRef(); |
| |
| - @SuppressWarnings("unchecked") private FST.Arc<BytesRef>[] arcs = new FST.Arc[5]; |
| + @SuppressWarnings("unchecked") private FST.Arc<BytesRef>[] arcs = new FST.Arc[1]; |
| |
| public SegmentTermsEnum() throws IOException { |
| //if (DEBUG) System.out.println("BTTR.init seg=" + segment); |
| - in = (IndexInput) BlockTreeTermsReader.this.in.clone(); |
| - stack = new Frame[5]; |
| - for(int stackOrd=0;stackOrd<stack.length;stackOrd++) { |
| - stack[stackOrd] = new Frame(stackOrd); |
| - } |
| + stack = new Frame[0]; |
| + |
| // Used to hold seek by TermState, or cached seek |
| staticFrame = new Frame(-1); |
| |
| + // Init w/ root block; don't use index since it may |
| + // not (and need not) have been loaded |
| for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) { |
| arcs[arcIdx] = new FST.Arc<BytesRef>(); |
| } |
| |
| - // Init w/ root block; don't use index since it may |
| - // not (and need not) have been loaded |
| - //final FST.Arc<BytesRef> arc = index.getFirstArc(arcs[0]); |
| - |
| - // Empty string prefix must have an output in the index! |
| - //assert arc.isFinal(); |
| - |
| currentFrame = staticFrame; |
| final FST.Arc<BytesRef> arc; |
| if (index != null) { |
| @@ -1214,8 +1203,9 @@ |
| } else { |
| arc = null; |
| } |
| - currentFrame = pushFrame(arc, rootCode, 0); |
| - currentFrame.loadBlock(); |
| + currentFrame = staticFrame; |
| + //currentFrame = pushFrame(arc, rootCode, 0); |
| + //currentFrame.loadBlock(); |
| validIndexPrefix = 0; |
| // if (DEBUG) { |
| // System.out.println("init frame state " + currentFrame.ord); |
| @@ -1226,6 +1216,12 @@ |
| // computeBlockStats().print(System.out); |
| } |
| |
| + private void initIndexInput() { |
| + if (this.in == null) { |
| + this.in = (IndexInput) BlockTreeTermsReader.this.in.clone(); |
| + } |
| + } |
| + |
| /** Runs next() through the entire terms dict, |
| * computing aggregate statistics. */ |
| public Stats computeBlockStats() throws IOException { |
| @@ -1975,6 +1971,20 @@ |
| @Override |
| public BytesRef next() throws IOException { |
| |
| + if (in == null) { |
| + // Fresh TermsEnum; seek to first term: |
| + final FST.Arc<BytesRef> arc; |
| + if (index != null) { |
| + arc = index.getFirstArc(arcs[0]); |
| + // Empty string prefix must have an output in the index! |
| + assert arc.isFinal(); |
| + } else { |
| + arc = null; |
| + } |
| + currentFrame = pushFrame(arc, rootCode, 0); |
| + currentFrame.loadBlock(); |
| + } |
| + |
| targetBeforeCurrentLength = currentFrame.ord; |
| |
| assert !eof; |
| @@ -2242,6 +2252,11 @@ |
| use. */ |
| void loadBlock() throws IOException { |
| |
| + // Clone the IndexInput lazily, so that consumers |
| + // that just pull a TermsEnum to |
| + // seekExact(TermState) don't pay this cost: |
| + initIndexInput(); |
| + |
| if (nextEnt != -1) { |
| // Already loaded |
| return; |
| Index: lucene/src/java/org/apache/lucene/index/MultiFields.java |
| =================================================================== |
| --- lucene/src/java/org/apache/lucene/index/MultiFields.java (revision 1203233) |
| +++ lucene/src/java/org/apache/lucene/index/MultiFields.java (working copy) |
| @@ -156,10 +156,12 @@ |
| assert term != null; |
| final Terms terms = getTerms(r, field); |
| if (terms != null) { |
| - return terms.docs(liveDocs, term, null); |
| - } else { |
| - return null; |
| + final TermsEnum termsEnum = terms.iterator(null); |
| + if (termsEnum.seekExact(term, true)) { |
| + return termsEnum.docs(liveDocs, null); |
| + } |
| } |
| + return null; |
| } |
| |
| /** Returns {@link DocsAndPositionsEnum} for the specified |
| @@ -170,10 +172,12 @@ |
| assert term != null; |
| final Terms terms = getTerms(r, field); |
| if (terms != null) { |
| - return terms.docsAndPositions(liveDocs, term, null); |
| - } else { |
| - return null; |
| + final TermsEnum termsEnum = terms.iterator(null); |
| + if (termsEnum.seekExact(term, true)) { |
| + return termsEnum.docsAndPositions(liveDocs, null); |
| + } |
| } |
| + return null; |
| } |
| |
| public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) { |
| @@ -233,6 +237,17 @@ |
| return result; |
| } |
| |
| + public static long totalTermFreq(IndexReader r, String field, BytesRef text) throws IOException { |
| + final Terms terms = getTerms(r, field); |
| + if (terms != null) { |
| + final TermsEnum termsEnum = terms.iterator(null); |
| + if (termsEnum.seekExact(text, true)) { |
| + return termsEnum.totalTermFreq(); |
| + } |
| + } |
| + return 0; |
| + } |
| + |
| @Override |
| public int getUniqueFieldCount() { |
| return terms.size(); |
| Index: lucene/src/java/org/apache/lucene/util/TermContext.java |
| =================================================================== |
| --- lucene/src/java/org/apache/lucene/util/TermContext.java (revision 1203233) |
| +++ lucene/src/java/org/apache/lucene/util/TermContext.java (working copy) |
| @@ -46,7 +46,6 @@ |
| |
| //public static boolean DEBUG = BlockTreeTermsWriter.DEBUG; |
| |
| - |
| /** |
| * Creates an empty {@link TermContext} from a {@link ReaderContext} |
| */ |
| @@ -94,7 +93,7 @@ |
| if (fields != null) { |
| final Terms terms = fields.terms(field); |
| if (terms != null) { |
| - final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share! |
| + final TermsEnum termsEnum = terms.iterator(null); |
| if (termsEnum.seekExact(bytes, cache)) { |
| final TermState termState = termsEnum.termState(); |
| //if (DEBUG) System.out.println(" found"); |
| Index: lucene/contrib/misc/src/java/org/apache/lucene/misc/GetTermInfo.java |
| =================================================================== |
| --- lucene/contrib/misc/src/java/org/apache/lucene/misc/GetTermInfo.java (revision 1203233) |
| +++ lucene/contrib/misc/src/java/org/apache/lucene/misc/GetTermInfo.java (working copy) |
| @@ -21,9 +21,7 @@ |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.FSDirectory; |
| import org.apache.lucene.util.BytesRef; |
| -import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.IndexReader; |
| -import org.apache.lucene.index.MultiFields; |
| |
| /* |
| * Utility to get document frequency and total number of occurrences (sum of the tf for each doc) of a term. |
| @@ -50,10 +48,9 @@ |
| |
| public static void getTermInfo(Directory dir, String field, BytesRef termtext) throws Exception { |
| IndexReader reader = IndexReader.open(dir); |
| - Terms terms =MultiFields.getTerms(reader, field); |
| long totalTF = HighFreqTerms.getTotalTermFreq(reader, field, termtext); |
| System.out.printf("%s:%s \t totalTF = %,d \t doc freq = %,d \n", |
| - field, termtext.utf8ToString(), totalTF, terms.docFreq(termtext)); |
| + field, termtext.utf8ToString(), totalTF, reader.docFreq(field, termtext)); |
| } |
| |
| private static void usage() { |
| Index: solr/core/src/test/org/apache/solr/search/TestRealTimeGet.java |
| =================================================================== |
| --- solr/core/src/test/org/apache/solr/search/TestRealTimeGet.java (revision 1203233) |
| +++ solr/core/src/test/org/apache/solr/search/TestRealTimeGet.java (working copy) |
| @@ -723,8 +723,11 @@ |
| Terms terms = fields.terms(t.field()); |
| if (terms == null) return -1; |
| BytesRef termBytes = t.bytes(); |
| - DocsEnum docs = terms.docs(MultiFields.getLiveDocs(r), termBytes, null); |
| - if (docs == null) return -1; |
| + final TermsEnum termsEnum = terms.iterator(null); |
| + if (!termsEnum.seekExact(termBytes, false)) { |
| + return -1; |
| + } |
| + DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null); |
| int id = docs.nextDoc(); |
| if (id != DocIdSetIterator.NO_MORE_DOCS) { |
| int next = docs.nextDoc(); |
| Index: solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java |
| =================================================================== |
| --- solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java (revision 1203233) |
| +++ solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java (working copy) |
| @@ -555,7 +555,11 @@ |
| Terms terms = fields.terms(t.field()); |
| if (terms == null) return -1; |
| BytesRef termBytes = t.bytes(); |
| - DocsEnum docs = terms.docs(MultiFields.getLiveDocs(reader), termBytes, null); |
| + final TermsEnum termsEnum = terms.iterator(null); |
| + if (!termsEnum.seekExact(termBytes, false)) { |
| + return -1; |
| + } |
| + DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(reader), null); |
| if (docs == null) return -1; |
| int id = docs.nextDoc(); |
| return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id; |
| @@ -947,7 +951,13 @@ |
| BytesRef termBytes = t.bytes(); |
| |
| Bits liveDocs = reader.getLiveDocs(); |
| - DocsEnum docsEnum = terms==null ? null : terms.docs(liveDocs, termBytes, null); |
| + DocsEnum docsEnum = null; |
| + if (terms != null) { |
| + final TermsEnum termsEnum = terms.iterator(null); |
| + if (termsEnum.seekExact(termBytes, false)) { |
| + docsEnum = termsEnum.docs(MultiFields.getLiveDocs(reader), null); |
| + } |
| + } |
| |
| if (docsEnum != null) { |
| DocsEnum.BulkReadResult readResult = docsEnum.getBulkResult(); |