| Index: CHANGES.txt |
| =================================================================== |
| --- CHANGES.txt (revision 737499) |
| +++ CHANGES.txt (working copy) |
| @@ -145,6 +145,13 @@ |
| 4. LUCENE-1224: Short circuit FuzzyQuery.rewrite when input token length |
| is small compared to minSimilarity. (Timo Nentwig, Mark Miller) |
| |
| + 5. LUCENE-1316: MatchAllDocsQuery now avoids the synchronized |
| + IndexReader.isDeleted() call per document, by directly accessing |
| + the underlying deleteDocs BitVector. This improves performance |
| + with non-readOnly readers, especially in a multi-threaded |
| + environment. (Todd Feak, Yonik Seeley, Jason Rutherglen via Mike |
| + McCandless) |
| + |
| Documentation |
| |
| Build |
| Index: src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java |
| =================================================================== |
| --- src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java (revision 737499) |
| +++ src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java (working copy) |
| @@ -36,6 +36,7 @@ |
| public void testQuery() throws IOException { |
| RAMDirectory dir = new RAMDirectory(); |
| IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| + iw.setMaxBufferedDocs(2); // force multi-segment |
| addDoc("one", iw); |
| addDoc("two", iw); |
| addDoc("three four", iw); |
| Index: src/test/org/apache/lucene/index/TestMultiSegmentReader.java |
| =================================================================== |
| --- src/test/org/apache/lucene/index/TestMultiSegmentReader.java (revision 737499) |
| +++ src/test/org/apache/lucene/index/TestMultiSegmentReader.java (working copy) |
| @@ -149,6 +149,19 @@ |
| mr.close(); |
| } |
| |
| + public void testAllTermDocs() throws IOException { |
| + IndexReader reader = openReader(); |
| + int NUM_DOCS = 2; |
| + TermDocs td = reader.termDocs(null); |
| + for(int i=0;i<NUM_DOCS;i++) { |
| + assertTrue(td.next()); |
| + assertEquals(i, td.doc()); |
| + assertEquals(1, td.freq()); |
| + } |
| + td.close(); |
| + reader.close(); |
| + } |
| + |
| private void addDoc(RAMDirectory ramDir1, String s, boolean create) throws IOException { |
| IndexWriter iw = new IndexWriter(ramDir1, new StandardAnalyzer(), create, IndexWriter.MaxFieldLength.LIMITED); |
| Document doc = new Document(); |
| Index: src/test/org/apache/lucene/index/TestFilterIndexReader.java |
| =================================================================== |
| --- src/test/org/apache/lucene/index/TestFilterIndexReader.java (revision 737499) |
| +++ src/test/org/apache/lucene/index/TestFilterIndexReader.java (working copy) |
| @@ -125,6 +125,15 @@ |
| assertTrue((positions.doc() % 2) == 1); |
| } |
| |
| + int NUM_DOCS = 3; |
| + |
| + TermDocs td = reader.termDocs(null); |
| + for(int i=0;i<NUM_DOCS;i++) { |
| + assertTrue(td.next()); |
| + assertEquals(i, td.doc()); |
| + assertEquals(1, td.freq()); |
| + } |
| + td.close(); |
| reader.close(); |
| directory.close(); |
| } |
| Index: src/test/org/apache/lucene/index/TestParallelReader.java |
| =================================================================== |
| --- src/test/org/apache/lucene/index/TestParallelReader.java (revision 737499) |
| +++ src/test/org/apache/lucene/index/TestParallelReader.java (working copy) |
| @@ -123,7 +123,7 @@ |
| |
| public void testIsCurrent() throws IOException { |
| Directory dir1 = getDir1(); |
| - Directory dir2 = getDir1(); |
| + Directory dir2 = getDir2(); |
| ParallelReader pr = new ParallelReader(); |
| pr.add(IndexReader.open(dir1)); |
| pr.add(IndexReader.open(dir2)); |
| @@ -147,7 +147,7 @@ |
| |
| public void testIsOptimized() throws IOException { |
| Directory dir1 = getDir1(); |
| - Directory dir2 = getDir1(); |
| + Directory dir2 = getDir2(); |
| |
| // add another document to ensure that the indexes are not optimized |
| IndexWriter modifier = new IndexWriter(dir1, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED); |
| @@ -194,6 +194,25 @@ |
| |
| } |
| |
| + public void testAllTermDocs() throws IOException { |
| + Directory dir1 = getDir1(); |
| + Directory dir2 = getDir2(); |
| + ParallelReader pr = new ParallelReader(); |
| + pr.add(IndexReader.open(dir1)); |
| + pr.add(IndexReader.open(dir2)); |
| + int NUM_DOCS = 2; |
| + TermDocs td = pr.termDocs(null); |
| + for(int i=0;i<NUM_DOCS;i++) { |
| + assertTrue(td.next()); |
| + assertEquals(i, td.doc()); |
| + assertEquals(1, td.freq()); |
| + } |
| + td.close(); |
| + pr.close(); |
| + dir1.close(); |
| + dir2.close(); |
| + } |
| + |
| |
| private void queryTest(Query query) throws IOException { |
| ScoreDoc[] parallelHits = parallel.search(query, null, 1000).scoreDocs; |
| Index: src/java/org/apache/lucene/search/MatchAllDocsQuery.java |
| =================================================================== |
| --- src/java/org/apache/lucene/search/MatchAllDocsQuery.java (revision 737499) |
| +++ src/java/org/apache/lucene/search/MatchAllDocsQuery.java (working copy) |
| @@ -18,15 +18,11 @@ |
| */ |
| |
| import org.apache.lucene.index.IndexReader; |
| -import org.apache.lucene.search.Explanation; |
| -import org.apache.lucene.search.Query; |
| -import org.apache.lucene.search.Scorer; |
| -import org.apache.lucene.search.Searcher; |
| -import org.apache.lucene.search.Similarity; |
| -import org.apache.lucene.search.Weight; |
| +import org.apache.lucene.index.TermDocs; |
| import org.apache.lucene.util.ToStringUtils; |
| |
| import java.util.Set; |
| +import java.io.IOException; |
| |
| /** |
| * A query that matches all documents. |
| @@ -38,17 +34,13 @@ |
| } |
| |
| private class MatchAllScorer extends Scorer { |
| - |
| - final IndexReader reader; |
| - int id; |
| - final int maxId; |
| + final TermDocs termDocs; |
| final float score; |
| |
| - MatchAllScorer(IndexReader reader, Similarity similarity, Weight w) { |
| + MatchAllScorer(IndexReader reader, Similarity similarity, Weight w) throws IOException |
| + { |
| super(similarity); |
| - this.reader = reader; |
| - id = -1; |
| - maxId = reader.maxDoc() - 1; |
| + this.termDocs = reader.termDocs(null); |
| score = w.getValue(); |
| } |
| |
| @@ -57,26 +49,19 @@ |
| } |
| |
| public int doc() { |
| - return id; |
| + return termDocs.doc(); |
| } |
| |
| - public boolean next() { |
| - while (id < maxId) { |
| - id++; |
| - if (!reader.isDeleted(id)) { |
| - return true; |
| - } |
| - } |
| - return false; |
| + public boolean next() throws IOException { |
| + return termDocs.next(); |
| } |
| |
| public float score() { |
| return score; |
| } |
| |
| - public boolean skipTo(int target) { |
| - id = target - 1; |
| - return next(); |
| + public boolean skipTo(int target) throws IOException { |
| + return termDocs.skipTo(target); |
| } |
| |
| } |
| @@ -112,7 +97,7 @@ |
| queryWeight *= this.queryNorm; |
| } |
| |
| - public Scorer scorer(IndexReader reader) { |
| + public Scorer scorer(IndexReader reader) throws IOException { |
| return new MatchAllScorer(reader, similarity, this); |
| } |
| |
| Index: src/java/org/apache/lucene/index/ParallelReader.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/ParallelReader.java (revision 737499) |
| +++ src/java/org/apache/lucene/index/ParallelReader.java (working copy) |
| @@ -523,7 +523,12 @@ |
| protected TermDocs termDocs; |
| |
| public ParallelTermDocs() {} |
| - public ParallelTermDocs(Term term) throws IOException { seek(term); } |
| + public ParallelTermDocs(Term term) throws IOException { |
| + if (term == null) |
| + termDocs = readers.isEmpty() ? null : ((IndexReader)readers.get(0)).termDocs(null); |
| + else |
| + seek(term); |
| + } |
| |
| public int doc() { return termDocs.doc(); } |
| public int freq() { return termDocs.freq(); } |
| Index: src/java/org/apache/lucene/index/SegmentReader.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/SegmentReader.java (revision 737499) |
| +++ src/java/org/apache/lucene/index/SegmentReader.java (working copy) |
| @@ -724,6 +724,14 @@ |
| return (deletedDocs != null && deletedDocs.get(n)); |
| } |
| |
| + public TermDocs termDocs(Term term) throws IOException { |
| + if (term == null) { |
| + return new AllTermDocs(this); |
| + } else { |
| + return super.termDocs(term); |
| + } |
| + } |
| + |
| public TermDocs termDocs() throws IOException { |
| ensureOpen(); |
| return new SegmentTermDocs(this); |
| Index: src/java/org/apache/lucene/index/SegmentTermDocs.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/SegmentTermDocs.java (revision 737499) |
| +++ src/java/org/apache/lucene/index/SegmentTermDocs.java (working copy) |
| @@ -46,7 +46,9 @@ |
| protected SegmentTermDocs(SegmentReader parent) { |
| this.parent = parent; |
| this.freqStream = (IndexInput) parent.freqStream.clone(); |
| - this.deletedDocs = parent.deletedDocs; |
| + synchronized (parent) { |
| + this.deletedDocs = parent.deletedDocs; |
| + } |
| this.skipInterval = parent.tis.getSkipInterval(); |
| this.maxSkipLevels = parent.tis.getMaxSkipLevels(); |
| } |
| Index: src/java/org/apache/lucene/index/AllTermDocs.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/AllTermDocs.java (revision 0) |
| +++ src/java/org/apache/lucene/index/AllTermDocs.java (revision 0) |
| @@ -0,0 +1,86 @@ |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +package org.apache.lucene.index; |
| + |
| +import org.apache.lucene.util.BitVector; |
| +import java.io.IOException; |
| + |
| +class AllTermDocs implements TermDocs { |
| + protected BitVector deletedDocs; |
| + protected int maxDoc; |
| + protected int doc = -1; |
| + |
| + protected AllTermDocs(SegmentReader parent) { |
| + synchronized (parent) { |
| + this.deletedDocs = parent.deletedDocs; |
| + } |
| + this.maxDoc = parent.maxDoc(); |
| + } |
| + |
| + public void seek(Term term) throws IOException { |
| + if (term==null) { |
| + doc = -1; |
| + } else { |
| + throw new UnsupportedOperationException(); |
| + } |
| + } |
| + |
| + public void seek(TermEnum termEnum) throws IOException { |
| + throw new UnsupportedOperationException(); |
| + } |
| + |
| + public int doc() { |
| + return doc; |
| + } |
| + |
| + public int freq() { |
| + return 1; |
| + } |
| + |
| + public boolean next() throws IOException { |
| + return skipTo(doc+1); |
| + } |
| + |
| + public int read(int[] docs, int[] freqs) throws IOException { |
| + final int length = docs.length; |
| + int i = 0; |
| + while (i < length && doc < maxDoc) { |
| + if (deletedDocs == null || !deletedDocs.get(doc)) { |
| + docs[i] = doc; |
| + freqs[i] = 1; |
| + ++i; |
| + } |
| + doc++; |
| + } |
| + return i; |
| + } |
| + |
| + public boolean skipTo(int target) throws IOException { |
| + doc = target; |
| + while (doc < maxDoc) { |
| + if (deletedDocs == null || !deletedDocs.get(doc)) { |
| + return true; |
| + } |
| + doc++; |
| + } |
| + return false; |
| + } |
| + |
| + public void close() throws IOException { |
| + } |
| +} |
| |
| Property changes on: src/java/org/apache/lucene/index/AllTermDocs.java |
| ___________________________________________________________________ |
| Added: svn:eol-style |
| + native |
| |
| Index: src/java/org/apache/lucene/index/FilterIndexReader.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/FilterIndexReader.java (revision 737499) |
| +++ src/java/org/apache/lucene/index/FilterIndexReader.java (working copy) |
| @@ -198,6 +198,11 @@ |
| return in.termDocs(); |
| } |
| |
| + public TermDocs termDocs(Term term) throws IOException { |
| + ensureOpen(); |
| + return in.termDocs(term); |
| + } |
| + |
| public TermPositions termPositions() throws IOException { |
| ensureOpen(); |
| return in.termPositions(); |
| Index: src/java/org/apache/lucene/index/IndexReader.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/IndexReader.java (revision 737499) |
| +++ src/java/org/apache/lucene/index/IndexReader.java (working copy) |
| @@ -796,7 +796,9 @@ |
| |
| /** Returns an enumeration of all the documents which contain |
| * <code>term</code>. For each document, the document number, the frequency of |
| - * the term in that document is also provided, for use in search scoring. |
| + * the term in that document is also provided, for use in |
| + * search scoring. If term is null, then all non-deleted |
| + * docs are returned with freq=1. |
| * Thus, this method implements the mapping: |
| * <p><ul> |
| * Term => <docNum, freq><sup>*</sup> |
| Index: src/java/org/apache/lucene/index/MultiSegmentReader.java |
| =================================================================== |
| --- src/java/org/apache/lucene/index/MultiSegmentReader.java (revision 737499) |
| +++ src/java/org/apache/lucene/index/MultiSegmentReader.java (working copy) |
| @@ -19,7 +19,6 @@ |
| |
| import java.io.IOException; |
| import java.util.Collection; |
| -import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| @@ -531,7 +530,7 @@ |
| |
| readerTermDocs = new TermDocs[r.length]; |
| } |
| - |
| + |
| public int doc() { |
| return base + current.doc(); |
| } |
| @@ -601,8 +600,6 @@ |
| } |
| |
| private TermDocs termDocs(int i) throws IOException { |
| - if (term == null) |
| - return null; |
| TermDocs result = readerTermDocs[i]; |
| if (result == null) |
| result = readerTermDocs[i] = termDocs(readers[i]); |
| @@ -612,7 +609,7 @@ |
| |
| protected TermDocs termDocs(IndexReader reader) |
| throws IOException { |
| - return reader.termDocs(); |
| + return term==null ? reader.termDocs(null) : reader.termDocs(); |
| } |
| |
| public void close() throws IOException { |
| Index: contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java |
| =================================================================== |
| --- contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (revision 737499) |
| +++ contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (working copy) |
| @@ -51,6 +51,7 @@ |
| import org.apache.lucene.search.Searcher; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.RAMDirectory; |
| +import org.apache.lucene.index.TermDocs; |
| |
| /** |
| Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour, |
| @@ -282,7 +283,9 @@ |
| // new PatternAnalyzer(PatternAnalyzer.NON_WORD_PATTERN, true, stopWords), |
| // new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS), |
| }; |
| - |
| + |
| + boolean first = true; |
| + |
| for (int iter=0; iter < iters; iter++) { |
| System.out.println("\n########### iteration=" + iter); |
| long start = System.currentTimeMillis(); |
| @@ -306,6 +309,18 @@ |
| boolean measureIndexing = false; // toggle this to measure query performance |
| MemoryIndex memind = null; |
| if (useMemIndex && !measureIndexing) memind = createMemoryIndex(doc); |
| + |
| + if (first) { |
| + IndexSearcher s = memind.createSearcher(); |
| + TermDocs td = s.getIndexReader().termDocs(null); |
| + assertTrue(td.next()); |
| + assertEquals(0, td.doc()); |
| + assertEquals(1, td.freq()); |
| + td.close(); |
| + s.close(); |
| + first = false; |
| + } |
| + |
| RAMDirectory ramind = null; |
| if (useRAMIndex && !measureIndexing) ramind = createRAMIndex(doc); |
| |
| Index: contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java |
| =================================================================== |
| --- contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (revision 737499) |
| +++ contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (working copy) |
| @@ -31,7 +31,6 @@ |
| import org.apache.lucene.analysis.Token; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.document.Document; |
| -import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.FieldSelector; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.Term; |
| @@ -847,13 +846,19 @@ |
| private boolean hasNext; |
| private int cursor = 0; |
| private ArrayIntList current; |
| + private Term term; |
| |
| public void seek(Term term) { |
| + this.term = term; |
| if (DEBUG) System.err.println(".seek: " + term); |
| - Info info = getInfo(term.field()); |
| - current = info == null ? null : info.getPositions(term.text()); |
| - hasNext = (current != null); |
| - cursor = 0; |
| + if (term == null) { |
| + hasNext = true; // term==null means match all docs |
| + } else { |
| + Info info = getInfo(term.field()); |
| + current = info == null ? null : info.getPositions(term.text()); |
| + hasNext = (current != null); |
| + cursor = 0; |
| + } |
| } |
| |
| public void seek(TermEnum termEnum) { |
| @@ -867,7 +872,7 @@ |
| } |
| |
| public int freq() { |
| - int freq = current != null ? numPositions(current) : 0; |
| + int freq = current != null ? numPositions(current) : (term == null ? 1 : 0); |
| if (DEBUG) System.err.println(".freq: " + freq); |
| return freq; |
| } |