blob: 2c697fc8a0074cacd488b99c9a60256eaf301859 [file] [log] [blame]
Index: CHANGES.txt
===================================================================
--- CHANGES.txt (revision 737499)
+++ CHANGES.txt (working copy)
@@ -145,6 +145,13 @@
4. LUCENE-1224: Short circuit FuzzyQuery.rewrite when input token length
is small compared to minSimilarity. (Timo Nentwig, Mark Miller)
+ 5. LUCENE-1316: MatchAllDocsQuery now avoids the synchronized
+ IndexReader.isDeleted() call per document, by directly accessing
+ the underlying deleteDocs BitVector. This improves performance
+ with non-readOnly readers, especially in a multi-threaded
+ environment. (Todd Feak, Yonik Seeley, Jason Rutherglen via Mike
+ McCandless)
+
Documentation
Build
Index: src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java
===================================================================
--- src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java (revision 737499)
+++ src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java (working copy)
@@ -36,6 +36,7 @@
public void testQuery() throws IOException {
RAMDirectory dir = new RAMDirectory();
IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
+ iw.setMaxBufferedDocs(2); // force multi-segment
addDoc("one", iw);
addDoc("two", iw);
addDoc("three four", iw);
Index: src/test/org/apache/lucene/index/TestMultiSegmentReader.java
===================================================================
--- src/test/org/apache/lucene/index/TestMultiSegmentReader.java (revision 737499)
+++ src/test/org/apache/lucene/index/TestMultiSegmentReader.java (working copy)
@@ -149,6 +149,19 @@
mr.close();
}
+ public void testAllTermDocs() throws IOException {
+ IndexReader reader = openReader();
+ int NUM_DOCS = 2;
+ TermDocs td = reader.termDocs(null);
+ for(int i=0;i<NUM_DOCS;i++) {
+ assertTrue(td.next());
+ assertEquals(i, td.doc());
+ assertEquals(1, td.freq());
+ }
+ td.close();
+ reader.close();
+ }
+
private void addDoc(RAMDirectory ramDir1, String s, boolean create) throws IOException {
IndexWriter iw = new IndexWriter(ramDir1, new StandardAnalyzer(), create, IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
Index: src/test/org/apache/lucene/index/TestFilterIndexReader.java
===================================================================
--- src/test/org/apache/lucene/index/TestFilterIndexReader.java (revision 737499)
+++ src/test/org/apache/lucene/index/TestFilterIndexReader.java (working copy)
@@ -125,6 +125,15 @@
assertTrue((positions.doc() % 2) == 1);
}
+ int NUM_DOCS = 3;
+
+ TermDocs td = reader.termDocs(null);
+ for(int i=0;i<NUM_DOCS;i++) {
+ assertTrue(td.next());
+ assertEquals(i, td.doc());
+ assertEquals(1, td.freq());
+ }
+ td.close();
reader.close();
directory.close();
}
Index: src/test/org/apache/lucene/index/TestParallelReader.java
===================================================================
--- src/test/org/apache/lucene/index/TestParallelReader.java (revision 737499)
+++ src/test/org/apache/lucene/index/TestParallelReader.java (working copy)
@@ -123,7 +123,7 @@
public void testIsCurrent() throws IOException {
Directory dir1 = getDir1();
- Directory dir2 = getDir1();
+ Directory dir2 = getDir2();
ParallelReader pr = new ParallelReader();
pr.add(IndexReader.open(dir1));
pr.add(IndexReader.open(dir2));
@@ -147,7 +147,7 @@
public void testIsOptimized() throws IOException {
Directory dir1 = getDir1();
- Directory dir2 = getDir1();
+ Directory dir2 = getDir2();
// add another document to ensure that the indexes are not optimized
IndexWriter modifier = new IndexWriter(dir1, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
@@ -194,6 +194,25 @@
}
+ public void testAllTermDocs() throws IOException {
+ Directory dir1 = getDir1();
+ Directory dir2 = getDir2();
+ ParallelReader pr = new ParallelReader();
+ pr.add(IndexReader.open(dir1));
+ pr.add(IndexReader.open(dir2));
+ int NUM_DOCS = 2;
+ TermDocs td = pr.termDocs(null);
+ for(int i=0;i<NUM_DOCS;i++) {
+ assertTrue(td.next());
+ assertEquals(i, td.doc());
+ assertEquals(1, td.freq());
+ }
+ td.close();
+ pr.close();
+ dir1.close();
+ dir2.close();
+ }
+
private void queryTest(Query query) throws IOException {
ScoreDoc[] parallelHits = parallel.search(query, null, 1000).scoreDocs;
Index: src/java/org/apache/lucene/search/MatchAllDocsQuery.java
===================================================================
--- src/java/org/apache/lucene/search/MatchAllDocsQuery.java (revision 737499)
+++ src/java/org/apache/lucene/search/MatchAllDocsQuery.java (working copy)
@@ -18,15 +18,11 @@
*/
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.Explanation;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Searcher;
-import org.apache.lucene.search.Similarity;
-import org.apache.lucene.search.Weight;
+import org.apache.lucene.index.TermDocs;
import org.apache.lucene.util.ToStringUtils;
import java.util.Set;
+import java.io.IOException;
/**
* A query that matches all documents.
@@ -38,17 +34,13 @@
}
private class MatchAllScorer extends Scorer {
-
- final IndexReader reader;
- int id;
- final int maxId;
+ final TermDocs termDocs;
final float score;
- MatchAllScorer(IndexReader reader, Similarity similarity, Weight w) {
+ MatchAllScorer(IndexReader reader, Similarity similarity, Weight w) throws IOException
+ {
super(similarity);
- this.reader = reader;
- id = -1;
- maxId = reader.maxDoc() - 1;
+ this.termDocs = reader.termDocs(null);
score = w.getValue();
}
@@ -57,26 +49,19 @@
}
public int doc() {
- return id;
+ return termDocs.doc();
}
- public boolean next() {
- while (id < maxId) {
- id++;
- if (!reader.isDeleted(id)) {
- return true;
- }
- }
- return false;
+ public boolean next() throws IOException {
+ return termDocs.next();
}
public float score() {
return score;
}
- public boolean skipTo(int target) {
- id = target - 1;
- return next();
+ public boolean skipTo(int target) throws IOException {
+ return termDocs.skipTo(target);
}
}
@@ -112,7 +97,7 @@
queryWeight *= this.queryNorm;
}
- public Scorer scorer(IndexReader reader) {
+ public Scorer scorer(IndexReader reader) throws IOException {
return new MatchAllScorer(reader, similarity, this);
}
Index: src/java/org/apache/lucene/index/ParallelReader.java
===================================================================
--- src/java/org/apache/lucene/index/ParallelReader.java (revision 737499)
+++ src/java/org/apache/lucene/index/ParallelReader.java (working copy)
@@ -523,7 +523,12 @@
protected TermDocs termDocs;
public ParallelTermDocs() {}
- public ParallelTermDocs(Term term) throws IOException { seek(term); }
+ public ParallelTermDocs(Term term) throws IOException {
+ if (term == null)
+ termDocs = readers.isEmpty() ? null : ((IndexReader)readers.get(0)).termDocs(null);
+ else
+ seek(term);
+ }
public int doc() { return termDocs.doc(); }
public int freq() { return termDocs.freq(); }
Index: src/java/org/apache/lucene/index/SegmentReader.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentReader.java (revision 737499)
+++ src/java/org/apache/lucene/index/SegmentReader.java (working copy)
@@ -724,6 +724,14 @@
return (deletedDocs != null && deletedDocs.get(n));
}
+ public TermDocs termDocs(Term term) throws IOException {
+ if (term == null) {
+ return new AllTermDocs(this);
+ } else {
+ return super.termDocs(term);
+ }
+ }
+
public TermDocs termDocs() throws IOException {
ensureOpen();
return new SegmentTermDocs(this);
Index: src/java/org/apache/lucene/index/SegmentTermDocs.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentTermDocs.java (revision 737499)
+++ src/java/org/apache/lucene/index/SegmentTermDocs.java (working copy)
@@ -46,7 +46,9 @@
protected SegmentTermDocs(SegmentReader parent) {
this.parent = parent;
this.freqStream = (IndexInput) parent.freqStream.clone();
- this.deletedDocs = parent.deletedDocs;
+ synchronized (parent) {
+ this.deletedDocs = parent.deletedDocs;
+ }
this.skipInterval = parent.tis.getSkipInterval();
this.maxSkipLevels = parent.tis.getMaxSkipLevels();
}
Index: src/java/org/apache/lucene/index/AllTermDocs.java
===================================================================
--- src/java/org/apache/lucene/index/AllTermDocs.java (revision 0)
+++ src/java/org/apache/lucene/index/AllTermDocs.java (revision 0)
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import org.apache.lucene.util.BitVector;
+import java.io.IOException;
+
+class AllTermDocs implements TermDocs {
+ protected BitVector deletedDocs;
+ protected int maxDoc;
+ protected int doc = -1;
+
+ protected AllTermDocs(SegmentReader parent) {
+ synchronized (parent) {
+ this.deletedDocs = parent.deletedDocs;
+ }
+ this.maxDoc = parent.maxDoc();
+ }
+
+ public void seek(Term term) throws IOException {
+ if (term==null) {
+ doc = -1;
+ } else {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ public void seek(TermEnum termEnum) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ public int doc() {
+ return doc;
+ }
+
+ public int freq() {
+ return 1;
+ }
+
+ public boolean next() throws IOException {
+ return skipTo(doc+1);
+ }
+
+ public int read(int[] docs, int[] freqs) throws IOException {
+ final int length = docs.length;
+ int i = 0;
+ while (i < length && doc < maxDoc) {
+ if (deletedDocs == null || !deletedDocs.get(doc)) {
+ docs[i] = doc;
+ freqs[i] = 1;
+ ++i;
+ }
+ doc++;
+ }
+ return i;
+ }
+
+ public boolean skipTo(int target) throws IOException {
+ doc = target;
+ while (doc < maxDoc) {
+ if (deletedDocs == null || !deletedDocs.get(doc)) {
+ return true;
+ }
+ doc++;
+ }
+ return false;
+ }
+
+ public void close() throws IOException {
+ }
+}
Property changes on: src/java/org/apache/lucene/index/AllTermDocs.java
___________________________________________________________________
Added: svn:eol-style
+ native
Index: src/java/org/apache/lucene/index/FilterIndexReader.java
===================================================================
--- src/java/org/apache/lucene/index/FilterIndexReader.java (revision 737499)
+++ src/java/org/apache/lucene/index/FilterIndexReader.java (working copy)
@@ -198,6 +198,11 @@
return in.termDocs();
}
+ public TermDocs termDocs(Term term) throws IOException {
+ ensureOpen();
+ return in.termDocs(term);
+ }
+
public TermPositions termPositions() throws IOException {
ensureOpen();
return in.termPositions();
Index: src/java/org/apache/lucene/index/IndexReader.java
===================================================================
--- src/java/org/apache/lucene/index/IndexReader.java (revision 737499)
+++ src/java/org/apache/lucene/index/IndexReader.java (working copy)
@@ -796,7 +796,9 @@
/** Returns an enumeration of all the documents which contain
* <code>term</code>. For each document, the document number, the frequency of
- * the term in that document is also provided, for use in search scoring.
+ * the term in that document is also provided, for use in
+ * search scoring. If term is null, then all non-deleted
+ * docs are returned with freq=1.
* Thus, this method implements the mapping:
* <p><ul>
* Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq&gt;<sup>*</sup>
Index: src/java/org/apache/lucene/index/MultiSegmentReader.java
===================================================================
--- src/java/org/apache/lucene/index/MultiSegmentReader.java (revision 737499)
+++ src/java/org/apache/lucene/index/MultiSegmentReader.java (working copy)
@@ -19,7 +19,6 @@
import java.io.IOException;
import java.util.Collection;
-import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@@ -531,7 +530,7 @@
readerTermDocs = new TermDocs[r.length];
}
-
+
public int doc() {
return base + current.doc();
}
@@ -601,8 +600,6 @@
}
private TermDocs termDocs(int i) throws IOException {
- if (term == null)
- return null;
TermDocs result = readerTermDocs[i];
if (result == null)
result = readerTermDocs[i] = termDocs(readers[i]);
@@ -612,7 +609,7 @@
protected TermDocs termDocs(IndexReader reader)
throws IOException {
- return reader.termDocs();
+ return term==null ? reader.termDocs(null) : reader.termDocs();
}
public void close() throws IOException {
Index: contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
===================================================================
--- contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (revision 737499)
+++ contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (working copy)
@@ -51,6 +51,7 @@
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.index.TermDocs;
/**
Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour,
@@ -282,7 +283,9 @@
// new PatternAnalyzer(PatternAnalyzer.NON_WORD_PATTERN, true, stopWords),
// new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS),
};
-
+
+ boolean first = true;
+
for (int iter=0; iter < iters; iter++) {
System.out.println("\n########### iteration=" + iter);
long start = System.currentTimeMillis();
@@ -306,6 +309,18 @@
boolean measureIndexing = false; // toggle this to measure query performance
MemoryIndex memind = null;
if (useMemIndex && !measureIndexing) memind = createMemoryIndex(doc);
+
+ if (first) {
+ IndexSearcher s = memind.createSearcher();
+ TermDocs td = s.getIndexReader().termDocs(null);
+ assertTrue(td.next());
+ assertEquals(0, td.doc());
+ assertEquals(1, td.freq());
+ td.close();
+ s.close();
+ first = false;
+ }
+
RAMDirectory ramind = null;
if (useRAMIndex && !measureIndexing) ramind = createRAMIndex(doc);
Index: contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
===================================================================
--- contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (revision 737499)
+++ contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (working copy)
@@ -31,7 +31,6 @@
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
@@ -847,13 +846,19 @@
private boolean hasNext;
private int cursor = 0;
private ArrayIntList current;
+ private Term term;
public void seek(Term term) {
+ this.term = term;
if (DEBUG) System.err.println(".seek: " + term);
- Info info = getInfo(term.field());
- current = info == null ? null : info.getPositions(term.text());
- hasNext = (current != null);
- cursor = 0;
+ if (term == null) {
+ hasNext = true; // term==null means match all docs
+ } else {
+ Info info = getInfo(term.field());
+ current = info == null ? null : info.getPositions(term.text());
+ hasNext = (current != null);
+ cursor = 0;
+ }
}
public void seek(TermEnum termEnum) {
@@ -867,7 +872,7 @@
}
public int freq() {
- int freq = current != null ? numPositions(current) : 0;
+ int freq = current != null ? numPositions(current) : (term == null ? 1 : 0);
if (DEBUG) System.err.println(".freq: " + freq);
return freq;
}