blob: 5f84ab8175465849391356577ae115c32bc8e61a [file] [log] [blame]
Index: CHANGES.txt
===================================================================
--- CHANGES.txt (revision 814707)
+++ CHANGES.txt (working copy)
@@ -434,6 +434,14 @@
NativeFSLockFactory, we strongly recommend not to mix deprecated
and new API. (Uwe Schindler, Mike McCandless)
+ * LUCENE-1911: Added a new method isCacheable() to DocIdSet. This method
+ should return true, if the underlying implementation does not use disk
+ I/O and is fast enough to be directly cached by CachingWrapperFilter.
+ OpenBitSet, SortedVIntList, and DocIdBitSet are such candidates.
+ The default implementation of the abstract DocIdSet class returns false.
+ In this case, CachingWrapperFilter copies the DocIdSetIterator into an
+ OpenBitSet for caching. (Uwe Schindler, Thomas Becker)
+
Bug fixes
* LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals()
Index: src/java/org/apache/lucene/search/CachingWrapperFilter.java
===================================================================
--- src/java/org/apache/lucene/search/CachingWrapperFilter.java (revision 814707)
+++ src/java/org/apache/lucene/search/CachingWrapperFilter.java (working copy)
@@ -19,6 +19,7 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.DocIdBitSet;
+import org.apache.lucene.util.OpenBitSetDISI;
import java.util.BitSet;
import java.util.WeakHashMap;
import java.util.Map;
@@ -75,10 +76,20 @@
/** Provide the DocIdSet to be cached, using the DocIdSet provided
* by the wrapped Filter.
- * This implementation returns the given DocIdSet.
+ * <p>This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable}
+ * returns <code>true</code>, else it copies the {@link DocIdSetIterator} into
+ * an {@link OpenBitSetDISI}.
*/
- protected DocIdSet docIdSetToCache(DocIdSet docIdSet, IndexReader reader) {
- return docIdSet;
+ protected DocIdSet docIdSetToCache(DocIdSet docIdSet, IndexReader reader) throws IOException {
+ if (docIdSet.isCacheable()) {
+ return docIdSet;
+ } else {
+ final DocIdSetIterator it = docIdSet.iterator();
+ // null is allowed to be returned by iterator(),
+ // in this case we wrap with the empty set,
+ // which is cacheable.
+ return (it == null) ? DocIdSet.EMPTY_DOCIDSET : new OpenBitSetDISI(it, reader.maxDoc());
+ }
}
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
Index: src/java/org/apache/lucene/search/DocIdSet.java
===================================================================
--- src/java/org/apache/lucene/search/DocIdSet.java (revision 814707)
+++ src/java/org/apache/lucene/search/DocIdSet.java (working copy)
@@ -37,6 +37,10 @@
public DocIdSetIterator iterator() {
return iterator;
}
+
+ public boolean isCacheable() {
+ return true;
+ }
};
/** Provides a {@link DocIdSetIterator} to access the set.
@@ -44,4 +48,15 @@
* <code>{@linkplain #EMPTY_DOCIDSET}.iterator()</code> if there
* are no docs that match. */
public abstract DocIdSetIterator iterator() throws IOException;
+
+ /**
+ * This method is a hint for {@link CachingWrapperFilter}, if this <code>DocIdSet</code>
+ * should be cached without copying it into a BitSet. The default is to return
+ * <code>false</code>. If you have an own <code>DocIdSet</code> implementation
+ * that does its iteration very effective and fast without doing disk I/O,
+ * override this method and return <code>true</here>.
+ */
+ public boolean isCacheable() {
+ return false;
+ }
}
Index: src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
===================================================================
--- src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (revision 814707)
+++ src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (working copy)
@@ -476,6 +476,11 @@
/** this method checks, if a doc is a hit, should throw AIOBE, when position invalid */
abstract boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException;
+
+ /** this DocIdSet is cacheable, if it works solely with FieldCache and no TermDocs */
+ public boolean isCacheable() {
+ return !(mayUseTermDocs && reader.hasDeletions());
+ }
public DocIdSetIterator iterator() throws IOException {
// Synchronization needed because deleted docs BitVector
@@ -484,7 +489,7 @@
// and the index has deletions
final TermDocs termDocs;
synchronized(reader) {
- termDocs = (mayUseTermDocs && reader.hasDeletions()) ? reader.termDocs(null) : null;
+ termDocs = isCacheable() ? null : reader.termDocs(null);
}
if (termDocs != null) {
// a DocIdSetIterator using TermDocs to iterate valid docIds
Index: src/java/org/apache/lucene/search/FieldCacheTermsFilter.java
===================================================================
--- src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (revision 814707)
+++ src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (working copy)
@@ -130,6 +130,11 @@
return new FieldCacheTermsFilterDocIdSetIterator();
}
+ /** This DocIdSet implementation is cacheable. */
+ public boolean isCacheable() {
+ return true;
+ }
+
protected class FieldCacheTermsFilterDocIdSetIterator extends DocIdSetIterator {
private int doc = -1;
Index: src/java/org/apache/lucene/search/FilteredDocIdSet.java
===================================================================
--- src/java/org/apache/lucene/search/FilteredDocIdSet.java (revision 814707)
+++ src/java/org/apache/lucene/search/FilteredDocIdSet.java (working copy)
@@ -49,6 +49,11 @@
_innerSet = innerSet;
}
+ /** This DocIdSet implementation is cacheable if the inner set is cacheable. */
+ public boolean isCacheable() {
+ return _innerSet.isCacheable();
+ }
+
/**
* Validation method to determine whether a docid should be in the result set.
* @param docid docid to be tested
Index: src/java/org/apache/lucene/search/QueryWrapperFilter.java
===================================================================
--- src/java/org/apache/lucene/search/QueryWrapperFilter.java (revision 814707)
+++ src/java/org/apache/lucene/search/QueryWrapperFilter.java (working copy)
@@ -74,6 +74,7 @@
public DocIdSetIterator iterator() throws IOException {
return weight.scorer(reader, true, false);
}
+ public boolean isCacheable() { return false; }
};
}
Index: src/java/org/apache/lucene/util/DocIdBitSet.java
===================================================================
--- src/java/org/apache/lucene/util/DocIdBitSet.java (revision 814707)
+++ src/java/org/apache/lucene/util/DocIdBitSet.java (working copy)
@@ -34,6 +34,11 @@
public DocIdSetIterator iterator() {
return new DocIdBitSetIterator(bitSet);
}
+
+ /** This DocIdSet implementation is cacheable. */
+ public boolean isCacheable() {
+ return true;
+ }
/**
* Returns the underlying BitSet.
Index: src/java/org/apache/lucene/util/OpenBitSet.java
===================================================================
--- src/java/org/apache/lucene/util/OpenBitSet.java (revision 814707)
+++ src/java/org/apache/lucene/util/OpenBitSet.java (working copy)
@@ -116,6 +116,11 @@
return new OpenBitSetIterator(bits, wlen);
}
+ /** This DocIdSet implementation is cacheable. */
+ public boolean isCacheable() {
+ return true;
+ }
+
/** Returns the current capacity in bits (1 greater than the index of the last bit) */
public long capacity() { return bits.length << 6; }
Index: src/java/org/apache/lucene/util/SortedVIntList.java
===================================================================
--- src/java/org/apache/lucene/util/SortedVIntList.java (revision 814707)
+++ src/java/org/apache/lucene/util/SortedVIntList.java (working copy)
@@ -180,6 +180,11 @@
return bytes.length;
}
+ /** This DocIdSet implementation is cacheable. */
+ public boolean isCacheable() {
+ return true;
+ }
+
/**
* @return An iterator over the sorted integers.
*/
Index: src/test/org/apache/lucene/search/TestCachingWrapperFilter.java
===================================================================
--- src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (revision 814707)
+++ src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (working copy)
@@ -18,12 +18,18 @@
*/
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.OpenBitSetDISI;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import java.io.IOException;
+import java.util.BitSet;
+
public class TestCachingWrapperFilter extends LuceneTestCase {
public void testCachingWorks() throws Exception {
Directory dir = new RAMDirectory();
@@ -50,4 +56,47 @@
reader.close();
}
+
+ private static void assertDocIdSetCacheable(IndexReader reader, Filter filter, boolean shouldCacheable) throws IOException {
+ final CachingWrapperFilter cacher = new CachingWrapperFilter(filter);
+ final DocIdSet originalSet = filter.getDocIdSet(reader);
+ final DocIdSet cachedSet = cacher.getDocIdSet(reader);
+ assertTrue(cachedSet.isCacheable());
+ assertEquals(shouldCacheable, originalSet.isCacheable());
+ //System.out.println("Original: "+originalSet.getClass().getName()+" -- cached: "+cachedSet.getClass().getName());
+ if (originalSet.isCacheable()) {
+ assertEquals("Cached DocIdSet must be of same class like uncached, if cacheable", originalSet.getClass(), cachedSet.getClass());
+ } else {
+ assertTrue("Cached DocIdSet must be an OpenBitSet if the original one was not cacheable", cachedSet instanceof OpenBitSetDISI);
+ }
+ }
+
+ public void testIsCacheAble() throws Exception {
+ Directory dir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
+ writer.close();
+
+ IndexReader reader = IndexReader.open(dir);
+
+ // not cacheable:
+ assertDocIdSetCacheable(reader, new QueryWrapperFilter(new TermQuery(new Term("test","value"))), false);
+ // returns default empty docidset, always cacheable:
+ assertDocIdSetCacheable(reader, NumericRangeFilter.newIntRange("test", new Integer(10000), new Integer(-10000), true, true), true);
+ // is cacheable:
+ assertDocIdSetCacheable(reader, FieldCacheRangeFilter.newIntRange("test", new Integer(10), new Integer(20), true, true), true);
+ // a openbitset filter is always cacheable
+ assertDocIdSetCacheable(reader, new Filter() {
+ public DocIdSet getDocIdSet(IndexReader reader) {
+ return new OpenBitSet();
+ }
+ }, true);
+ // a deprecated filter is always cacheable
+ assertDocIdSetCacheable(reader, new Filter() {
+ public BitSet bits(IndexReader reader) {
+ return new BitSet();
+ }
+ }, true);
+
+ reader.close();
+ }
}
Index: src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java
===================================================================
--- src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java (revision 814707)
+++ src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java (working copy)
@@ -66,8 +66,9 @@
Query q = new TermQuery(new Term("body","body"));
// test id, bounded on both ends
-
- result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,T,T), numDocs).scoreDocs;
+ FieldCacheRangeFilter fcrf;
+ result = search.search(q,fcrf = FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,T,T), numDocs).scoreDocs;
+ assertTrue(fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
assertEquals("find all", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,T,F), numDocs).scoreDocs;
@@ -212,8 +213,9 @@
Query q = new TermQuery(new Term("body","body"));
// test id, bounded on both ends
-
- result = search.search(q,FieldCacheRangeFilter.newShortRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs;
+ FieldCacheRangeFilter fcrf;
+ result = search.search(q,fcrf=FieldCacheRangeFilter.newShortRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs;
+ assertTrue(fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
assertEquals("find all", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newShortRange("id",minIdO,maxIdO,T,F), numDocs).scoreDocs;
@@ -303,7 +305,9 @@
// test id, bounded on both ends
- result = search.search(q,FieldCacheRangeFilter.newIntRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs;
+ FieldCacheRangeFilter fcrf;
+ result = search.search(q,fcrf=FieldCacheRangeFilter.newIntRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs;
+ assertTrue(fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
assertEquals("find all", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newIntRange("id",minIdO,maxIdO,T,F), numDocs).scoreDocs;
@@ -393,7 +397,9 @@
// test id, bounded on both ends
- result = search.search(q,FieldCacheRangeFilter.newLongRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs;
+ FieldCacheRangeFilter fcrf;
+ result = search.search(q,fcrf=FieldCacheRangeFilter.newLongRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs;
+ assertTrue(fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
assertEquals("find all", numDocs, result.length);
result = search.search(q,FieldCacheRangeFilter.newLongRange("id",minIdO,maxIdO,T,F), numDocs).scoreDocs;
@@ -523,4 +529,49 @@
assertEquals("infinity special case", 0, result.length);
}
+ // test using a sparse index (with deleted docs). The DocIdSet should be not cacheable, as it uses TermDocs if the range contains 0
+ public void testSparseIndex() throws IOException {
+ RAMDirectory dir = new RAMDirectory();
+ IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED);
+
+ for (int d = -20; d <= 20; d++) {
+ Document doc = new Document();
+ doc.add(new Field("id",Integer.toString(d), Field.Store.NO, Field.Index.NOT_ANALYZED));
+ doc.add(new Field("body","body", Field.Store.NO, Field.Index.NOT_ANALYZED));
+ writer.addDocument(doc);
+ }
+
+ writer.optimize();
+ writer.deleteDocuments(new Term("id","0"));
+ writer.close();
+
+ IndexReader reader = IndexReader.open(dir);
+ IndexSearcher search = new IndexSearcher(reader);
+ assertTrue(reader.hasDeletions());
+
+ ScoreDoc[] result;
+ FieldCacheRangeFilter fcrf;
+ Query q = new TermQuery(new Term("body","body"));
+
+ result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",new Byte((byte) -20),new Byte((byte) 20),T,T), 100).scoreDocs;
+ assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
+ assertEquals("find all", 40, result.length);
+
+ result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",new Byte((byte) 0),new Byte((byte) 20),T,T), 100).scoreDocs;
+ assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
+ assertEquals("find all", 20, result.length);
+
+ result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",new Byte((byte) -20),new Byte((byte) 0),T,T), 100).scoreDocs;
+ assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
+ assertEquals("find all", 20, result.length);
+
+ result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",new Byte((byte) 10),new Byte((byte) 20),T,T), 100).scoreDocs;
+ assertTrue("DocIdSet must be cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
+ assertEquals("find all", 11, result.length);
+
+ result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",new Byte((byte) -20),new Byte((byte) -10),T,T), 100).scoreDocs;
+ assertTrue("DocIdSet must be cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable());
+ assertEquals("find all", 11, result.length);
+ }
+
}
Index: src/test/org/apache/lucene/search/TestQueryWrapperFilter.java
===================================================================
--- src/test/org/apache/lucene/search/TestQueryWrapperFilter.java (revision 814707)
+++ src/test/org/apache/lucene/search/TestQueryWrapperFilter.java (working copy)
@@ -48,6 +48,8 @@
IndexSearcher searcher = new IndexSearcher(dir, true);
TopDocs hits = searcher.search(new MatchAllDocsQuery(), qwf, 10);
assertEquals(1, hits.totalHits);
+ hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10);
+ assertEquals(1, hits.totalHits);
// should not throw exception with complex primitive query
BooleanQuery booleanQuery = new BooleanQuery();
@@ -58,6 +60,8 @@
hits = searcher.search(new MatchAllDocsQuery(), qwf, 10);
assertEquals(1, hits.totalHits);
+ hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10);
+ assertEquals(1, hits.totalHits);
// should not throw exception with non primitive Query (doesn't implement
// Query#createWeight)
@@ -65,6 +69,15 @@
hits = searcher.search(new MatchAllDocsQuery(), qwf, 10);
assertEquals(1, hits.totalHits);
+ hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10);
+ assertEquals(1, hits.totalHits);
+ // test a query with no hits
+ termQuery = new TermQuery(new Term("field", "not_exist"));
+ qwf = new QueryWrapperFilter(termQuery);
+ hits = searcher.search(new MatchAllDocsQuery(), qwf, 10);
+ assertEquals(0, hits.totalHits);
+ hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10);
+ assertEquals(0, hits.totalHits);
}
}