| Index: CHANGES.txt
|
| ===================================================================
|
| --- CHANGES.txt (revision 814707)
|
| +++ CHANGES.txt (working copy)
|
| @@ -434,6 +434,14 @@
|
| NativeFSLockFactory, we strongly recommend not to mix deprecated |
| and new API. (Uwe Schindler, Mike McCandless) |
| |
| + * LUCENE-1911: Added a new method isCacheable() to DocIdSet. This method |
| + should return true, if the underlying implementation does not use disk |
| + I/O and is fast enough to be directly cached by CachingWrapperFilter. |
| + OpenBitSet, SortedVIntList, and DocIdBitSet are such candidates. |
| + The default implementation of the abstract DocIdSet class returns false. |
| + In this case, CachingWrapperFilter copies the DocIdSetIterator into an |
| + OpenBitSet for caching. (Uwe Schindler, Thomas Becker) |
| + |
| Bug fixes |
| |
| * LUCENE-1415: MultiPhraseQuery has incorrect hashCode() and equals() |
| Index: src/java/org/apache/lucene/search/CachingWrapperFilter.java
|
| ===================================================================
|
| --- src/java/org/apache/lucene/search/CachingWrapperFilter.java (revision 814707)
|
| +++ src/java/org/apache/lucene/search/CachingWrapperFilter.java (working copy)
|
| @@ -19,6 +19,7 @@
|
| |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.util.DocIdBitSet; |
| +import org.apache.lucene.util.OpenBitSetDISI; |
| import java.util.BitSet; |
| import java.util.WeakHashMap; |
| import java.util.Map; |
| @@ -75,10 +76,20 @@
|
| |
| /** Provide the DocIdSet to be cached, using the DocIdSet provided |
| * by the wrapped Filter. |
| - * This implementation returns the given DocIdSet. |
| + * <p>This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable} |
| + * returns <code>true</code>, else it copies the {@link DocIdSetIterator} into |
| + * an {@link OpenBitSetDISI}. |
| */ |
| - protected DocIdSet docIdSetToCache(DocIdSet docIdSet, IndexReader reader) { |
| - return docIdSet; |
| + protected DocIdSet docIdSetToCache(DocIdSet docIdSet, IndexReader reader) throws IOException { |
| + if (docIdSet.isCacheable()) { |
| + return docIdSet; |
| + } else { |
| + final DocIdSetIterator it = docIdSet.iterator(); |
| + // null is allowed to be returned by iterator(), |
| + // in this case we wrap with the empty set, |
| + // which is cacheable. |
| + return (it == null) ? DocIdSet.EMPTY_DOCIDSET : new OpenBitSetDISI(it, reader.maxDoc()); |
| + } |
| } |
| |
| public DocIdSet getDocIdSet(IndexReader reader) throws IOException { |
| Index: src/java/org/apache/lucene/search/DocIdSet.java
|
| ===================================================================
|
| --- src/java/org/apache/lucene/search/DocIdSet.java (revision 814707)
|
| +++ src/java/org/apache/lucene/search/DocIdSet.java (working copy)
|
| @@ -37,6 +37,10 @@
|
| public DocIdSetIterator iterator() { |
| return iterator; |
| } |
| + |
| + public boolean isCacheable() { |
| + return true; |
| + } |
| }; |
| |
| /** Provides a {@link DocIdSetIterator} to access the set. |
| @@ -44,4 +48,15 @@
|
| * <code>{@linkplain #EMPTY_DOCIDSET}.iterator()</code> if there |
| * are no docs that match. */ |
| public abstract DocIdSetIterator iterator() throws IOException; |
| + |
| + /** |
| + * This method is a hint for {@link CachingWrapperFilter}, if this <code>DocIdSet</code> |
| + * should be cached without copying it into a BitSet. The default is to return |
| + * <code>false</code>. If you have an own <code>DocIdSet</code> implementation |
| + * that does its iteration very effective and fast without doing disk I/O, |
| + * override this method and return <code>true</here>. |
| + */ |
| + public boolean isCacheable() { |
| + return false; |
| + } |
| } |
| Index: src/java/org/apache/lucene/search/FieldCacheRangeFilter.java
|
| ===================================================================
|
| --- src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (revision 814707)
|
| +++ src/java/org/apache/lucene/search/FieldCacheRangeFilter.java (working copy)
|
| @@ -476,6 +476,11 @@
|
| |
| /** this method checks, if a doc is a hit, should throw AIOBE, when position invalid */ |
| abstract boolean matchDoc(int doc) throws ArrayIndexOutOfBoundsException; |
| + |
| + /** this DocIdSet is cacheable, if it works solely with FieldCache and no TermDocs */ |
| + public boolean isCacheable() { |
| + return !(mayUseTermDocs && reader.hasDeletions()); |
| + } |
| |
| public DocIdSetIterator iterator() throws IOException { |
| // Synchronization needed because deleted docs BitVector |
| @@ -484,7 +489,7 @@
|
| // and the index has deletions |
| final TermDocs termDocs; |
| synchronized(reader) { |
| - termDocs = (mayUseTermDocs && reader.hasDeletions()) ? reader.termDocs(null) : null; |
| + termDocs = isCacheable() ? null : reader.termDocs(null); |
| } |
| if (termDocs != null) { |
| // a DocIdSetIterator using TermDocs to iterate valid docIds |
| Index: src/java/org/apache/lucene/search/FieldCacheTermsFilter.java
|
| ===================================================================
|
| --- src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (revision 814707)
|
| +++ src/java/org/apache/lucene/search/FieldCacheTermsFilter.java (working copy)
|
| @@ -130,6 +130,11 @@
|
| return new FieldCacheTermsFilterDocIdSetIterator(); |
| } |
| |
| + /** This DocIdSet implementation is cacheable. */ |
| + public boolean isCacheable() { |
| + return true; |
| + } |
| + |
| protected class FieldCacheTermsFilterDocIdSetIterator extends DocIdSetIterator { |
| private int doc = -1; |
| |
| Index: src/java/org/apache/lucene/search/FilteredDocIdSet.java
|
| ===================================================================
|
| --- src/java/org/apache/lucene/search/FilteredDocIdSet.java (revision 814707)
|
| +++ src/java/org/apache/lucene/search/FilteredDocIdSet.java (working copy)
|
| @@ -49,6 +49,11 @@
|
| _innerSet = innerSet; |
| } |
| |
| + /** This DocIdSet implementation is cacheable if the inner set is cacheable. */ |
| + public boolean isCacheable() { |
| + return _innerSet.isCacheable(); |
| + } |
| + |
| /** |
| * Validation method to determine whether a docid should be in the result set. |
| * @param docid docid to be tested |
| Index: src/java/org/apache/lucene/search/QueryWrapperFilter.java
|
| ===================================================================
|
| --- src/java/org/apache/lucene/search/QueryWrapperFilter.java (revision 814707)
|
| +++ src/java/org/apache/lucene/search/QueryWrapperFilter.java (working copy)
|
| @@ -74,6 +74,7 @@
|
| public DocIdSetIterator iterator() throws IOException { |
| return weight.scorer(reader, true, false); |
| } |
| + public boolean isCacheable() { return false; } |
| }; |
| } |
| |
| Index: src/java/org/apache/lucene/util/DocIdBitSet.java
|
| ===================================================================
|
| --- src/java/org/apache/lucene/util/DocIdBitSet.java (revision 814707)
|
| +++ src/java/org/apache/lucene/util/DocIdBitSet.java (working copy)
|
| @@ -34,6 +34,11 @@
|
| public DocIdSetIterator iterator() { |
| return new DocIdBitSetIterator(bitSet); |
| } |
| + |
| + /** This DocIdSet implementation is cacheable. */ |
| + public boolean isCacheable() { |
| + return true; |
| + } |
| |
| /** |
| * Returns the underlying BitSet. |
| Index: src/java/org/apache/lucene/util/OpenBitSet.java
|
| ===================================================================
|
| --- src/java/org/apache/lucene/util/OpenBitSet.java (revision 814707)
|
| +++ src/java/org/apache/lucene/util/OpenBitSet.java (working copy)
|
| @@ -116,6 +116,11 @@
|
| return new OpenBitSetIterator(bits, wlen); |
| } |
| |
| + /** This DocIdSet implementation is cacheable. */ |
| + public boolean isCacheable() { |
| + return true; |
| + } |
| + |
| /** Returns the current capacity in bits (1 greater than the index of the last bit) */ |
| public long capacity() { return bits.length << 6; } |
| |
| Index: src/java/org/apache/lucene/util/SortedVIntList.java
|
| ===================================================================
|
| --- src/java/org/apache/lucene/util/SortedVIntList.java (revision 814707)
|
| +++ src/java/org/apache/lucene/util/SortedVIntList.java (working copy)
|
| @@ -180,6 +180,11 @@
|
| return bytes.length; |
| } |
| |
| + /** This DocIdSet implementation is cacheable. */ |
| + public boolean isCacheable() { |
| + return true; |
| + } |
| + |
| /** |
| * @return An iterator over the sorted integers. |
| */ |
| Index: src/test/org/apache/lucene/search/TestCachingWrapperFilter.java
|
| ===================================================================
|
| --- src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (revision 814707)
|
| +++ src/test/org/apache/lucene/search/TestCachingWrapperFilter.java (working copy)
|
| @@ -18,12 +18,18 @@
|
| */ |
| |
| import org.apache.lucene.util.LuceneTestCase; |
| +import org.apache.lucene.util.OpenBitSet; |
| +import org.apache.lucene.util.OpenBitSetDISI; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.RAMDirectory; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexWriter; |
| +import org.apache.lucene.index.Term; |
| import org.apache.lucene.analysis.standard.StandardAnalyzer; |
| |
| +import java.io.IOException; |
| +import java.util.BitSet; |
| + |
| public class TestCachingWrapperFilter extends LuceneTestCase { |
| public void testCachingWorks() throws Exception { |
| Directory dir = new RAMDirectory(); |
| @@ -50,4 +56,47 @@
|
| |
| reader.close(); |
| } |
| + |
| + private static void assertDocIdSetCacheable(IndexReader reader, Filter filter, boolean shouldCacheable) throws IOException { |
| + final CachingWrapperFilter cacher = new CachingWrapperFilter(filter); |
| + final DocIdSet originalSet = filter.getDocIdSet(reader); |
| + final DocIdSet cachedSet = cacher.getDocIdSet(reader); |
| + assertTrue(cachedSet.isCacheable()); |
| + assertEquals(shouldCacheable, originalSet.isCacheable()); |
| + //System.out.println("Original: "+originalSet.getClass().getName()+" -- cached: "+cachedSet.getClass().getName()); |
| + if (originalSet.isCacheable()) { |
| + assertEquals("Cached DocIdSet must be of same class like uncached, if cacheable", originalSet.getClass(), cachedSet.getClass()); |
| + } else { |
| + assertTrue("Cached DocIdSet must be an OpenBitSet if the original one was not cacheable", cachedSet instanceof OpenBitSetDISI); |
| + } |
| + } |
| + |
| + public void testIsCacheAble() throws Exception { |
| + Directory dir = new RAMDirectory(); |
| + IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| + writer.close(); |
| + |
| + IndexReader reader = IndexReader.open(dir); |
| + |
| + // not cacheable: |
| + assertDocIdSetCacheable(reader, new QueryWrapperFilter(new TermQuery(new Term("test","value"))), false); |
| + // returns default empty docidset, always cacheable: |
| + assertDocIdSetCacheable(reader, NumericRangeFilter.newIntRange("test", new Integer(10000), new Integer(-10000), true, true), true); |
| + // is cacheable: |
| + assertDocIdSetCacheable(reader, FieldCacheRangeFilter.newIntRange("test", new Integer(10), new Integer(20), true, true), true); |
| + // a openbitset filter is always cacheable |
| + assertDocIdSetCacheable(reader, new Filter() { |
| + public DocIdSet getDocIdSet(IndexReader reader) { |
| + return new OpenBitSet(); |
| + } |
| + }, true); |
| + // a deprecated filter is always cacheable |
| + assertDocIdSetCacheable(reader, new Filter() { |
| + public BitSet bits(IndexReader reader) { |
| + return new BitSet(); |
| + } |
| + }, true); |
| + |
| + reader.close(); |
| + } |
| } |
| Index: src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java
|
| ===================================================================
|
| --- src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java (revision 814707)
|
| +++ src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java (working copy)
|
| @@ -66,8 +66,9 @@
|
| Query q = new TermQuery(new Term("body","body")); |
| |
| // test id, bounded on both ends |
| - |
| - result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,T,T), numDocs).scoreDocs; |
| + FieldCacheRangeFilter fcrf; |
| + result = search.search(q,fcrf = FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,T,T), numDocs).scoreDocs; |
| + assertTrue(fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); |
| assertEquals("find all", numDocs, result.length); |
| |
| result = search.search(q,FieldCacheRangeFilter.newStringRange("id",minIP,maxIP,T,F), numDocs).scoreDocs; |
| @@ -212,8 +213,9 @@
|
| Query q = new TermQuery(new Term("body","body")); |
| |
| // test id, bounded on both ends |
| - |
| - result = search.search(q,FieldCacheRangeFilter.newShortRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs; |
| + FieldCacheRangeFilter fcrf; |
| + result = search.search(q,fcrf=FieldCacheRangeFilter.newShortRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs; |
| + assertTrue(fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); |
| assertEquals("find all", numDocs, result.length); |
| |
| result = search.search(q,FieldCacheRangeFilter.newShortRange("id",minIdO,maxIdO,T,F), numDocs).scoreDocs; |
| @@ -303,7 +305,9 @@
|
| |
| // test id, bounded on both ends |
| |
| - result = search.search(q,FieldCacheRangeFilter.newIntRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs; |
| + FieldCacheRangeFilter fcrf; |
| + result = search.search(q,fcrf=FieldCacheRangeFilter.newIntRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs; |
| + assertTrue(fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); |
| assertEquals("find all", numDocs, result.length); |
| |
| result = search.search(q,FieldCacheRangeFilter.newIntRange("id",minIdO,maxIdO,T,F), numDocs).scoreDocs; |
| @@ -393,7 +397,9 @@
|
| |
| // test id, bounded on both ends |
| |
| - result = search.search(q,FieldCacheRangeFilter.newLongRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs; |
| + FieldCacheRangeFilter fcrf; |
| + result = search.search(q,fcrf=FieldCacheRangeFilter.newLongRange("id",minIdO,maxIdO,T,T), numDocs).scoreDocs; |
| + assertTrue(fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); |
| assertEquals("find all", numDocs, result.length); |
| |
| result = search.search(q,FieldCacheRangeFilter.newLongRange("id",minIdO,maxIdO,T,F), numDocs).scoreDocs; |
| @@ -523,4 +529,49 @@
|
| assertEquals("infinity special case", 0, result.length); |
| } |
| |
| + // test using a sparse index (with deleted docs). The DocIdSet should be not cacheable, as it uses TermDocs if the range contains 0 |
| + public void testSparseIndex() throws IOException { |
| + RAMDirectory dir = new RAMDirectory(); |
| + IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), T, IndexWriter.MaxFieldLength.LIMITED); |
| + |
| + for (int d = -20; d <= 20; d++) { |
| + Document doc = new Document(); |
| + doc.add(new Field("id",Integer.toString(d), Field.Store.NO, Field.Index.NOT_ANALYZED)); |
| + doc.add(new Field("body","body", Field.Store.NO, Field.Index.NOT_ANALYZED)); |
| + writer.addDocument(doc); |
| + } |
| + |
| + writer.optimize(); |
| + writer.deleteDocuments(new Term("id","0")); |
| + writer.close(); |
| + |
| + IndexReader reader = IndexReader.open(dir); |
| + IndexSearcher search = new IndexSearcher(reader); |
| + assertTrue(reader.hasDeletions()); |
| + |
| + ScoreDoc[] result; |
| + FieldCacheRangeFilter fcrf; |
| + Query q = new TermQuery(new Term("body","body")); |
| + |
| + result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",new Byte((byte) -20),new Byte((byte) 20),T,T), 100).scoreDocs; |
| + assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); |
| + assertEquals("find all", 40, result.length); |
| + |
| + result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",new Byte((byte) 0),new Byte((byte) 20),T,T), 100).scoreDocs; |
| + assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); |
| + assertEquals("find all", 20, result.length); |
| + |
| + result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",new Byte((byte) -20),new Byte((byte) 0),T,T), 100).scoreDocs; |
| + assertFalse("DocIdSet must be not cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); |
| + assertEquals("find all", 20, result.length); |
| + |
| + result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",new Byte((byte) 10),new Byte((byte) 20),T,T), 100).scoreDocs; |
| + assertTrue("DocIdSet must be cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); |
| + assertEquals("find all", 11, result.length); |
| + |
| + result = search.search(q,fcrf=FieldCacheRangeFilter.newByteRange("id",new Byte((byte) -20),new Byte((byte) -10),T,T), 100).scoreDocs; |
| + assertTrue("DocIdSet must be cacheable", fcrf.getDocIdSet(reader.getSequentialSubReaders()[0]).isCacheable()); |
| + assertEquals("find all", 11, result.length); |
| + } |
| + |
| } |
| Index: src/test/org/apache/lucene/search/TestQueryWrapperFilter.java
|
| ===================================================================
|
| --- src/test/org/apache/lucene/search/TestQueryWrapperFilter.java (revision 814707)
|
| +++ src/test/org/apache/lucene/search/TestQueryWrapperFilter.java (working copy)
|
| @@ -48,6 +48,8 @@
|
| IndexSearcher searcher = new IndexSearcher(dir, true); |
| TopDocs hits = searcher.search(new MatchAllDocsQuery(), qwf, 10); |
| assertEquals(1, hits.totalHits); |
| + hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); |
| + assertEquals(1, hits.totalHits); |
| |
| // should not throw exception with complex primitive query |
| BooleanQuery booleanQuery = new BooleanQuery(); |
| @@ -58,6 +60,8 @@
|
| |
| hits = searcher.search(new MatchAllDocsQuery(), qwf, 10); |
| assertEquals(1, hits.totalHits); |
| + hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); |
| + assertEquals(1, hits.totalHits); |
| |
| // should not throw exception with non primitive Query (doesn't implement |
| // Query#createWeight) |
| @@ -65,6 +69,15 @@
|
| |
| hits = searcher.search(new MatchAllDocsQuery(), qwf, 10); |
| assertEquals(1, hits.totalHits); |
| + hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); |
| + assertEquals(1, hits.totalHits); |
| |
| + // test a query with no hits |
| + termQuery = new TermQuery(new Term("field", "not_exist")); |
| + qwf = new QueryWrapperFilter(termQuery); |
| + hits = searcher.search(new MatchAllDocsQuery(), qwf, 10); |
| + assertEquals(0, hits.totalHits); |
| + hits = searcher.search(new MatchAllDocsQuery(), new CachingWrapperFilter(qwf), 10); |
| + assertEquals(0, hits.totalHits); |
| } |
| } |