| Index: src/java/org/apache/lucene/search/Filter.java |
| =================================================================== |
| --- src/java/org/apache/lucene/search/Filter.java (revision 887181) |
| +++ src/java/org/apache/lucene/search/Filter.java (working copy) |
| @@ -22,12 +22,28 @@ |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.util.DocIdBitSet; |
| |
| -/** Abstract base class providing a mechanism to use a subset of an index |
| +/** |
| + * Abstract base class providing a mechanism to use a subset of an index |
| * for restriction or permission of index search results. |
| * <p> |
| */ |
| public abstract class Filter implements java.io.Serializable { |
| + |
| /** |
| + * Creates a {@link DocIdSet} that provides the documents which should be |
| + * permitted or prohibited in search results. <b>NOTE:</b> null can be |
| + * returned if no documents will be accepted by this Filter. |
| + * <p> |
| + * Note: This method might be called more than once during a search if the |
| + * index has more than one segment. In such a case the {@link DocIdSet} |
| + * must be relative to the document base of the given reader. Yet, the |
| + * segment readers are passed in increasing document base order. |
| + * |
| + * @param reader a {@link IndexReader} instance opened on the index currently |
| + * searched on. Note, it is likely that the provided reader does not |
| + * represent the whole underlying index i.e. if the index has more than |
| + * one segment the given reader only represents a single segment. |
| + * |
| * @return a DocIdSet that provides the documents which should be permitted or |
| * prohibited in search results. <b>NOTE:</b> null can be returned if |
| * no documents will be accepted by this Filter. |
| Index: src/test/org/apache/lucene/search/TestFilteredSearch.java |
| =================================================================== |
| --- src/test/org/apache/lucene/search/TestFilteredSearch.java (revision 887181) |
| +++ src/test/org/apache/lucene/search/TestFilteredSearch.java (working copy) |
| @@ -24,9 +24,12 @@ |
| import org.apache.lucene.analysis.WhitespaceAnalyzer; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| +import org.apache.lucene.index.CorruptIndexException; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexWriter; |
| import org.apache.lucene.index.Term; |
| +import org.apache.lucene.store.Directory; |
| +import org.apache.lucene.store.LockObtainFailedException; |
| import org.apache.lucene.store.RAMDirectory; |
| import org.apache.lucene.util.OpenBitSet; |
| |
| @@ -42,19 +45,32 @@ |
| |
| private static final String FIELD = "category"; |
| |
| - public void testFilteredSearch() { |
| + public void testFilteredSearch() throws CorruptIndexException, LockObtainFailedException, IOException { |
| + boolean enforceSingleSegment = true; |
| RAMDirectory directory = new RAMDirectory(); |
| int[] filterBits = {1, 36}; |
| - Filter filter = new SimpleDocIdSetFilter(filterBits); |
| - |
| + SimpleDocIdSetFilter filter = new SimpleDocIdSetFilter(filterBits); |
| + IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| + searchFiltered(writer, directory, filter, enforceSingleSegment); |
| + // run the test on more than one segment |
| + enforceSingleSegment = false; |
| + // reset - it is stateful |
| + filter.reset(); |
| + writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| + // we index 60 docs - this will create 6 segments |
| + writer.setMaxBufferedDocs(10); |
| + searchFiltered(writer, directory, filter, enforceSingleSegment); |
| + } |
| |
| + public void searchFiltered(IndexWriter writer, Directory directory, Filter filter, boolean optimize) { |
| try { |
| - IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| for (int i = 0; i < 60; i++) {//Simple docs |
| Document doc = new Document(); |
| doc.add(new Field(FIELD, Integer.toString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); |
| writer.addDocument(doc); |
| } |
| + if(optimize) |
| + writer.optimize(); |
| writer.close(); |
| |
| BooleanQuery booleanQuery = new BooleanQuery(); |
| @@ -69,25 +85,34 @@ |
| catch (IOException e) { |
| fail(e.getMessage()); |
| } |
| - |
| + |
| } |
| - |
| - |
| + |
| public static final class SimpleDocIdSetFilter extends Filter { |
| - private OpenBitSet bits; |
| - |
| + private int docBase; |
| + private final int[] docs; |
| + private int index; |
| public SimpleDocIdSetFilter(int[] docs) { |
| - bits = new OpenBitSet(); |
| - for(int i = 0; i < docs.length; i++){ |
| - bits.set(docs[i]); |
| - } |
| - |
| + this.docs = docs; |
| } |
| - |
| @Override |
| public DocIdSet getDocIdSet(IndexReader reader) { |
| - return bits; |
| + final OpenBitSet set = new OpenBitSet(); |
| + final int limit = docBase+reader.maxDoc(); |
| + for (;index < docs.length; index++) { |
| + final int docId = docs[index]; |
| + if(docId > limit) |
| + break; |
| + set.set(docId-docBase); |
| + } |
| + docBase = limit; |
| + return set.isEmpty()?null:set; |
| } |
| + |
| + public void reset(){ |
| + index = 0; |
| + docBase = 0; |
| + } |
| } |
| |
| } |