| Index: D:/Programme/eclipse/workspace/lucene/src/test/org/apache/lucene/search/TestMultiSearcherRanking.java |
| =================================================================== |
| --- D:/Programme/eclipse/workspace/lucene/src/test/org/apache/lucene/search/TestMultiSearcherRanking.java (revision 164962) |
| +++ D:/Programme/eclipse/workspace/lucene/src/test/org/apache/lucene/search/TestMultiSearcherRanking.java (working copy) |
| @@ -24,81 +24,123 @@ |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.index.IndexWriter; |
| -import org.apache.lucene.index.Term; |
| +import org.apache.lucene.queryParser.ParseException; |
| +import org.apache.lucene.queryParser.QueryParser; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.RAMDirectory; |
| |
| /** |
| * Tests {@link MultiSearcher} ranking, i.e. makes sure this bug is fixed: |
| * http://issues.apache.org/bugzilla/show_bug.cgi?id=31841 |
| - * |
| + * |
| * @version $Id: TestMultiSearcher.java 150492 2004-09-06 22:01:49Z dnaber $ |
| */ |
| -public class TestMultiSearcherRanking extends TestCase |
| -{ |
| +public class TestMultiSearcherRanking extends TestCase { |
| + private final boolean verbose = false; // set to true to output hits |
| + private final String FIELD_NAME = "body"; |
| |
| - private final Query query = new TermQuery(new Term("body", "three")); |
| - |
| - public void testMultiSearcherRanking() throws IOException { |
| - Hits multiSearcherHits = multi(); |
| - Hits singleSearcherHits = single(); |
| + private Searcher multiSearcher; |
| + private Searcher singleSearcher; |
| + |
| + public void testOneTermQuery() throws IOException, ParseException { |
| + checkQuery("three"); |
| + } |
| + |
| + public void testTwoTermQuery() throws IOException, ParseException { |
| + checkQuery("three foo"); |
| + } |
| + |
| + public void testPrefixQuery() throws IOException, ParseException { |
| + checkQuery("multi*"); |
| + } |
| + |
| + public void testFuzzyQuery() throws IOException, ParseException { |
| + checkQuery("multiThree~"); |
| + } |
| + |
| + public void testRangeQuery() throws IOException, ParseException { |
| + checkQuery("{multiA TO multiP}"); |
| + } |
| + |
| + /** |
| + * checks if a query yields the same result when executed on |
| + * a single IndexSearcher containing all documents and on a |
| + * MultiSearcher aggregating sub-searchers |
| + * @param queryStr the query to check. |
| + * @throws IOException |
| + * @throws ParseException |
| + */ |
| + private void checkQuery(String queryStr) throws IOException, ParseException { |
| + // check result hit ranking |
| + if(verbose) System.out.println("Query: " + queryStr); |
| + Query query = QueryParser.parse(queryStr, FIELD_NAME, |
| + new StandardAnalyzer()); |
| + Hits multiSearcherHits = multiSearcher.search(query); |
| + Hits singleSearcherHits = singleSearcher.search(query); |
| assertEquals(multiSearcherHits.length(), singleSearcherHits.length()); |
| - for(int i = 0; i < multiSearcherHits.length(); i++) { |
| - assertEquals(multiSearcherHits.score(i), singleSearcherHits.score(i), 0.001f); |
| + for (int i = 0; i < multiSearcherHits.length(); i++) { |
| Document docMulti = multiSearcherHits.doc(i); |
| Document docSingle = singleSearcherHits.doc(i); |
| - assertEquals(docMulti.get("body"), docSingle.get("body")); |
| + if(verbose) System.out.println("Multi: " + docMulti.get(FIELD_NAME) + " score=" |
| + + multiSearcherHits.score(i)); |
| + if(verbose) System.out.println("Single: " + docSingle.get(FIELD_NAME) + " score=" |
| + + singleSearcherHits.score(i)); |
| + assertEquals(multiSearcherHits.score(i), singleSearcherHits.score(i), |
| + 0.001f); |
| + assertEquals(docMulti.get(FIELD_NAME), docSingle.get(FIELD_NAME)); |
| } |
| + if(verbose) System.out.println(); |
| + |
| + // check query rewriting |
| + Query sq = singleSearcher.rewrite(query); |
| + Query mq = multiSearcher.rewrite(query); |
| + assertEquals(mq, sq); |
| } |
| |
| - // Collection 1+2 searched with MultiSearcher: |
| - private Hits multi() throws IOException { |
| - Directory d1 = new RAMDirectory(); |
| - IndexWriter iw = new IndexWriter(d1, new StandardAnalyzer(), true); |
| - addCollection1(iw); |
| - iw.close(); |
| + /** |
| + * initializes multiSearcher and singleSearcher with the same document set |
| + */ |
| + protected void setUp() throws Exception { |
| + // create MultiSearcher from two seperate searchers |
| + Directory d1 = new RAMDirectory(); |
| + IndexWriter iw1 = new IndexWriter(d1, new StandardAnalyzer(), true); |
| + addCollection1(iw1); |
| + iw1.close(); |
| |
| Directory d2 = new RAMDirectory(); |
| - iw = new IndexWriter(d2, new StandardAnalyzer(), true); |
| - addCollection2(iw); |
| - iw.close(); |
| - |
| + IndexWriter iw2 = new IndexWriter(d2, new StandardAnalyzer(), true); |
| + addCollection2(iw2); |
| + iw2.close(); |
| + |
| Searchable[] s = new Searchable[2]; |
| s[0] = new IndexSearcher(d1); |
| s[1] = new IndexSearcher(d2); |
| - MultiSearcher ms = new MultiSearcher(s); |
| - Hits hits = ms.search(query); |
| - return hits; |
| - } |
| - |
| - // Collection 1+2 indexed together: |
| - private Hits single() throws IOException { |
| + multiSearcher = new MultiSearcher(s); |
| + |
| + // create IndexSearcher which contains all documents |
| Directory d = new RAMDirectory(); |
| IndexWriter iw = new IndexWriter(d, new StandardAnalyzer(), true); |
| addCollection1(iw); |
| addCollection2(iw); |
| iw.close(); |
| - IndexSearcher is = new IndexSearcher(d); |
| - Hits hits = is.search(query); |
| - return hits; |
| + singleSearcher = new IndexSearcher(d); |
| } |
| |
| private void addCollection1(IndexWriter iw) throws IOException { |
| add("one blah three", iw); |
| - add("one foo three", iw); |
| - add("one foobar three", iw); |
| + add("one foo three multiOne", iw); |
| + add("one foobar three multiThree", iw); |
| } |
| - |
| + |
| private void addCollection2(IndexWriter iw) throws IOException { |
| add("two blah three", iw); |
| - add("two foo xxx", iw); |
| - add("two foobar xxx", iw); |
| + add("two foo xxx multiTwo", iw); |
| + add("two foobar xxx multiThreee", iw); |
| } |
| |
| private void add(String value, IndexWriter iw) throws IOException { |
| Document d = new Document(); |
| - d.add(new Field("body", value, Field.Store.YES, Field.Index.TOKENIZED)); |
| + d.add(new Field(FIELD_NAME, value, Field.Store.YES, Field.Index.TOKENIZED)); |
| iw.addDocument(d); |
| } |
| - |
| } |
| Index: D:/Programme/eclipse/workspace/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java |
| =================================================================== |
| --- D:/Programme/eclipse/workspace/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (revision 164962) |
| +++ D:/Programme/eclipse/workspace/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java (working copy) |
| @@ -67,11 +67,6 @@ |
| return query; |
| } |
| |
| - public Query combine(Query[] queries) { |
| - return Query.mergeBooleanQueries(queries); |
| - } |
| - |
| - |
| /** Prints a user-readable version of this query. */ |
| public String toString(String field) { |
| StringBuffer buffer = new StringBuffer(); |
| Index: D:/Programme/eclipse/workspace/lucene/src/java/org/apache/lucene/search/RangeQuery.java |
| =================================================================== |
| --- D:/Programme/eclipse/workspace/lucene/src/java/org/apache/lucene/search/RangeQuery.java (revision 164962) |
| +++ D:/Programme/eclipse/workspace/lucene/src/java/org/apache/lucene/search/RangeQuery.java (working copy) |
| @@ -104,10 +104,6 @@ |
| return query; |
| } |
| |
| - public Query combine(Query[] queries) { |
| - return Query.mergeBooleanQueries(queries); |
| - } |
| - |
| /** Returns the field name for this query */ |
| public String getField() { |
| return (lowerTerm != null ? lowerTerm.field() : upperTerm.field()); |
| Index: D:/Programme/eclipse/workspace/lucene/src/java/org/apache/lucene/search/PrefixQuery.java |
| =================================================================== |
| --- D:/Programme/eclipse/workspace/lucene/src/java/org/apache/lucene/search/PrefixQuery.java (revision 164962) |
| +++ D:/Programme/eclipse/workspace/lucene/src/java/org/apache/lucene/search/PrefixQuery.java (working copy) |
| @@ -59,10 +59,6 @@ |
| return query; |
| } |
| |
| - public Query combine(Query[] queries) { |
| - return Query.mergeBooleanQueries(queries); |
| - } |
| - |
| /** Prints a user-readable version of this query. */ |
| public String toString(String field) { |
| StringBuffer buffer = new StringBuffer(); |
| Index: D:/Programme/eclipse/workspace/lucene/src/java/org/apache/lucene/search/BooleanQuery.java |
| =================================================================== |
| --- D:/Programme/eclipse/workspace/lucene/src/java/org/apache/lucene/search/BooleanQuery.java (revision 164962) |
| +++ D:/Programme/eclipse/workspace/lucene/src/java/org/apache/lucene/search/BooleanQuery.java (working copy) |
| @@ -20,7 +20,6 @@ |
| import java.util.Iterator; |
| import java.util.Set; |
| import java.util.Vector; |
| -import java.util.Arrays; |
| |
| import org.apache.lucene.index.IndexReader; |
| |
| @@ -151,7 +150,7 @@ |
| |
| /** Returns the set of clauses in this query. */ |
| public BooleanClause[] getClauses() { |
| - return (BooleanClause[])clauses.toArray(new BooleanClause[0]); |
| + return (BooleanClause[])clauses.toArray(new BooleanClause[clauses.size()]); |
| } |
| |
| private class BooleanWeight implements Weight { |
| @@ -371,11 +370,6 @@ |
| } |
| } |
| |
| - // inherit javadoc |
| - public Query combine(Query[] queries) { |
| - return Query.mergeBooleanQueries(queries); |
| - } |
| - |
| public Object clone() { |
| BooleanQuery clone = (BooleanQuery)super.clone(); |
| clone.clauses = (Vector)this.clauses.clone(); |
| @@ -422,7 +416,8 @@ |
| return false; |
| BooleanQuery other = (BooleanQuery)o; |
| return (this.getBoost() == other.getBoost()) |
| - && this.clauses.equals(other.clauses); |
| + && this.clauses.size() == other.clauses.size() |
| + && this.clauses.containsAll(other.clauses); |
| } |
| |
| /** Returns a hash code value for this object.*/ |
| Index: D:/Programme/eclipse/workspace/lucene/src/java/org/apache/lucene/search/Query.java |
| =================================================================== |
| --- D:/Programme/eclipse/workspace/lucene/src/java/org/apache/lucene/search/Query.java (revision 164962) |
| +++ D:/Programme/eclipse/workspace/lucene/src/java/org/apache/lucene/search/Query.java (working copy) |
| @@ -101,21 +101,52 @@ |
| public Query rewrite(IndexReader reader) throws IOException { |
| return this; |
| } |
| - |
| + |
| /** Expert: called when re-writing queries under MultiSearcher. |
| - * |
| - * <p>Only implemented by derived queries, with no |
| - * {@link #createWeight(Searcher)} implementatation. |
| - */ |
| + * |
| + * Create a single query suitable for use by all subsearchers (in 1-1 |
| + * correspondence with queries). This is an optimization of the OR of |
| + * all queries. We handle the common optimization cases of equal |
| + * queries and overlapping clauses of boolean OR queries (as generated |
| + * by MultiTermQuery.rewrite() and RangeQuery.rewrite()). |
| + * Be careful overriding this method as queries[0] determines which |
| + * method will be called and is not necessarily of the same type as |
| + * the other queries. |
| + */ |
| public Query combine(Query[] queries) { |
| - for (int i = 0; i < queries.length; i++) { |
| - if (!this.equals(queries[i])) { |
| - throw new IllegalArgumentException(); |
| - } |
| + HashSet uniques = new HashSet(); |
| + for (int i = 0; i < queries.length; i++) { |
| + Query query = queries[i]; |
| + BooleanClause[] clauses = null; |
| + // check if we can split the query into clauses |
| + boolean splittable = (query instanceof BooleanQuery); |
| + if(splittable){ |
| + BooleanQuery bq = (BooleanQuery) query; |
| + splittable = bq.isCoordDisabled(); |
| + clauses = bq.getClauses(); |
| + for (int j = 0; splittable && j < clauses.length; j++) { |
| + splittable = (clauses[j].getOccur() == BooleanClause.Occur.SHOULD); |
| } |
| - return this; |
| + } |
| + if(splittable){ |
| + for (int j = 0; j < clauses.length; j++) { |
| + uniques.add(clauses[j].getQuery()); |
| + } |
| + } else { |
| + uniques.add(query); |
| + } |
| + } |
| + // optimization: if we have just one query, just return it |
| + if(uniques.size() == 1){ |
| + return (Query)uniques.iterator().next(); |
| + } |
| + Iterator it = uniques.iterator(); |
| + BooleanQuery result = new BooleanQuery(true); |
| + while (it.hasNext()) |
| + result.add((Query) it.next(), BooleanClause.Occur.SHOULD); |
| + return result; |
| } |
| - |
| + |
| /** |
| * Expert: adds all terms occuring in this query to the terms set |
| */ |
| @@ -124,31 +155,6 @@ |
| throw new UnsupportedOperationException(); |
| } |
| |
| - |
| - /** Expert: merges the clauses of a set of BooleanQuery's into a single |
| - * BooleanQuery. |
| - * |
| - *<p>A utility for use by {@link #combine(Query[])} implementations. |
| - */ |
| - public static Query mergeBooleanQueries(Query[] queries) { |
| - HashSet allClauses = new HashSet(); |
| - for (int i = 0; i < queries.length; i++) { |
| - BooleanClause[] clauses = ((BooleanQuery)queries[i]).getClauses(); |
| - for (int j = 0; j < clauses.length; j++) { |
| - allClauses.add(clauses[j]); |
| - } |
| - } |
| - |
| - boolean coordDisabled = |
| - queries.length==0? false : ((BooleanQuery)queries[0]).isCoordDisabled(); |
| - BooleanQuery result = new BooleanQuery(coordDisabled); |
| - Iterator i = allClauses.iterator(); |
| - while (i.hasNext()) { |
| - result.add((BooleanClause)i.next()); |
| - } |
| - return result; |
| - } |
| - |
| /** Expert: Returns the Similarity implementation to be used for this query. |
| * Subclasses may override this method to specify their own Similarity |
| * implementation, perhaps one that delegates through that of the Searcher. |
| |