| Index: src/java/org/apache/lucene/search/Explanation.java |
| =================================================================== |
| --- src/java/org/apache/lucene/search/Explanation.java (revision 807361) |
| +++ src/java/org/apache/lucene/search/Explanation.java (working copy) |
| @@ -17,6 +17,7 @@ |
| * limitations under the License. |
| */ |
| |
| +import java.io.Serializable; |
| import java.util.ArrayList; |
| |
| /** Expert: Describes the score computation for document and query. */ |
| @@ -124,4 +125,25 @@ |
| |
| return buffer.toString(); |
| } |
| + |
| + /** |
| + * Small Util class used to pass both an idf factor as well as an |
| + * explanation for that factor. |
| + * |
| + * This class will likely be held on a {@link Weight}, so be aware |
| + * before storing any large or un-serializable fields. |
| + * |
| + */ |
| + public static abstract class IDFExplanation implements Serializable { |
| + /** |
| + * @return the idf factor |
| + */ |
| + public abstract float getIdf(); |
| + /** |
| + * This should be calculated lazily if possible. |
| + * |
| + * @return the explanation for the idf factor. |
| + */ |
| + public abstract String explain(); |
| + } |
| } |
| Index: src/java/org/apache/lucene/search/PhraseQuery.java |
| =================================================================== |
| --- src/java/org/apache/lucene/search/PhraseQuery.java (revision 807361) |
| +++ src/java/org/apache/lucene/search/PhraseQuery.java (working copy) |
| @@ -24,6 +24,7 @@ |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.index.TermPositions; |
| import org.apache.lucene.index.IndexReader; |
| +import org.apache.lucene.search.Explanation.IDFExplanation; |
| import org.apache.lucene.util.ToStringUtils; |
| |
| /** A Query that matches documents containing a particular sequence of terms. |
| @@ -112,12 +113,14 @@ |
| private float idf; |
| private float queryNorm; |
| private float queryWeight; |
| + private IDFExplanation idfExp; |
| |
| public PhraseWeight(Searcher searcher) |
| throws IOException { |
| this.similarity = getSimilarity(searcher); |
| |
| - idf = similarity.idf(terms, searcher); |
| + idfExp = similarity.idfExplain(terms, searcher); |
| + idf = idfExp.getIdf(); |
| } |
| |
| public String toString() { return "weight(" + PhraseQuery.this + ")"; } |
| @@ -167,24 +170,20 @@ |
| StringBuffer docFreqs = new StringBuffer(); |
| StringBuffer query = new StringBuffer(); |
| query.append('\"'); |
| + docFreqs.append(idfExp.explain()); |
| for (int i = 0; i < terms.size(); i++) { |
| if (i != 0) { |
| - docFreqs.append(" "); |
| query.append(" "); |
| } |
| |
| Term term = (Term)terms.get(i); |
| |
| - docFreqs.append(term.text()); |
| - docFreqs.append("="); |
| - docFreqs.append(reader.docFreq(term)); |
| - |
| query.append(term.text()); |
| } |
| query.append('\"'); |
| |
| Explanation idfExpl = |
| - new Explanation(idf, "idf(" + field + ": " + docFreqs + ")"); |
| + new Explanation(idf, "idf(" + field + ":" + docFreqs + ")"); |
| |
| // explain query weight |
| Explanation queryExpl = new Explanation(); |
| Index: src/java/org/apache/lucene/search/Similarity.java |
| =================================================================== |
| --- src/java/org/apache/lucene/search/Similarity.java (revision 807361) |
| +++ src/java/org/apache/lucene/search/Similarity.java (working copy) |
| @@ -17,13 +17,16 @@ |
| * limitations under the License. |
| */ |
| |
| + |
| import org.apache.lucene.index.FieldInvertState; |
| import org.apache.lucene.index.Term; |
| +import org.apache.lucene.search.Explanation.IDFExplanation; |
| import org.apache.lucene.util.SmallFloat; |
| |
| import java.io.IOException; |
| import java.io.Serializable; |
| import java.util.Collection; |
| +import java.util.IdentityHashMap; |
| import java.util.Iterator; |
| |
| /** Expert: Scoring API. |
| @@ -287,8 +290,6 @@ |
| * @see Searcher#setSimilarity(Similarity) |
| */ |
| public abstract class Similarity implements Serializable { |
| - /** The Similarity implementation used by default. */ |
| - private static Similarity defaultImpl = new DefaultSimilarity(); |
| |
| public static final int NO_DOC_ID_PROVIDED = -1; |
| |
| @@ -478,10 +479,62 @@ |
| * @param term the term in question |
| * @param searcher the document collection being searched |
| * @return a score factor for the term |
| + * @deprecated see {@link #idfExplain(Term, Searcher)} |
| */ |
| public float idf(Term term, Searcher searcher) throws IOException { |
| return idf(searcher.docFreq(term), searcher.maxDoc()); |
| } |
| + |
| + /** |
| + * Computes a score factor for a simple term and returns an explanation |
| + * for that score factor. |
| + * |
| + * <p> |
| + * The default implementation uses: |
| + * |
| + * <pre> |
| + * idf(searcher.docFreq(term), searcher.maxDoc()); |
| + * </pre> |
| + * |
| + * Note that {@link Searcher#maxDoc()} is used instead of |
| + * {@link org.apache.lucene.index.IndexReader#numDocs()} because it is |
| + * proportional to {@link Searcher#docFreq(Term)} , i.e., when one is |
| + * inaccurate, so is the other, and in the same direction. |
| + * |
| + * @param term the term in question |
| + * @param searcher the document collection being searched |
| + * @return an IDFExplain object that includes both an idf score factor |
| + and an explanation for the term. |
| + * @throws IOException |
| + */ |
| + public IDFExplanation idfExplain(final Term term, final Searcher searcher) throws IOException { |
| + if(supportedMethods.overridesTermIDF) { |
| + final float idf = idf(term, searcher); |
| + return new IDFExplanation() { |
| + //@Override |
| + public float getIdf() { |
| + return idf; |
| + } |
| + //@Override |
| + public String explain() { |
| + return "Inexplicable"; |
| + } |
| + }; |
| + } |
| + final int df = searcher.docFreq(term); |
| + final int max = searcher.maxDoc(); |
| + final float idf = idf(df, max); |
| + return new IDFExplanation() { |
| + //@Override |
| + public String explain() { |
| + return "idf(docFreq=" + df + |
| + ", maxDocs=" + max + ")"; |
| + } |
| + //@Override |
| + public float getIdf() { |
| + return idf; |
| + }}; |
| + } |
| |
| /** Computes a score factor for a phrase. |
| * |
| @@ -490,7 +543,8 @@ |
| * |
| * @param terms the terms in the phrase |
| * @param searcher the document collection being searched |
| - * @return a score factor for the phrase |
| + * @return |
| + * @deprecated see {@link #idfExplain(Collection, Searcher)} |
| */ |
| public float idf(Collection terms, Searcher searcher) throws IOException { |
| float idf = 0.0f; |
| @@ -500,6 +554,60 @@ |
| } |
| return idf; |
| } |
| + |
| + /** |
| + * Computes a score factor for a phrase. |
| + * |
| + * <p> |
| + * The default implementation sums the idf factor for |
| + * each term in the phrase. |
| + * |
| + * @param terms the terms in the phrase |
| + * @param searcher the document collection being searched |
| + * @return an IDFExplain object that includes both an idf |
| + * score factor for the phrase and an explanation |
| + * for each term. |
| + * @throws IOException |
| + */ |
| + public IDFExplanation idfExplain(Collection terms, Searcher searcher) throws IOException { |
| + if(supportedMethods.overridesCollectionIDF) { |
| + final float idf = idf(terms, searcher); |
| + return new IDFExplanation() { |
| + //@Override |
| + public float getIdf() { |
| + return idf; |
| + } |
| + //@Override |
| + public String explain() { |
| + return "Inexplicable"; |
| + } |
| + }; |
| + } |
| + final int max = searcher.maxDoc(); |
| + float idf = 0.0f; |
| + final StringBuffer exp = new StringBuffer(); |
| + Iterator i = terms.iterator(); |
| + while (i.hasNext()) { |
| + Term term = (Term)i.next(); |
| + final int df = searcher.docFreq(term); |
| + idf += idf(df, max); |
| + exp.append(" "); |
| + exp.append(term.text()); |
| + exp.append("="); |
| + exp.append(df); |
| + } |
| + final float fIdf = idf; |
| + return new IDFExplanation() { |
| + //@Override |
| + public float getIdf() { |
| + return fIdf; |
| + } |
| + //@Override |
| + public String explain() { |
| + return exp.toString(); |
| + } |
| + }; |
| + } |
| |
| /** Computes a score factor based on a term's document frequency (the number |
| * of documents which contain the term). This value is multiplied by the |
| @@ -577,5 +685,52 @@ |
| //TODO: When removing the deprecated scorePayload above, set this to return 1 |
| return scorePayload(fieldName, payload, offset, length); |
| } |
| + |
| + /** @deprecated Remove this when old API is removed! */ |
| + private final MethodSupport supportedMethods = getSupportedMethods(this.getClass()); |
| + |
| + /** @deprecated Remove this when old API is removed! */ |
| + private static final class MethodSupport implements Serializable { |
| + final boolean overridesCollectionIDF, overridesTermIDF; |
| + |
| + MethodSupport(Class clazz) { |
| + overridesCollectionIDF = isMethodOverridden(clazz, "idf", C_IDF_METHOD_PARAMS); |
| + overridesTermIDF = isMethodOverridden(clazz, "idf", T_IDF_METHOD_PARAMS); |
| + } |
| + |
| + private static boolean isMethodOverridden(Class clazz, String name, Class[] params) { |
| + try { |
| + return clazz.getMethod(name, params).getDeclaringClass() != Similarity.class; |
| + } catch (NoSuchMethodException e) { |
| + // should not happen |
| + throw new RuntimeException(e); |
| + } |
| + } |
| + /** @deprecated Remove this when old API is removed! */ |
| + private static final Class[] T_IDF_METHOD_PARAMS = new Class[]{Term.class, Searcher.class}; |
| + |
| + /** @deprecated Remove this when old API is removed! */ |
| + private static final Class[] C_IDF_METHOD_PARAMS = new Class[]{Collection.class, Searcher.class}; |
| + } |
| + |
| + /** @deprecated Remove this when old API is removed! */ |
| + private static final IdentityHashMap/*<Class<? extends Similarity>,MethodSupport>*/ knownMethodSupport = new IdentityHashMap(); |
| + |
| + /** @deprecated Remove this when old API is removed! */ |
| + private static MethodSupport getSupportedMethods(Class clazz) { |
| + MethodSupport supportedMethods; |
| + synchronized(knownMethodSupport) { |
| + supportedMethods = (MethodSupport) knownMethodSupport.get(clazz); |
| + if (supportedMethods == null) { |
| + knownMethodSupport.put(clazz, supportedMethods = new MethodSupport(clazz)); |
| + } |
| + } |
| + return supportedMethods; |
| + } |
| + |
| + /** The Similarity implementation used by default. |
| + * TODO: move back to top when old API is removed! |
| + **/ |
| + private static Similarity defaultImpl = new DefaultSimilarity(); |
| |
| } |
| Index: src/java/org/apache/lucene/search/TermQuery.java |
| =================================================================== |
| --- src/java/org/apache/lucene/search/TermQuery.java (revision 807361) |
| +++ src/java/org/apache/lucene/search/TermQuery.java (working copy) |
| @@ -23,6 +23,7 @@ |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.index.TermDocs; |
| import org.apache.lucene.index.IndexReader; |
| +import org.apache.lucene.search.Explanation.IDFExplanation; |
| import org.apache.lucene.util.ToStringUtils; |
| |
| /** A Query that matches documents containing a term. |
| @@ -37,11 +38,13 @@ |
| private float idf; |
| private float queryNorm; |
| private float queryWeight; |
| + private IDFExplanation idfExp; |
| |
| public TermWeight(Searcher searcher) |
| throws IOException { |
| this.similarity = getSimilarity(searcher); |
| - idf = similarity.idf(term, searcher); // compute idf |
| + idfExp = similarity.idfExplain(term, searcher); |
| + idf = idfExp.getIdf(); |
| } |
| |
| public String toString() { return "weight(" + TermQuery.this + ")"; } |
| @@ -75,8 +78,7 @@ |
| ComplexExplanation result = new ComplexExplanation(); |
| result.setDescription("weight("+getQuery()+" in "+doc+"), product of:"); |
| |
| - Explanation expl = new Explanation(idf, "idf(docFreq=" + reader.docFreq(term) + |
| - ", maxDocs=" + reader.maxDoc() + ")"); |
| + Explanation expl = new Explanation(idf, idfExp.explain()); |
| |
| // explain query weight |
| Explanation queryExpl = new Explanation(); |
| Index: src/java/org/apache/lucene/search/spans/SpanWeight.java |
| =================================================================== |
| --- src/java/org/apache/lucene/search/spans/SpanWeight.java (revision 807361) |
| +++ src/java/org/apache/lucene/search/spans/SpanWeight.java (working copy) |
| @@ -18,12 +18,11 @@ |
| */ |
| |
| import org.apache.lucene.index.IndexReader; |
| -import org.apache.lucene.index.Term; |
| import org.apache.lucene.search.*; |
| +import org.apache.lucene.search.Explanation.IDFExplanation; |
| |
| import java.io.IOException; |
| import java.util.HashSet; |
| -import java.util.Iterator; |
| import java.util.Set; |
| |
| /** |
| @@ -38,6 +37,7 @@ |
| |
| protected Set terms; |
| protected SpanQuery query; |
| + private IDFExplanation idfExp; |
| |
| public SpanWeight(SpanQuery query, Searcher searcher) |
| throws IOException { |
| @@ -45,8 +45,8 @@ |
| this.query = query; |
| terms=new HashSet(); |
| query.extractTerms(terms); |
| - |
| - idf = this.query.getSimilarity(searcher).idf(terms, searcher); |
| + idfExp = similarity.idfExplain(terms, searcher); |
| + idf = idfExp.getIdf(); |
| } |
| |
| public Query getQuery() { return query; } |
| @@ -75,21 +75,8 @@ |
| result.setDescription("weight("+getQuery()+" in "+doc+"), product of:"); |
| String field = ((SpanQuery)getQuery()).getField(); |
| |
| - StringBuffer docFreqs = new StringBuffer(); |
| - Iterator i = terms.iterator(); |
| - while (i.hasNext()) { |
| - Term term = (Term)i.next(); |
| - docFreqs.append(term.text()); |
| - docFreqs.append("="); |
| - docFreqs.append(reader.docFreq(term)); |
| - |
| - if (i.hasNext()) { |
| - docFreqs.append(" "); |
| - } |
| - } |
| - |
| Explanation idfExpl = |
| - new Explanation(idf, "idf(" + field + ": " + docFreqs + ")"); |
| + new Explanation(idf, "idf(" + field + ": " + idfExp.explain() + ")"); |
| |
| // explain query weight |
| Explanation queryExpl = new Explanation(); |
| Index: src/test/org/apache/lucene/search/TestSimpleExplanations.java |
| =================================================================== |
| --- src/test/org/apache/lucene/search/TestSimpleExplanations.java (revision 807361) |
| +++ src/test/org/apache/lucene/search/TestSimpleExplanations.java (working copy) |
| @@ -17,6 +17,19 @@ |
| * limitations under the License. |
| */ |
| |
| +import org.apache.lucene.analysis.standard.StandardAnalyzer; |
| +import org.apache.lucene.document.Document; |
| +import org.apache.lucene.document.Field; |
| +import org.apache.lucene.index.IndexWriter; |
| +import org.apache.lucene.index.Term; |
| +import org.apache.lucene.queryParser.QueryParser; |
| +import org.apache.lucene.search.spans.SpanNearQuery; |
| +import org.apache.lucene.search.spans.SpanQuery; |
| +import org.apache.lucene.search.spans.SpanTermQuery; |
| +import org.apache.lucene.store.Directory; |
| +import org.apache.lucene.store.MockRAMDirectory; |
| + |
| + |
| /** |
| * TestExplanations subclass focusing on basic query types |
| */ |
| @@ -291,4 +304,67 @@ |
| } |
| |
| |
| + public void testTermQueryMultiSearcherExplain() throws Exception { |
| + // creating two directories for indices |
| + Directory indexStoreA = new MockRAMDirectory(); |
| + Directory indexStoreB = new MockRAMDirectory(); |
| + |
| + Document lDoc = new Document(); |
| + lDoc.add(new Field("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED)); |
| + Document lDoc2 = new Document(); |
| + lDoc2.add(new Field("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED)); |
| + Document lDoc3 = new Document(); |
| + lDoc3.add(new Field("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED)); |
| + |
| + IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| + IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); |
| + |
| + writerA.addDocument(lDoc); |
| + writerA.addDocument(lDoc2); |
| + writerA.optimize(); |
| + writerA.close(); |
| + |
| + writerB.addDocument(lDoc3); |
| + writerB.close(); |
| + |
| + QueryParser parser = new QueryParser("fulltext", new StandardAnalyzer()); |
| + Query query = parser.parse("handle:1"); |
| + |
| + Searcher[] searchers = new Searcher[2]; |
| + searchers[0] = new IndexSearcher(indexStoreB); |
| + searchers[1] = new IndexSearcher(indexStoreA); |
| + Searcher mSearcher = new MultiSearcher(searchers); |
| + ScoreDoc[] hits = mSearcher.search(query, null, 1000).scoreDocs; |
| + |
| + assertEquals(3, hits.length); |
| + |
| + Explanation explain = mSearcher.explain(query, hits[0].doc); |
| + String exp = explain.toString(0); |
| + assertTrue(exp, exp.indexOf("maxDocs=3") > -1); |
| + assertTrue(exp, exp.indexOf("docFreq=3") > -1); |
| + |
| + query = parser.parse("handle:\"1 2\""); |
| + hits = mSearcher.search(query, null, 1000).scoreDocs; |
| + |
| + assertEquals(3, hits.length); |
| + |
| + explain = mSearcher.explain(query, hits[0].doc); |
| + exp = explain.toString(0); |
| + assertTrue(exp, exp.indexOf("1=3") > -1); |
| + assertTrue(exp, exp.indexOf("2=3") > -1); |
| + |
| + query = new SpanNearQuery(new SpanQuery[] { |
| + new SpanTermQuery(new Term("handle", "1")), |
| + new SpanTermQuery(new Term("handle", "2")) }, 0, true); |
| + hits = mSearcher.search(query, null, 1000).scoreDocs; |
| + |
| + assertEquals(3, hits.length); |
| + |
| + explain = mSearcher.explain(query, hits[0].doc); |
| + exp = explain.toString(0); |
| + assertTrue(exp, exp.indexOf("1=3") > -1); |
| + assertTrue(exp, exp.indexOf("2=3") > -1); |
| + mSearcher.close(); |
| + } |
| + |
| } |
| |