blob: d1a7e29a2e5aaf2792f323275b579670b15a0aa2 [file] [log] [blame]
Index: src/java/org/apache/lucene/search/Explanation.java
===================================================================
--- src/java/org/apache/lucene/search/Explanation.java (revision 807361)
+++ src/java/org/apache/lucene/search/Explanation.java (working copy)
@@ -17,6 +17,7 @@
* limitations under the License.
*/
+import java.io.Serializable;
import java.util.ArrayList;
/** Expert: Describes the score computation for document and query. */
@@ -124,4 +125,25 @@
return buffer.toString();
}
+
+ /**
+ * Small Util class used to pass both an idf factor as well as an
+ * explanation for that factor.
+ *
+ * This class will likely be held on a {@link Weight}, so be aware
+ * before storing any large or un-serializable fields.
+ *
+ */
+ public static abstract class IDFExplanation implements Serializable {
+ /**
+ * @return the idf factor
+ */
+ public abstract float getIdf();
+ /**
+ * This should be calculated lazily if possible.
+ *
+ * @return the explanation for the idf factor.
+ */
+ public abstract String explain();
+ }
}
Index: src/java/org/apache/lucene/search/PhraseQuery.java
===================================================================
--- src/java/org/apache/lucene/search/PhraseQuery.java (revision 807361)
+++ src/java/org/apache/lucene/search/PhraseQuery.java (working copy)
@@ -24,6 +24,7 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Explanation.IDFExplanation;
import org.apache.lucene.util.ToStringUtils;
/** A Query that matches documents containing a particular sequence of terms.
@@ -112,12 +113,14 @@
private float idf;
private float queryNorm;
private float queryWeight;
+ private IDFExplanation idfExp;
public PhraseWeight(Searcher searcher)
throws IOException {
this.similarity = getSimilarity(searcher);
- idf = similarity.idf(terms, searcher);
+ idfExp = similarity.idfExplain(terms, searcher);
+ idf = idfExp.getIdf();
}
public String toString() { return "weight(" + PhraseQuery.this + ")"; }
@@ -167,24 +170,20 @@
StringBuffer docFreqs = new StringBuffer();
StringBuffer query = new StringBuffer();
query.append('\"');
+ docFreqs.append(idfExp.explain());
for (int i = 0; i < terms.size(); i++) {
if (i != 0) {
- docFreqs.append(" ");
query.append(" ");
}
Term term = (Term)terms.get(i);
- docFreqs.append(term.text());
- docFreqs.append("=");
- docFreqs.append(reader.docFreq(term));
-
query.append(term.text());
}
query.append('\"');
Explanation idfExpl =
- new Explanation(idf, "idf(" + field + ": " + docFreqs + ")");
+ new Explanation(idf, "idf(" + field + ":" + docFreqs + ")");
// explain query weight
Explanation queryExpl = new Explanation();
Index: src/java/org/apache/lucene/search/Similarity.java
===================================================================
--- src/java/org/apache/lucene/search/Similarity.java (revision 807361)
+++ src/java/org/apache/lucene/search/Similarity.java (working copy)
@@ -17,13 +17,16 @@
* limitations under the License.
*/
+
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Explanation.IDFExplanation;
import org.apache.lucene.util.SmallFloat;
import java.io.IOException;
import java.io.Serializable;
import java.util.Collection;
+import java.util.IdentityHashMap;
import java.util.Iterator;
/** Expert: Scoring API.
@@ -287,8 +290,6 @@
* @see Searcher#setSimilarity(Similarity)
*/
public abstract class Similarity implements Serializable {
- /** The Similarity implementation used by default. */
- private static Similarity defaultImpl = new DefaultSimilarity();
public static final int NO_DOC_ID_PROVIDED = -1;
@@ -478,10 +479,62 @@
* @param term the term in question
* @param searcher the document collection being searched
* @return a score factor for the term
+ * @deprecated see {@link #idfExplain(Term, Searcher)}
*/
public float idf(Term term, Searcher searcher) throws IOException {
return idf(searcher.docFreq(term), searcher.maxDoc());
}
+
+ /**
+ * Computes a score factor for a simple term and returns an explanation
+ * for that score factor.
+ *
+ * <p>
+ * The default implementation uses:
+ *
+ * <pre>
+ * idf(searcher.docFreq(term), searcher.maxDoc());
+ * </pre>
+ *
+ * Note that {@link Searcher#maxDoc()} is used instead of
+ * {@link org.apache.lucene.index.IndexReader#numDocs()} because it is
+ * proportional to {@link Searcher#docFreq(Term)} , i.e., when one is
+ * inaccurate, so is the other, and in the same direction.
+ *
+ * @param term the term in question
+ * @param searcher the document collection being searched
+ * @return an IDFExplain object that includes both an idf score factor
+ and an explanation for the term.
+ * @throws IOException
+ */
+ public IDFExplanation idfExplain(final Term term, final Searcher searcher) throws IOException {
+ if(supportedMethods.overridesTermIDF) {
+ final float idf = idf(term, searcher);
+ return new IDFExplanation() {
+ //@Override
+ public float getIdf() {
+ return idf;
+ }
+ //@Override
+ public String explain() {
+ return "Inexplicable";
+ }
+ };
+ }
+ final int df = searcher.docFreq(term);
+ final int max = searcher.maxDoc();
+ final float idf = idf(df, max);
+ return new IDFExplanation() {
+ //@Override
+ public String explain() {
+ return "idf(docFreq=" + df +
+ ", maxDocs=" + max + ")";
+ }
+ //@Override
+ public float getIdf() {
+ return idf;
+ }};
+ }
/** Computes a score factor for a phrase.
*
@@ -490,7 +543,8 @@
*
* @param terms the terms in the phrase
* @param searcher the document collection being searched
- * @return a score factor for the phrase
+ * @return
+ * @deprecated see {@link #idfExplain(Collection, Searcher)}
*/
public float idf(Collection terms, Searcher searcher) throws IOException {
float idf = 0.0f;
@@ -500,6 +554,60 @@
}
return idf;
}
+
+ /**
+ * Computes a score factor for a phrase.
+ *
+ * <p>
+ * The default implementation sums the idf factor for
+ * each term in the phrase.
+ *
+ * @param terms the terms in the phrase
+ * @param searcher the document collection being searched
+ * @return an IDFExplain object that includes both an idf
+ * score factor for the phrase and an explanation
+ * for each term.
+ * @throws IOException
+ */
+ public IDFExplanation idfExplain(Collection terms, Searcher searcher) throws IOException {
+ if(supportedMethods.overridesCollectionIDF) {
+ final float idf = idf(terms, searcher);
+ return new IDFExplanation() {
+ //@Override
+ public float getIdf() {
+ return idf;
+ }
+ //@Override
+ public String explain() {
+ return "Inexplicable";
+ }
+ };
+ }
+ final int max = searcher.maxDoc();
+ float idf = 0.0f;
+ final StringBuffer exp = new StringBuffer();
+ Iterator i = terms.iterator();
+ while (i.hasNext()) {
+ Term term = (Term)i.next();
+ final int df = searcher.docFreq(term);
+ idf += idf(df, max);
+ exp.append(" ");
+ exp.append(term.text());
+ exp.append("=");
+ exp.append(df);
+ }
+ final float fIdf = idf;
+ return new IDFExplanation() {
+ //@Override
+ public float getIdf() {
+ return fIdf;
+ }
+ //@Override
+ public String explain() {
+ return exp.toString();
+ }
+ };
+ }
/** Computes a score factor based on a term's document frequency (the number
* of documents which contain the term). This value is multiplied by the
@@ -577,5 +685,52 @@
//TODO: When removing the deprecated scorePayload above, set this to return 1
return scorePayload(fieldName, payload, offset, length);
}
+
+ /** @deprecated Remove this when old API is removed! */
+ private final MethodSupport supportedMethods = getSupportedMethods(this.getClass());
+
+ /** @deprecated Remove this when old API is removed! */
+ private static final class MethodSupport implements Serializable {
+ final boolean overridesCollectionIDF, overridesTermIDF;
+
+ MethodSupport(Class clazz) {
+ overridesCollectionIDF = isMethodOverridden(clazz, "idf", C_IDF_METHOD_PARAMS);
+ overridesTermIDF = isMethodOverridden(clazz, "idf", T_IDF_METHOD_PARAMS);
+ }
+
+ private static boolean isMethodOverridden(Class clazz, String name, Class[] params) {
+ try {
+ return clazz.getMethod(name, params).getDeclaringClass() != Similarity.class;
+ } catch (NoSuchMethodException e) {
+ // should not happen
+ throw new RuntimeException(e);
+ }
+ }
+ /** @deprecated Remove this when old API is removed! */
+ private static final Class[] T_IDF_METHOD_PARAMS = new Class[]{Term.class, Searcher.class};
+
+ /** @deprecated Remove this when old API is removed! */
+ private static final Class[] C_IDF_METHOD_PARAMS = new Class[]{Collection.class, Searcher.class};
+ }
+
+ /** @deprecated Remove this when old API is removed! */
+ private static final IdentityHashMap/*<Class<? extends Similarity>,MethodSupport>*/ knownMethodSupport = new IdentityHashMap();
+
+ /** @deprecated Remove this when old API is removed! */
+ private static MethodSupport getSupportedMethods(Class clazz) {
+ MethodSupport supportedMethods;
+ synchronized(knownMethodSupport) {
+ supportedMethods = (MethodSupport) knownMethodSupport.get(clazz);
+ if (supportedMethods == null) {
+ knownMethodSupport.put(clazz, supportedMethods = new MethodSupport(clazz));
+ }
+ }
+ return supportedMethods;
+ }
+
+ /** The Similarity implementation used by default.
+ * TODO: move back to top when old API is removed!
+ **/
+ private static Similarity defaultImpl = new DefaultSimilarity();
}
Index: src/java/org/apache/lucene/search/TermQuery.java
===================================================================
--- src/java/org/apache/lucene/search/TermQuery.java (revision 807361)
+++ src/java/org/apache/lucene/search/TermQuery.java (working copy)
@@ -23,6 +23,7 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Explanation.IDFExplanation;
import org.apache.lucene.util.ToStringUtils;
/** A Query that matches documents containing a term.
@@ -37,11 +38,13 @@
private float idf;
private float queryNorm;
private float queryWeight;
+ private IDFExplanation idfExp;
public TermWeight(Searcher searcher)
throws IOException {
this.similarity = getSimilarity(searcher);
- idf = similarity.idf(term, searcher); // compute idf
+ idfExp = similarity.idfExplain(term, searcher);
+ idf = idfExp.getIdf();
}
public String toString() { return "weight(" + TermQuery.this + ")"; }
@@ -75,8 +78,7 @@
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
- Explanation expl = new Explanation(idf, "idf(docFreq=" + reader.docFreq(term) +
- ", maxDocs=" + reader.maxDoc() + ")");
+ Explanation expl = new Explanation(idf, idfExp.explain());
// explain query weight
Explanation queryExpl = new Explanation();
Index: src/java/org/apache/lucene/search/spans/SpanWeight.java
===================================================================
--- src/java/org/apache/lucene/search/spans/SpanWeight.java (revision 807361)
+++ src/java/org/apache/lucene/search/spans/SpanWeight.java (working copy)
@@ -18,12 +18,11 @@
*/
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
+import org.apache.lucene.search.Explanation.IDFExplanation;
import java.io.IOException;
import java.util.HashSet;
-import java.util.Iterator;
import java.util.Set;
/**
@@ -38,6 +37,7 @@
protected Set terms;
protected SpanQuery query;
+ private IDFExplanation idfExp;
public SpanWeight(SpanQuery query, Searcher searcher)
throws IOException {
@@ -45,8 +45,8 @@
this.query = query;
terms=new HashSet();
query.extractTerms(terms);
-
- idf = this.query.getSimilarity(searcher).idf(terms, searcher);
+ idfExp = similarity.idfExplain(terms, searcher);
+ idf = idfExp.getIdf();
}
public Query getQuery() { return query; }
@@ -75,21 +75,8 @@
result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
String field = ((SpanQuery)getQuery()).getField();
- StringBuffer docFreqs = new StringBuffer();
- Iterator i = terms.iterator();
- while (i.hasNext()) {
- Term term = (Term)i.next();
- docFreqs.append(term.text());
- docFreqs.append("=");
- docFreqs.append(reader.docFreq(term));
-
- if (i.hasNext()) {
- docFreqs.append(" ");
- }
- }
-
Explanation idfExpl =
- new Explanation(idf, "idf(" + field + ": " + docFreqs + ")");
+ new Explanation(idf, "idf(" + field + ": " + idfExp.explain() + ")");
// explain query weight
Explanation queryExpl = new Explanation();
Index: src/test/org/apache/lucene/search/TestSimpleExplanations.java
===================================================================
--- src/test/org/apache/lucene/search/TestSimpleExplanations.java (revision 807361)
+++ src/test/org/apache/lucene/search/TestSimpleExplanations.java (working copy)
@@ -17,6 +17,19 @@
* limitations under the License.
*/
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.MockRAMDirectory;
+
+
/**
* TestExplanations subclass focusing on basic query types
*/
@@ -291,4 +304,67 @@
}
+ public void testTermQueryMultiSearcherExplain() throws Exception {
+ // creating two directories for indices
+ Directory indexStoreA = new MockRAMDirectory();
+ Directory indexStoreB = new MockRAMDirectory();
+
+ Document lDoc = new Document();
+ lDoc.add(new Field("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED));
+ Document lDoc2 = new Document();
+ lDoc2.add(new Field("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED));
+ Document lDoc3 = new Document();
+ lDoc3.add(new Field("handle", "1 2", Field.Store.YES, Field.Index.ANALYZED));
+
+ IndexWriter writerA = new IndexWriter(indexStoreA, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
+ IndexWriter writerB = new IndexWriter(indexStoreB, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
+
+ writerA.addDocument(lDoc);
+ writerA.addDocument(lDoc2);
+ writerA.optimize();
+ writerA.close();
+
+ writerB.addDocument(lDoc3);
+ writerB.close();
+
+ QueryParser parser = new QueryParser("fulltext", new StandardAnalyzer());
+ Query query = parser.parse("handle:1");
+
+ Searcher[] searchers = new Searcher[2];
+ searchers[0] = new IndexSearcher(indexStoreB);
+ searchers[1] = new IndexSearcher(indexStoreA);
+ Searcher mSearcher = new MultiSearcher(searchers);
+ ScoreDoc[] hits = mSearcher.search(query, null, 1000).scoreDocs;
+
+ assertEquals(3, hits.length);
+
+ Explanation explain = mSearcher.explain(query, hits[0].doc);
+ String exp = explain.toString(0);
+ assertTrue(exp, exp.indexOf("maxDocs=3") > -1);
+ assertTrue(exp, exp.indexOf("docFreq=3") > -1);
+
+ query = parser.parse("handle:\"1 2\"");
+ hits = mSearcher.search(query, null, 1000).scoreDocs;
+
+ assertEquals(3, hits.length);
+
+ explain = mSearcher.explain(query, hits[0].doc);
+ exp = explain.toString(0);
+ assertTrue(exp, exp.indexOf("1=3") > -1);
+ assertTrue(exp, exp.indexOf("2=3") > -1);
+
+ query = new SpanNearQuery(new SpanQuery[] {
+ new SpanTermQuery(new Term("handle", "1")),
+ new SpanTermQuery(new Term("handle", "2")) }, 0, true);
+ hits = mSearcher.search(query, null, 1000).scoreDocs;
+
+ assertEquals(3, hits.length);
+
+ explain = mSearcher.explain(query, hits[0].doc);
+ exp = explain.toString(0);
+ assertTrue(exp, exp.indexOf("1=3") > -1);
+ assertTrue(exp, exp.indexOf("2=3") > -1);
+ mSearcher.close();
+ }
+
}