| Index: contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java |
| =================================================================== |
| --- contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java (revision 0) |
| +++ contrib/queries/src/test/org/apache/lucene/search/similar/TestMoreLikeThis.java (revision 0) |
| @@ -0,0 +1,110 @@ |
| +package org.apache.lucene.search.similar; |
| + |
| +import java.io.IOException; |
| +import java.io.StringReader; |
| +import java.util.HashMap; |
| +import java.util.List; |
| +import java.util.Map; |
| + |
| +import org.apache.lucene.analysis.standard.StandardAnalyzer; |
| +import org.apache.lucene.document.Document; |
| +import org.apache.lucene.document.Field; |
| +import org.apache.lucene.index.IndexReader; |
| +import org.apache.lucene.index.IndexWriter; |
| +import org.apache.lucene.index.IndexWriter.MaxFieldLength; |
| +import org.apache.lucene.search.BooleanClause; |
| +import org.apache.lucene.search.BooleanQuery; |
| +import org.apache.lucene.search.IndexSearcher; |
| +import org.apache.lucene.search.TermQuery; |
| +import org.apache.lucene.store.RAMDirectory; |
| +import org.apache.lucene.util.LuceneTestCase; |
| + |
| +public class TestMoreLikeThis extends LuceneTestCase { |
| + private static final String KEY_FIELD = "url"; |
| + private RAMDirectory directory; |
| + private IndexReader reader; |
| + private IndexSearcher searcher; |
| + |
| + protected void setUp() throws Exception { |
| + directory = new RAMDirectory(); |
| + IndexWriter writer = new IndexWriter(directory, new StandardAnalyzer(), |
| + true, MaxFieldLength.UNLIMITED); |
| + |
| + // Add series of docs with specific information for MoreLikeThis |
| + addDoc(writer, "lucene"); |
| + addDoc(writer, "lucene release"); |
| + |
| + writer.close(); |
| + reader = IndexReader.open(directory, true); |
| + searcher = new IndexSearcher(reader); |
| + |
| + } |
| + |
| + protected void tearDown() throws Exception { |
| + reader.close(); |
| + searcher.close(); |
| + directory.close(); |
| + } |
| + |
| + private void addDoc(IndexWriter writer, String text) throws IOException { |
| + Document doc = new Document(); |
| + doc.add(new Field("text", text, Field.Store.YES, Field.Index.ANALYZED)); |
| + writer.addDocument(doc); |
| + } |
| + |
| + public void testBoostFactor() throws Throwable { |
| + Map originalValues = getOriginalValues(); |
| + |
| + MoreLikeThis mlt = new MoreLikeThis( |
| + reader); |
| + mlt.setMinDocFreq(1); |
| + mlt.setMinTermFreq(1); |
| + mlt.setMinWordLen(1); |
| + mlt.setFieldNames(new String[] { "text" }); |
| + mlt.setBoost(true); |
| + |
| + // this mean that every term boost factor will be multiplied by this |
| + // number |
| + float boostFactor = 5; |
| + mlt.setBoostFactor(boostFactor); |
| + |
| + BooleanQuery query = (BooleanQuery) mlt.like(new StringReader( |
| + "lucene release")); |
| + List clauses = query.clauses(); |
| + |
| + assertEquals("Expected " + originalValues.size() + " clauses.", |
| + originalValues.size(), clauses.size()); |
| + |
| + for (int i = 0; i < clauses.size(); i++) { |
| + BooleanClause clause = (BooleanClause) clauses.get(i); |
| + TermQuery tq = (TermQuery) clause.getQuery(); |
| + Float termBoost = (Float) originalValues.get(tq.getTerm().text()); |
| + assertNotNull("Expected term " + tq.getTerm().text(), termBoost); |
| + |
| + float totalBoost = termBoost.floatValue() * boostFactor; |
| + assertTrue("Expected boost of " + totalBoost + " for term '" |
| + + tq.getTerm().text() + "' got " + tq.getBoost(), |
| + totalBoost == tq.getBoost()); |
| + } |
| + } |
| + |
| + private Map getOriginalValues() throws IOException { |
| + Map originalValues = new HashMap(); |
| + MoreLikeThis mlt = new MoreLikeThis(reader); |
| + mlt.setMinDocFreq(1); |
| + mlt.setMinTermFreq(1); |
| + mlt.setMinWordLen(1); |
| + mlt.setFieldNames(new String[] { "text" }); |
| + mlt.setBoost(true); |
| + BooleanQuery query = (BooleanQuery) mlt.like(new StringReader( |
| + "lucene release")); |
| + List clauses = query.clauses(); |
| + |
| + for (int i = 0; i < clauses.size(); i++) { |
| + BooleanClause clause = (BooleanClause) clauses.get(i); |
| + TermQuery tq = (TermQuery) clause.getQuery(); |
| + originalValues.put(tq.getTerm().text(), new Float(tq.getBoost())); |
| + } |
| + return originalValues; |
| + } |
| +} |
| Index: contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java |
| =================================================================== |
| --- contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java (revision 781362) |
| +++ contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java (working copy) |
| @@ -282,6 +282,27 @@ |
| private final IndexReader ir; |
| |
| /** |
| + * Boost factor to use when boosting the terms |
| + */ |
| + private float boostFactor = 1; |
| + |
| + /** |
| + * Returns the boost factor used when boosting terms |
| + * @return the boost factor used when boosting terms |
| + */ |
| + public float getBoostFactor() { |
| + return boostFactor; |
| + } |
| + |
| + /** |
| + * Sets the boost factor to use when boosting terms |
| + * @param boostFactor |
| + */ |
| + public void setBoostFactor(float boostFactor) { |
| + this.boostFactor = boostFactor; |
| + } |
| + |
| + /** |
| * Constructor requiring an IndexReader. |
| */ |
| public MoreLikeThis(IndexReader ir) { |
| @@ -586,7 +607,7 @@ |
| } |
| float myScore = ((Float) ar[2]).floatValue(); |
| |
| - tq.setBoost(myScore / bestScore); |
| + tq.setBoost(boostFactor * myScore / bestScore); |
| } |
| |
| try { |