lucene/core/src/test/org/apache/lucene/search/TestDocValuesScoring.java - lucene-solr - Git at Google

 package org.apache.lucene.search;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FloatDocValuesField;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;

 /**
  * Tests the use of indexdocvalues in scoring.
  *
  * In the example, a docvalues field is used as a per-document boost (separate from the norm)
  * @lucene.experimental
  */
 public class TestDocValuesScoring extends LuceneTestCase {
   private static final float SCORE_EPSILON = 0.001f; /* for comparing floats */

   public void testSimple() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
     Document doc = new Document();
     Field field = newTextField("foo", "", Field.Store.NO);
     doc.add(field);
     Field dvField = new FloatDocValuesField("foo_boost", 0.0F);
     doc.add(dvField);
     Field field2 = newTextField("bar", "", Field.Store.NO);
     doc.add(field2);

     field.setStringValue("quick brown fox");
     field2.setStringValue("quick brown fox");
     dvField.setFloatValue(2f); // boost x2
     iw.addDocument(doc);
     field.setStringValue("jumps over lazy brown dog");
     field2.setStringValue("jumps over lazy brown dog");
     dvField.setFloatValue(4f); // boost x4
     iw.addDocument(doc);
     IndexReader ir = iw.getReader();
     iw.shutdown();

     // no boosting
     IndexSearcher searcher1 = newSearcher(ir, false);
     final Similarity base = searcher1.getSimilarity();
     // boosting
     IndexSearcher searcher2 = newSearcher(ir, false);
     searcher2.setSimilarity(new PerFieldSimilarityWrapper() {
       final Similarity fooSim = new BoostingSimilarity(base, "foo_boost");

       @Override
       public Similarity get(String field) {
         return "foo".equals(field) ? fooSim : base;
       }

       @Override
       public float coord(int overlap, int maxOverlap) {
         return base.coord(overlap, maxOverlap);
       }

       @Override
       public float queryNorm(float sumOfSquaredWeights) {
         return base.queryNorm(sumOfSquaredWeights);
       }
     });

     // in this case, we searched on field "foo". first document should have 2x the score.
     TermQuery tq = new TermQuery(new Term("foo", "quick"));
     QueryUtils.check(random(), tq, searcher1);
     QueryUtils.check(random(), tq, searcher2);

     TopDocs noboost = searcher1.search(tq, 10);
     TopDocs boost = searcher2.search(tq, 10);
     assertEquals(1, noboost.totalHits);
     assertEquals(1, boost.totalHits);

     //System.out.println(searcher2.explain(tq, boost.scoreDocs[0].doc));
     assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score*2f, SCORE_EPSILON);

     // this query matches only the second document, which should have 4x the score.
     tq = new TermQuery(new Term("foo", "jumps"));
     QueryUtils.check(random(), tq, searcher1);
     QueryUtils.check(random(), tq, searcher2);

     noboost = searcher1.search(tq, 10);
     boost = searcher2.search(tq, 10);
     assertEquals(1, noboost.totalHits);
     assertEquals(1, boost.totalHits);

     assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score*4f, SCORE_EPSILON);

     // search on on field bar just for kicks, nothing should happen, since we setup
     // our sim provider to only use foo_boost for field foo.
     tq = new TermQuery(new Term("bar", "quick"));
     QueryUtils.check(random(), tq, searcher1);
     QueryUtils.check(random(), tq, searcher2);

     noboost = searcher1.search(tq, 10);
     boost = searcher2.search(tq, 10);
     assertEquals(1, noboost.totalHits);
     assertEquals(1, boost.totalHits);

     assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score, SCORE_EPSILON);

     ir.close();
     dir.close();
   }

   /**
    * Similarity that wraps another similarity and boosts the final score
    * according to whats in a docvalues field.
    *
    * @lucene.experimental
    */
   static class BoostingSimilarity extends Similarity {
     private final Similarity sim;
     private final String boostField;

     public BoostingSimilarity(Similarity sim, String boostField) {
       this.sim = sim;
       this.boostField = boostField;
     }

     @Override
     public long computeNorm(FieldInvertState state) {
       return sim.computeNorm(state);
     }

     @Override
     public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
       return sim.computeWeight(queryBoost, collectionStats, termStats);
     }

     @Override
     public SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
       final SimScorer sub = sim.simScorer(stats, context);
       final FieldCache.Floats values = FieldCache.DEFAULT.getFloats(context.reader(), boostField, false);

       return new SimScorer() {
         @Override
         public float score(int doc, float freq) {
           return values.get(doc) * sub.score(doc, freq);
         }

         @Override
         public float computeSlopFactor(int distance) {
           return sub.computeSlopFactor(distance);
         }

         @Override
         public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
           return sub.computePayloadFactor(doc, start, end, payload);
         }

         @Override
         public Explanation explain(int doc, Explanation freq) {
           Explanation boostExplanation = new Explanation(values.get(doc), "indexDocValue(" + boostField + ")");
           Explanation simExplanation = sub.explain(doc, freq);
           Explanation expl = new Explanation(boostExplanation.getValue() * simExplanation.getValue(), "product of:");
           expl.addDetail(boostExplanation);
           expl.addDetail(simExplanation);
           return expl;
         }
       };
     }
   }
 }
	package org.apache.lucene.search;

	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;

	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.document.FloatDocValuesField;
	import org.apache.lucene.index.AtomicReaderContext;
	import org.apache.lucene.index.FieldInvertState;
	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.index.RandomIndexWriter;
	import org.apache.lucene.index.Term;
	import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
	import org.apache.lucene.search.similarities.Similarity;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.LuceneTestCase;

	/**
	* Tests the use of indexdocvalues in scoring.
	*
	* In the example, a docvalues field is used as a per-document boost (separate from the norm)
	* @lucene.experimental
	*/
	public class TestDocValuesScoring extends LuceneTestCase {
	private static final float SCORE_EPSILON = 0.001f; /* for comparing floats */

	public void testSimple() throws Exception {
	Directory dir = newDirectory();
	RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
	Document doc = new Document();
	Field field = newTextField("foo", "", Field.Store.NO);
	doc.add(field);
	Field dvField = new FloatDocValuesField("foo_boost", 0.0F);
	doc.add(dvField);
	Field field2 = newTextField("bar", "", Field.Store.NO);
	doc.add(field2);

	field.setStringValue("quick brown fox");
	field2.setStringValue("quick brown fox");
	dvField.setFloatValue(2f); // boost x2
	iw.addDocument(doc);
	field.setStringValue("jumps over lazy brown dog");
	field2.setStringValue("jumps over lazy brown dog");
	dvField.setFloatValue(4f); // boost x4
	iw.addDocument(doc);
	IndexReader ir = iw.getReader();
	iw.shutdown();

	// no boosting
	IndexSearcher searcher1 = newSearcher(ir, false);
	final Similarity base = searcher1.getSimilarity();
	// boosting
	IndexSearcher searcher2 = newSearcher(ir, false);
	searcher2.setSimilarity(new PerFieldSimilarityWrapper() {
	final Similarity fooSim = new BoostingSimilarity(base, "foo_boost");

	@Override
	public Similarity get(String field) {
	return "foo".equals(field) ? fooSim : base;
	}

	@Override
	public float coord(int overlap, int maxOverlap) {
	return base.coord(overlap, maxOverlap);
	}

	@Override
	public float queryNorm(float sumOfSquaredWeights) {
	return base.queryNorm(sumOfSquaredWeights);
	}
	});

	// in this case, we searched on field "foo". first document should have 2x the score.
	TermQuery tq = new TermQuery(new Term("foo", "quick"));
	QueryUtils.check(random(), tq, searcher1);
	QueryUtils.check(random(), tq, searcher2);

	TopDocs noboost = searcher1.search(tq, 10);
	TopDocs boost = searcher2.search(tq, 10);
	assertEquals(1, noboost.totalHits);
	assertEquals(1, boost.totalHits);

	//System.out.println(searcher2.explain(tq, boost.scoreDocs[0].doc));
	assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score*2f, SCORE_EPSILON);

	// this query matches only the second document, which should have 4x the score.
	tq = new TermQuery(new Term("foo", "jumps"));
	QueryUtils.check(random(), tq, searcher1);
	QueryUtils.check(random(), tq, searcher2);

	noboost = searcher1.search(tq, 10);
	boost = searcher2.search(tq, 10);
	assertEquals(1, noboost.totalHits);
	assertEquals(1, boost.totalHits);

	assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score*4f, SCORE_EPSILON);

	// search on on field bar just for kicks, nothing should happen, since we setup
	// our sim provider to only use foo_boost for field foo.
	tq = new TermQuery(new Term("bar", "quick"));
	QueryUtils.check(random(), tq, searcher1);
	QueryUtils.check(random(), tq, searcher2);

	noboost = searcher1.search(tq, 10);
	boost = searcher2.search(tq, 10);
	assertEquals(1, noboost.totalHits);
	assertEquals(1, boost.totalHits);

	assertEquals(boost.scoreDocs[0].score, noboost.scoreDocs[0].score, SCORE_EPSILON);

	ir.close();
	dir.close();
	}

	/**
	* Similarity that wraps another similarity and boosts the final score
	* according to whats in a docvalues field.
	*
	* @lucene.experimental
	*/
	static class BoostingSimilarity extends Similarity {
	private final Similarity sim;
	private final String boostField;

	public BoostingSimilarity(Similarity sim, String boostField) {
	this.sim = sim;
	this.boostField = boostField;
	}

	@Override
	public long computeNorm(FieldInvertState state) {
	return sim.computeNorm(state);
	}

	@Override
	public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
	return sim.computeWeight(queryBoost, collectionStats, termStats);
	}

	@Override
	public SimScorer simScorer(SimWeight stats, AtomicReaderContext context) throws IOException {
	final SimScorer sub = sim.simScorer(stats, context);
	final FieldCache.Floats values = FieldCache.DEFAULT.getFloats(context.reader(), boostField, false);

	return new SimScorer() {
	@Override
	public float score(int doc, float freq) {
	return values.get(doc) * sub.score(doc, freq);
	}

	@Override
	public float computeSlopFactor(int distance) {
	return sub.computeSlopFactor(distance);
	}

	@Override
	public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
	return sub.computePayloadFactor(doc, start, end, payload);
	}

	@Override
	public Explanation explain(int doc, Explanation freq) {
	Explanation boostExplanation = new Explanation(values.get(doc), "indexDocValue(" + boostField + ")");
	Explanation simExplanation = sub.explain(doc, freq);
	Explanation expl = new Explanation(boostExplanation.getValue() * simExplanation.getValue(), "product of:");
	expl.addDetail(boostExplanation);
	expl.addDetail(simExplanation);
	return expl;
	}
	};
	}
	}
	}