lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.search.highlight;

 import java.util.HashMap;
 import java.util.HashSet;

 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.Query;

 /**
  * {@link Scorer} implementation which scores text fragments by the number of
  * unique query terms found. This class uses the {@link QueryTermExtractor}
  * class to process determine the query terms and their boosts to be used.
  */
 // TODO: provide option to boost score of fragments near beginning of document
 // based on fragment.getFragNum()
 public class QueryTermScorer implements Scorer {

   TextFragment currentTextFragment = null;
   HashSet<String> uniqueTermsInFragment;

   float totalScore = 0;
   float maxTermWeight = 0;
   private HashMap<String,WeightedTerm> termsToFind;

   private CharTermAttribute termAtt;

   /**
    *
    * @param query a Lucene query (ideally rewritten using query.rewrite before
    *        being passed to this class and the searcher)
    */
   public QueryTermScorer(Query query) {
     this(QueryTermExtractor.getTerms(query));
   }

   /**
    *
    * @param query a Lucene query (ideally rewritten using query.rewrite before
    *        being passed to this class and the searcher)
    * @param fieldName the Field name which is used to match Query terms
    */
   public QueryTermScorer(Query query, String fieldName) {
     this(QueryTermExtractor.getTerms(query, false, fieldName));
   }

   /**
    *
    * @param query a Lucene query (ideally rewritten using query.rewrite before
    *        being passed to this class and the searcher)
    * @param reader used to compute IDF which can be used to a) score selected
    *        fragments better b) use graded highlights eg set font color
    *        intensity
    * @param fieldName the field on which Inverse Document Frequency (IDF)
    *        calculations are based
    */
   public QueryTermScorer(Query query, IndexReader reader, String fieldName) {
     this(QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName));
   }

   public QueryTermScorer(WeightedTerm[] weightedTerms) {
     termsToFind = new HashMap<>();
     for (int i = 0; i < weightedTerms.length; i++) {
       WeightedTerm existingTerm = termsToFind
           .get(weightedTerms[i].term);
       if ((existingTerm == null)
           || (existingTerm.weight < weightedTerms[i].weight)) {
         // if a term is defined more than once, always use the highest scoring
         // weight
         termsToFind.put(weightedTerms[i].term, weightedTerms[i]);
         maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
       }
     }
   }

   /* (non-Javadoc)
    * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
    */
   @Override
   public TokenStream init(TokenStream tokenStream) {
     termAtt = tokenStream.addAttribute(CharTermAttribute.class);
     return null;
   }

   /*
    * (non-Javadoc)
    *
    * @see
    * org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache
    * .lucene.search.highlight.TextFragment)
    */
   @Override
   public void startFragment(TextFragment newFragment) {
     uniqueTermsInFragment = new HashSet<>();
     currentTextFragment = newFragment;
     totalScore = 0;

   }


   /* (non-Javadoc)
    * @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
    */
   @Override
   public float getTokenScore() {
     String termText = termAtt.toString();

     WeightedTerm queryTerm = termsToFind.get(termText);
     if (queryTerm == null) {
       // not a query term - return
       return 0;
     }
     // found a query term - is it unique in this doc?
     if (!uniqueTermsInFragment.contains(termText)) {
       totalScore += queryTerm.getWeight();
       uniqueTermsInFragment.add(termText);
     }
     return queryTerm.getWeight();
   }


   /* (non-Javadoc)
    * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
    */
   @Override
   public float getFragmentScore() {
     return totalScore;
   }

   /*
    * (non-Javadoc)
    *
    * @see
    * org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
    */
   public void allFragmentsProcessed() {
     // this class has no special operations to perform at end of processing
   }

   /**
    *
    * @return The highest weighted term (useful for passing to GradientFormatter
    *         to set top end of coloring scale.
    */
   public float getMaxTermWeight() {
     return maxTermWeight;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.search.highlight;

	import java.util.HashMap;
	import java.util.HashSet;

	import org.apache.lucene.analysis.TokenStream;
	import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.search.Query;

	/**
	* {@link Scorer} implementation which scores text fragments by the number of
	* unique query terms found. This class uses the {@link QueryTermExtractor}
	* class to process determine the query terms and their boosts to be used.
	*/
	// TODO: provide option to boost score of fragments near beginning of document
	// based on fragment.getFragNum()
	public class QueryTermScorer implements Scorer {

	TextFragment currentTextFragment = null;
	HashSet<String> uniqueTermsInFragment;

	float totalScore = 0;
	float maxTermWeight = 0;
	private HashMap<String,WeightedTerm> termsToFind;

	private CharTermAttribute termAtt;

	/**
	*
	* @param query a Lucene query (ideally rewritten using query.rewrite before
	* being passed to this class and the searcher)
	*/
	public QueryTermScorer(Query query) {
	this(QueryTermExtractor.getTerms(query));
	}

	/**
	*
	* @param query a Lucene query (ideally rewritten using query.rewrite before
	* being passed to this class and the searcher)
	* @param fieldName the Field name which is used to match Query terms
	*/
	public QueryTermScorer(Query query, String fieldName) {
	this(QueryTermExtractor.getTerms(query, false, fieldName));
	}

	/**
	*
	* @param query a Lucene query (ideally rewritten using query.rewrite before
	* being passed to this class and the searcher)
	* @param reader used to compute IDF which can be used to a) score selected
	* fragments better b) use graded highlights eg set font color
	* intensity
	* @param fieldName the field on which Inverse Document Frequency (IDF)
	* calculations are based
	*/
	public QueryTermScorer(Query query, IndexReader reader, String fieldName) {
	this(QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName));
	}

	public QueryTermScorer(WeightedTerm[] weightedTerms) {
	termsToFind = new HashMap<>();
	for (int i = 0; i < weightedTerms.length; i++) {
	WeightedTerm existingTerm = termsToFind
	.get(weightedTerms[i].term);
	if ((existingTerm == null)
	\|\| (existingTerm.weight < weightedTerms[i].weight)) {
	// if a term is defined more than once, always use the highest scoring
	// weight
	termsToFind.put(weightedTerms[i].term, weightedTerms[i]);
	maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
	}
	}
	}

	/* (non-Javadoc)
	* @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
	*/
	@Override
	public TokenStream init(TokenStream tokenStream) {
	termAtt = tokenStream.addAttribute(CharTermAttribute.class);
	return null;
	}

	/*
	* (non-Javadoc)
	*
	* @see
	* org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache
	* .lucene.search.highlight.TextFragment)
	*/
	@Override
	public void startFragment(TextFragment newFragment) {
	uniqueTermsInFragment = new HashSet<>();
	currentTextFragment = newFragment;
	totalScore = 0;

	}


	/* (non-Javadoc)
	* @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
	*/
	@Override
	public float getTokenScore() {
	String termText = termAtt.toString();

	WeightedTerm queryTerm = termsToFind.get(termText);
	if (queryTerm == null) {
	// not a query term - return
	return 0;
	}
	// found a query term - is it unique in this doc?
	if (!uniqueTermsInFragment.contains(termText)) {
	totalScore += queryTerm.getWeight();
	uniqueTermsInFragment.add(termText);
	}
	return queryTerm.getWeight();
	}


	/* (non-Javadoc)
	* @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
	*/
	@Override
	public float getFragmentScore() {
	return totalScore;
	}

	/*
	* (non-Javadoc)
	*
	* @see
	* org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
	*/
	public void allFragmentsProcessed() {
	// this class has no special operations to perform at end of processing
	}

	/**
	*
	* @return The highest weighted term (useful for passing to GradientFormatter
	* to set top end of coloring scale.
	*/
	public float getMaxTermWeight() {
	return maxTermWeight;
	}
	}