lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetHighlightTask.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.lucene.benchmark.byTask.tasks;

 import java.text.BreakIterator;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.highlight.DefaultEncoder;
 import org.apache.lucene.search.highlight.Encoder;
 import org.apache.lucene.search.highlight.Highlighter;
 import org.apache.lucene.search.highlight.QueryScorer;
 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
 import org.apache.lucene.search.highlight.TokenSources;
 import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
 import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
 import org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner;
 import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
 import org.apache.lucene.search.vectorhighlight.FieldQuery;
 import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
 import org.apache.lucene.search.vectorhighlight.WeightedFragListBuilder;
 import org.apache.lucene.util.ArrayUtil;

 /**
  * Search and Traverse and Retrieve docs task.  Highlight the fields in the retrieved documents.
  *
  * <p>Note: This task reuses the reader if it is already open.
  * Otherwise a reader is opened at start and closed at the end.
  * </p>
  *
  * <p>Takes optional multivalued, comma separated param string as: type[&lt;enum&gt;],maxFrags[&lt;int&gt;],fields[name1;name2;...]</p>
  * <ul>
  * <li>type - the highlighter implementation, e.g. "UH"</li>
  * <li>maxFrags - The maximum number of fragments to score by the highlighter</li>
  * <li>fields - The fields to highlight.  If not specified all fields will be highlighted (or at least attempted)</li>
  * </ul>
  * Example:
  * <pre>"SearchHlgtSameRdr" SearchTravRetHighlight(type[UH],maxFrags[3],fields[body]) &gt; : 1000
  * </pre>
  *
  * Documents must be stored in order for this task to work.  Additionally, term vector positions can be used as well,
  * and offsets in postings is another option.
  *
  * <p>Other side effects: counts additional 1 (record) for each traversed hit,
  * and 1 more for each retrieved (non null) document and 1 for each fragment returned.</p>
  */
 public class SearchTravRetHighlightTask extends SearchTravTask {
   private int maxDocCharsToAnalyze; // max leading content chars to highlight
   private int maxFrags = 1; // aka passages
   private Set<String> hlFields = Collections.singleton("body");
   private String type;
   private HLImpl hlImpl;
   private Analyzer analyzer;

   public SearchTravRetHighlightTask(PerfRunData runData) {
     super(runData);
   }

   @Override
   public void setParams(String params) {
     // can't call super because super doesn't understand our params syntax
     this.params = params;
     // TODO consider instead using data.getConfig().get("highlighter.*")?
     String[] splits = params.split(",");
     for (String split : splits) {
       if (split.startsWith("type[") == true) {
         type = split.substring("type[".length(), split.length() - 1);
       } else if (split.startsWith("maxFrags[") == true) {
         maxFrags = (int) Float.parseFloat(split.substring("maxFrags[".length(), split.length() - 1));
       } else if (split.startsWith("fields[") == true) {
         String fieldNames = split.substring("fields[".length(), split.length() - 1);
         String[] fieldSplits = fieldNames.split(";");
         hlFields = new HashSet<>(Arrays.asList(fieldSplits));
       }
     }
   }

   @Override
   public void setup() throws Exception {
     super.setup();
     //check to make sure either the doc is being stored
     PerfRunData data = getRunData();
     if (data.getConfig().get("doc.stored", false) == false){
       throw new Exception("doc.stored must be set to true");
     }
     maxDocCharsToAnalyze = data.getConfig().get("highlighter.maxDocCharsToAnalyze", Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
     analyzer = data.getAnalyzer();
     String type = this.type;
     if (type == null) {
       type = data.getConfig().get("highlighter", null);
     }
     switch (type) {
       case "NONE": hlImpl = new NoHLImpl(); break;
       case "SH_A": hlImpl = new StandardHLImpl(false); break;
       case "SH_V": hlImpl = new StandardHLImpl(true); break;

       case "FVH_V": hlImpl = new FastVectorHLImpl(); break;

       case "UH": hlImpl = new UnifiedHLImpl(null); break;
       case "UH_A": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.ANALYSIS); break;
       case "UH_V": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.TERM_VECTORS); break;
       case "UH_P": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.POSTINGS); break;
       case "UH_PV": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.POSTINGS_WITH_TERM_VECTORS); break;

       default: throw new Exception("unrecognized highlighter type: " + type + " (try 'UH')");
     }
   }

   // here is where we intercept ReadTask's logic to do the highlighting, and nothing else (no retrieval of all field vals)
   @Override
   protected int withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
     hlImpl.withTopDocs(searcher, q, hits);
     // note: it'd be nice if we knew the sum kilobytes of text across these hits so we could return that. It'd be a more
     //  useful number to gauge the amount of work. But given "average" document sizes and lots of queries, returning the
     //  number of docs is reasonable.
     return hits.scoreDocs.length; // always return # scored docs.
   }

   private interface HLImpl {
     void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception;
   }

   private volatile int preventOptimizeAway = 0;

   private class StandardHLImpl implements HLImpl {
     SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<em>", "</em>");
     DefaultEncoder encoder = new DefaultEncoder();
     Highlighter highlighter = new Highlighter(formatter, encoder, null);
     boolean termVecs;

     StandardHLImpl(boolean termVecs) {
       highlighter.setEncoder(new DefaultEncoder());
       highlighter.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
       this.termVecs = termVecs;
     }

     @Override
     public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
       IndexReader reader = searcher.getIndexReader();
       highlighter.setFragmentScorer(new QueryScorer(q));
       // highlighter.setTextFragmenter();  unfortunately no sentence mechanism, not even regex. Default here is trivial
       for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
         Document document = reader.document(scoreDoc.doc, hlFields);
         Fields tvFields = termVecs ? reader.getTermVectors(scoreDoc.doc) : null;
         for (IndexableField indexableField : document) {
           TokenStream tokenStream;
           if (termVecs) {
             tokenStream = TokenSources.getTokenStream(indexableField.name(), tvFields,
                 indexableField.stringValue(), analyzer, maxDocCharsToAnalyze);
           } else {
             tokenStream = analyzer.tokenStream(indexableField.name(), indexableField.stringValue());
           }
           // will close TokenStream:
           String[] fragments = highlighter.getBestFragments(tokenStream, indexableField.stringValue(), maxFrags);
           preventOptimizeAway = fragments.length;
         }
       }
     }
   }

   private class FastVectorHLImpl implements HLImpl {
     int fragSize = 100;
     WeightedFragListBuilder fragListBuilder = new WeightedFragListBuilder();
     BoundaryScanner bs = new BreakIteratorBoundaryScanner(BreakIterator.getSentenceInstance(Locale.ENGLISH));
     ScoreOrderFragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder(bs);
     String[] preTags = {"<em>"};
     String[] postTags = {"</em>"};
     Encoder encoder = new DefaultEncoder();// new SimpleHTMLEncoder();
     FastVectorHighlighter highlighter = new FastVectorHighlighter(
         true,   // phraseHighlight
         false); // requireFieldMatch -- not pertinent to our benchmark

     @Override
     public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
       IndexReader reader = searcher.getIndexReader();
       final FieldQuery fq = highlighter.getFieldQuery( q, reader);
       for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
         for (String hlField : hlFields) {
           String[] fragments = highlighter.getBestFragments(fq, reader, scoreDoc.doc, hlField, fragSize, maxFrags,
               fragListBuilder, fragmentsBuilder, preTags, postTags, encoder);
           preventOptimizeAway = fragments.length;
         }
       }
     }
   }

   private ScoreDoc[] docIdOrder(ScoreDoc[] scoreDocs) {
     ScoreDoc[] clone = new ScoreDoc[scoreDocs.length];
     System.arraycopy(scoreDocs, 0, clone, 0, scoreDocs.length);
     ArrayUtil.introSort(clone, (a, b) -> Integer.compare(a.doc, b.doc));
     return clone;
   }

   private class UnifiedHLImpl implements HLImpl {
     UnifiedHighlighter highlighter;
     IndexSearcher lastSearcher;
     UnifiedHighlighter.OffsetSource offsetSource; // null means auto select
     String[] fields = hlFields.toArray(new String[hlFields.size()]);
     int[] maxPassages;

     UnifiedHLImpl(final UnifiedHighlighter.OffsetSource offsetSource) {
       this.offsetSource = offsetSource;
       maxPassages = new int[hlFields.size()];
       Arrays.fill(maxPassages, maxFrags);
     }

     private void reset(IndexSearcher searcher) {
       if (lastSearcher == searcher) {
         return;
       }
       lastSearcher = searcher;
       highlighter = new UnifiedHighlighter(searcher, analyzer) {
         @Override
         protected OffsetSource getOffsetSource(String field) {
           return offsetSource != null ? offsetSource : super.getOffsetSource(field);
         }
       };
       highlighter.setBreakIterator(() -> BreakIterator.getSentenceInstance(Locale.ENGLISH));
       highlighter.setMaxLength(maxDocCharsToAnalyze);
       highlighter.setHighlightPhrasesStrictly(true);
       highlighter.setHandleMultiTermQuery(true);
     }

     @Override
     public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
       reset(searcher);
       Map<String, String[]> result = highlighter.highlightFields(fields, q, hits, maxPassages);
       preventOptimizeAway = result.size();
     }
   }

   private class NoHLImpl implements HLImpl {

     @Override
     public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
       //just retrieve the HL fields
       for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
         preventOptimizeAway += searcher.doc(scoreDoc.doc, hlFields).iterator().hasNext() ? 2 : 1;
       }
     }
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.lucene.benchmark.byTask.tasks;

	import java.text.BreakIterator;
	import java.util.Arrays;
	import java.util.Collections;
	import java.util.HashSet;
	import java.util.Locale;
	import java.util.Map;
	import java.util.Set;

	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.TokenStream;
	import org.apache.lucene.benchmark.byTask.PerfRunData;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.index.Fields;
	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.index.IndexableField;
	import org.apache.lucene.search.IndexSearcher;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.ScoreDoc;
	import org.apache.lucene.search.TopDocs;
	import org.apache.lucene.search.highlight.DefaultEncoder;
	import org.apache.lucene.search.highlight.Encoder;
	import org.apache.lucene.search.highlight.Highlighter;
	import org.apache.lucene.search.highlight.QueryScorer;
	import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
	import org.apache.lucene.search.highlight.TokenSources;
	import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
	import org.apache.lucene.search.vectorhighlight.BoundaryScanner;
	import org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner;
	import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter;
	import org.apache.lucene.search.vectorhighlight.FieldQuery;
	import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder;
	import org.apache.lucene.search.vectorhighlight.WeightedFragListBuilder;
	import org.apache.lucene.util.ArrayUtil;

	/**
	* Search and Traverse and Retrieve docs task. Highlight the fields in the retrieved documents.
	*
	* <p>Note: This task reuses the reader if it is already open.
	* Otherwise a reader is opened at start and closed at the end.
	* </p>
	*
	* <p>Takes optional multivalued, comma separated param string as: type[<enum>],maxFrags[<int>],fields[name1;name2;...]</p>
	* <ul>
	* <li>type - the highlighter implementation, e.g. "UH"</li>
	* <li>maxFrags - The maximum number of fragments to score by the highlighter</li>
	* <li>fields - The fields to highlight. If not specified all fields will be highlighted (or at least attempted)</li>
	* </ul>
	* Example:
	* <pre>"SearchHlgtSameRdr" SearchTravRetHighlight(type[UH],maxFrags[3],fields[body]) > : 1000
	* </pre>
	*
	* Documents must be stored in order for this task to work. Additionally, term vector positions can be used as well,
	* and offsets in postings is another option.
	*
	* <p>Other side effects: counts additional 1 (record) for each traversed hit,
	* and 1 more for each retrieved (non null) document and 1 for each fragment returned.</p>
	*/
	public class SearchTravRetHighlightTask extends SearchTravTask {
	private int maxDocCharsToAnalyze; // max leading content chars to highlight
	private int maxFrags = 1; // aka passages
	private Set<String> hlFields = Collections.singleton("body");
	private String type;
	private HLImpl hlImpl;
	private Analyzer analyzer;

	public SearchTravRetHighlightTask(PerfRunData runData) {
	super(runData);
	}

	@Override
	public void setParams(String params) {
	// can't call super because super doesn't understand our params syntax
	this.params = params;
	// TODO consider instead using data.getConfig().get("highlighter.*")?
	String[] splits = params.split(",");
	for (String split : splits) {
	if (split.startsWith("type[") == true) {
	type = split.substring("type[".length(), split.length() - 1);
	} else if (split.startsWith("maxFrags[") == true) {
	maxFrags = (int) Float.parseFloat(split.substring("maxFrags[".length(), split.length() - 1));
	} else if (split.startsWith("fields[") == true) {
	String fieldNames = split.substring("fields[".length(), split.length() - 1);
	String[] fieldSplits = fieldNames.split(";");
	hlFields = new HashSet<>(Arrays.asList(fieldSplits));
	}
	}
	}

	@Override
	public void setup() throws Exception {
	super.setup();
	//check to make sure either the doc is being stored
	PerfRunData data = getRunData();
	if (data.getConfig().get("doc.stored", false) == false){
	throw new Exception("doc.stored must be set to true");
	}
	maxDocCharsToAnalyze = data.getConfig().get("highlighter.maxDocCharsToAnalyze", Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
	analyzer = data.getAnalyzer();
	String type = this.type;
	if (type == null) {
	type = data.getConfig().get("highlighter", null);
	}
	switch (type) {
	case "NONE": hlImpl = new NoHLImpl(); break;
	case "SH_A": hlImpl = new StandardHLImpl(false); break;
	case "SH_V": hlImpl = new StandardHLImpl(true); break;

	case "FVH_V": hlImpl = new FastVectorHLImpl(); break;

	case "UH": hlImpl = new UnifiedHLImpl(null); break;
	case "UH_A": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.ANALYSIS); break;
	case "UH_V": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.TERM_VECTORS); break;
	case "UH_P": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.POSTINGS); break;
	case "UH_PV": hlImpl = new UnifiedHLImpl(UnifiedHighlighter.OffsetSource.POSTINGS_WITH_TERM_VECTORS); break;

	default: throw new Exception("unrecognized highlighter type: " + type + " (try 'UH')");
	}
	}

	// here is where we intercept ReadTask's logic to do the highlighting, and nothing else (no retrieval of all field vals)
	@Override
	protected int withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
	hlImpl.withTopDocs(searcher, q, hits);
	// note: it'd be nice if we knew the sum kilobytes of text across these hits so we could return that. It'd be a more
	// useful number to gauge the amount of work. But given "average" document sizes and lots of queries, returning the
	// number of docs is reasonable.
	return hits.scoreDocs.length; // always return # scored docs.
	}

	private interface HLImpl {
	void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception;
	}

	private volatile int preventOptimizeAway = 0;

	private class StandardHLImpl implements HLImpl {
	SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<em>", "</em>");
	DefaultEncoder encoder = new DefaultEncoder();
	Highlighter highlighter = new Highlighter(formatter, encoder, null);
	boolean termVecs;

	StandardHLImpl(boolean termVecs) {
	highlighter.setEncoder(new DefaultEncoder());
	highlighter.setMaxDocCharsToAnalyze(maxDocCharsToAnalyze);
	this.termVecs = termVecs;
	}

	@Override
	public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
	IndexReader reader = searcher.getIndexReader();
	highlighter.setFragmentScorer(new QueryScorer(q));
	// highlighter.setTextFragmenter(); unfortunately no sentence mechanism, not even regex. Default here is trivial
	for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
	Document document = reader.document(scoreDoc.doc, hlFields);
	Fields tvFields = termVecs ? reader.getTermVectors(scoreDoc.doc) : null;
	for (IndexableField indexableField : document) {
	TokenStream tokenStream;
	if (termVecs) {
	tokenStream = TokenSources.getTokenStream(indexableField.name(), tvFields,
	indexableField.stringValue(), analyzer, maxDocCharsToAnalyze);
	} else {
	tokenStream = analyzer.tokenStream(indexableField.name(), indexableField.stringValue());
	}
	// will close TokenStream:
	String[] fragments = highlighter.getBestFragments(tokenStream, indexableField.stringValue(), maxFrags);
	preventOptimizeAway = fragments.length;
	}
	}
	}
	}

	private class FastVectorHLImpl implements HLImpl {
	int fragSize = 100;
	WeightedFragListBuilder fragListBuilder = new WeightedFragListBuilder();
	BoundaryScanner bs = new BreakIteratorBoundaryScanner(BreakIterator.getSentenceInstance(Locale.ENGLISH));
	ScoreOrderFragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder(bs);
	String[] preTags = {"<em>"};
	String[] postTags = {"</em>"};
	Encoder encoder = new DefaultEncoder();// new SimpleHTMLEncoder();
	FastVectorHighlighter highlighter = new FastVectorHighlighter(
	true, // phraseHighlight
	false); // requireFieldMatch -- not pertinent to our benchmark

	@Override
	public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
	IndexReader reader = searcher.getIndexReader();
	final FieldQuery fq = highlighter.getFieldQuery( q, reader);
	for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
	for (String hlField : hlFields) {
	String[] fragments = highlighter.getBestFragments(fq, reader, scoreDoc.doc, hlField, fragSize, maxFrags,
	fragListBuilder, fragmentsBuilder, preTags, postTags, encoder);
	preventOptimizeAway = fragments.length;
	}
	}
	}
	}

	private ScoreDoc[] docIdOrder(ScoreDoc[] scoreDocs) {
	ScoreDoc[] clone = new ScoreDoc[scoreDocs.length];
	System.arraycopy(scoreDocs, 0, clone, 0, scoreDocs.length);
	ArrayUtil.introSort(clone, (a, b) -> Integer.compare(a.doc, b.doc));
	return clone;
	}

	private class UnifiedHLImpl implements HLImpl {
	UnifiedHighlighter highlighter;
	IndexSearcher lastSearcher;
	UnifiedHighlighter.OffsetSource offsetSource; // null means auto select
	String[] fields = hlFields.toArray(new String[hlFields.size()]);
	int[] maxPassages;

	UnifiedHLImpl(final UnifiedHighlighter.OffsetSource offsetSource) {
	this.offsetSource = offsetSource;
	maxPassages = new int[hlFields.size()];
	Arrays.fill(maxPassages, maxFrags);
	}

	private void reset(IndexSearcher searcher) {
	if (lastSearcher == searcher) {
	return;
	}
	lastSearcher = searcher;
	highlighter = new UnifiedHighlighter(searcher, analyzer) {
	@Override
	protected OffsetSource getOffsetSource(String field) {
	return offsetSource != null ? offsetSource : super.getOffsetSource(field);
	}
	};
	highlighter.setBreakIterator(() -> BreakIterator.getSentenceInstance(Locale.ENGLISH));
	highlighter.setMaxLength(maxDocCharsToAnalyze);
	highlighter.setHighlightPhrasesStrictly(true);
	highlighter.setHandleMultiTermQuery(true);
	}

	@Override
	public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
	reset(searcher);
	Map<String, String[]> result = highlighter.highlightFields(fields, q, hits, maxPassages);
	preventOptimizeAway = result.size();
	}
	}

	private class NoHLImpl implements HLImpl {

	@Override
	public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
	//just retrieve the HL fields
	for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
	preventOptimizeAway += searcher.doc(scoreDoc.doc, hlFields).iterator().hasNext() ? 2 : 1;
	}
	}
	}
	}