src/contrib/Highlighter/WeightedSpanTermExtractor.cs - lucenenet - Git at Google

 /*
  *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  *
 */

 using System;
 using System.Collections.Generic;
 using System.IO;
 using System.Linq;
 using System.Text;
 using Lucene.Net.Analysis;
 using Lucene.Net.Index;
 using Lucene.Net.Index.Memory;
 using Lucene.Net.Search.Spans;
 using Lucene.Net.Store;
 using Lucene.Net.Support;
 using Lucene.Net.Util;

 namespace Lucene.Net.Search.Highlight
 {
     /// <summary>
     /// Class used to extract <see cref="WeightedSpanTerm"/>s from a <see cref="Query"/> based on whether
     /// <see cref="Term"/>s from the <see cref="Query"/> are contained in a supplied <see cref="Analysis.TokenStream"/>.
     /// </summary>
     public class WeightedSpanTermExtractor
     {
         private String fieldName;
         private TokenStream tokenStream;
         private IDictionary<String, IndexReader> readers = new HashMap<String, IndexReader>(10);
         private String defaultField;
         private bool expandMultiTermQuery;
         private bool cachedTokenStream;
         private bool wrapToCaching = true;

         public WeightedSpanTermExtractor()
         {
         }

         public WeightedSpanTermExtractor(String defaultField)
         {
             if (defaultField != null)
             {
                 this.defaultField = StringHelper.Intern(defaultField);
             }
         }

         private void CloseReaders()
         {
             ICollection<IndexReader> readerSet = readers.Values;

             foreach (IndexReader reader in readerSet)
             {
                 try
                 {
                     reader.Close();
                 }
                 catch (IOException e)
                 {
                     // alert?
                 }
             }
         }

         /// <summary>
         /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>Query</c>.
         /// </summary>
         /// <param name="query">Query to extract Terms from</param>
         /// <param name="terms">Map to place created WeightedSpanTerms in</param>
         private void Extract(Query query, IDictionary<String, WeightedSpanTerm> terms)
         {
             if (query is BooleanQuery)
             {
                 BooleanClause[] queryClauses = ((BooleanQuery) query).GetClauses();

                 for (int i = 0; i < queryClauses.Length; i++)
                 {
                     if (!queryClauses[i].IsProhibited)
                     {
                         Extract(queryClauses[i].Query, terms);
                     }
                 }
             }
             else if (query is PhraseQuery)
             {
                 PhraseQuery phraseQuery = ((PhraseQuery) query);
                 Term[] phraseQueryTerms = phraseQuery.GetTerms();
                 SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length];
                 for (int i = 0; i < phraseQueryTerms.Length; i++)
                 {
                     clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
                 }
                 int slop = phraseQuery.Slop;
                 int[] positions = phraseQuery.GetPositions();
                 // add largest position increment to slop
                 if (positions.Length > 0)
                 {
                     int lastPos = positions[0];
                     int largestInc = 0;
                     int sz = positions.Length;
                     for (int i = 1; i < sz; i++)
                     {
                         int pos = positions[i];
                         int inc = pos - lastPos;
                         if (inc > largestInc)
                         {
                             largestInc = inc;
                         }
                         lastPos = pos;
                     }
                     if (largestInc > 1)
                     {
                         slop += largestInc;
                     }
                 }

                 bool inorder = slop == 0;

                 SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
                 sp.Boost = query.Boost;
                 ExtractWeightedSpanTerms(terms, sp);
             }
             else if (query is TermQuery)
             {
                 ExtractWeightedTerms(terms, query);
             }
             else if (query is SpanQuery)
             {
                 ExtractWeightedSpanTerms(terms, (SpanQuery) query);
             }
             else if (query is FilteredQuery)
             {
                 Extract(((FilteredQuery) query).Query, terms);
             }
             else if (query is DisjunctionMaxQuery)
             {
                 foreach (var q in ((DisjunctionMaxQuery) query))
                 {
                     Extract(q, terms);
                 }
             }
             else if (query is MultiTermQuery && expandMultiTermQuery)
             {
                 MultiTermQuery mtq = ((MultiTermQuery) query);
                 if (mtq.RewriteMethod != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)
                 {
                     mtq = (MultiTermQuery) mtq.Clone();
                     mtq.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
                     query = mtq;
                 }
                 FakeReader fReader = new FakeReader();
                 MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.Rewrite(fReader, mtq);
                 if (fReader.Field != null)
                 {
                     IndexReader ir = GetReaderForField(fReader.Field);
                     Extract(query.Rewrite(ir), terms);
                 }
             }
             else if (query is MultiPhraseQuery)
             {
                 MultiPhraseQuery mpq = (MultiPhraseQuery) query;
                 IList<Term[]> termArrays = mpq.GetTermArrays();
                 int[] positions = mpq.GetPositions();
                 if (positions.Length > 0)
                 {

                     int maxPosition = positions[positions.Length - 1];
                     for (int i = 0; i < positions.Length - 1; ++i)
                     {
                         if (positions[i] > maxPosition)
                         {
                             maxPosition = positions[i];
                         }
                     }

                     var disjunctLists = new List<SpanQuery>[maxPosition + 1];
                     int distinctPositions = 0;

                     for (int i = 0; i < termArrays.Count; ++i)
                     {
                         Term[] termArray = termArrays[i];
                         List<SpanQuery> disjuncts = disjunctLists[positions[i]];
                         if (disjuncts == null)
                         {
                             disjuncts = (disjunctLists[positions[i]] = new List<SpanQuery>(termArray.Length));
                             ++distinctPositions;
                         }
                         for (int j = 0; j < termArray.Length; ++j)
                         {
                             disjuncts.Add(new SpanTermQuery(termArray[j]));
                         }
                     }

                     int positionGaps = 0;
                     int position = 0;
                     SpanQuery[] clauses = new SpanQuery[distinctPositions];
                     for (int i = 0; i < disjunctLists.Length; ++i)
                     {
                         List<SpanQuery> disjuncts = disjunctLists[i];
                         if (disjuncts != null)
                         {
                             clauses[position++] = new SpanOrQuery(disjuncts.ToArray());
                         }
                         else
                         {
                             ++positionGaps;
                         }
                     }

                     int slop = mpq.Slop;
                     bool inorder = (slop == 0);

                     SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
                     sp.Boost = query.Boost;
                     ExtractWeightedSpanTerms(terms, sp);
                 }
             }
         }

         /// <summary>
         /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>SpanQuery</c>.
         /// </summary>
         /// <param name="terms">Map to place created WeightedSpanTerms in</param>
         /// <param name="spanQuery">SpanQuery to extract Terms from</param>
         private void ExtractWeightedSpanTerms(IDictionary<String, WeightedSpanTerm> terms, SpanQuery spanQuery)
         {
             HashSet<String> fieldNames;

             if (fieldName == null)
             {
                 fieldNames = new HashSet<String>();
                 CollectSpanQueryFields(spanQuery, fieldNames);
             }
             else
             {
                 fieldNames = new HashSet<String>();
                 fieldNames.Add(fieldName);
             }
             // To support the use of the default field name
             if (defaultField != null)
             {
                 fieldNames.Add(defaultField);
             }

             IDictionary<String, SpanQuery> queries = new HashMap<String, SpanQuery>();

             var nonWeightedTerms = Support.Compatibility.SetFactory.CreateHashSet<Term>();
             bool mustRewriteQuery = MustRewriteQuery(spanQuery);
             if (mustRewriteQuery)
             {
                 foreach (String field in fieldNames)
                 {
                     SpanQuery rewrittenQuery = (SpanQuery) spanQuery.Rewrite(GetReaderForField(field));
                     queries[field] = rewrittenQuery;
                     rewrittenQuery.ExtractTerms(nonWeightedTerms);
                 }
             }
             else
             {
                 spanQuery.ExtractTerms(nonWeightedTerms);
             }

             List<PositionSpan> spanPositions = new List<PositionSpan>();

             foreach (String field in fieldNames)
             {

                 IndexReader reader = GetReaderForField(field);
                 Spans.Spans spans;
                 if (mustRewriteQuery)
                 {
                     spans = queries[field].GetSpans(reader);
                 }
                 else
                 {
                     spans = spanQuery.GetSpans(reader);
                 }


                 // collect span positions
                 while (spans.Next())
                 {
                     spanPositions.Add(new PositionSpan(spans.Start(), spans.End() - 1));
                 }

             }

             if (spanPositions.Count == 0)
             {
                 // no spans found
                 return;
             }

             foreach (Term queryTerm in nonWeightedTerms)
             {

                 if (FieldNameComparator(queryTerm.Field))
                 {
                     WeightedSpanTerm weightedSpanTerm = terms[queryTerm.Text];

                     if (weightedSpanTerm == null)
                     {
                         weightedSpanTerm = new WeightedSpanTerm(spanQuery.Boost, queryTerm.Text);
                         weightedSpanTerm.AddPositionSpans(spanPositions);
                         weightedSpanTerm.SetPositionSensitive(true);
                         terms[queryTerm.Text] = weightedSpanTerm;
                     }
                     else
                     {
                         if (spanPositions.Count > 0)
                         {
                             weightedSpanTerm.AddPositionSpans(spanPositions);
                         }
                     }
                 }
             }
         }

         /// <summary>
         /// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>Query</c>.
         /// </summary>
         /// <param name="terms"></param>
         /// <param name="query"></param>
         private void ExtractWeightedTerms(IDictionary<String, WeightedSpanTerm> terms, Query query)
         {
             var nonWeightedTerms = Support.Compatibility.SetFactory.CreateHashSet<Term>();
             query.ExtractTerms(nonWeightedTerms);

             foreach (Term queryTerm in nonWeightedTerms)
             {

                 if (FieldNameComparator(queryTerm.Field))
                 {
                     WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(query.Boost, queryTerm.Text);
                     terms[queryTerm.Text] = weightedSpanTerm;
                 }
             }
         }

         /// <summary>
         /// Necessary to implement matches for queries against <c>defaultField</c>
         /// </summary>
         private bool FieldNameComparator(String fieldNameToCheck)
         {
             bool rv = fieldName == null || fieldNameToCheck == fieldName
                       || fieldNameToCheck == defaultField;
             return rv;
         }

         private IndexReader GetReaderForField(String field)
         {
             if (wrapToCaching && !cachedTokenStream && !(tokenStream is CachingTokenFilter))
             {
                 tokenStream = new CachingTokenFilter(tokenStream);
                 cachedTokenStream = true;
             }
             IndexReader reader = readers[field];
             if (reader == null)
             {
                 MemoryIndex indexer = new MemoryIndex();
                 indexer.AddField(field, tokenStream);
                 tokenStream.Reset();
                 IndexSearcher searcher = indexer.CreateSearcher();
                 reader = searcher.IndexReader;
                 readers[field] = reader;
             }

             return reader;
         }

         /// <summary>
         /// Creates a Map of <c>WeightedSpanTerms</c> from the given <c>Query</c> and <c>TokenStream</c>.
         /// </summary>
         /// <param name="query">query that caused hit</param>
         /// <param name="tokenStream">TokenStream of text to be highlighted</param>
         /// <returns>Map containing WeightedSpanTerms</returns>
         public IDictionary<String, WeightedSpanTerm> GetWeightedSpanTerms(Query query, TokenStream tokenStream)
         {
             return GetWeightedSpanTerms(query, tokenStream, null);
         }


         /// <summary>
         /// Creates a Map of <c>WeightedSpanTerms</c> from the given <c>Query</c> and <c>TokenStream</c>.
         /// </summary>
         /// <param name="query">query that caused hit</param>
         /// <param name="tokenStream">tokenStream of text to be highlighted</param>
         /// <param name="fieldName">restricts Term's used based on field name</param>
         /// <returns>Map containing WeightedSpanTerms</returns>
         public IDictionary<String, WeightedSpanTerm> GetWeightedSpanTerms(Query query, TokenStream tokenStream,
                                                                           String fieldName)
         {
             if (fieldName != null)
             {
                 this.fieldName = StringHelper.Intern(fieldName);
             }
             else
             {
                 this.fieldName = null;
             }

             IDictionary<String, WeightedSpanTerm> terms = new PositionCheckingMap<String>();
             this.tokenStream = tokenStream;
             try
             {
                 Extract(query, terms);
             }
             finally
             {
                 CloseReaders();
             }

             return terms;
         }

         /// <summary>
         /// Creates a Map of <c>WeightedSpanTerms</c> from the given <c>Query</c> and <c>TokenStream</c>. Uses a supplied
         /// <c>IndexReader</c> to properly Weight terms (for gradient highlighting).
         /// </summary>
         /// <param name="query">Query that caused hit</param>
         /// <param name="tokenStream">Tokenstream of text to be highlighted</param>
         /// <param name="fieldName">restricts Term's used based on field name</param>
         /// <param name="reader">to use for scoring</param>
         /// <returns>Map of WeightedSpanTerms with quasi tf/idf scores</returns>
         public IDictionary<String, WeightedSpanTerm> GetWeightedSpanTermsWithScores(Query query, TokenStream tokenStream,
                                                                                     String fieldName, IndexReader reader)
         {
             if (fieldName != null)
             {
                 this.fieldName = StringHelper.Intern(fieldName);
             }
             else
             {
                 this.fieldName = null;
             }
             this.tokenStream = tokenStream;

             IDictionary<String, WeightedSpanTerm> terms = new PositionCheckingMap<String>();
             Extract(query, terms);

             int totalNumDocs = reader.NumDocs();
             var weightedTerms = terms.Keys;

             try
             {
                 foreach (var wt in weightedTerms)
                 {
                     WeightedSpanTerm weightedSpanTerm = terms[wt];
                     int docFreq = reader.DocFreq(new Term(fieldName, weightedSpanTerm.Term));
                     // docFreq counts deletes
                     if (totalNumDocs < docFreq)
                     {
                         docFreq = totalNumDocs;
                     }
                     // IDF algorithm taken from DefaultSimilarity class
                     float idf = (float) (Math.Log((float) totalNumDocs/(double) (docFreq + 1)) + 1.0);
                     weightedSpanTerm.Weight *= idf;
                 }
             }
             finally
             {

                 CloseReaders();
             }

             return terms;
         }

         private void CollectSpanQueryFields(SpanQuery spanQuery, HashSet<String> fieldNames)
         {
             if (spanQuery is FieldMaskingSpanQuery)
             {
                 CollectSpanQueryFields(((FieldMaskingSpanQuery) spanQuery).MaskedQuery, fieldNames);
             }
             else if (spanQuery is SpanFirstQuery)
             {
                 CollectSpanQueryFields(((SpanFirstQuery) spanQuery).Match, fieldNames);
             }
             else if (spanQuery is SpanNearQuery)
             {
                 foreach (SpanQuery clause in ((SpanNearQuery) spanQuery).GetClauses())
                 {
                     CollectSpanQueryFields(clause, fieldNames);
                 }
             }
             else if (spanQuery is SpanNotQuery)
             {
                 CollectSpanQueryFields(((SpanNotQuery) spanQuery).Include, fieldNames);
             }
             else if (spanQuery is SpanOrQuery)
             {
                 foreach (SpanQuery clause in ((SpanOrQuery) spanQuery).GetClauses())
                 {
                     CollectSpanQueryFields(clause, fieldNames);
                 }
             }
             else
             {
                 fieldNames.Add(spanQuery.Field);
             }
         }

         private bool MustRewriteQuery(SpanQuery spanQuery)
         {
             if (!expandMultiTermQuery)
             {
                 return false; // Will throw UnsupportedOperationException in case of a SpanRegexQuery.
             }
             else if (spanQuery is FieldMaskingSpanQuery)
             {
                 return MustRewriteQuery(((FieldMaskingSpanQuery)spanQuery).MaskedQuery);
             }
             else if (spanQuery is SpanFirstQuery)
             {
                 return MustRewriteQuery(((SpanFirstQuery)spanQuery).Match);
             }
             else if (spanQuery is SpanNearQuery)
             {
                 foreach (SpanQuery clause in ((SpanNearQuery) spanQuery).GetClauses())
                 {
                     if (MustRewriteQuery(clause))
                     {
                         return true;
                     }
                 }
                 return false;
             }
             else if (spanQuery is SpanNotQuery)
             {
                 SpanNotQuery spanNotQuery = (SpanNotQuery) spanQuery;
                 return MustRewriteQuery(spanNotQuery.Include) || MustRewriteQuery(spanNotQuery.Exclude);
             }
             else if (spanQuery is SpanOrQuery)
             {
                 foreach (SpanQuery clause in ((SpanOrQuery) spanQuery).GetClauses())
                 {
                     if (MustRewriteQuery(clause))
                     {
                         return true;
                     }
                 }
                 return false;
             }
             else if (spanQuery is SpanTermQuery)
             {
                 return false;
             }
             else
             {
                 return true;
             }
         }


         /// <summary>
         /// This class makes sure that if both position sensitive and insensitive
         /// versions of the same term are added, the position insensitive one wins.
         /// </summary>
         /// <typeparam name="K"></typeparam>
         private class PositionCheckingMap<K> : HashMap<K, WeightedSpanTerm>
         {
             public PositionCheckingMap()
             {

             }

             public PositionCheckingMap(IEnumerable<KeyValuePair<K, WeightedSpanTerm>> m)
             {
                 PutAll(m);
             }

             public void PutAll(IEnumerable<KeyValuePair<K, WeightedSpanTerm>> m)
             {
                 foreach (var entry in m)
                 {
                     Add(entry.Key, entry.Value);
                 }
             }

             public override void Add(K key, WeightedSpanTerm value)
             {
                 base.Add(key, value);
                 WeightedSpanTerm prev = this[key];

                 if (prev == null) return;

                 WeightedSpanTerm prevTerm = prev;
                 WeightedSpanTerm newTerm = value;
                 if (!prevTerm.IsPositionSensitive())
                 {
                     newTerm.SetPositionSensitive(false);
                 }
             }

         }

         public bool ExpandMultiTermQuery
         {
             set { this.expandMultiTermQuery = value; }
             get { return expandMultiTermQuery; }
         }

         public bool IsCachedTokenStream
         {
             get { return cachedTokenStream; }
         }

         public TokenStream TokenStream
         {
             get { return tokenStream; }
         }


         /// <summary>
         /// By default, <see cref="Analysis.TokenStream"/>s that are not of the type
         /// <see cref="CachingTokenFilter"/> are wrapped in a <see cref="CachingTokenFilter"/> to
         /// <see cref="Analysis.TokenStream"/> impl and you don't want it to be wrapped, set this to
         /// false.
         /// </summary>
         public void SetWrapIfNotCachingTokenFilter(bool wrap)
         {
             this.wrapToCaching = wrap;
         }

         /// <summary>
         /// A fake IndexReader class to extract the field from a MultiTermQuery
         /// </summary>
         protected internal sealed class FakeReader : FilterIndexReader
         {

             private static IndexReader EMPTY_MEMORY_INDEX_READER = new MemoryIndex().CreateSearcher().IndexReader;

             public String Field { get; private set; }

             protected internal FakeReader()
                 : base(EMPTY_MEMORY_INDEX_READER)
             {

             }

             public override TermEnum Terms(Term t)
             {
                 // only set first fieldname, maybe use a Set?
                 if (t != null && Field == null)
                     Field = t.Field;
                 return base.Terms(t);
             }


         }
     }
 }
	/*
	*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*
	*/

	using System;
	using System.Collections.Generic;
	using System.IO;
	using System.Linq;
	using System.Text;
	using Lucene.Net.Analysis;
	using Lucene.Net.Index;
	using Lucene.Net.Index.Memory;
	using Lucene.Net.Search.Spans;
	using Lucene.Net.Store;
	using Lucene.Net.Support;
	using Lucene.Net.Util;

	namespace Lucene.Net.Search.Highlight
	{
	/// <summary>
	/// Class used to extract <see cref="WeightedSpanTerm"/>s from a <see cref="Query"/> based on whether
	/// <see cref="Term"/>s from the <see cref="Query"/> are contained in a supplied <see cref="Analysis.TokenStream"/>.
	/// </summary>
	public class WeightedSpanTermExtractor
	{
	private String fieldName;
	private TokenStream tokenStream;
	private IDictionary<String, IndexReader> readers = new HashMap<String, IndexReader>(10);
	private String defaultField;
	private bool expandMultiTermQuery;
	private bool cachedTokenStream;
	private bool wrapToCaching = true;

	public WeightedSpanTermExtractor()
	{
	}

	public WeightedSpanTermExtractor(String defaultField)
	{
	if (defaultField != null)
	{
	this.defaultField = StringHelper.Intern(defaultField);
	}
	}

	private void CloseReaders()
	{
	ICollection<IndexReader> readerSet = readers.Values;

	foreach (IndexReader reader in readerSet)
	{
	try
	{
	reader.Close();
	}
	catch (IOException e)
	{
	// alert?
	}
	}
	}

	/// <summary>
	/// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>Query</c>.
	/// </summary>
	/// <param name="query">Query to extract Terms from</param>
	/// <param name="terms">Map to place created WeightedSpanTerms in</param>
	private void Extract(Query query, IDictionary<String, WeightedSpanTerm> terms)
	{
	if (query is BooleanQuery)
	{
	BooleanClause[] queryClauses = ((BooleanQuery) query).GetClauses();

	for (int i = 0; i < queryClauses.Length; i++)
	{
	if (!queryClauses[i].IsProhibited)
	{
	Extract(queryClauses[i].Query, terms);
	}
	}
	}
	else if (query is PhraseQuery)
	{
	PhraseQuery phraseQuery = ((PhraseQuery) query);
	Term[] phraseQueryTerms = phraseQuery.GetTerms();
	SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length];
	for (int i = 0; i < phraseQueryTerms.Length; i++)
	{
	clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
	}
	int slop = phraseQuery.Slop;
	int[] positions = phraseQuery.GetPositions();
	// add largest position increment to slop
	if (positions.Length > 0)
	{
	int lastPos = positions[0];
	int largestInc = 0;
	int sz = positions.Length;
	for (int i = 1; i < sz; i++)
	{
	int pos = positions[i];
	int inc = pos - lastPos;
	if (inc > largestInc)
	{
	largestInc = inc;
	}
	lastPos = pos;
	}
	if (largestInc > 1)
	{
	slop += largestInc;
	}
	}

	bool inorder = slop == 0;

	SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
	sp.Boost = query.Boost;
	ExtractWeightedSpanTerms(terms, sp);
	}
	else if (query is TermQuery)
	{
	ExtractWeightedTerms(terms, query);
	}
	else if (query is SpanQuery)
	{
	ExtractWeightedSpanTerms(terms, (SpanQuery) query);
	}
	else if (query is FilteredQuery)
	{
	Extract(((FilteredQuery) query).Query, terms);
	}
	else if (query is DisjunctionMaxQuery)
	{
	foreach (var q in ((DisjunctionMaxQuery) query))
	{
	Extract(q, terms);
	}
	}
	else if (query is MultiTermQuery && expandMultiTermQuery)
	{
	MultiTermQuery mtq = ((MultiTermQuery) query);
	if (mtq.RewriteMethod != MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE)
	{
	mtq = (MultiTermQuery) mtq.Clone();
	mtq.RewriteMethod = MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE;
	query = mtq;
	}
	FakeReader fReader = new FakeReader();
	MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE.Rewrite(fReader, mtq);
	if (fReader.Field != null)
	{
	IndexReader ir = GetReaderForField(fReader.Field);
	Extract(query.Rewrite(ir), terms);
	}
	}
	else if (query is MultiPhraseQuery)
	{
	MultiPhraseQuery mpq = (MultiPhraseQuery) query;
	IList<Term[]> termArrays = mpq.GetTermArrays();
	int[] positions = mpq.GetPositions();
	if (positions.Length > 0)
	{

	int maxPosition = positions[positions.Length - 1];
	for (int i = 0; i < positions.Length - 1; ++i)
	{
	if (positions[i] > maxPosition)
	{
	maxPosition = positions[i];
	}
	}

	var disjunctLists = new List<SpanQuery>[maxPosition + 1];
	int distinctPositions = 0;

	for (int i = 0; i < termArrays.Count; ++i)
	{
	Term[] termArray = termArrays[i];
	List<SpanQuery> disjuncts = disjunctLists[positions[i]];
	if (disjuncts == null)
	{
	disjuncts = (disjunctLists[positions[i]] = new List<SpanQuery>(termArray.Length));
	++distinctPositions;
	}
	for (int j = 0; j < termArray.Length; ++j)
	{
	disjuncts.Add(new SpanTermQuery(termArray[j]));
	}
	}

	int positionGaps = 0;
	int position = 0;
	SpanQuery[] clauses = new SpanQuery[distinctPositions];
	for (int i = 0; i < disjunctLists.Length; ++i)
	{
	List<SpanQuery> disjuncts = disjunctLists[i];
	if (disjuncts != null)
	{
	clauses[position++] = new SpanOrQuery(disjuncts.ToArray());
	}
	else
	{
	++positionGaps;
	}
	}

	int slop = mpq.Slop;
	bool inorder = (slop == 0);

	SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
	sp.Boost = query.Boost;
	ExtractWeightedSpanTerms(terms, sp);
	}
	}
	}

	/// <summary>
	/// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>SpanQuery</c>.
	/// </summary>
	/// <param name="terms">Map to place created WeightedSpanTerms in</param>
	/// <param name="spanQuery">SpanQuery to extract Terms from</param>
	private void ExtractWeightedSpanTerms(IDictionary<String, WeightedSpanTerm> terms, SpanQuery spanQuery)
	{
	HashSet<String> fieldNames;

	if (fieldName == null)
	{
	fieldNames = new HashSet<String>();
	CollectSpanQueryFields(spanQuery, fieldNames);
	}
	else
	{
	fieldNames = new HashSet<String>();
	fieldNames.Add(fieldName);
	}
	// To support the use of the default field name
	if (defaultField != null)
	{
	fieldNames.Add(defaultField);
	}

	IDictionary<String, SpanQuery> queries = new HashMap<String, SpanQuery>();

	var nonWeightedTerms = Support.Compatibility.SetFactory.CreateHashSet<Term>();
	bool mustRewriteQuery = MustRewriteQuery(spanQuery);
	if (mustRewriteQuery)
	{
	foreach (String field in fieldNames)
	{
	SpanQuery rewrittenQuery = (SpanQuery) spanQuery.Rewrite(GetReaderForField(field));
	queries[field] = rewrittenQuery;
	rewrittenQuery.ExtractTerms(nonWeightedTerms);
	}
	}
	else
	{
	spanQuery.ExtractTerms(nonWeightedTerms);
	}

	List<PositionSpan> spanPositions = new List<PositionSpan>();

	foreach (String field in fieldNames)
	{

	IndexReader reader = GetReaderForField(field);
	Spans.Spans spans;
	if (mustRewriteQuery)
	{
	spans = queries[field].GetSpans(reader);
	}
	else
	{
	spans = spanQuery.GetSpans(reader);
	}


	// collect span positions
	while (spans.Next())
	{
	spanPositions.Add(new PositionSpan(spans.Start(), spans.End() - 1));
	}

	}

	if (spanPositions.Count == 0)
	{
	// no spans found
	return;
	}

	foreach (Term queryTerm in nonWeightedTerms)
	{

	if (FieldNameComparator(queryTerm.Field))
	{
	WeightedSpanTerm weightedSpanTerm = terms[queryTerm.Text];

	if (weightedSpanTerm == null)
	{
	weightedSpanTerm = new WeightedSpanTerm(spanQuery.Boost, queryTerm.Text);
	weightedSpanTerm.AddPositionSpans(spanPositions);
	weightedSpanTerm.SetPositionSensitive(true);
	terms[queryTerm.Text] = weightedSpanTerm;
	}
	else
	{
	if (spanPositions.Count > 0)
	{
	weightedSpanTerm.AddPositionSpans(spanPositions);
	}
	}
	}
	}
	}

	/// <summary>
	/// Fills a <c>Map</c> with <see cref="WeightedSpanTerm"/>s using the terms from the supplied <c>Query</c>.
	/// </summary>
	/// <param name="terms"></param>
	/// <param name="query"></param>
	private void ExtractWeightedTerms(IDictionary<String, WeightedSpanTerm> terms, Query query)
	{
	var nonWeightedTerms = Support.Compatibility.SetFactory.CreateHashSet<Term>();
	query.ExtractTerms(nonWeightedTerms);

	foreach (Term queryTerm in nonWeightedTerms)
	{

	if (FieldNameComparator(queryTerm.Field))
	{
	WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(query.Boost, queryTerm.Text);
	terms[queryTerm.Text] = weightedSpanTerm;
	}
	}
	}

	/// <summary>
	/// Necessary to implement matches for queries against <c>defaultField</c>
	/// </summary>
	private bool FieldNameComparator(String fieldNameToCheck)
	{
	bool rv = fieldName == null \|\| fieldNameToCheck == fieldName
	\|\| fieldNameToCheck == defaultField;
	return rv;
	}

	private IndexReader GetReaderForField(String field)
	{
	if (wrapToCaching && !cachedTokenStream && !(tokenStream is CachingTokenFilter))
	{
	tokenStream = new CachingTokenFilter(tokenStream);
	cachedTokenStream = true;
	}
	IndexReader reader = readers[field];
	if (reader == null)
	{
	MemoryIndex indexer = new MemoryIndex();
	indexer.AddField(field, tokenStream);
	tokenStream.Reset();
	IndexSearcher searcher = indexer.CreateSearcher();
	reader = searcher.IndexReader;
	readers[field] = reader;
	}

	return reader;
	}

	/// <summary>
	/// Creates a Map of <c>WeightedSpanTerms</c> from the given <c>Query</c> and <c>TokenStream</c>.
	/// </summary>
	/// <param name="query">query that caused hit</param>
	/// <param name="tokenStream">TokenStream of text to be highlighted</param>
	/// <returns>Map containing WeightedSpanTerms</returns>
	public IDictionary<String, WeightedSpanTerm> GetWeightedSpanTerms(Query query, TokenStream tokenStream)
	{
	return GetWeightedSpanTerms(query, tokenStream, null);
	}


	/// <summary>
	/// Creates a Map of <c>WeightedSpanTerms</c> from the given <c>Query</c> and <c>TokenStream</c>.
	/// </summary>
	/// <param name="query">query that caused hit</param>
	/// <param name="tokenStream">tokenStream of text to be highlighted</param>
	/// <param name="fieldName">restricts Term's used based on field name</param>
	/// <returns>Map containing WeightedSpanTerms</returns>
	public IDictionary<String, WeightedSpanTerm> GetWeightedSpanTerms(Query query, TokenStream tokenStream,
	String fieldName)
	{
	if (fieldName != null)
	{
	this.fieldName = StringHelper.Intern(fieldName);
	}
	else
	{
	this.fieldName = null;
	}

	IDictionary<String, WeightedSpanTerm> terms = new PositionCheckingMap<String>();
	this.tokenStream = tokenStream;
	try
	{
	Extract(query, terms);
	}
	finally
	{
	CloseReaders();
	}

	return terms;
	}

	/// <summary>
	/// Creates a Map of <c>WeightedSpanTerms</c> from the given <c>Query</c> and <c>TokenStream</c>. Uses a supplied
	/// <c>IndexReader</c> to properly Weight terms (for gradient highlighting).
	/// </summary>
	/// <param name="query">Query that caused hit</param>
	/// <param name="tokenStream">Tokenstream of text to be highlighted</param>
	/// <param name="fieldName">restricts Term's used based on field name</param>
	/// <param name="reader">to use for scoring</param>
	/// <returns>Map of WeightedSpanTerms with quasi tf/idf scores</returns>
	public IDictionary<String, WeightedSpanTerm> GetWeightedSpanTermsWithScores(Query query, TokenStream tokenStream,
	String fieldName, IndexReader reader)
	{
	if (fieldName != null)
	{
	this.fieldName = StringHelper.Intern(fieldName);
	}
	else
	{
	this.fieldName = null;
	}
	this.tokenStream = tokenStream;

	IDictionary<String, WeightedSpanTerm> terms = new PositionCheckingMap<String>();
	Extract(query, terms);

	int totalNumDocs = reader.NumDocs();
	var weightedTerms = terms.Keys;

	try
	{
	foreach (var wt in weightedTerms)
	{
	WeightedSpanTerm weightedSpanTerm = terms[wt];
	int docFreq = reader.DocFreq(new Term(fieldName, weightedSpanTerm.Term));
	// docFreq counts deletes
	if (totalNumDocs < docFreq)
	{
	docFreq = totalNumDocs;
	}
	// IDF algorithm taken from DefaultSimilarity class
	float idf = (float) (Math.Log((float) totalNumDocs/(double) (docFreq + 1)) + 1.0);
	weightedSpanTerm.Weight *= idf;
	}
	}
	finally
	{

	CloseReaders();
	}

	return terms;
	}

	private void CollectSpanQueryFields(SpanQuery spanQuery, HashSet<String> fieldNames)
	{
	if (spanQuery is FieldMaskingSpanQuery)
	{
	CollectSpanQueryFields(((FieldMaskingSpanQuery) spanQuery).MaskedQuery, fieldNames);
	}
	else if (spanQuery is SpanFirstQuery)
	{
	CollectSpanQueryFields(((SpanFirstQuery) spanQuery).Match, fieldNames);
	}
	else if (spanQuery is SpanNearQuery)
	{
	foreach (SpanQuery clause in ((SpanNearQuery) spanQuery).GetClauses())
	{
	CollectSpanQueryFields(clause, fieldNames);
	}
	}
	else if (spanQuery is SpanNotQuery)
	{
	CollectSpanQueryFields(((SpanNotQuery) spanQuery).Include, fieldNames);
	}
	else if (spanQuery is SpanOrQuery)
	{
	foreach (SpanQuery clause in ((SpanOrQuery) spanQuery).GetClauses())
	{
	CollectSpanQueryFields(clause, fieldNames);
	}
	}
	else
	{
	fieldNames.Add(spanQuery.Field);
	}
	}

	private bool MustRewriteQuery(SpanQuery spanQuery)
	{
	if (!expandMultiTermQuery)
	{
	return false; // Will throw UnsupportedOperationException in case of a SpanRegexQuery.
	}
	else if (spanQuery is FieldMaskingSpanQuery)
	{
	return MustRewriteQuery(((FieldMaskingSpanQuery)spanQuery).MaskedQuery);
	}
	else if (spanQuery is SpanFirstQuery)
	{
	return MustRewriteQuery(((SpanFirstQuery)spanQuery).Match);
	}
	else if (spanQuery is SpanNearQuery)
	{
	foreach (SpanQuery clause in ((SpanNearQuery) spanQuery).GetClauses())
	{
	if (MustRewriteQuery(clause))
	{
	return true;
	}
	}
	return false;
	}
	else if (spanQuery is SpanNotQuery)
	{
	SpanNotQuery spanNotQuery = (SpanNotQuery) spanQuery;
	return MustRewriteQuery(spanNotQuery.Include) \|\| MustRewriteQuery(spanNotQuery.Exclude);
	}
	else if (spanQuery is SpanOrQuery)
	{
	foreach (SpanQuery clause in ((SpanOrQuery) spanQuery).GetClauses())
	{
	if (MustRewriteQuery(clause))
	{
	return true;
	}
	}
	return false;
	}
	else if (spanQuery is SpanTermQuery)
	{
	return false;
	}
	else
	{
	return true;
	}
	}


	/// <summary>
	/// This class makes sure that if both position sensitive and insensitive
	/// versions of the same term are added, the position insensitive one wins.
	/// </summary>
	/// <typeparam name="K"></typeparam>
	private class PositionCheckingMap<K> : HashMap<K, WeightedSpanTerm>
	{
	public PositionCheckingMap()
	{

	}

	public PositionCheckingMap(IEnumerable<KeyValuePair<K, WeightedSpanTerm>> m)
	{
	PutAll(m);
	}

	public void PutAll(IEnumerable<KeyValuePair<K, WeightedSpanTerm>> m)
	{
	foreach (var entry in m)
	{
	Add(entry.Key, entry.Value);
	}
	}

	public override void Add(K key, WeightedSpanTerm value)
	{
	base.Add(key, value);
	WeightedSpanTerm prev = this[key];

	if (prev == null) return;

	WeightedSpanTerm prevTerm = prev;
	WeightedSpanTerm newTerm = value;
	if (!prevTerm.IsPositionSensitive())
	{
	newTerm.SetPositionSensitive(false);
	}
	}

	}

	public bool ExpandMultiTermQuery
	{
	set { this.expandMultiTermQuery = value; }
	get { return expandMultiTermQuery; }
	}

	public bool IsCachedTokenStream
	{
	get { return cachedTokenStream; }
	}

	public TokenStream TokenStream
	{
	get { return tokenStream; }
	}


	/// <summary>
	/// By default, <see cref="Analysis.TokenStream"/>s that are not of the type
	/// <see cref="CachingTokenFilter"/> are wrapped in a <see cref="CachingTokenFilter"/> to
	/// <see cref="Analysis.TokenStream"/> impl and you don't want it to be wrapped, set this to
	/// false.
	/// </summary>
	public void SetWrapIfNotCachingTokenFilter(bool wrap)
	{
	this.wrapToCaching = wrap;
	}

	/// <summary>
	/// A fake IndexReader class to extract the field from a MultiTermQuery
	/// </summary>
	protected internal sealed class FakeReader : FilterIndexReader
	{

	private static IndexReader EMPTY_MEMORY_INDEX_READER = new MemoryIndex().CreateSearcher().IndexReader;

	public String Field { get; private set; }

	protected internal FakeReader()
	: base(EMPTY_MEMORY_INDEX_READER)
	{

	}

	public override TermEnum Terms(Term t)
	{
	// only set first fieldname, maybe use a Set?
	if (t != null && Field == null)
	Field = t.Field;
	return base.Terms(t);
	}


	}
	}
	}