src/Lucene.Net/Search/MultiSearcher.cs - lucenenet - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 using System;

 using Document = Lucene.Net.Documents.Document;
 using FieldSelector = Lucene.Net.Documents.FieldSelector;
 using CorruptIndexException = Lucene.Net.Index.CorruptIndexException;
 using Term = Lucene.Net.Index.Term;

 namespace Lucene.Net.Search
 {

 	/// <summary>Implements search over a set of <code>Searchables</code>.
 	///
 	/// <p>Applications usually need only call the inherited {@link #Search(Query)}
 	/// or {@link #search(Query,Filter)} methods.
 	/// </summary>
 	public class MultiSearcher : Searcher
 	{
 		private class AnonymousClassHitCollector : HitCollector
 		{
 			public AnonymousClassHitCollector(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
 			{
 				InitBlock(results, start, enclosingInstance);
 			}
 			private void  InitBlock(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
 			{
 				this.results = results;
 				this.start = start;
 				this.enclosingInstance = enclosingInstance;
 			}
 			private Lucene.Net.Search.HitCollector results;
 			private int start;
 			private MultiSearcher enclosingInstance;
 			public MultiSearcher Enclosing_Instance
 			{
 				get
 				{
 					return enclosingInstance;
 				}

 			}
 			public override void  Collect(int doc, float score)
 			{
 				results.Collect(doc + start, score);
 			}
 		}
 		/// <summary> Document Frequency cache acting as a Dummy-Searcher.
 		/// This class is no full-fledged Searcher, but only supports
 		/// the methods necessary to initialize Weights.
 		/// </summary>
 		private class CachedDfSource:Searcher
 		{
 			private System.Collections.IDictionary dfMap; // Map from Terms to corresponding doc freqs
 			private int maxDoc; // document count

 			public CachedDfSource(System.Collections.IDictionary dfMap, int maxDoc, Similarity similarity)
 			{
 				this.dfMap = dfMap;
 				this.maxDoc = maxDoc;
 				SetSimilarity(similarity);
 			}

 			public override int DocFreq(Term term)
 			{
 				int df;
 				try
 				{
 					df = ((System.Int32) dfMap[term]);
 				}
 				catch (System.NullReferenceException)
 				{
 					throw new System.ArgumentException("df for term " + term.Text() + " not available");
 				}
 				return df;
 			}

 			public override int[] DocFreqs(Term[] terms)
 			{
 				int[] result = new int[terms.Length];
 				for (int i = 0; i < terms.Length; i++)
 				{
 					result[i] = DocFreq(terms[i]);
 				}
 				return result;
 			}

 			public override int MaxDoc()
 			{
 				return maxDoc;
 			}

 			public override Query Rewrite(Query query)
 			{
 				// this is a bit of a hack. We know that a query which
 				// creates a Weight based on this Dummy-Searcher is
 				// always already rewritten (see preparedWeight()).
 				// Therefore we just return the unmodified query here
 				return query;
 			}

 			public override void  Close()
 			{
 				throw new System.NotSupportedException();
 			}

 			public override Document Doc(int i)
 			{
 				throw new System.NotSupportedException();
 			}

 			public override Document Doc(int i, FieldSelector fieldSelector)
 			{
 				throw new System.NotSupportedException();
 			}

 			public override Explanation Explain(Weight weight, int doc)
 			{
 				throw new System.NotSupportedException();
 			}

 			public override void  Search(Weight weight, Filter filter, HitCollector results)
 			{
 				throw new System.NotSupportedException();
 			}

 			public override TopDocs Search(Weight weight, Filter filter, int n)
 			{
 				throw new System.NotSupportedException();
 			}

 			public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
 			{
 				throw new System.NotSupportedException();
 			}
 		}


 		private Lucene.Net.Search.Searchable[] searchables;
 		private int[] starts;
 		private int maxDoc = 0;

 		/// <summary>Creates a searcher which searches <i>searchables</i>. </summary>
 		public MultiSearcher(Lucene.Net.Search.Searchable[] searchables)
 		{
 			this.searchables = searchables;

 			starts = new int[searchables.Length + 1]; // build starts array
 			for (int i = 0; i < searchables.Length; i++)
 			{
 				starts[i] = maxDoc;
 				maxDoc += searchables[i].MaxDoc(); // compute maxDocs
 			}
 			starts[searchables.Length] = maxDoc;
 		}

 		/// <summary>Return the array of {@link Searchable}s this searches. </summary>
 		public virtual Lucene.Net.Search.Searchable[] GetSearchables()
 		{
 			return searchables;
 		}

 		protected internal virtual int[] GetStarts()
 		{
 			return starts;
 		}

 		// inherit javadoc
 		public override void  Close()
 		{
 			for (int i = 0; i < searchables.Length; i++)
 				searchables[i].Close();
 		}

 		public override int DocFreq(Term term)
 		{
 			int docFreq = 0;
 			for (int i = 0; i < searchables.Length; i++)
 				docFreq += searchables[i].DocFreq(term);
 			return docFreq;
 		}

 		// inherit javadoc
 		public override Document Doc(int n)
 		{
 			int i = SubSearcher(n); // find searcher index
 			return searchables[i].Doc(n - starts[i]); // dispatch to searcher
 		}

 		// inherit javadoc
 		public override Document Doc(int n, FieldSelector fieldSelector)
 		{
 			int i = SubSearcher(n); // find searcher index
 			return searchables[i].Doc(n - starts[i], fieldSelector); // dispatch to searcher
 		}

 		/// <summary>Returns index of the searcher for document <code>n</code> in the array
 		/// used to construct this searcher.
 		/// </summary>
 		public virtual int SubSearcher(int n)
 		{
 			// find searcher for doc n:
 			// replace w/ call to Arrays.binarySearch in Java 1.2
 			int lo = 0; // search starts array
 			int hi = searchables.Length - 1; // for first element less
 			// than n, return its index
 			while (hi >= lo)
 			{
 				int mid = (lo + hi) >> 1;
 				int midValue = starts[mid];
 				if (n < midValue)
 					hi = mid - 1;
 				else if (n > midValue)
 					lo = mid + 1;
 				else
 				{
 					// found a match
 					while (mid + 1 < searchables.Length && starts[mid + 1] == midValue)
 					{
 						mid++; // scan to last match
 					}
 					return mid;
 				}
 			}
 			return hi;
 		}

 		/// <summary>Returns the document number of document <code>n</code> within its
 		/// sub-index.
 		/// </summary>
 		public virtual int SubDoc(int n)
 		{
 			return n - starts[SubSearcher(n)];
 		}

 		public override int MaxDoc()
 		{
 			return maxDoc;
 		}

 		public override TopDocs Search(Weight weight, Filter filter, int nDocs)
 		{

 			HitQueue hq = new HitQueue(nDocs);
 			int totalHits = 0;

 			for (int i = 0; i < searchables.Length; i++)
 			{
 				// search each searcher
 				TopDocs docs = searchables[i].Search(weight, filter, nDocs);
 				totalHits += docs.totalHits; // update totalHits
 				ScoreDoc[] scoreDocs = docs.scoreDocs;
 				for (int j = 0; j < scoreDocs.Length; j++)
 				{
 					// merge scoreDocs into hq
 					ScoreDoc scoreDoc = scoreDocs[j];
 					scoreDoc.doc += starts[i]; // convert doc
 					if (!hq.Insert(scoreDoc))
 						break; // no more scores > minScore
 				}
 			}

 			ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
 			for (int i = hq.Size() - 1; i >= 0; i--)
 				// put docs in array
 				scoreDocs2[i] = (ScoreDoc) hq.Pop();

 			float maxScore = (totalHits == 0) ? System.Single.NegativeInfinity : scoreDocs2[0].score;

 			return new TopDocs(totalHits, scoreDocs2, maxScore);
 		}

 		public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
 		{
 			FieldDocSortedHitQueue hq = null;
 			int totalHits = 0;

 			float maxScore = System.Single.NegativeInfinity;

 			for (int i = 0; i < searchables.Length; i++)
 			{
 				// search each searcher
 				TopFieldDocs docs = searchables[i].Search(weight, filter, n, sort);

 				if (hq == null)
 					hq = new FieldDocSortedHitQueue(docs.fields, n);
 				totalHits += docs.totalHits; // update totalHits
 				maxScore = System.Math.Max(maxScore, docs.GetMaxScore());
 				ScoreDoc[] scoreDocs = docs.scoreDocs;
 				for (int j = 0; j < scoreDocs.Length; j++)
 				{
 					// merge scoreDocs into hq
 					ScoreDoc scoreDoc = scoreDocs[j];
 					scoreDoc.doc += starts[i]; // convert doc
 					if (!hq.Insert(scoreDoc))
 						break; // no more scores > minScore
 				}
 			}

 			ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
 			for (int i = hq.Size() - 1; i >= 0; i--)
 				// put docs in array
 				scoreDocs2[i] = (ScoreDoc) hq.Pop();

 			return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore);
 		}


 		// inherit javadoc
 		public override void  Search(Weight weight, Filter filter, HitCollector results)
 		{
 			for (int i = 0; i < searchables.Length; i++)
 			{

 				int start = starts[i];

 				searchables[i].Search(weight, filter, new AnonymousClassHitCollector(results, start, this));
 			}
 		}

 		public override Query Rewrite(Query original)
 		{
 			Query[] queries = new Query[searchables.Length];
 			for (int i = 0; i < searchables.Length; i++)
 			{
 				queries[i] = searchables[i].Rewrite(original);
 			}
 			return queries[0].Combine(queries);
 		}

 		public override Explanation Explain(Weight weight, int doc)
 		{
 			int i = SubSearcher(doc); // find searcher index
 			return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher
 		}

 		/// <summary> Create weight in multiple index scenario.
 		///
 		/// Distributed query processing is done in the following steps:
 		/// 1. rewrite query
 		/// 2. extract necessary terms
 		/// 3. collect dfs for these terms from the Searchables
 		/// 4. create query weight using aggregate dfs.
 		/// 5. distribute that weight to Searchables
 		/// 6. merge results
 		///
 		/// Steps 1-4 are done here, 5+6 in the search() methods
 		///
 		/// </summary>
 		/// <returns> rewritten queries
 		/// </returns>
 		protected internal override Weight CreateWeight(Query original)
 		{
 			// step 1
 			Query rewrittenQuery = Rewrite(original);

 			// step 2
 			System.Collections.Hashtable terms = new System.Collections.Hashtable();
 			rewrittenQuery.ExtractTerms(terms);

 			// step3
 			Term[] allTermsArray = new Term[terms.Count];
             int index = 0;
             System.Collections.IEnumerator e = terms.Keys.GetEnumerator();
             while (e.MoveNext())
                 allTermsArray[index++] = e.Current as Term;
             int[] aggregatedDfs = new int[terms.Count];
 			for (int i = 0; i < searchables.Length; i++)
 			{
 				int[] dfs = searchables[i].DocFreqs(allTermsArray);
 				for (int j = 0; j < aggregatedDfs.Length; j++)
 				{
 					aggregatedDfs[j] += dfs[j];
 				}
 			}

 			System.Collections.Hashtable dfMap = new System.Collections.Hashtable();
 			for (int i = 0; i < allTermsArray.Length; i++)
 			{
 				dfMap[allTermsArray[i]] = (System.Int32) aggregatedDfs[i];
 			}

 			// step4
 			int numDocs = MaxDoc();
 			CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, GetSimilarity());

 			return rewrittenQuery.Weight(cacheSim);
 		}
 	}
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	using System;

	using Document = Lucene.Net.Documents.Document;
	using FieldSelector = Lucene.Net.Documents.FieldSelector;
	using CorruptIndexException = Lucene.Net.Index.CorruptIndexException;
	using Term = Lucene.Net.Index.Term;

	namespace Lucene.Net.Search
	{

	/// <summary>Implements search over a set of <code>Searchables</code>.
	///
	/// <p>Applications usually need only call the inherited {@link #Search(Query)}
	/// or {@link #search(Query,Filter)} methods.
	/// </summary>
	public class MultiSearcher : Searcher
	{
	private class AnonymousClassHitCollector : HitCollector
	{
	public AnonymousClassHitCollector(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
	{
	InitBlock(results, start, enclosingInstance);
	}
	private void InitBlock(Lucene.Net.Search.HitCollector results, int start, MultiSearcher enclosingInstance)
	{
	this.results = results;
	this.start = start;
	this.enclosingInstance = enclosingInstance;
	}
	private Lucene.Net.Search.HitCollector results;
	private int start;
	private MultiSearcher enclosingInstance;
	public MultiSearcher Enclosing_Instance
	{
	get
	{
	return enclosingInstance;
	}

	}
	public override void Collect(int doc, float score)
	{
	results.Collect(doc + start, score);
	}
	}
	/// <summary> Document Frequency cache acting as a Dummy-Searcher.
	/// This class is no full-fledged Searcher, but only supports
	/// the methods necessary to initialize Weights.
	/// </summary>
	private class CachedDfSource:Searcher
	{
	private System.Collections.IDictionary dfMap; // Map from Terms to corresponding doc freqs
	private int maxDoc; // document count

	public CachedDfSource(System.Collections.IDictionary dfMap, int maxDoc, Similarity similarity)
	{
	this.dfMap = dfMap;
	this.maxDoc = maxDoc;
	SetSimilarity(similarity);
	}

	public override int DocFreq(Term term)
	{
	int df;
	try
	{
	df = ((System.Int32) dfMap[term]);
	}
	catch (System.NullReferenceException)
	{
	throw new System.ArgumentException("df for term " + term.Text() + " not available");
	}
	return df;
	}

	public override int[] DocFreqs(Term[] terms)
	{
	int[] result = new int[terms.Length];
	for (int i = 0; i < terms.Length; i++)
	{
	result[i] = DocFreq(terms[i]);
	}
	return result;
	}

	public override int MaxDoc()
	{
	return maxDoc;
	}

	public override Query Rewrite(Query query)
	{
	// this is a bit of a hack. We know that a query which
	// creates a Weight based on this Dummy-Searcher is
	// always already rewritten (see preparedWeight()).
	// Therefore we just return the unmodified query here
	return query;
	}

	public override void Close()
	{
	throw new System.NotSupportedException();
	}

	public override Document Doc(int i)
	{
	throw new System.NotSupportedException();
	}

	public override Document Doc(int i, FieldSelector fieldSelector)
	{
	throw new System.NotSupportedException();
	}

	public override Explanation Explain(Weight weight, int doc)
	{
	throw new System.NotSupportedException();
	}

	public override void Search(Weight weight, Filter filter, HitCollector results)
	{
	throw new System.NotSupportedException();
	}

	public override TopDocs Search(Weight weight, Filter filter, int n)
	{
	throw new System.NotSupportedException();
	}

	public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
	{
	throw new System.NotSupportedException();
	}
	}


	private Lucene.Net.Search.Searchable[] searchables;
	private int[] starts;
	private int maxDoc = 0;

	/// <summary>Creates a searcher which searches <i>searchables</i>. </summary>
	public MultiSearcher(Lucene.Net.Search.Searchable[] searchables)
	{
	this.searchables = searchables;

	starts = new int[searchables.Length + 1]; // build starts array
	for (int i = 0; i < searchables.Length; i++)
	{
	starts[i] = maxDoc;
	maxDoc += searchables[i].MaxDoc(); // compute maxDocs
	}
	starts[searchables.Length] = maxDoc;
	}

	/// <summary>Return the array of {@link Searchable}s this searches. </summary>
	public virtual Lucene.Net.Search.Searchable[] GetSearchables()
	{
	return searchables;
	}

	protected internal virtual int[] GetStarts()
	{
	return starts;
	}

	// inherit javadoc
	public override void Close()
	{
	for (int i = 0; i < searchables.Length; i++)
	searchables[i].Close();
	}

	public override int DocFreq(Term term)
	{
	int docFreq = 0;
	for (int i = 0; i < searchables.Length; i++)
	docFreq += searchables[i].DocFreq(term);
	return docFreq;
	}

	// inherit javadoc
	public override Document Doc(int n)
	{
	int i = SubSearcher(n); // find searcher index
	return searchables[i].Doc(n - starts[i]); // dispatch to searcher
	}

	// inherit javadoc
	public override Document Doc(int n, FieldSelector fieldSelector)
	{
	int i = SubSearcher(n); // find searcher index
	return searchables[i].Doc(n - starts[i], fieldSelector); // dispatch to searcher
	}

	/// <summary>Returns index of the searcher for document <code>n</code> in the array
	/// used to construct this searcher.
	/// </summary>
	public virtual int SubSearcher(int n)
	{
	// find searcher for doc n:
	// replace w/ call to Arrays.binarySearch in Java 1.2
	int lo = 0; // search starts array
	int hi = searchables.Length - 1; // for first element less
	// than n, return its index
	while (hi >= lo)
	{
	int mid = (lo + hi) >> 1;
	int midValue = starts[mid];
	if (n < midValue)
	hi = mid - 1;
	else if (n > midValue)
	lo = mid + 1;
	else
	{
	// found a match
	while (mid + 1 < searchables.Length && starts[mid + 1] == midValue)
	{
	mid++; // scan to last match
	}
	return mid;
	}
	}
	return hi;
	}

	/// <summary>Returns the document number of document <code>n</code> within its
	/// sub-index.
	/// </summary>
	public virtual int SubDoc(int n)
	{
	return n - starts[SubSearcher(n)];
	}

	public override int MaxDoc()
	{
	return maxDoc;
	}

	public override TopDocs Search(Weight weight, Filter filter, int nDocs)
	{

	HitQueue hq = new HitQueue(nDocs);
	int totalHits = 0;

	for (int i = 0; i < searchables.Length; i++)
	{
	// search each searcher
	TopDocs docs = searchables[i].Search(weight, filter, nDocs);
	totalHits += docs.totalHits; // update totalHits
	ScoreDoc[] scoreDocs = docs.scoreDocs;
	for (int j = 0; j < scoreDocs.Length; j++)
	{
	// merge scoreDocs into hq
	ScoreDoc scoreDoc = scoreDocs[j];
	scoreDoc.doc += starts[i]; // convert doc
	if (!hq.Insert(scoreDoc))
	break; // no more scores > minScore
	}
	}

	ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
	for (int i = hq.Size() - 1; i >= 0; i--)
	// put docs in array
	scoreDocs2[i] = (ScoreDoc) hq.Pop();

	float maxScore = (totalHits == 0) ? System.Single.NegativeInfinity : scoreDocs2[0].score;

	return new TopDocs(totalHits, scoreDocs2, maxScore);
	}

	public override TopFieldDocs Search(Weight weight, Filter filter, int n, Sort sort)
	{
	FieldDocSortedHitQueue hq = null;
	int totalHits = 0;

	float maxScore = System.Single.NegativeInfinity;

	for (int i = 0; i < searchables.Length; i++)
	{
	// search each searcher
	TopFieldDocs docs = searchables[i].Search(weight, filter, n, sort);

	if (hq == null)
	hq = new FieldDocSortedHitQueue(docs.fields, n);
	totalHits += docs.totalHits; // update totalHits
	maxScore = System.Math.Max(maxScore, docs.GetMaxScore());
	ScoreDoc[] scoreDocs = docs.scoreDocs;
	for (int j = 0; j < scoreDocs.Length; j++)
	{
	// merge scoreDocs into hq
	ScoreDoc scoreDoc = scoreDocs[j];
	scoreDoc.doc += starts[i]; // convert doc
	if (!hq.Insert(scoreDoc))
	break; // no more scores > minScore
	}
	}

	ScoreDoc[] scoreDocs2 = new ScoreDoc[hq.Size()];
	for (int i = hq.Size() - 1; i >= 0; i--)
	// put docs in array
	scoreDocs2[i] = (ScoreDoc) hq.Pop();

	return new TopFieldDocs(totalHits, scoreDocs2, hq.GetFields(), maxScore);
	}


	// inherit javadoc
	public override void Search(Weight weight, Filter filter, HitCollector results)
	{
	for (int i = 0; i < searchables.Length; i++)
	{

	int start = starts[i];

	searchables[i].Search(weight, filter, new AnonymousClassHitCollector(results, start, this));
	}
	}

	public override Query Rewrite(Query original)
	{
	Query[] queries = new Query[searchables.Length];
	for (int i = 0; i < searchables.Length; i++)
	{
	queries[i] = searchables[i].Rewrite(original);
	}
	return queries[0].Combine(queries);
	}

	public override Explanation Explain(Weight weight, int doc)
	{
	int i = SubSearcher(doc); // find searcher index
	return searchables[i].Explain(weight, doc - starts[i]); // dispatch to searcher
	}

	/// <summary> Create weight in multiple index scenario.
	///
	/// Distributed query processing is done in the following steps:
	/// 1. rewrite query
	/// 2. extract necessary terms
	/// 3. collect dfs for these terms from the Searchables
	/// 4. create query weight using aggregate dfs.
	/// 5. distribute that weight to Searchables
	/// 6. merge results
	///
	/// Steps 1-4 are done here, 5+6 in the search() methods
	///
	/// </summary>
	/// <returns> rewritten queries
	/// </returns>
	protected internal override Weight CreateWeight(Query original)
	{
	// step 1
	Query rewrittenQuery = Rewrite(original);

	// step 2
	System.Collections.Hashtable terms = new System.Collections.Hashtable();
	rewrittenQuery.ExtractTerms(terms);

	// step3
	Term[] allTermsArray = new Term[terms.Count];
	int index = 0;
	System.Collections.IEnumerator e = terms.Keys.GetEnumerator();
	while (e.MoveNext())
	allTermsArray[index++] = e.Current as Term;
	int[] aggregatedDfs = new int[terms.Count];
	for (int i = 0; i < searchables.Length; i++)
	{
	int[] dfs = searchables[i].DocFreqs(allTermsArray);
	for (int j = 0; j < aggregatedDfs.Length; j++)
	{
	aggregatedDfs[j] += dfs[j];
	}
	}

	System.Collections.Hashtable dfMap = new System.Collections.Hashtable();
	for (int i = 0; i < allTermsArray.Length; i++)
	{
	dfMap[allTermsArray[i]] = (System.Int32) aggregatedDfs[i];
	}

	// step4
	int numDocs = MaxDoc();
	CachedDfSource cacheSim = new CachedDfSource(dfMap, numDocs, GetSimilarity());

	return rewrittenQuery.Weight(cacheSim);
	}
	}
	}