src/Lucene.Net.Benchmark/Quality/QualityBenchmark.cs - lucenenet - Git at Google

 using Lucene.Net.Benchmarks.Quality.Utils;
 using Lucene.Net.Search;
 using System;
 using System.IO;

 namespace Lucene.Net.Benchmarks.Quality
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     /// <summary>
     /// Main entry point for running a quality benchmark.
     /// <para/>
     /// There are two main configurations for running a quality benchmark:
     /// <list type="bullet">
     ///     <item><description>Against existing judgements.</description></item>
     ///     <item><description>For submission (e.g. for a contest).</description></item>
     /// </list>
     /// The first configuration requires a non null <see cref="IJudge"/>.
     /// The second configuration requires a non null <see cref="Utils.SubmissionReport"/>.
     /// </summary>
     public class QualityBenchmark
     {
         /// <summary>Quality Queries that this quality benchmark would execute.</summary>
         protected QualityQuery[] m_qualityQueries;

         /// <summary>Parser for turning QualityQueries into Lucene Queries.</summary>
         protected IQualityQueryParser m_qqParser;

         /// <summary>Index to be searched.</summary>
         protected IndexSearcher m_searcher;

         /// <summary>index field to extract doc name for each search result; used for judging the results.</summary>
         protected string m_docNameField;

         /// <summary>maximal number of queries that this quality benchmark runs. Default: maxint. Useful for debugging.</summary>
         private int maxQueries = int.MaxValue;

         /// <summary>Maximal number of results to collect for each query. Default: 1000.</summary>
         private int maxResults = 1000;

         /// <summary>
         /// Create a <see cref="QualityBenchmark"/>.
         /// </summary>
         /// <param name="qqs">Quality queries to run.</param>
         /// <param name="qqParser">Parser for turning QualityQueries into Lucene Queries.</param>
         /// <param name="searcher">Index to be searched.</param>
         /// <param name="docNameField">
         /// Name of field containing the document name.
         /// This allows to extract the doc name for search results,
         /// and is important for judging the results.
         /// </param>
         public QualityBenchmark(QualityQuery[] qqs, IQualityQueryParser qqParser,
             IndexSearcher searcher, string docNameField)
         {
             this.m_qualityQueries = qqs;
             this.m_qqParser = qqParser;
             this.m_searcher = searcher;
             this.m_docNameField = docNameField;
         }

         /// <summary>
         /// Run the quality benchmark.
         /// </summary>
         /// <param name="judge">
         /// The judge that can tell if a certain result doc is relevant for a certain quality query.
         /// If null, no judgements would be made. Usually null for a submission run.
         /// </param>
         /// <param name="submitRep">Submission report is created if non null.</param>
         /// <param name="qualityLog">If not null, quality run data would be printed for each query.</param>
         /// <returns><see cref="QualityStats"/> of each quality query that was executed.</returns>
         /// <exception cref="Exception">If quality benchmark failed to run.</exception>
         public virtual QualityStats[] Execute(IJudge judge, SubmissionReport submitRep,
                                         TextWriter qualityLog)
         {
             int nQueries = Math.Min(maxQueries, m_qualityQueries.Length);
             QualityStats[] stats = new QualityStats[nQueries];
             for (int i = 0; i < nQueries; i++)
             {
                 QualityQuery qq = m_qualityQueries[i];
                 // generate query
                 Query q = m_qqParser.Parse(qq);
                 // search with this query
                 long t1 = J2N.Time.CurrentTimeMilliseconds();
                 TopDocs td = m_searcher.Search(q, null, maxResults);
                 long searchTime = J2N.Time.CurrentTimeMilliseconds() - t1;
                 //most likely we either submit or judge, but check both
                 if (judge != null)
                 {
                     stats[i] = AnalyzeQueryResults(qq, q, td, judge, qualityLog, searchTime);
                 }
                 if (submitRep != null)
                 {
                     submitRep.Report(qq, td, m_docNameField, m_searcher);
                 }
             }
             if (submitRep != null)
             {
                 submitRep.Flush();
             }
             return stats;
         }

         /// <summary>Analyze/judge results for a single quality query; optionally log them.</summary>
         private QualityStats AnalyzeQueryResults(QualityQuery qq, Query q, TopDocs td, IJudge judge, TextWriter logger, long searchTime)
         {
             QualityStats stts = new QualityStats(judge.MaxRecall(qq), searchTime);
             ScoreDoc[] sd = td.ScoreDocs;
             long t1 = J2N.Time.CurrentTimeMilliseconds(); // extraction of first doc name we measure also construction of doc name extractor, just in case.
             DocNameExtractor xt = new DocNameExtractor(m_docNameField);
             for (int i = 0; i < sd.Length; i++)
             {
                 string docName = xt.DocName(m_searcher, sd[i].Doc);
                 long docNameExtractTime = J2N.Time.CurrentTimeMilliseconds() - t1;
                 t1 = J2N.Time.CurrentTimeMilliseconds();
                 bool isRelevant = judge.IsRelevant(docName, qq);
                 stts.AddResult(i + 1, isRelevant, docNameExtractTime);
             }
             if (logger != null)
             {
                 logger.WriteLine(qq.QueryID + "  -  " + q);
                 stts.Log(qq.QueryID + " Stats:", 1, logger, "  ");
             }
             return stts;
         }

         /// <summary>
         /// The maximum number of quality queries to run. Useful at debugging.
         /// </summary>
         public virtual int MaxQueries
         {
             get => maxQueries;
             set => maxQueries = value;
         }

         /// <summary>
         /// The maximum number of results to collect for each quality query.
         /// </summary>
         public virtual int MaxResults
         {
             get => maxResults;
             set => maxResults = value;
         }
     }
 }
	using Lucene.Net.Benchmarks.Quality.Utils;
	using Lucene.Net.Search;
	using System;
	using System.IO;

	namespace Lucene.Net.Benchmarks.Quality
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/// <summary>
	/// Main entry point for running a quality benchmark.
	/// <para/>
	/// There are two main configurations for running a quality benchmark:
	/// <list type="bullet">
	/// <item><description>Against existing judgements.</description></item>
	/// <item><description>For submission (e.g. for a contest).</description></item>
	/// </list>
	/// The first configuration requires a non null <see cref="IJudge"/>.
	/// The second configuration requires a non null <see cref="Utils.SubmissionReport"/>.
	/// </summary>
	public class QualityBenchmark
	{
	/// <summary>Quality Queries that this quality benchmark would execute.</summary>
	protected QualityQuery[] m_qualityQueries;

	/// <summary>Parser for turning QualityQueries into Lucene Queries.</summary>
	protected IQualityQueryParser m_qqParser;

	/// <summary>Index to be searched.</summary>
	protected IndexSearcher m_searcher;

	/// <summary>index field to extract doc name for each search result; used for judging the results.</summary>
	protected string m_docNameField;

	/// <summary>maximal number of queries that this quality benchmark runs. Default: maxint. Useful for debugging.</summary>
	private int maxQueries = int.MaxValue;

	/// <summary>Maximal number of results to collect for each query. Default: 1000.</summary>
	private int maxResults = 1000;

	/// <summary>
	/// Create a <see cref="QualityBenchmark"/>.
	/// </summary>
	/// <param name="qqs">Quality queries to run.</param>
	/// <param name="qqParser">Parser for turning QualityQueries into Lucene Queries.</param>
	/// <param name="searcher">Index to be searched.</param>
	/// <param name="docNameField">
	/// Name of field containing the document name.
	/// This allows to extract the doc name for search results,
	/// and is important for judging the results.
	/// </param>
	public QualityBenchmark(QualityQuery[] qqs, IQualityQueryParser qqParser,
	IndexSearcher searcher, string docNameField)
	{
	this.m_qualityQueries = qqs;
	this.m_qqParser = qqParser;
	this.m_searcher = searcher;
	this.m_docNameField = docNameField;
	}

	/// <summary>
	/// Run the quality benchmark.
	/// </summary>
	/// <param name="judge">
	/// The judge that can tell if a certain result doc is relevant for a certain quality query.
	/// If null, no judgements would be made. Usually null for a submission run.
	/// </param>
	/// <param name="submitRep">Submission report is created if non null.</param>
	/// <param name="qualityLog">If not null, quality run data would be printed for each query.</param>
	/// <returns><see cref="QualityStats"/> of each quality query that was executed.</returns>
	/// <exception cref="Exception">If quality benchmark failed to run.</exception>
	public virtual QualityStats[] Execute(IJudge judge, SubmissionReport submitRep,
	TextWriter qualityLog)
	{
	int nQueries = Math.Min(maxQueries, m_qualityQueries.Length);
	QualityStats[] stats = new QualityStats[nQueries];
	for (int i = 0; i < nQueries; i++)
	{
	QualityQuery qq = m_qualityQueries[i];
	// generate query
	Query q = m_qqParser.Parse(qq);
	// search with this query
	long t1 = J2N.Time.CurrentTimeMilliseconds();
	TopDocs td = m_searcher.Search(q, null, maxResults);
	long searchTime = J2N.Time.CurrentTimeMilliseconds() - t1;
	//most likely we either submit or judge, but check both
	if (judge != null)
	{
	stats[i] = AnalyzeQueryResults(qq, q, td, judge, qualityLog, searchTime);
	}
	if (submitRep != null)
	{
	submitRep.Report(qq, td, m_docNameField, m_searcher);
	}
	}
	if (submitRep != null)
	{
	submitRep.Flush();
	}
	return stats;
	}

	/// <summary>Analyze/judge results for a single quality query; optionally log them.</summary>
	private QualityStats AnalyzeQueryResults(QualityQuery qq, Query q, TopDocs td, IJudge judge, TextWriter logger, long searchTime)
	{
	QualityStats stts = new QualityStats(judge.MaxRecall(qq), searchTime);
	ScoreDoc[] sd = td.ScoreDocs;
	long t1 = J2N.Time.CurrentTimeMilliseconds(); // extraction of first doc name we measure also construction of doc name extractor, just in case.
	DocNameExtractor xt = new DocNameExtractor(m_docNameField);
	for (int i = 0; i < sd.Length; i++)
	{
	string docName = xt.DocName(m_searcher, sd[i].Doc);
	long docNameExtractTime = J2N.Time.CurrentTimeMilliseconds() - t1;
	t1 = J2N.Time.CurrentTimeMilliseconds();
	bool isRelevant = judge.IsRelevant(docName, qq);
	stts.AddResult(i + 1, isRelevant, docNameExtractTime);
	}
	if (logger != null)
	{
	logger.WriteLine(qq.QueryID + " - " + q);
	stts.Log(qq.QueryID + " Stats:", 1, logger, " ");
	}
	return stts;
	}

	/// <summary>
	/// The maximum number of quality queries to run. Useful at debugging.
	/// </summary>
	public virtual int MaxQueries
	{
	get => maxQueries;
	set => maxQueries = value;
	}

	/// <summary>
	/// The maximum number of results to collect for each quality query.
	/// </summary>
	public virtual int MaxResults
	{
	get => maxResults;
	set => maxResults = value;
	}
	}
	}