blob: 1e95fa1a7d16ba3eb4312b2ad6e73c5a860ecd48 [file] [log] [blame]
using Lucene.Net.Analysis;
using Lucene.Net.Benchmarks.ByTask.Feeds;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
using JCG = J2N.Collections.Generic;
using Console = Lucene.Net.Support.SystemConsole;
namespace Lucene.Net.Benchmarks.ByTask.Tasks
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Read index (abstract) task.
/// Sub classes implement <see cref="WithSearch"/>, <see cref="WithWarm"/>, <see cref="WithTraverse"/> and <see cref="WithRetrieve"/>
/// </summary>
/// <remarks>
/// Note: All ReadTasks reuse the reader if it is already open.
/// Otherwise a reader is opened at start and closed at the end.
/// <para/>
/// The <c>search.num.hits</c> config parameter sets
/// the top number of hits to collect during searching. If
/// <c>print.hits.field</c> is set, then each hit is
/// printed along with the value of that field.
/// <para/>
/// Other side effects: none.
/// </remarks>
public abstract class ReadTask : PerfTask
{
private readonly IQueryMaker queryMaker;
public ReadTask(PerfRunData runData)
: base(runData)
{
if (WithSearch)
{
queryMaker = GetQueryMaker();
}
else
{
queryMaker = null;
}
}
public override int DoLogic()
{
int res = 0;
// open reader or use existing one
IndexSearcher searcher = RunData.GetIndexSearcher();
IndexReader reader;
bool closeSearcher;
if (searcher == null)
{
// open our own reader
Directory dir = RunData.Directory;
reader = DirectoryReader.Open(dir);
searcher = new IndexSearcher(reader);
closeSearcher = true;
}
else
{
// use existing one; this passes +1 ref to us
reader = searcher.IndexReader;
closeSearcher = false;
}
// optionally warm and add num docs traversed to count
if (WithWarm)
{
Document doc = null;
IBits liveDocs = MultiFields.GetLiveDocs(reader);
for (int m = 0; m < reader.MaxDoc; m++)
{
if (null == liveDocs || liveDocs.Get(m))
{
doc = reader.Document(m);
res += (doc == null ? 0 : 1);
}
}
}
if (WithSearch)
{
res++;
Query q = queryMaker.MakeQuery();
Sort sort = Sort;
TopDocs hits = null;
int numHits = NumHits;
if (numHits > 0)
{
if (WithCollector == false)
{
if (sort != null)
{
// TODO: instead of always passing false we
// should detect based on the query; if we make
// the IndexSearcher search methods that take
// Weight public again, we can go back to
// pulling the Weight ourselves:
TopFieldCollector collector = TopFieldCollector.Create(sort, numHits,
true, WithScore,
WithMaxScore,
false);
searcher.Search(q, null, collector);
hits = collector.GetTopDocs();
}
else
{
hits = searcher.Search(q, numHits);
}
}
else
{
ICollector collector = CreateCollector();
searcher.Search(q, null, collector);
//hits = collector.topDocs();
}
string printHitsField = RunData.Config.Get("print.hits.field", null);
if (hits != null && printHitsField != null && printHitsField.Length > 0)
{
Console.WriteLine("totalHits = " + hits.TotalHits);
Console.WriteLine("maxDoc() = " + reader.MaxDoc);
Console.WriteLine("numDocs() = " + reader.NumDocs);
for (int i = 0; i < hits.ScoreDocs.Length; i++)
{
int docID = hits.ScoreDocs[i].Doc;
Document doc = reader.Document(docID);
Console.WriteLine(" " + i + ": doc=" + docID + " score=" + hits.ScoreDocs[i].Score + " " + printHitsField + " =" + doc.Get(printHitsField));
}
}
if (WithTraverse)
{
ScoreDoc[] scoreDocs = hits.ScoreDocs;
int traversalSize = Math.Min(scoreDocs.Length, TraversalSize);
if (traversalSize > 0)
{
bool retrieve = WithRetrieve;
int numHighlight = Math.Min(NumToHighlight, scoreDocs.Length);
Analyzer analyzer = RunData.Analyzer;
BenchmarkHighlighter highlighter = null;
if (numHighlight > 0)
{
highlighter = GetBenchmarkHighlighter(q);
}
for (int m = 0; m < traversalSize; m++)
{
int id = scoreDocs[m].Doc;
res++;
if (retrieve)
{
Document document = RetrieveDoc(reader, id);
res += document != null ? 1 : 0;
if (numHighlight > 0 && m < numHighlight)
{
ICollection<string> fieldsToHighlight = GetFieldsToHighlight(document);
foreach (string field in fieldsToHighlight)
{
string text = document.Get(field);
res += highlighter.DoHighlight(reader, id, field, document, analyzer, text);
}
}
}
}
}
}
}
}
if (closeSearcher)
{
reader.Dispose();
}
else
{
// Release our +1 ref from above
reader.DecRef();
}
return res;
}
protected virtual ICollector CreateCollector()
{
return TopScoreDocCollector.Create(NumHits, true);
}
protected virtual Document RetrieveDoc(IndexReader ir, int id)
{
return ir.Document(id);
}
/// <summary>
/// Return query maker used for this task.
/// </summary>
public abstract IQueryMaker GetQueryMaker();
/// <summary>
/// Return <c>true</c> if search should be performed.
/// </summary>
public abstract bool WithSearch { get; }
public virtual bool WithCollector
{
get { return false; }
}
/// <summary>
/// Return <c>true</c> if warming should be performed.
/// </summary>
public abstract bool WithWarm { get; }
/// <summary>
/// Return <c>true</c> if, with search, results should be traversed.
/// </summary>
public abstract bool WithTraverse { get; }
/// <summary>
/// Whether scores should be computed (only useful with
/// field sort)
/// </summary>
public virtual bool WithScore
{
get { return true; }
}
/// <summary>
/// Whether maxScores should be computed (only useful with
/// field sort)
/// </summary>
public virtual bool WithMaxScore
{
get { return true; }
}
/// <summary>
/// Specify the number of hits to traverse. Tasks should override this if they want to restrict the number
/// of hits that are traversed when <see cref="WithTraverse"/> is <c>true</c>. Must be greater than 0.
/// <para/>
/// Read task calculates the traversal as: <c>Math.Min(hits.Length, TraversalSize)</c>
/// </summary>
/// <remarks>
/// Unless overridden, the return value is <see cref="int.MaxValue"/>.
/// </remarks>
public virtual int TraversalSize
{
get { return int.MaxValue; }
}
internal static readonly int DEFAULT_SEARCH_NUM_HITS = 10;
private int numHits;
public override void Setup()
{
base.Setup();
numHits = RunData.Config.Get("search.num.hits", DEFAULT_SEARCH_NUM_HITS);
}
/// <summary>
/// Specify the number of hits to retrieve. Tasks should override this if they want to restrict the number
/// of hits that are collected during searching. Must be greater than 0.
/// <para/>
/// Returns 10 by default, or <c>search.num.hits</c> config if set.
/// </summary>
public virtual int NumHits
{
get { return numHits; }
}
/// <summary>
/// Return <c>true</c> if, with search &amp; results traversing, docs should be retrieved.
/// </summary>
public abstract bool WithRetrieve { get; }
/// <summary>
/// The number of documents to highlight. 0 means no docs will be highlighted.
/// </summary>
public virtual int NumToHighlight
{
get { return 0; }
}
/// <summary>
/// Return an appropriate highlighter to be used with
/// highlighting tasks.
/// </summary>
/// <param name="q"></param>
/// <returns></returns>
protected virtual BenchmarkHighlighter GetBenchmarkHighlighter(Query q)
{
return null;
}
public virtual Sort Sort
{
get { return null; }
}
/// <summary>
/// Define the fields to highlight. Base implementation returns all fields.
/// </summary>
/// <param name="document">The <see cref="Document"/>.</param>
/// <returns>An <see cref="T:ICollection{string}"/> of <see cref="Field"/> names.</returns>
protected virtual ICollection<string> GetFieldsToHighlight(Document document)
{
IList<IIndexableField> fields = document.Fields;
ISet<string> result = new JCG.HashSet<string>(fields.Count);
foreach (IIndexableField f in fields)
{
result.Add(f.Name);
}
return result;
}
}
}