blob: a191dd0be8820b8222c2b1b22f02dbe868ef8ca7 [file] [log] [blame]
using J2N.Text;
using Lucene.Net.Support;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
namespace Lucene.Net.Benchmarks.Quality.Trec
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Judge if given document is relevant to given quality query, based on Trec format for judgements.
/// </summary>
public class TrecJudge : IJudge
{
IDictionary<string, QRelJudgement> judgements;
/// <summary>
/// Constructor from a reader.
/// </summary>
/// <remarks>
/// Expected input format:
/// <code>
/// qnum 0 doc-name is-relevant
/// </code>
/// Two sample lines:
/// <code>
/// 19 0 doc303 1
/// 19 0 doc7295 0
/// </code>
/// </remarks>
/// <param name="reader">Where judgments are read from.</param>
/// <exception cref="IOException">If there is a low-level I/O error.</exception>
public TrecJudge(TextReader reader)
{
judgements = new Dictionary<string, QRelJudgement>();
QRelJudgement curr = null;
string zero = "0";
string line;
try
{
while (null != (line = reader.ReadLine()))
{
line = line.Trim();
if (line.Length == 0 || '#' == line[0])
{
continue;
}
StringTokenizer st = new StringTokenizer(line);
st.MoveNext();
string queryID = st.Current;
st.MoveNext();
st.MoveNext();
string docName = st.Current;
st.MoveNext();
bool relevant = !zero.Equals(st.Current, StringComparison.Ordinal);
// LUCENENET: don't call st.NextToken() unless the condition fails.
Debug.Assert(st.RemainingTokens != 0, "wrong format: " + line + " next: " + (st.MoveNext() ? st.Current : ""));
if (relevant)
{ // only keep relevant docs
if (curr == null || !curr.queryID.Equals(queryID, StringComparison.Ordinal))
{
if (!judgements.TryGetValue(queryID, out curr) || curr == null)
{
curr = new QRelJudgement(queryID);
judgements[queryID] = curr;
}
}
curr.AddRelevantDoc(docName);
}
}
}
finally
{
reader.Dispose();
}
}
// inherit javadocs
public virtual bool IsRelevant(string docName, QualityQuery query)
{
QRelJudgement qrj;// = judgements.get(query.getQueryID());
judgements.TryGetValue(query.QueryID, out qrj);
return qrj != null && qrj.IsRelevant(docName);
}
/// <summary>
/// Single Judgement of a trec quality query.
/// </summary>
private class QRelJudgement
{
internal string queryID;
private IDictionary<string, string> relevantDocs;
internal QRelJudgement(string queryID)
{
this.queryID = queryID;
relevantDocs = new HashMap<string, string>();
}
public virtual void AddRelevantDoc(string docName)
{
relevantDocs[docName] = docName;
}
internal virtual bool IsRelevant(string docName)
{
return relevantDocs.ContainsKey(docName);
}
public virtual int MaxRecall
{
get { return relevantDocs.Count; }
}
}
// inherit javadocs
public virtual bool ValidateData(QualityQuery[] qq, TextWriter logger)
{
IDictionary<string, QRelJudgement> missingQueries = new Dictionary<string, QRelJudgement>(judgements);
IList<string> missingJudgements = new List<string>();
for (int i = 0; i < qq.Length; i++)
{
string id = qq[i].QueryID;
if (!missingQueries.Remove(id))
missingJudgements.Add(id);
}
bool isValid = true;
if (missingJudgements.Count > 0)
{
isValid = false;
if (logger != null)
{
logger.WriteLine("WARNING: " + missingJudgements.Count + " queries have no judgments! - ");
for (int i = 0; i < missingJudgements.Count; i++)
{
logger.WriteLine(" " + missingJudgements[i]);
}
}
}
if (missingQueries.Count > 0)
{
isValid = false;
if (logger != null)
{
logger.WriteLine("WARNING: " + missingQueries.Count + " judgments match no query! - ");
foreach (string id in missingQueries.Keys)
{
logger.WriteLine(" " + id);
}
}
}
return isValid;
}
// inherit javadocs
public virtual int MaxRecall(QualityQuery query)
{
QRelJudgement qrj;
if (judgements.TryGetValue(query.QueryID, out qrj) && qrj != null)
{
return qrj.MaxRecall;
}
return 0;
}
}
}