blob: 21bc6f0ec1f246346158c7aeb4f027d82f64335a [file] [log] [blame]
using Lucene.Net.Documents;
using Lucene.Net.Documents.Extensions;
using Lucene.Net.Index;
using Lucene.Net.Search.Spell;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
using JCG = J2N.Collections.Generic;
namespace Lucene.Net.Search.Suggest
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// <para>
/// Dictionary with terms, weights, payload (optional) and contexts (optional)
/// information taken from stored/indexed fields in a Lucene index.
/// </para>
/// <b>NOTE:</b>
/// <list type="bullet">
/// <item><description>
/// The term and (optionally) payload fields have to be
/// stored
/// </description></item>
/// <item><description>
/// The weight field can be stored or can be a <see cref="NumericDocValues"/>.
/// If the weight field is not defined, the value of the weight is <c>0</c>
/// </description></item>
/// <item><description>
/// if any of the term or (optionally) payload fields supplied
/// do not have a value for a document, then the document is
/// skipped by the dictionary
/// </description></item>
/// </list>
/// </summary>
public class DocumentDictionary : IDictionary
{
/// <summary>
/// <see cref="IndexReader"/> to load documents from </summary>
protected readonly IndexReader m_reader;
/// <summary>
/// Field to read payload from </summary>
protected readonly string m_payloadField;
/// <summary>
/// Field to read contexts from </summary>
protected readonly string m_contextsField;
private readonly string field;
private readonly string weightField;
/// <summary>
/// Creates a new dictionary with the contents of the fields named <paramref name="field"/>
/// for the terms and <paramref name="weightField"/> for the weights that will be used for
/// the corresponding terms.
/// </summary>
public DocumentDictionary(IndexReader reader, string field, string weightField)
: this(reader, field, weightField, null)
{
}
/// <summary>
/// Creates a new dictionary with the contents of the fields named <paramref name="field"/>
/// for the terms, <paramref name="weightField"/> for the weights that will be used for the
/// the corresponding terms and <paramref name="payloadField"/> for the corresponding payloads
/// for the entry.
/// </summary>
public DocumentDictionary(IndexReader reader, string field, string weightField, string payloadField)
: this(reader, field, weightField, payloadField, null)
{
}
/// <summary>
/// Creates a new dictionary with the contents of the fields named <paramref name="field"/>
/// for the terms, <paramref name="weightField"/> for the weights that will be used for the
/// the corresponding terms, <paramref name="payloadField"/> for the corresponding payloads
/// for the entry and <paramref name="contextsField"/> for associated contexts.
/// </summary>
public DocumentDictionary(IndexReader reader, string field, string weightField, string payloadField, string contextsField)
{
this.m_reader = reader;
this.field = field;
this.weightField = weightField;
this.m_payloadField = payloadField;
this.m_contextsField = contextsField;
}
public virtual IInputIterator GetEntryIterator()
{
return new DocumentInputIterator(this, m_payloadField != null, m_contextsField != null);
}
/// <summary>
/// Implements <see cref="IInputIterator"/> from stored fields. </summary>
protected internal class DocumentInputIterator : IInputIterator
{
private readonly DocumentDictionary outerInstance;
private readonly int docCount;
private readonly ISet<string> relevantFields;
private readonly bool hasPayloads;
private readonly bool hasContexts;
private readonly IBits liveDocs;
private int currentDocId = -1;
private long currentWeight;
private BytesRef currentPayload;
private ISet<BytesRef> currentContexts;
private readonly NumericDocValues weightValues;
/// <summary>
/// Creates an iterator over term, weight and payload fields from the lucene
/// index. setting <see cref="HasPayloads"/> to false, implies an iterator
/// over only term and weight.
/// </summary>
public DocumentInputIterator(DocumentDictionary outerInstance, bool hasPayloads, bool hasContexts)
{
this.outerInstance = outerInstance;
this.hasPayloads = hasPayloads;
this.hasContexts = hasContexts;
docCount = outerInstance.m_reader.MaxDoc - 1;
weightValues = (outerInstance.weightField != null) ? MultiDocValues.GetNumericValues(outerInstance.m_reader, outerInstance.weightField) : null;
liveDocs = (outerInstance.m_reader.Leaves.Count > 0) ? MultiFields.GetLiveDocs(outerInstance.m_reader) : null;
relevantFields = GetRelevantFields(new string[] { outerInstance.field, outerInstance.weightField, outerInstance.m_payloadField, outerInstance.m_contextsField });
}
public virtual long Weight
{
get { return currentWeight; }
}
public virtual IComparer<BytesRef> Comparer
{
get
{
return null;
}
}
public virtual BytesRef Next()
{
while (currentDocId < docCount)
{
currentDocId++;
if (liveDocs != null && !liveDocs.Get(currentDocId))
{
continue;
}
Document doc = outerInstance.m_reader.Document(currentDocId, relevantFields);
BytesRef tempPayload = null;
BytesRef tempTerm = null;
ISet<BytesRef> tempContexts = new JCG.HashSet<BytesRef>();
if (hasPayloads)
{
IIndexableField payload = doc.GetField(outerInstance.m_payloadField);
if (payload == null || (payload.GetBinaryValue() == null && payload.GetStringValue() == null))
{
continue;
}
tempPayload = payload.GetBinaryValue() ?? new BytesRef(payload.GetStringValue());
}
if (hasContexts)
{
IIndexableField[] contextFields = doc.GetFields(outerInstance.m_contextsField);
foreach (IIndexableField contextField in contextFields)
{
if (contextField.GetBinaryValue() == null && contextField.GetStringValue() == null)
{
continue;
}
else
{
tempContexts.Add(contextField.GetBinaryValue() ?? new BytesRef(contextField.GetStringValue()));
}
}
}
IIndexableField fieldVal = doc.GetField(outerInstance.field);
if (fieldVal == null || (fieldVal.GetBinaryValue() == null && fieldVal.GetStringValue() == null))
{
continue;
}
tempTerm = (fieldVal.GetStringValue() != null) ? new BytesRef(fieldVal.GetStringValue()) : fieldVal.GetBinaryValue();
currentPayload = tempPayload;
currentContexts = tempContexts;
currentWeight = GetWeight(doc, currentDocId);
return tempTerm;
}
return null;
}
public virtual BytesRef Payload => currentPayload;
public virtual bool HasPayloads => hasPayloads;
/// <summary>
/// Returns the value of the <see cref="Weight"/> property for the current document.
/// Retrieves the value for the <see cref="Weight"/> property if its stored (using <paramref name="doc"/>)
/// or if its indexed as <see cref="NumericDocValues"/> (using <paramref name="docId"/>) for the document.
/// If no value is found, then the weight is 0.
/// </summary>
protected internal virtual long GetWeight(Document doc, int docId)
{
IIndexableField weight = doc.GetField(outerInstance.weightField);
if (weight != null) // found weight as stored
{
return weight.GetInt64ValueOrDefault();
} // found weight as NumericDocValue
else if (weightValues != null)
{
return weightValues.Get(docId);
} // fall back
else
{
return 0;
}
}
private ISet<string> GetRelevantFields(params string[] fields)
{
var relevantFields = new JCG.HashSet<string>();
foreach (string relevantField in fields)
{
if (relevantField != null)
{
relevantFields.Add(relevantField);
}
}
return relevantFields;
}
public virtual IEnumerable<BytesRef> Contexts
{
get
{
if (hasContexts)
{
return currentContexts;
}
return null;
}
}
public virtual bool HasContexts => hasContexts;
}
}
}