blob: b418d8eb8650e60f698d019fff00a6ab75f6bb92 [file] [log] [blame]
using J2N.Collections.Generic.Extensions;
using Lucene.Net.Diagnostics;
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using JCG = J2N.Collections.Generic;
namespace Lucene.Net.Index
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Collection of <see cref="Index.FieldInfo"/>s (accessible by number or by name).
/// <para/>
/// @lucene.experimental
/// </summary>
public class FieldInfos : IEnumerable<FieldInfo>
{
private readonly bool hasFreq;
private readonly bool hasProx;
private readonly bool hasPayloads;
private readonly bool hasOffsets;
private readonly bool hasVectors;
private readonly bool hasNorms;
private readonly bool hasDocValues;
private readonly IDictionary<int, FieldInfo> byNumber = new JCG.SortedDictionary<int, FieldInfo>();
private readonly IDictionary<string, FieldInfo> byName = new JCG.Dictionary<string, FieldInfo>();
private readonly ICollection<FieldInfo> values; // for an unmodifiable iterator
/// <summary>
/// Constructs a new <see cref="FieldInfos"/> from an array of <see cref="Index.FieldInfo"/> objects
/// </summary>
public FieldInfos(FieldInfo[] infos)
{
bool hasVectors = false;
bool hasProx = false;
bool hasPayloads = false;
bool hasOffsets = false;
bool hasFreq = false;
bool hasNorms = false;
bool hasDocValues = false;
foreach (FieldInfo info in infos)
{
if (info.Number < 0)
{
throw new ArgumentException("illegal field number: " + info.Number + " for field " + info.Name);
}
FieldInfo previous;
if (byNumber.TryGetValue(info.Number, out previous))
{
throw new ArgumentException("duplicate field numbers: " + previous.Name + " and " + info.Name + " have: " + info.Number);
}
byNumber[info.Number] = info;
if (byName.TryGetValue(info.Name, out previous))
{
throw new ArgumentException("duplicate field names: " + previous.Number + " and " + info.Number + " have: " + info.Name);
}
byName[info.Name] = info;
hasVectors |= info.HasVectors;
hasProx |= info.IsIndexed && info.IndexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
hasFreq |= info.IsIndexed && info.IndexOptions != IndexOptions.DOCS_ONLY;
hasOffsets |= info.IsIndexed && info.IndexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
hasNorms |= info.HasNorms;
hasDocValues |= info.HasDocValues;
hasPayloads |= info.HasPayloads;
}
this.hasVectors = hasVectors;
this.hasProx = hasProx;
this.hasPayloads = hasPayloads;
this.hasOffsets = hasOffsets;
this.hasFreq = hasFreq;
this.hasNorms = hasNorms;
this.hasDocValues = hasDocValues;
this.values = byNumber.Values;
}
/// <summary>
/// Returns <c>true</c> if any fields have freqs </summary>
public virtual bool HasFreq => hasFreq;
/// <summary>
/// Returns <c>true</c> if any fields have positions </summary>
public virtual bool HasProx => hasProx;
/// <summary>
/// Returns <c>true</c> if any fields have payloads </summary>
public virtual bool HasPayloads => hasPayloads;
/// <summary>
/// Returns <c>true</c> if any fields have offsets </summary>
public virtual bool HasOffsets => hasOffsets;
/// <summary>
/// Returns <c>true</c> if any fields have vectors </summary>
public virtual bool HasVectors => hasVectors;
/// <summary>
/// Returns <c>true</c> if any fields have norms </summary>
public virtual bool HasNorms => hasNorms;
/// <summary>
/// Returns <c>true</c> if any fields have <see cref="DocValues"/> </summary>
public virtual bool HasDocValues => hasDocValues;
/// <summary>
/// Returns the number of fields.
/// <para/>
/// NOTE: This was size() in Lucene.
/// </summary>
public virtual int Count
{
get
{
if (Debugging.AssertsEnabled) Debugging.Assert(byNumber.Count == byName.Count);
return byNumber.Count;
}
}
/// <summary>
/// Returns an iterator over all the fieldinfo objects present,
/// ordered by ascending field number
/// </summary>
// TODO: what happens if in fact a different order is used?
public virtual IEnumerator<FieldInfo> GetEnumerator()
{
return values.GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
/// <summary>
/// Return the <see cref="Index.FieldInfo"/> object referenced by the <paramref name="fieldName"/> </summary>
/// <returns> the <see cref="Index.FieldInfo"/> object or <c>null</c> when the given <paramref name="fieldName"/>
/// doesn't exist. </returns>
public virtual FieldInfo FieldInfo(string fieldName)
{
FieldInfo ret;
byName.TryGetValue(fieldName, out ret);
return ret;
}
/// <summary>
/// Return the <see cref="Index.FieldInfo"/> object referenced by the <paramref name="fieldNumber"/>. </summary>
/// <param name="fieldNumber"> field's number. </param>
/// <returns> the <see cref="Index.FieldInfo"/> object or null when the given <paramref name="fieldNumber"/>
/// doesn't exist. </returns>
/// <exception cref="ArgumentException"> if <paramref name="fieldNumber"/> is negative </exception>
public virtual FieldInfo FieldInfo(int fieldNumber)
{
if (fieldNumber < 0)
{
throw new ArgumentException("Illegal field number: " + fieldNumber);
}
Index.FieldInfo ret;
byNumber.TryGetValue(fieldNumber, out ret);
return ret;
}
internal sealed class FieldNumbers
{
private readonly IDictionary<int?, string> numberToName;
private readonly IDictionary<string, int?> nameToNumber;
// We use this to enforce that a given field never
// changes DV type, even across segments / IndexWriter
// sessions:
private readonly IDictionary<string, DocValuesType> docValuesType;
// TODO: we should similarly catch an attempt to turn
// norms back on after they were already ommitted; today
// we silently discard the norm but this is badly trappy
private int lowestUnassignedFieldNumber = -1;
internal FieldNumbers()
{
this.nameToNumber = new Dictionary<string, int?>();
this.numberToName = new Dictionary<int?, string>();
this.docValuesType = new Dictionary<string, DocValuesType>();
}
/// <summary>
/// Returns the global field number for the given field name. If the name
/// does not exist yet it tries to add it with the given preferred field
/// number assigned if possible otherwise the first unassigned field number
/// is used as the field number.
/// </summary>
internal int AddOrGet(string fieldName, int preferredFieldNumber, DocValuesType dvType)
{
lock (this)
{
if (dvType != DocValuesType.NONE)
{
DocValuesType currentDVType;
docValuesType.TryGetValue(fieldName, out currentDVType);
if (currentDVType == DocValuesType.NONE) // default value in .NET (value type 0)
{
docValuesType[fieldName] = dvType;
}
else if (currentDVType != DocValuesType.NONE && currentDVType != dvType)
{
throw new ArgumentException("cannot change DocValues type from " + currentDVType + " to " + dvType + " for field \"" + fieldName + "\"");
}
}
int? fieldNumber;
nameToNumber.TryGetValue(fieldName, out fieldNumber);
if (fieldNumber == null)
{
int? preferredBoxed = preferredFieldNumber;
if (preferredFieldNumber != -1 && !numberToName.ContainsKey(preferredBoxed))
{
// cool - we can use this number globally
fieldNumber = preferredBoxed;
}
else
{
// find a new FieldNumber
while (numberToName.ContainsKey(++lowestUnassignedFieldNumber))
{
// might not be up to date - lets do the work once needed
}
fieldNumber = lowestUnassignedFieldNumber;
}
numberToName[fieldNumber] = fieldName;
nameToNumber[fieldName] = fieldNumber;
}
return (int)fieldNumber;
}
}
// used by assert
internal bool ContainsConsistent(int? number, string name, DocValuesType dvType)
{
lock (this)
{
string numberToNameStr;
int? nameToNumberVal;
DocValuesType docValuesType_E;
numberToName.TryGetValue(number, out numberToNameStr);
nameToNumber.TryGetValue(name, out nameToNumberVal);
docValuesType.TryGetValue(name, out docValuesType_E);
return name.Equals(numberToNameStr, StringComparison.Ordinal)
&& number.Equals(nameToNumber[name]) &&
(dvType == DocValuesType.NONE || docValuesType_E == DocValuesType.NONE || dvType == docValuesType_E);
}
}
/// <summary>
/// Returns <c>true</c> if the <paramref name="fieldName"/> exists in the map and is of the
/// same <paramref name="dvType"/>.
/// </summary>
internal bool Contains(string fieldName, DocValuesType dvType)
{
lock (this)
{
// used by IndexWriter.updateNumericDocValue
if (!nameToNumber.ContainsKey(fieldName))
{
return false;
}
else
{
// only return true if the field has the same dvType as the requested one
DocValuesType dvCand;
docValuesType.TryGetValue(fieldName, out dvCand);
return dvType == dvCand;
}
}
}
internal void Clear()
{
lock (this)
{
numberToName.Clear();
nameToNumber.Clear();
docValuesType.Clear();
}
}
internal void SetDocValuesType(int number, string name, DocValuesType dvType)
{
lock (this)
{
if (Debugging.AssertsEnabled) Debugging.Assert(ContainsConsistent(number, name, dvType));
docValuesType[name] = dvType;
}
}
}
internal sealed class Builder
{
private readonly Dictionary<string, FieldInfo> byName = new Dictionary<string, FieldInfo>();
private readonly FieldNumbers globalFieldNumbers;
internal Builder()
: this(new FieldNumbers())
{
}
/// <summary>
/// Creates a new instance with the given <see cref="FieldNumbers"/>.
/// </summary>
internal Builder(FieldNumbers globalFieldNumbers)
{
if (Debugging.AssertsEnabled) Debugging.Assert(globalFieldNumbers != null);
this.globalFieldNumbers = globalFieldNumbers;
}
public void Add(FieldInfos other)
{
foreach (FieldInfo fieldInfo in other)
{
Add(fieldInfo);
}
}
/// <summary>
/// NOTE: this method does not carry over termVector
/// booleans nor docValuesType; the indexer chain
/// (TermVectorsConsumerPerField, DocFieldProcessor) must
/// set these fields when they succeed in consuming
/// the document
/// </summary>
public FieldInfo AddOrUpdate(string name, IIndexableFieldType fieldType)
{
// TODO: really, indexer shouldn't even call this
// method (it's only called from DocFieldProcessor);
// rather, each component in the chain should update
// what it "owns". EG fieldType.indexOptions() should
// be updated by maybe FreqProxTermsWriterPerField:
return AddOrUpdateInternal(name, -1, fieldType.IsIndexed, false, fieldType.OmitNorms, false, fieldType.IndexOptions, fieldType.DocValueType, DocValuesType.NONE);
}
private FieldInfo AddOrUpdateInternal(string name, int preferredFieldNumber, bool isIndexed, bool storeTermVector, bool omitNorms, bool storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normType)
{
// LUCENENET: Bypass FieldInfo method so we can access the quick boolean check
if (!TryGetFieldInfo(name, out FieldInfo fi) || fi is null)
{
// this field wasn't yet added to this in-RAM
// segment's FieldInfo, so now we get a global
// number for this field. If the field was seen
// before then we'll get the same name and number,
// else we'll allocate a new one:
int fieldNumber = globalFieldNumbers.AddOrGet(name, preferredFieldNumber, docValues);
fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType, null);
if (Debugging.AssertsEnabled)
{
Debugging.Assert(!byName.ContainsKey(fi.Name));
Debugging.Assert(globalFieldNumbers.ContainsConsistent(fi.Number, fi.Name, fi.DocValuesType));
}
byName[fi.Name] = fi;
}
else
{
fi.Update(isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions);
if (docValues != DocValuesType.NONE)
{
// only pay the synchronization cost if fi does not already have a DVType
bool updateGlobal = !fi.HasDocValues;
fi.DocValuesType = docValues; // this will also perform the consistency check.
if (updateGlobal)
{
// must also update docValuesType map so it's
// aware of this field's DocValueType
globalFieldNumbers.SetDocValuesType(fi.Number, name, docValues);
}
}
if (!fi.OmitsNorms && normType != DocValuesType.NONE)
{
fi.NormType = normType;
}
}
return fi;
}
public FieldInfo Add(FieldInfo fi)
{
// IMPORTANT - reuse the field number if possible for consistent field numbers across segments
return AddOrUpdateInternal(fi.Name, fi.Number, fi.IsIndexed, fi.HasVectors, fi.OmitsNorms, fi.HasPayloads, fi.IndexOptions, fi.DocValuesType, fi.NormType);
}
public bool TryGetFieldInfo(string fieldName, out FieldInfo ret) // LUCENENET specific - changed from FieldInfo to TryGetFieldInfo
{
return byName.TryGetValue(fieldName, out ret);
}
public FieldInfos Finish()
{
return new FieldInfos(byName.Values.ToArray());
}
}
}
}