blob: 8eeceb48ef8216598bc1c4e065a4b98c18a93735 [file] [log] [blame]
using System;
using System.Collections.Generic;
using System.Diagnostics;
namespace Lucene.Net.Index
{
using ArrayUtil = Lucene.Net.Util.ArrayUtil;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Codec = Lucene.Net.Codecs.Codec;
using Counter = Lucene.Net.Util.Counter;
using FieldInfosWriter = Lucene.Net.Codecs.FieldInfosWriter;
using IOContext = Lucene.Net.Store.IOContext;
/// <summary>
/// this is a DocConsumer that gathers all fields under the
/// same name, and calls per-field consumers to process field
/// by field. this class doesn't doesn't do any "real" work
/// of its own: it just forwards the fields to a
/// DocFieldConsumer.
/// </summary>
internal sealed class DocFieldProcessor : DocConsumer
{
internal readonly DocFieldConsumer Consumer;
internal readonly StoredFieldsConsumer StoredConsumer;
internal readonly Codec Codec;
// Holds all fields seen in current doc
internal DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1];
internal int FieldCount;
// Hash table for all fields ever seen
internal DocFieldProcessorPerField[] FieldHash = new DocFieldProcessorPerField[2];
internal int HashMask = 1;
internal int TotalFieldCount;
internal int FieldGen;
internal readonly DocumentsWriterPerThread.DocState DocState;
internal readonly Counter BytesUsed;
public DocFieldProcessor(DocumentsWriterPerThread docWriter, DocFieldConsumer consumer, StoredFieldsConsumer storedConsumer)
{
this.DocState = docWriter.docState;
this.Codec = docWriter.Codec;
this.BytesUsed = docWriter.bytesUsed;
this.Consumer = consumer;
this.StoredConsumer = storedConsumer;
}
public override void Flush(SegmentWriteState state)
{
IDictionary<string, DocFieldConsumerPerField> childFields = new Dictionary<string, DocFieldConsumerPerField>();
ICollection<DocFieldConsumerPerField> fields = Fields();
foreach (DocFieldConsumerPerField f in fields)
{
childFields[f.FieldInfo.Name] = f;
}
Debug.Assert(fields.Count == TotalFieldCount);
StoredConsumer.Flush(state);
Consumer.Flush(childFields, state);
// Important to save after asking consumer to flush so
// consumer can alter the FieldInfo* if necessary. EG,
// FreqProxTermsWriter does this with
// FieldInfo.storePayload.
FieldInfosWriter infosWriter = Codec.FieldInfosFormat().FieldInfosWriter;
infosWriter.Write(state.Directory, state.SegmentInfo.Name, "", state.FieldInfos, IOContext.DEFAULT);
}
public override void Abort()
{
Exception th = null;
foreach (DocFieldProcessorPerField field in FieldHash)
{
DocFieldProcessorPerField fieldNext = field;
while (fieldNext != null)
{
DocFieldProcessorPerField next = fieldNext.Next;
try
{
fieldNext.Abort();
}
catch (Exception t)
{
if (th == null)
{
th = t;
}
}
fieldNext = next;
}
}
try
{
StoredConsumer.Abort();
}
catch (Exception t)
{
if (th == null)
{
th = t;
}
}
try
{
Consumer.Abort();
}
catch (Exception t)
{
if (th == null)
{
th = t;
}
}
// If any errors occured, throw it.
if (th != null)
{
if (th is Exception)
{
throw (Exception)th;
}
// defensive code - we should not hit unchecked exceptions
throw new Exception(th.Message, th);
}
}
public ICollection<DocFieldConsumerPerField> Fields()
{
ICollection<DocFieldConsumerPerField> fields = new HashSet<DocFieldConsumerPerField>();
for (int i = 0; i < FieldHash.Length; i++)
{
DocFieldProcessorPerField field = FieldHash[i];
while (field != null)
{
fields.Add(field.Consumer);
field = field.Next;
}
}
Debug.Assert(fields.Count == TotalFieldCount);
return fields;
}
private void Rehash()
{
int newHashSize = (FieldHash.Length * 2);
Debug.Assert(newHashSize > FieldHash.Length);
DocFieldProcessorPerField[] newHashArray = new DocFieldProcessorPerField[newHashSize];
// Rehash
int newHashMask = newHashSize - 1;
for (int j = 0; j < FieldHash.Length; j++)
{
DocFieldProcessorPerField fp0 = FieldHash[j];
while (fp0 != null)
{
int hashPos2 = fp0.FieldInfo.Name.GetHashCode() & newHashMask;
DocFieldProcessorPerField nextFP0 = fp0.Next;
fp0.Next = newHashArray[hashPos2];
newHashArray[hashPos2] = fp0;
fp0 = nextFP0;
}
}
FieldHash = newHashArray;
HashMask = newHashMask;
}
public override void ProcessDocument(FieldInfos.Builder fieldInfos)
{
Consumer.StartDocument();
StoredConsumer.StartDocument();
FieldCount = 0;
int thisFieldGen = FieldGen++;
// Absorb any new fields first seen in this document.
// Also absorb any changes to fields we had already
// seen before (eg suddenly turning on norms or
// vectors, etc.):
foreach (IndexableField field in DocState.Doc)
{
string fieldName = field.Name();
// Make sure we have a PerField allocated
int hashPos = fieldName.GetHashCode() & HashMask;
DocFieldProcessorPerField fp = FieldHash[hashPos];
while (fp != null && !fp.FieldInfo.Name.Equals(fieldName))
{
fp = fp.Next;
}
if (fp == null)
{
// TODO FI: we need to genericize the "flags" that a
// field holds, and, how these flags are merged; it
// needs to be more "pluggable" such that if I want
// to have a new "thing" my Fields can do, I can
// easily add it
FieldInfo fi = fieldInfos.AddOrUpdate(fieldName, field.FieldType());
fp = new DocFieldProcessorPerField(this, fi);
fp.Next = FieldHash[hashPos];
FieldHash[hashPos] = fp;
TotalFieldCount++;
if (TotalFieldCount >= FieldHash.Length / 2)
{
Rehash();
}
}
else
{
// need to addOrUpdate so that FieldInfos can update globalFieldNumbers
// with the correct DocValue type (LUCENE-5192)
FieldInfo fi = fieldInfos.AddOrUpdate(fieldName, field.FieldType());
Debug.Assert(fi == fp.FieldInfo, "should only have updated an existing FieldInfo instance");
}
if (thisFieldGen != fp.LastGen)
{
// First time we're seeing this field for this doc
fp.FieldCount = 0;
if (FieldCount == fields.Length)
{
int newSize = fields.Length * 2;
DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize];
Array.Copy(fields, 0, newArray, 0, FieldCount);
fields = newArray;
}
fields[FieldCount++] = fp;
fp.LastGen = thisFieldGen;
}
fp.AddField(field);
StoredConsumer.AddField(DocState.DocID, field, fp.FieldInfo);
}
// If we are writing vectors then we must visit
// fields in sorted order so they are written in
// sorted order. TODO: we actually only need to
// sort the subset of fields that have vectors
// enabled; we could save [small amount of] CPU
// here.
ArrayUtil.IntroSort(fields, 0, FieldCount, fieldsComp);
for (int i = 0; i < FieldCount; i++)
{
DocFieldProcessorPerField perField = fields[i];
perField.Consumer.ProcessFields(perField.Fields, perField.FieldCount);
}
}
private static readonly IComparer<DocFieldProcessorPerField> fieldsComp = new ComparatorAnonymousInnerClassHelper();
private class ComparatorAnonymousInnerClassHelper : IComparer<DocFieldProcessorPerField>
{
public ComparatorAnonymousInnerClassHelper()
{
}
public virtual int Compare(DocFieldProcessorPerField o1, DocFieldProcessorPerField o2)
{
return o1.FieldInfo.Name.CompareTo(o2.FieldInfo.Name);
}
}
public override void FinishDocument()
{
try
{
StoredConsumer.FinishDocument();
}
finally
{
Consumer.FinishDocument();
}
}
}
}