| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| using System; |
| using Lucene.Net.Documents; |
| using Lucene.Net.Support; |
| using Document = Lucene.Net.Documents.Document; |
| using ArrayUtil = Lucene.Net.Util.ArrayUtil; |
| |
| namespace Lucene.Net.Index |
| { |
| |
| /// <summary> Gathers all Fieldables for a document under the same |
| /// name, updates FieldInfos, and calls per-field consumers |
| /// to process field by field. |
| /// |
| /// Currently, only a single thread visits the fields, |
| /// sequentially, for processing. |
| /// </summary> |
| |
| sealed class DocFieldProcessorPerThread:DocConsumerPerThread |
| { |
| private void InitBlock() |
| { |
| docFreeList = new PerDoc[1]; |
| } |
| |
| internal float docBoost; |
| internal int fieldGen; |
| internal DocFieldProcessor docFieldProcessor; |
| internal FieldInfos fieldInfos; |
| internal DocFieldConsumerPerThread consumer; |
| |
| // Holds all fields seen in current doc |
| internal DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1]; |
| internal int fieldCount; |
| |
| // Hash table for all fields ever seen |
| internal DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2]; |
| internal int hashMask = 1; |
| internal int totalFieldCount; |
| |
| internal StoredFieldsWriterPerThread fieldsWriter; |
| |
| internal DocumentsWriter.DocState docState; |
| |
| public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) |
| { |
| InitBlock(); |
| this.docState = threadState.docState; |
| this.docFieldProcessor = docFieldProcessor; |
| this.fieldInfos = docFieldProcessor.fieldInfos; |
| this.consumer = docFieldProcessor.consumer.AddThread(this); |
| fieldsWriter = docFieldProcessor.fieldsWriter.AddThread(docState); |
| } |
| |
| public override void Abort() |
| { |
| for (int i = 0; i < fieldHash.Length; i++) |
| { |
| DocFieldProcessorPerField field = fieldHash[i]; |
| while (field != null) |
| { |
| DocFieldProcessorPerField next = field.next; |
| field.Abort(); |
| field = next; |
| } |
| } |
| fieldsWriter.Abort(); |
| consumer.Abort(); |
| } |
| |
| public System.Collections.Generic.ICollection<DocFieldConsumerPerField> Fields() |
| { |
| System.Collections.Generic.ICollection<DocFieldConsumerPerField> fields = |
| new System.Collections.Generic.HashSet<DocFieldConsumerPerField>(); |
| for (int i = 0; i < fieldHash.Length; i++) |
| { |
| DocFieldProcessorPerField field = fieldHash[i]; |
| while (field != null) |
| { |
| fields.Add(field.consumer); |
| field = field.next; |
| } |
| } |
| System.Diagnostics.Debug.Assert(fields.Count == totalFieldCount); |
| return fields; |
| } |
| |
| /// <summary>If there are fields we've seen but did not see again |
| /// in the last run, then free them up. |
| /// </summary> |
| |
| internal void TrimFields(SegmentWriteState state) |
| { |
| |
| for (int i = 0; i < fieldHash.Length; i++) |
| { |
| DocFieldProcessorPerField perField = fieldHash[i]; |
| DocFieldProcessorPerField lastPerField = null; |
| |
| while (perField != null) |
| { |
| |
| if (perField.lastGen == - 1) |
| { |
| |
| // This field was not seen since the previous |
| // flush, so, free up its resources now |
| |
| // Unhash |
| if (lastPerField == null) |
| fieldHash[i] = perField.next; |
| else |
| lastPerField.next = perField.next; |
| |
| if (state.docWriter.infoStream != null) |
| state.docWriter.infoStream.WriteLine(" purge field=" + perField.fieldInfo.name); |
| |
| totalFieldCount--; |
| } |
| else |
| { |
| // Reset |
| perField.lastGen = - 1; |
| lastPerField = perField; |
| } |
| |
| perField = perField.next; |
| } |
| } |
| } |
| |
| private void Rehash() |
| { |
| int newHashSize = (fieldHash.Length * 2); |
| System.Diagnostics.Debug.Assert(newHashSize > fieldHash.Length); |
| |
| DocFieldProcessorPerField[] newHashArray = new DocFieldProcessorPerField[newHashSize]; |
| |
| // Rehash |
| int newHashMask = newHashSize - 1; |
| for (int j = 0; j < fieldHash.Length; j++) |
| { |
| DocFieldProcessorPerField fp0 = fieldHash[j]; |
| while (fp0 != null) |
| { |
| int hashPos2 = fp0.fieldInfo.name.GetHashCode() & newHashMask; |
| DocFieldProcessorPerField nextFP0 = fp0.next; |
| fp0.next = newHashArray[hashPos2]; |
| newHashArray[hashPos2] = fp0; |
| fp0 = nextFP0; |
| } |
| } |
| |
| fieldHash = newHashArray; |
| hashMask = newHashMask; |
| } |
| |
| public override DocumentsWriter.DocWriter ProcessDocument() |
| { |
| |
| consumer.StartDocument(); |
| fieldsWriter.StartDocument(); |
| |
| Document doc = docState.doc; |
| |
| System.Diagnostics.Debug.Assert(docFieldProcessor.docWriter.writer.TestPoint("DocumentsWriter.ThreadState.init start")); |
| |
| fieldCount = 0; |
| |
| int thisFieldGen = fieldGen++; |
| |
| System.Collections.Generic.IList<IFieldable> docFields = doc.GetFields(); |
| int numDocFields = docFields.Count; |
| |
| // Absorb any new fields first seen in this document. |
| // Also absorb any changes to fields we had already |
| // seen before (eg suddenly turning on norms or |
| // vectors, etc.): |
| |
| for (int i = 0; i < numDocFields; i++) |
| { |
| IFieldable field = docFields[i]; |
| System.String fieldName = field.Name; |
| |
| // Make sure we have a PerField allocated |
| int hashPos = fieldName.GetHashCode() & hashMask; |
| DocFieldProcessorPerField fp = fieldHash[hashPos]; |
| while (fp != null && !fp.fieldInfo.name.Equals(fieldName)) |
| fp = fp.next; |
| |
| if (fp == null) |
| { |
| |
| // TODO FI: we need to genericize the "flags" that a |
| // field holds, and, how these flags are merged; it |
| // needs to be more "pluggable" such that if I want |
| // to have a new "thing" my Fields can do, I can |
| // easily add it |
| FieldInfo fi = fieldInfos.Add(fieldName, field.IsIndexed, field.IsTermVectorStored, |
| field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector, |
| field.OmitNorms, false, field.OmitTermFreqAndPositions); |
| |
| fp = new DocFieldProcessorPerField(this, fi); |
| fp.next = fieldHash[hashPos]; |
| fieldHash[hashPos] = fp; |
| totalFieldCount++; |
| |
| if (totalFieldCount >= fieldHash.Length / 2) |
| Rehash(); |
| } |
| else |
| { |
| fp.fieldInfo.Update(field.IsIndexed, field.IsTermVectorStored, |
| field.IsStorePositionWithTermVector, field.IsStoreOffsetWithTermVector, |
| field.OmitNorms, false, field.OmitTermFreqAndPositions); |
| } |
| |
| if (thisFieldGen != fp.lastGen) |
| { |
| |
| // First time we're seeing this field for this doc |
| fp.fieldCount = 0; |
| |
| if (fieldCount == fields.Length) |
| { |
| int newSize = fields.Length * 2; |
| DocFieldProcessorPerField[] newArray = new DocFieldProcessorPerField[newSize]; |
| Array.Copy(fields, 0, newArray, 0, fieldCount); |
| fields = newArray; |
| } |
| |
| fields[fieldCount++] = fp; |
| fp.lastGen = thisFieldGen; |
| } |
| |
| if (fp.fieldCount == fp.fields.Length) |
| { |
| IFieldable[] newArray = new IFieldable[fp.fields.Length * 2]; |
| Array.Copy(fp.fields, 0, newArray, 0, fp.fieldCount); |
| fp.fields = newArray; |
| } |
| |
| fp.fields[fp.fieldCount++] = field; |
| if (field.IsStored) |
| { |
| fieldsWriter.AddField(field, fp.fieldInfo); |
| } |
| } |
| |
| // If we are writing vectors then we must visit |
| // fields in sorted order so they are written in |
| // sorted order. TODO: we actually only need to |
| // sort the subset of fields that have vectors |
| // enabled; we could save [small amount of] CPU |
| // here. |
| QuickSort(fields, 0, fieldCount - 1); |
| |
| for (int i = 0; i < fieldCount; i++) |
| fields[i].consumer.ProcessFields(fields[i].fields, fields[i].fieldCount); |
| |
| if (docState.maxTermPrefix != null && docState.infoStream != null) |
| { |
| docState.infoStream.WriteLine("WARNING: document contains at least one immense term (longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'"); |
| docState.maxTermPrefix = null; |
| } |
| |
| DocumentsWriter.DocWriter one = fieldsWriter.FinishDocument(); |
| DocumentsWriter.DocWriter two = consumer.FinishDocument(); |
| if (one == null) |
| { |
| return two; |
| } |
| else if (two == null) |
| { |
| return one; |
| } |
| else |
| { |
| PerDoc both = GetPerDoc(); |
| both.docID = docState.docID; |
| System.Diagnostics.Debug.Assert(one.docID == docState.docID); |
| System.Diagnostics.Debug.Assert(two.docID == docState.docID); |
| both.one = one; |
| both.two = two; |
| return both; |
| } |
| } |
| |
| internal void QuickSort(DocFieldProcessorPerField[] array, int lo, int hi) |
| { |
| if (lo >= hi) |
| return ; |
| else if (hi == 1 + lo) |
| { |
| if (String.CompareOrdinal(array[lo].fieldInfo.name, array[hi].fieldInfo.name) > 0) |
| { |
| DocFieldProcessorPerField tmp = array[lo]; |
| array[lo] = array[hi]; |
| array[hi] = tmp; |
| } |
| return ; |
| } |
| |
| int mid = Number.URShift((lo + hi), 1); |
| |
| if (String.CompareOrdinal(array[lo].fieldInfo.name, array[mid].fieldInfo.name) > 0) |
| { |
| DocFieldProcessorPerField tmp = array[lo]; |
| array[lo] = array[mid]; |
| array[mid] = tmp; |
| } |
| |
| if (String.CompareOrdinal(array[mid].fieldInfo.name, array[hi].fieldInfo.name) > 0) |
| { |
| DocFieldProcessorPerField tmp = array[mid]; |
| array[mid] = array[hi]; |
| array[hi] = tmp; |
| |
| if (String.CompareOrdinal(array[lo].fieldInfo.name, array[mid].fieldInfo.name) > 0) |
| { |
| DocFieldProcessorPerField tmp2 = array[lo]; |
| array[lo] = array[mid]; |
| array[mid] = tmp2; |
| } |
| } |
| |
| int left = lo + 1; |
| int right = hi - 1; |
| |
| if (left >= right) |
| return ; |
| |
| DocFieldProcessorPerField partition = array[mid]; |
| |
| for (; ; ) |
| { |
| while (String.CompareOrdinal(array[right].fieldInfo.name, partition.fieldInfo.name) > 0) |
| --right; |
| |
| while (left < right && String.CompareOrdinal(array[left].fieldInfo.name, partition.fieldInfo.name) <= 0) |
| ++left; |
| |
| if (left < right) |
| { |
| DocFieldProcessorPerField tmp = array[left]; |
| array[left] = array[right]; |
| array[right] = tmp; |
| --right; |
| } |
| else |
| { |
| break; |
| } |
| } |
| |
| QuickSort(array, lo, left); |
| QuickSort(array, left + 1, hi); |
| } |
| |
| internal PerDoc[] docFreeList; |
| internal int freeCount; |
| internal int allocCount; |
| |
| internal PerDoc GetPerDoc() |
| { |
| lock (this) |
| { |
| if (freeCount == 0) |
| { |
| allocCount++; |
| if (allocCount > docFreeList.Length) |
| { |
| // Grow our free list up front to make sure we have |
| // enough space to recycle all outstanding PerDoc |
| // instances |
| System.Diagnostics.Debug.Assert(allocCount == 1 + docFreeList.Length); |
| docFreeList = new PerDoc[ArrayUtil.GetNextSize(allocCount)]; |
| } |
| return new PerDoc(this); |
| } |
| else |
| return docFreeList[--freeCount]; |
| } |
| } |
| |
| internal void FreePerDoc(PerDoc perDoc) |
| { |
| lock (this) |
| { |
| System.Diagnostics.Debug.Assert(freeCount < docFreeList.Length); |
| docFreeList[freeCount++] = perDoc; |
| } |
| } |
| |
| internal class PerDoc:DocumentsWriter.DocWriter |
| { |
| public PerDoc(DocFieldProcessorPerThread enclosingInstance) |
| { |
| InitBlock(enclosingInstance); |
| } |
| private void InitBlock(DocFieldProcessorPerThread enclosingInstance) |
| { |
| this.enclosingInstance = enclosingInstance; |
| } |
| private DocFieldProcessorPerThread enclosingInstance; |
| public DocFieldProcessorPerThread Enclosing_Instance |
| { |
| get |
| { |
| return enclosingInstance; |
| } |
| |
| } |
| |
| internal DocumentsWriter.DocWriter one; |
| internal DocumentsWriter.DocWriter two; |
| |
| public override long SizeInBytes() |
| { |
| return one.SizeInBytes() + two.SizeInBytes(); |
| } |
| |
| public override void Finish() |
| { |
| try |
| { |
| try |
| { |
| one.Finish(); |
| } |
| finally |
| { |
| two.Finish(); |
| } |
| } |
| finally |
| { |
| Enclosing_Instance.FreePerDoc(this); |
| } |
| } |
| |
| public override void Abort() |
| { |
| try |
| { |
| try |
| { |
| one.Abort(); |
| } |
| finally |
| { |
| two.Abort(); |
| } |
| } |
| finally |
| { |
| Enclosing_Instance.FreePerDoc(this); |
| } |
| } |
| } |
| } |
| } |