| /* | |
| * Licensed to the Apache Software Foundation (ASF) under one or more | |
| * contributor license agreements. See the NOTICE file distributed with | |
| * this work for additional information regarding copyright ownership. | |
| * The ASF licenses this file to You under the Apache License, Version 2.0 | |
| * (the "License"); you may not use this file except in compliance with | |
| * the License. You may obtain a copy of the License at | |
| * | |
| * http://www.apache.org/licenses/LICENSE-2.0 | |
| * | |
| * Unless required by applicable law or agreed to in writing, software | |
| * distributed under the License is distributed on an "AS IS" BASIS, | |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| * See the License for the specific language governing permissions and | |
| * limitations under the License. | |
| */ | |
| using System; | |
| using Directory = Lucene.Net.Store.Directory; | |
| using IndexOutput = Lucene.Net.Store.IndexOutput; | |
| using StringHelper = Lucene.Net.Util.StringHelper; | |
| namespace Lucene.Net.Index | |
| { | |
| public sealed class TermVectorsWriter | |
| { | |
| private IndexOutput tvx = null, tvd = null, tvf = null; | |
| private FieldInfos fieldInfos; | |
| public TermVectorsWriter(Directory directory, System.String segment, FieldInfos fieldInfos) | |
| { | |
| // Open files for TermVector storage | |
| tvx = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); | |
| tvx.WriteInt(TermVectorsReader.FORMAT_VERSION); | |
| tvd = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION); | |
| tvd.WriteInt(TermVectorsReader.FORMAT_VERSION); | |
| tvf = directory.CreateOutput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION); | |
| tvf.WriteInt(TermVectorsReader.FORMAT_VERSION); | |
| this.fieldInfos = fieldInfos; | |
| } | |
| /// <summary> Add a complete document specified by all its term vectors. If document has no | |
| /// term vectors, add value for tvx. | |
| /// | |
| /// </summary> | |
| /// <param name="vectors"> | |
| /// </param> | |
| /// <throws> IOException </throws> | |
| public void AddAllDocVectors(TermFreqVector[] vectors) | |
| { | |
| tvx.WriteLong(tvd.GetFilePointer()); | |
| if (vectors != null) | |
| { | |
| int numFields = vectors.Length; | |
| tvd.WriteVInt(numFields); | |
| long[] fieldPointers = new long[numFields]; | |
| for (int i = 0; i < numFields; i++) | |
| { | |
| fieldPointers[i] = tvf.GetFilePointer(); | |
| int fieldNumber = fieldInfos.FieldNumber(vectors[i].GetField()); | |
| // 1st pass: write field numbers to tvd | |
| tvd.WriteVInt(fieldNumber); | |
| int numTerms = vectors[i].Size(); | |
| tvf.WriteVInt(numTerms); | |
| TermPositionVector tpVector; | |
| byte bits; | |
| bool storePositions; | |
| bool storeOffsets; | |
| if (vectors[i] is TermPositionVector) | |
| { | |
| // May have positions & offsets | |
| tpVector = (TermPositionVector) vectors[i]; | |
| storePositions = tpVector.Size() > 0 && tpVector.GetTermPositions(0) != null; | |
| storeOffsets = tpVector.Size() > 0 && tpVector.GetOffsets(0) != null; | |
| bits = (byte) ((storePositions ? TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR : (byte) 0) + (storeOffsets ? TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR : (byte) 0)); | |
| } | |
| else | |
| { | |
| tpVector = null; | |
| bits = 0; | |
| storePositions = false; | |
| storeOffsets = false; | |
| } | |
| tvf.WriteVInt(bits); | |
| System.String[] terms = vectors[i].GetTerms(); | |
| int[] freqs = vectors[i].GetTermFrequencies(); | |
| System.String lastTermText = ""; | |
| for (int j = 0; j < numTerms; j++) | |
| { | |
| System.String termText = terms[j]; | |
| int start = StringHelper.StringDifference(lastTermText, termText); | |
| int length = termText.Length - start; | |
| tvf.WriteVInt(start); // write shared prefix length | |
| tvf.WriteVInt(length); // write delta length | |
| tvf.WriteChars(termText, start, length); // write delta chars | |
| lastTermText = termText; | |
| int termFreq = freqs[j]; | |
| tvf.WriteVInt(termFreq); | |
| if (storePositions) | |
| { | |
| int[] positions = tpVector.GetTermPositions(j); | |
| if (positions == null) | |
| throw new System.SystemException("Trying to write positions that are null!"); | |
| System.Diagnostics.Debug.Assert(positions.Length == termFreq); | |
| // use delta encoding for positions | |
| int lastPosition = 0; | |
| for (int k = 0; k < positions.Length; k++) | |
| { | |
| int position = positions[k]; | |
| tvf.WriteVInt(position - lastPosition); | |
| lastPosition = position; | |
| } | |
| } | |
| if (storeOffsets) | |
| { | |
| TermVectorOffsetInfo[] offsets = tpVector.GetOffsets(j); | |
| if (offsets == null) | |
| throw new System.SystemException("Trying to write offsets that are null!"); | |
| System.Diagnostics.Debug.Assert(offsets.Length == termFreq); | |
| // use delta encoding for offsets | |
| int lastEndOffset = 0; | |
| for (int k = 0; k < offsets.Length; k++) | |
| { | |
| int startOffset = offsets[k].GetStartOffset(); | |
| int endOffset = offsets[k].GetEndOffset(); | |
| tvf.WriteVInt(startOffset - lastEndOffset); | |
| tvf.WriteVInt(endOffset - startOffset); | |
| lastEndOffset = endOffset; | |
| } | |
| } | |
| } | |
| } | |
| // 2nd pass: write field pointers to tvd | |
| long lastFieldPointer = 0; | |
| for (int i = 0; i < numFields; i++) | |
| { | |
| long fieldPointer = fieldPointers[i]; | |
| tvd.WriteVLong(fieldPointer - lastFieldPointer); | |
| lastFieldPointer = fieldPointer; | |
| } | |
| } | |
| else | |
| tvd.WriteVInt(0); | |
| } | |
| /// <summary>Close all streams. </summary> | |
| internal void Close() | |
| { | |
| // make an effort to close all streams we can but remember and re-throw | |
| // the first exception encountered in this process | |
| System.IO.IOException keep = null; | |
| if (tvx != null) | |
| try | |
| { | |
| tvx.Close(); | |
| } | |
| catch (System.IO.IOException e) | |
| { | |
| if (keep == null) | |
| keep = e; | |
| } | |
| if (tvd != null) | |
| try | |
| { | |
| tvd.Close(); | |
| } | |
| catch (System.IO.IOException e) | |
| { | |
| if (keep == null) | |
| keep = e; | |
| } | |
| if (tvf != null) | |
| try | |
| { | |
| tvf.Close(); | |
| } | |
| catch (System.IO.IOException e) | |
| { | |
| if (keep == null) | |
| keep = e; | |
| } | |
| if (keep != null) | |
| { | |
| throw new System.IO.IOException(keep.StackTrace); | |
| } | |
| } | |
| } | |
| } |