| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| using System; |
| using System.Collections.Generic; |
| using Directory = Lucene.Net.Store.Directory; |
| using IndexInput = Lucene.Net.Store.IndexInput; |
| using IndexOutput = Lucene.Net.Store.IndexOutput; |
| |
| namespace Lucene.Net.Index |
| { |
| |
| |
| /// <summary> Combines multiple files into a single compound file. |
| /// The file format:<br/> |
| /// <list type="bullet"> |
| /// <item>VInt fileCount</item> |
| /// <item>{Directory} |
| /// fileCount entries with the following structure:</item> |
| /// <list type="bullet"> |
| /// <item>long dataOffset</item> |
| /// <item>String fileName</item> |
| /// </list> |
| /// <item>{File Data} |
| /// fileCount entries with the raw data of the corresponding file</item> |
| /// </list> |
| /// |
| /// The fileCount integer indicates how many files are contained in this compound |
| /// file. The {directory} that follows has that many entries. Each directory entry |
| /// contains a long pointer to the start of this file's data section, and a String |
| /// with that file's name. |
| /// </summary> |
| public sealed class CompoundFileWriter : IDisposable |
| { |
| |
| private sealed class FileEntry |
| { |
| /// <summary>source file </summary> |
| internal System.String file; |
| |
| /// <summary>temporary holder for the start of directory entry for this file </summary> |
| internal long directoryOffset; |
| |
| /// <summary>temporary holder for the start of this file's data section </summary> |
| internal long dataOffset; |
| } |
| |
| |
| private readonly Directory directory; |
| private readonly String fileName; |
| private readonly HashSet<string> ids; |
| private readonly LinkedList<FileEntry> entries; |
| private bool merged = false; |
| private readonly SegmentMerger.CheckAbort checkAbort; |
| |
| /// <summary>Create the compound stream in the specified file. The file name is the |
| /// entire name (no extensions are added). |
| /// </summary> |
| /// <throws> NullPointerException if <c>dir</c> or <c>name</c> is null </throws> |
| public CompoundFileWriter(Directory dir, System.String name):this(dir, name, null) |
| { |
| } |
| |
| internal CompoundFileWriter(Directory dir, System.String name, SegmentMerger.CheckAbort checkAbort) |
| { |
| if (dir == null) |
| throw new ArgumentNullException("dir"); |
| if (name == null) |
| throw new ArgumentNullException("name"); |
| this.checkAbort = checkAbort; |
| directory = dir; |
| fileName = name; |
| ids = new HashSet<string>(); |
| entries = new LinkedList<FileEntry>(); |
| } |
| |
| /// <summary>Returns the directory of the compound file. </summary> |
| public Directory Directory |
| { |
| get { return directory; } |
| } |
| |
| /// <summary>Returns the name of the compound file. </summary> |
| public string Name |
| { |
| get { return fileName; } |
| } |
| |
| /// <summary>Add a source stream. <c>file</c> is the string by which the |
| /// sub-stream will be known in the compound stream. |
| /// |
| /// </summary> |
| /// <throws> IllegalStateException if this writer is closed </throws> |
| /// <throws> NullPointerException if <c>file</c> is null </throws> |
| /// <throws> IllegalArgumentException if a file with the same name </throws> |
| /// <summary> has been added already |
| /// </summary> |
| public void AddFile(String file) |
| { |
| if (merged) |
| throw new InvalidOperationException("Can't add extensions after merge has been called"); |
| |
| if (file == null) |
| throw new ArgumentNullException("file"); |
| |
| try |
| { |
| ids.Add(file); |
| } |
| catch (Exception) |
| { |
| throw new ArgumentException("File " + file + " already added"); |
| } |
| |
| var entry = new FileEntry {file = file}; |
| entries.AddLast(entry); |
| } |
| |
| [Obsolete("Use Dispose() instead")] |
| public void Close() |
| { |
| Dispose(); |
| } |
| |
| /// <summary>Merge files with the extensions added up to now. |
| /// All files with these extensions are combined sequentially into the |
| /// compound stream. After successful merge, the source files |
| /// are deleted. |
| /// </summary> |
| /// <throws> IllegalStateException if close() had been called before or </throws> |
| /// <summary> if no file has been added to this object |
| /// </summary> |
| public void Dispose() |
| { |
| // Extract into protected method if class ever becomes unsealed |
| |
| // TODO: Dispose shouldn't throw exceptions! |
| if (merged) |
| throw new SystemException("Merge already performed"); |
| |
| if ((entries.Count == 0)) |
| throw new SystemException("No entries to merge have been defined"); |
| |
| merged = true; |
| |
| // open the compound stream |
| IndexOutput os = null; |
| try |
| { |
| os = directory.CreateOutput(fileName); |
| |
| // Write the number of entries |
| os.WriteVInt(entries.Count); |
| |
| // Write the directory with all offsets at 0. |
| // Remember the positions of directory entries so that we can |
| // adjust the offsets later |
| long totalSize = 0; |
| foreach (FileEntry fe in entries) |
| { |
| fe.directoryOffset = os.FilePointer; |
| os.WriteLong(0); // for now |
| os.WriteString(fe.file); |
| totalSize += directory.FileLength(fe.file); |
| } |
| |
| // Pre-allocate size of file as optimization -- |
| // this can potentially help IO performance as |
| // we write the file and also later during |
| // searching. It also uncovers a disk-full |
| // situation earlier and hopefully without |
| // actually filling disk to 100%: |
| long finalLength = totalSize + os.FilePointer; |
| os.SetLength(finalLength); |
| |
| // Open the files and copy their data into the stream. |
| // Remember the locations of each file's data section. |
| var buffer = new byte[16384]; |
| foreach (FileEntry fe in entries) |
| { |
| fe.dataOffset = os.FilePointer; |
| CopyFile(fe, os, buffer); |
| } |
| |
| // Write the data offsets into the directory of the compound stream |
| foreach (FileEntry fe in entries) |
| { |
| os.Seek(fe.directoryOffset); |
| os.WriteLong(fe.dataOffset); |
| } |
| |
| System.Diagnostics.Debug.Assert(finalLength == os.Length); |
| |
| // Close the output stream. Set the os to null before trying to |
| // close so that if an exception occurs during the close, the |
| // finally clause below will not attempt to close the stream |
| // the second time. |
| IndexOutput tmp = os; |
| os = null; |
| tmp.Close(); |
| } |
| finally |
| { |
| if (os != null) |
| try |
| { |
| os.Close(); |
| } |
| catch (System.IO.IOException) |
| { |
| } |
| } |
| } |
| |
| |
| /// <summary>Copy the contents of the file with specified extension into the |
| /// provided output stream. Use the provided buffer for moving data |
| /// to reduce memory allocation. |
| /// </summary> |
| private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer) |
| { |
| IndexInput isRenamed = null; |
| try |
| { |
| long startPtr = os.FilePointer; |
| |
| isRenamed = directory.OpenInput(source.file); |
| long length = isRenamed.Length(); |
| long remainder = length; |
| int chunk = buffer.Length; |
| |
| while (remainder > 0) |
| { |
| var len = (int) Math.Min(chunk, remainder); |
| isRenamed.ReadBytes(buffer, 0, len, false); |
| os.WriteBytes(buffer, len); |
| remainder -= len; |
| if (checkAbort != null) |
| // Roughly every 2 MB we will check if |
| // it's time to abort |
| checkAbort.Work(80); |
| } |
| |
| // Verify that remainder is 0 |
| if (remainder != 0) |
| throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")"); |
| |
| // Verify that the output length diff is equal to original file |
| long endPtr = os.FilePointer; |
| long diff = endPtr - startPtr; |
| if (diff != length) |
| throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length); |
| } |
| finally |
| { |
| if (isRenamed != null) |
| isRenamed.Close(); |
| } |
| } |
| } |
| } |