| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.backward_codecs.lucene50; |
| |
| import java.io.IOException; |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.CompoundDirectory; |
| import org.apache.lucene.codecs.CompoundFormat; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.SegmentInfo; |
| import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.DataOutput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| import org.apache.lucene.store.IndexOutput; |
| |
| /** |
| * Lucene 5.0 compound file format |
| * |
| * <p>Files: |
| * |
| * <ul> |
| * <li><code>.cfs</code>: An optional "virtual" file consisting of all the other index files for |
| * systems that frequently run out of file handles. |
| * <li><code>.cfe</code>: The "virtual" compound file's entry table holding all entries in the |
| * corresponding .cfs file. |
| * </ul> |
| * |
| * <p>Description: |
| * |
| * <ul> |
| * <li>Compound (.cfs) --> Header, FileData <sup>FileCount</sup>, Footer |
| * <li>Compound Entry Table (.cfe) --> Header, FileCount, <FileName, DataOffset, |
| * DataLength> <sup>FileCount</sup> |
| * <li>Header --> {@link CodecUtil#writeIndexHeader IndexHeader} |
| * <li>FileCount --> {@link DataOutput#writeVInt VInt} |
| * <li>DataOffset,DataLength,Checksum --> {@link DataOutput#writeLong UInt64} |
| * <li>FileName --> {@link DataOutput#writeString String} |
| * <li>FileData --> raw file data |
| * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter} |
| * </ul> |
| * |
| * <p>Notes: |
| * |
| * <ul> |
| * <li>FileCount indicates how many files are contained in this compound file. The entry table |
| * that follows has that many entries. |
| * <li>Each directory entry contains a long pointer to the start of this file's data section, the |
| * files length, and a String with that file's name. |
| * </ul> |
| */ |
| public final class Lucene50RWCompoundFormat extends CompoundFormat { |
| |
| /** Extension of compound file */ |
| static final String DATA_EXTENSION = "cfs"; |
| /** Extension of compound file entries */ |
| static final String ENTRIES_EXTENSION = "cfe"; |
| |
| static final String DATA_CODEC = "Lucene50CompoundData"; |
| static final String ENTRY_CODEC = "Lucene50CompoundEntries"; |
| static final int VERSION_START = 0; |
| static final int VERSION_CURRENT = VERSION_START; |
| |
| /** Sole constructor. */ |
| public Lucene50RWCompoundFormat() {} |
| |
| @Override |
| public CompoundDirectory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) |
| throws IOException { |
| return new Lucene50CompoundReader(dir, si, context); |
| } |
| |
| @Override |
| public void write(Directory dir, SegmentInfo si, IOContext context) throws IOException { |
| String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION); |
| String entriesFile = IndexFileNames.segmentFileName(si.name, "", ENTRIES_EXTENSION); |
| |
| try (IndexOutput data = dir.createOutput(dataFile, context); |
| IndexOutput entries = dir.createOutput(entriesFile, context)) { |
| CodecUtil.writeIndexHeader(data, DATA_CODEC, VERSION_CURRENT, si.getId(), ""); |
| CodecUtil.writeIndexHeader(entries, ENTRY_CODEC, VERSION_CURRENT, si.getId(), ""); |
| |
| writeCompoundFile(entries, data, dir, si); |
| |
| CodecUtil.writeFooter(data); |
| CodecUtil.writeFooter(entries); |
| } |
| } |
| |
| private void writeCompoundFile( |
| IndexOutput entries, IndexOutput data, Directory dir, SegmentInfo si) throws IOException { |
| // write number of files |
| entries.writeVInt(si.files().size()); |
| for (String file : si.files()) { |
| // write bytes for file |
| long startOffset = data.getFilePointer(); |
| try (ChecksumIndexInput in = dir.openChecksumInput(file, IOContext.READONCE)) { |
| |
| // just copies the index header, verifying that its id matches what we expect |
| CodecUtil.verifyAndCopyIndexHeader(in, data, si.getId()); |
| |
| // copy all bytes except the footer |
| long numBytesToCopy = in.length() - CodecUtil.footerLength() - in.getFilePointer(); |
| data.copyBytes(in, numBytesToCopy); |
| |
| // verify footer (checksum) matches for the incoming file we are copying |
| long checksum = CodecUtil.checkFooter(in); |
| |
| // this is poached from CodecUtil.writeFooter, but we need to use our own checksum, not |
| // data.getChecksum(), but I think |
| // adding a public method to CodecUtil to do that is somewhat dangerous: |
| data.writeInt(CodecUtil.FOOTER_MAGIC); |
| data.writeInt(0); |
| data.writeLong(checksum); |
| } |
| long endOffset = data.getFilePointer(); |
| |
| long length = endOffset - startOffset; |
| |
| // write entry for file |
| entries.writeString(IndexFileNames.stripSegmentName(file)); |
| entries.writeLong(startOffset); |
| entries.writeLong(length); |
| } |
| } |
| } |