src/java/org/apache/lucene/index/FieldsWriter.java - lucene-solr - Git at Google

 package org.apache.lucene.index;

 /**
  * Copyright 2004 The Apache Software Foundation
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not
  * use this file except in compliance with the License. You may obtain a copy of
  * the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  * License for the specific language governing permissions and limitations under
  * the License.
  */

 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.Iterator;
 import java.util.zip.Deflater;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.IndexInput;

 final class FieldsWriter
 {
   static final byte FIELD_IS_TOKENIZED = 0x1;
   static final byte FIELD_IS_BINARY = 0x2;
   static final byte FIELD_IS_COMPRESSED = 0x4;

   // Original format
   static final int FORMAT = 0;

   // Changed strings to UTF8
   static final int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1;

   // NOTE: if you introduce a new format, make it 1 higher
   // than the current one, and always change this if you
   // switch to a new format!
   static final int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;

     private FieldInfos fieldInfos;

     private IndexOutput fieldsStream;

     private IndexOutput indexStream;

     private boolean doClose;

     FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
         fieldInfos = fn;

         boolean success = false;
         final String fieldsName = segment + "." + IndexFileNames.FIELDS_EXTENSION;
         try {
           fieldsStream = d.createOutput(fieldsName);
           fieldsStream.writeInt(FORMAT_CURRENT);
           success = true;
         } finally {
           if (!success) {
             try {
               close();
             } catch (Throwable t) {
               // Suppress so we keep throwing the original exception
             }
             try {
               d.deleteFile(fieldsName);
             } catch (Throwable t) {
               // Suppress so we keep throwing the original exception
             }
           }
         }

         success = false;
         final String indexName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
         try {
           indexStream = d.createOutput(indexName);
           indexStream.writeInt(FORMAT_CURRENT);
           success = true;
         } finally {
           if (!success) {
             try {
               close();
             } catch (IOException ioe) {
             }
             try {
               d.deleteFile(fieldsName);
             } catch (Throwable t) {
               // Suppress so we keep throwing the original exception
             }
             try {
               d.deleteFile(indexName);
             } catch (Throwable t) {
               // Suppress so we keep throwing the original exception
             }
           }
         }

         doClose = true;
     }

     FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) {
         fieldInfos = fn;
         fieldsStream = fdt;
         indexStream = fdx;
         doClose = false;
     }

     void setFieldsStream(IndexOutput stream) {
       this.fieldsStream = stream;
     }

     // Writes the contents of buffer into the fields stream
     // and adds a new entry for this document into the index
     // stream.  This assumes the buffer was already written
     // in the correct fields format.
     void flushDocument(int numStoredFields, RAMOutputStream buffer) throws IOException {
       indexStream.writeLong(fieldsStream.getFilePointer());
       fieldsStream.writeVInt(numStoredFields);
       buffer.writeTo(fieldsStream);
     }

     void skipDocument() throws IOException {
       indexStream.writeLong(fieldsStream.getFilePointer());
       fieldsStream.writeVInt(0);
     }

     void flush() throws IOException {
       indexStream.flush();
       fieldsStream.flush();
     }

     final void close() throws IOException {
       if (doClose) {

         try {
           if (fieldsStream != null) {
             try {
               fieldsStream.close();
             } finally {
               fieldsStream = null;
             }
           }
         } catch (IOException ioe) {
           try {
             if (indexStream != null) {
               try {
                 indexStream.close();
               } finally {
                 indexStream = null;
               }
             }
           } catch (IOException ioe2) {
             // Ignore so we throw only first IOException hit
           }
           throw ioe;
         } finally {
           if (indexStream != null) {
             try {
               indexStream.close();
             } finally {
               indexStream = null;
             }
           }
         }
       }
     }

     final void writeField(FieldInfo fi, Fieldable field) throws IOException {
       // if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
       // and field.binaryValue() already returns the compressed value for a field
       // with isCompressed()==true, so we disable compression in that case
       boolean disableCompression = (field instanceof FieldsReader.FieldForMerge);
       fieldsStream.writeVInt(fi.number);
       byte bits = 0;
       if (field.isTokenized())
         bits |= FieldsWriter.FIELD_IS_TOKENIZED;
       if (field.isBinary())
         bits |= FieldsWriter.FIELD_IS_BINARY;
       if (field.isCompressed())
         bits |= FieldsWriter.FIELD_IS_COMPRESSED;

       fieldsStream.writeByte(bits);

       if (field.isCompressed()) {
         // compression is enabled for the current field
         final byte[] data;
         final int len;
         final int offset;
         if (disableCompression) {
           // optimized case for merging, the data
           // is already compressed
           data = field.getBinaryValue();
           assert data != null;
           len = field.getBinaryLength();
           offset = field.getBinaryOffset();
         } else {
           // check if it is a binary field
           if (field.isBinary()) {
             data = compress(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength());
           } else {
             byte x[] = field.stringValue().getBytes("UTF-8");
             data = compress(x, 0, x.length);
           }
           len = data.length;
           offset = 0;
         }

         fieldsStream.writeVInt(len);
         fieldsStream.writeBytes(data, offset, len);
       }
       else {
         // compression is disabled for the current field
         if (field.isBinary()) {
           final byte[] data;
           final int len;
           final int offset;
           data = field.getBinaryValue();
           len = field.getBinaryLength();
           offset =  field.getBinaryOffset();

           fieldsStream.writeVInt(len);
           fieldsStream.writeBytes(data, offset, len);
         }
         else {
           fieldsStream.writeString(field.stringValue());
         }
       }
     }

     /** Bulk write a contiguous series of documents.  The
      *  lengths array is the length (in bytes) of each raw
      *  document.  The stream IndexInput is the
      *  fieldsStream from which we should bulk-copy all
      *  bytes. */
     final void addRawDocuments(IndexInput stream, int[] lengths, int numDocs) throws IOException {
       long position = fieldsStream.getFilePointer();
       long start = position;
       for(int i=0;i<numDocs;i++) {
         indexStream.writeLong(position);
         position += lengths[i];
       }
       fieldsStream.copyBytes(stream, position-start);
       assert fieldsStream.getFilePointer() == position;
     }

     final void addDocument(Document doc) throws IOException {
         indexStream.writeLong(fieldsStream.getFilePointer());

         int storedCount = 0;
         Iterator fieldIterator = doc.getFields().iterator();
         while (fieldIterator.hasNext()) {
             Fieldable field = (Fieldable) fieldIterator.next();
             if (field.isStored())
                 storedCount++;
         }
         fieldsStream.writeVInt(storedCount);

         fieldIterator = doc.getFields().iterator();
         while (fieldIterator.hasNext()) {
             Fieldable field = (Fieldable) fieldIterator.next();
             if (field.isStored())
               writeField(fieldInfos.fieldInfo(field.name()), field);
         }
     }

     private final byte[] compress (byte[] input, int offset, int length) {
       // Create the compressor with highest level of compression
       Deflater compressor = new Deflater();
       compressor.setLevel(Deflater.BEST_COMPRESSION);

       // Give the compressor the data to compress
       compressor.setInput(input, offset, length);
       compressor.finish();

       /*
        * Create an expandable byte array to hold the compressed data.
        * You cannot use an array that's the same size as the orginal because
        * there is no guarantee that the compressed data will be smaller than
        * the uncompressed data.
        */
       ByteArrayOutputStream bos = new ByteArrayOutputStream(length);

       try {
         compressor.setLevel(Deflater.BEST_COMPRESSION);

         // Give the compressor the data to compress
         compressor.setInput(input);
         compressor.finish();

         // Compress the data
         byte[] buf = new byte[1024];
         while (!compressor.finished()) {
           int count = compressor.deflate(buf);
           bos.write(buf, 0, count);
         }

       } finally {
         compressor.end();
       }

       // Get the compressed data
       return bos.toByteArray();
     }
 }
	package org.apache.lucene.index;

	/**
	* Copyright 2004 The Apache Software Foundation
	*
	* Licensed under the Apache License, Version 2.0 (the "License"); you may not
	* use this file except in compliance with the License. You may obtain a copy of
	* the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
	* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
	* License for the specific language governing permissions and limitations under
	* the License.
	*/

	import java.io.ByteArrayOutputStream;
	import java.io.IOException;
	import java.util.Iterator;
	import java.util.zip.Deflater;

	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Fieldable;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.store.RAMOutputStream;
	import org.apache.lucene.store.IndexOutput;
	import org.apache.lucene.store.IndexInput;

	final class FieldsWriter
	{
	static final byte FIELD_IS_TOKENIZED = 0x1;
	static final byte FIELD_IS_BINARY = 0x2;
	static final byte FIELD_IS_COMPRESSED = 0x4;

	// Original format
	static final int FORMAT = 0;

	// Changed strings to UTF8
	static final int FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1;

	// NOTE: if you introduce a new format, make it 1 higher
	// than the current one, and always change this if you
	// switch to a new format!
	static final int FORMAT_CURRENT = FORMAT_VERSION_UTF8_LENGTH_IN_BYTES;

	private FieldInfos fieldInfos;

	private IndexOutput fieldsStream;

	private IndexOutput indexStream;

	private boolean doClose;

	FieldsWriter(Directory d, String segment, FieldInfos fn) throws IOException {
	fieldInfos = fn;

	boolean success = false;
	final String fieldsName = segment + "." + IndexFileNames.FIELDS_EXTENSION;
	try {
	fieldsStream = d.createOutput(fieldsName);
	fieldsStream.writeInt(FORMAT_CURRENT);
	success = true;
	} finally {
	if (!success) {
	try {
	close();
	} catch (Throwable t) {
	// Suppress so we keep throwing the original exception
	}
	try {
	d.deleteFile(fieldsName);
	} catch (Throwable t) {
	// Suppress so we keep throwing the original exception
	}
	}
	}

	success = false;
	final String indexName = segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION;
	try {
	indexStream = d.createOutput(indexName);
	indexStream.writeInt(FORMAT_CURRENT);
	success = true;
	} finally {
	if (!success) {
	try {
	close();
	} catch (IOException ioe) {
	}
	try {
	d.deleteFile(fieldsName);
	} catch (Throwable t) {
	// Suppress so we keep throwing the original exception
	}
	try {
	d.deleteFile(indexName);
	} catch (Throwable t) {
	// Suppress so we keep throwing the original exception
	}
	}
	}

	doClose = true;
	}

	FieldsWriter(IndexOutput fdx, IndexOutput fdt, FieldInfos fn) {
	fieldInfos = fn;
	fieldsStream = fdt;
	indexStream = fdx;
	doClose = false;
	}

	void setFieldsStream(IndexOutput stream) {
	this.fieldsStream = stream;
	}

	// Writes the contents of buffer into the fields stream
	// and adds a new entry for this document into the index
	// stream. This assumes the buffer was already written
	// in the correct fields format.
	void flushDocument(int numStoredFields, RAMOutputStream buffer) throws IOException {
	indexStream.writeLong(fieldsStream.getFilePointer());
	fieldsStream.writeVInt(numStoredFields);
	buffer.writeTo(fieldsStream);
	}

	void skipDocument() throws IOException {
	indexStream.writeLong(fieldsStream.getFilePointer());
	fieldsStream.writeVInt(0);
	}

	void flush() throws IOException {
	indexStream.flush();
	fieldsStream.flush();
	}

	final void close() throws IOException {
	if (doClose) {

	try {
	if (fieldsStream != null) {
	try {
	fieldsStream.close();
	} finally {
	fieldsStream = null;
	}
	}
	} catch (IOException ioe) {
	try {
	if (indexStream != null) {
	try {
	indexStream.close();
	} finally {
	indexStream = null;
	}
	}
	} catch (IOException ioe2) {
	// Ignore so we throw only first IOException hit
	}
	throw ioe;
	} finally {
	if (indexStream != null) {
	try {
	indexStream.close();
	} finally {
	indexStream = null;
	}
	}
	}
	}
	}

	final void writeField(FieldInfo fi, Fieldable field) throws IOException {
	// if the field as an instanceof FieldsReader.FieldForMerge, we're in merge mode
	// and field.binaryValue() already returns the compressed value for a field
	// with isCompressed()==true, so we disable compression in that case
	boolean disableCompression = (field instanceof FieldsReader.FieldForMerge);
	fieldsStream.writeVInt(fi.number);
	byte bits = 0;
	if (field.isTokenized())
	bits \|= FieldsWriter.FIELD_IS_TOKENIZED;
	if (field.isBinary())
	bits \|= FieldsWriter.FIELD_IS_BINARY;
	if (field.isCompressed())
	bits \|= FieldsWriter.FIELD_IS_COMPRESSED;

	fieldsStream.writeByte(bits);

	if (field.isCompressed()) {
	// compression is enabled for the current field
	final byte[] data;
	final int len;
	final int offset;
	if (disableCompression) {
	// optimized case for merging, the data
	// is already compressed
	data = field.getBinaryValue();
	assert data != null;
	len = field.getBinaryLength();
	offset = field.getBinaryOffset();
	} else {
	// check if it is a binary field
	if (field.isBinary()) {
	data = compress(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength());
	} else {
	byte x[] = field.stringValue().getBytes("UTF-8");
	data = compress(x, 0, x.length);
	}
	len = data.length;
	offset = 0;
	}

	fieldsStream.writeVInt(len);
	fieldsStream.writeBytes(data, offset, len);
	}
	else {
	// compression is disabled for the current field
	if (field.isBinary()) {
	final byte[] data;
	final int len;
	final int offset;
	data = field.getBinaryValue();
	len = field.getBinaryLength();
	offset = field.getBinaryOffset();

	fieldsStream.writeVInt(len);
	fieldsStream.writeBytes(data, offset, len);
	}
	else {
	fieldsStream.writeString(field.stringValue());
	}
	}
	}

	/** Bulk write a contiguous series of documents. The
	* lengths array is the length (in bytes) of each raw
	* document. The stream IndexInput is the
	* fieldsStream from which we should bulk-copy all
	* bytes. */
	final void addRawDocuments(IndexInput stream, int[] lengths, int numDocs) throws IOException {
	long position = fieldsStream.getFilePointer();
	long start = position;
	for(int i=0;i<numDocs;i++) {
	indexStream.writeLong(position);
	position += lengths[i];
	}
	fieldsStream.copyBytes(stream, position-start);
	assert fieldsStream.getFilePointer() == position;
	}

	final void addDocument(Document doc) throws IOException {
	indexStream.writeLong(fieldsStream.getFilePointer());

	int storedCount = 0;
	Iterator fieldIterator = doc.getFields().iterator();
	while (fieldIterator.hasNext()) {
	Fieldable field = (Fieldable) fieldIterator.next();
	if (field.isStored())
	storedCount++;
	}
	fieldsStream.writeVInt(storedCount);

	fieldIterator = doc.getFields().iterator();
	while (fieldIterator.hasNext()) {
	Fieldable field = (Fieldable) fieldIterator.next();
	if (field.isStored())
	writeField(fieldInfos.fieldInfo(field.name()), field);
	}
	}

	private final byte[] compress (byte[] input, int offset, int length) {
	// Create the compressor with highest level of compression
	Deflater compressor = new Deflater();
	compressor.setLevel(Deflater.BEST_COMPRESSION);

	// Give the compressor the data to compress
	compressor.setInput(input, offset, length);
	compressor.finish();

	/*
	* Create an expandable byte array to hold the compressed data.
	* You cannot use an array that's the same size as the orginal because
	* there is no guarantee that the compressed data will be smaller than
	* the uncompressed data.
	*/
	ByteArrayOutputStream bos = new ByteArrayOutputStream(length);

	try {
	compressor.setLevel(Deflater.BEST_COMPRESSION);

	// Give the compressor the data to compress
	compressor.setInput(input);
	compressor.finish();

	// Compress the data
	byte[] buf = new byte[1024];
	while (!compressor.finished()) {
	int count = compressor.deflate(buf);
	bos.write(buf, 0, count);
	}

	} finally {
	compressor.end();
	}

	// Get the compressed data
	return bos.toByteArray();
	}
	}