lucene/src/java/org/apache/lucene/index/values/Bytes.java - lucene-solr - Git at Google

 package org.apache.lucene.index.values;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /** Base class for specific Bytes Reader/Writer implementations */
 import java.io.IOException;
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.concurrent.atomic.AtomicLong;

 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.values.IndexDocValues.SortedSource;
 import org.apache.lucene.index.values.IndexDocValues.Source;
 import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CodecUtil;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.PagedBytes;

 /**
  * Provides concrete Writer/Reader implementations for <tt>byte[]</tt> value per
  * document. There are 6 package-private default implementations of this, for
  * all combinations of {@link Mode#DEREF}/{@link Mode#STRAIGHT}/
  * {@link Mode#SORTED} x fixed-length/variable-length.
  *
  * <p>
  * NOTE: Currently the total amount of byte[] data stored (across a single
  * segment) cannot exceed 2GB.
  * </p>
  * <p>
  * NOTE: Each byte[] must be <= 32768 bytes in length
  * </p>
  *
  * @lucene.experimental
  */
 public final class Bytes {
   // TODO - add bulk copy where possible
   private Bytes() { /* don't instantiate! */
   }

   /**
    * Defines the {@link Writer}s store mode. The writer will either store the
    * bytes sequentially ({@link #STRAIGHT}, dereferenced ({@link #DEREF}) or
    * sorted ({@link #SORTED})
    *
    * @lucene.experimental
    */
   public static enum Mode {
     /**
      * Mode for sequentially stored bytes
      */
     STRAIGHT,
     /**
      * Mode for dereferenced stored bytes
      */
     DEREF,
     /**
      * Mode for sorted stored bytes
      */
     SORTED
   };

   /**
    * Creates a new <tt>byte[]</tt> {@link Writer} instances for the given
    * directory.
    *
    * @param dir
    *          the directory to write the values to
    * @param id
    *          the id used to create a unique file name. Usually composed out of
    *          the segment name and a unique id per segment.
    * @param mode
    *          the writers store mode
    * @param comp
    *          a {@link BytesRef} comparator - only used with {@link Mode#SORTED}
    * @param fixedSize
    *          <code>true</code> if all bytes subsequently passed to the
    *          {@link Writer} will have the same length
    * @param bytesUsed
    *          an {@link AtomicLong} instance to track the used bytes within the
    *          {@link Writer}. A call to {@link Writer#finish(int)} will release
    *          all internally used resources and frees the memeory tracking
    *          reference.
    * @return a new {@link Writer} instance
    * @throws IOException
    *           if the files for the writer can not be created.
    */
   public static Writer getWriter(Directory dir, String id, Mode mode,
       Comparator<BytesRef> comp, boolean fixedSize, AtomicLong bytesUsed)
       throws IOException {
     // TODO -- i shouldn't have to specify fixed? can
     // track itself & do the write thing at write time?
     if (comp == null) {
       comp = BytesRef.getUTF8SortedAsUnicodeComparator();
     }

     if (fixedSize) {
       if (mode == Mode.STRAIGHT) {
         return new FixedStraightBytesImpl.Writer(dir, id);
       } else if (mode == Mode.DEREF) {
         return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed);
       } else if (mode == Mode.SORTED) {
         return new FixedSortedBytesImpl.Writer(dir, id, comp, bytesUsed);
       }
     } else {
       if (mode == Mode.STRAIGHT) {
         return new VarStraightBytesImpl.Writer(dir, id, bytesUsed);
       } else if (mode == Mode.DEREF) {
         return new VarDerefBytesImpl.Writer(dir, id, bytesUsed);
       } else if (mode == Mode.SORTED) {
         return new VarSortedBytesImpl.Writer(dir, id, comp, bytesUsed);
       }
     }

     throw new IllegalArgumentException("");
   }

   /**
    * Creates a new {@link IndexDocValues} instance that provides either memory
    * resident or iterative access to a per-document stored <tt>byte[]</tt>
    * value. The returned {@link IndexDocValues} instance will be initialized without
    * consuming a significant amount of memory.
    *
    * @param dir
    *          the directory to load the {@link IndexDocValues} from.
    * @param id
    *          the file ID in the {@link Directory} to load the values from.
    * @param mode
    *          the mode used to store the values
    * @param fixedSize
    *          <code>true</code> iff the values are stored with fixed-size,
    *          otherwise <code>false</code>
    * @param maxDoc
    *          the number of document values stored for the given ID
    * @return an initialized {@link IndexDocValues} instance.
    * @throws IOException
    *           if an {@link IOException} occurs
    */
   public static IndexDocValues getValues(Directory dir, String id, Mode mode,
       boolean fixedSize, int maxDoc) throws IOException {
     // TODO -- I can peek @ header to determing fixed/mode?
     if (fixedSize) {
       if (mode == Mode.STRAIGHT) {
         return new FixedStraightBytesImpl.Reader(dir, id, maxDoc);
       } else if (mode == Mode.DEREF) {
         return new FixedDerefBytesImpl.Reader(dir, id, maxDoc);
       } else if (mode == Mode.SORTED) {
         return new FixedSortedBytesImpl.Reader(dir, id, maxDoc);
       }
     } else {
       if (mode == Mode.STRAIGHT) {
         return new VarStraightBytesImpl.Reader(dir, id, maxDoc);
       } else if (mode == Mode.DEREF) {
         return new VarDerefBytesImpl.Reader(dir, id, maxDoc);
       } else if (mode == Mode.SORTED) {
         return new VarSortedBytesImpl.Reader(dir, id, maxDoc);
       }
     }

     throw new IllegalArgumentException("Illegal Mode: " + mode);
   }

   // TODO open up this API?
   static abstract class BytesBaseSource extends Source {
     private final PagedBytes pagedBytes;
     protected final IndexInput datIn;
     protected final IndexInput idxIn;
     protected final static int PAGED_BYTES_BITS = 15;
     protected final PagedBytes.Reader data;
     protected final long totalLengthInBytes;

     protected BytesBaseSource(IndexInput datIn, IndexInput idxIn,
         PagedBytes pagedBytes, long bytesToRead) throws IOException {
       assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
           + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
       this.datIn = datIn;
       this.totalLengthInBytes = bytesToRead;
       this.pagedBytes = pagedBytes;
       this.pagedBytes.copy(datIn, bytesToRead);
       data = pagedBytes.freeze(true);
       this.idxIn = idxIn;
     }

     public void close() throws IOException {
       try {
         data.close(); // close data
       } finally {
         try {
           if (datIn != null) {
             datIn.close();
           }
         } finally {
           if (idxIn != null) {// if straight - no index needed
             idxIn.close();
           }
         }
       }
     }

     /**
      * Returns one greater than the largest possible document number.
      */
     protected abstract int maxDoc();

     @Override
     public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
       return new SourceEnum(attrSource, type(), this, maxDoc()) {
         @Override
         public int advance(int target) throws IOException {
           if (target >= numDocs) {
             return pos = NO_MORE_DOCS;
           }
           while (source.getBytes(target, bytesRef).length == 0) {
             if (++target >= numDocs) {
               return pos = NO_MORE_DOCS;
             }
           }
           return pos = target;
         }
       };
     }

   }

   static abstract class BytesBaseSortedSource extends SortedSource {
     protected final IndexInput datIn;
     protected final IndexInput idxIn;
     protected final BytesRef defaultValue = new BytesRef();
     protected final static int PAGED_BYTES_BITS = 15;
     private final PagedBytes pagedBytes;
     protected final PagedBytes.Reader data;
     private final Comparator<BytesRef> comp;

     protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn,
         Comparator<BytesRef> comp, PagedBytes pagedBytes, long bytesToRead)
         throws IOException {
       assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
           + (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
       this.datIn = datIn;
       this.pagedBytes = pagedBytes;
       this.pagedBytes.copy(datIn, bytesToRead);
       data = pagedBytes.freeze(true);
       this.idxIn = idxIn;
       this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator()
           : comp;

     }

     @Override
     public BytesRef getByOrd(int ord, BytesRef bytesRef) {
       assert ord >= 0;
       return deref(ord, bytesRef);
     }

     protected void closeIndexInput() throws IOException {
       try {
         if (datIn != null) {
           datIn.close();
         }
       } finally {
         if (idxIn != null) {// if straight
           idxIn.close();
         }
       }
     }

     /**
      * Returns the largest doc id + 1 in this doc values source
      */
     protected abstract int maxDoc();

     /**
      * Copies the value for the given ord to the given {@link BytesRef} and
      * returns it.
      */
     protected abstract BytesRef deref(int ord, BytesRef bytesRef);

     protected int binarySearch(BytesRef b, BytesRef bytesRef, int low,
         int high) {
       int mid = 0;
       while (low <= high) {
         mid = (low + high) >>> 1;
         deref(mid, bytesRef);
         final int cmp = comp.compare(bytesRef, b);
         if (cmp < 0) {
           low = mid + 1;
         } else if (cmp > 0) {
           high = mid - 1;
         } else {
           return mid;
         }
       }
       assert comp.compare(bytesRef, b) != 0;
       return -(low + 1);
     }

     @Override
     public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
       return new SourceEnum(attrSource, type(), this, maxDoc()) {

         @Override
         public int advance(int target) throws IOException {
           if (target >= numDocs) {
             return pos = NO_MORE_DOCS;
           }
           while (source.getBytes(target, bytesRef).length == 0) {
             if (++target >= numDocs) {
               return pos = NO_MORE_DOCS;
             }
           }
           return pos = target;
         }
       };
     }
   }

   // TODO: open up this API?!
   static abstract class BytesWriterBase extends Writer {
     private final String id;
     protected IndexOutput idxOut;
     protected IndexOutput datOut;
     protected BytesRef bytesRef;
     protected final ByteBlockPool pool;

     protected BytesWriterBase(Directory dir, String id, String codecName,
         int version, boolean initIndex, ByteBlockPool pool,
         AtomicLong bytesUsed) throws IOException {
       super(bytesUsed);
       this.id = id;
       this.pool = pool;
       datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
             DATA_EXTENSION));
       boolean success = false;
       try {
         CodecUtil.writeHeader(datOut, codecName, version);
         if (initIndex) {
           idxOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
               INDEX_EXTENSION));
           CodecUtil.writeHeader(idxOut, codecName, version);
         } else {
           idxOut = null;
         }
         success = true;
       } finally {
         if (!success) {
           IOUtils.closeSafely(true, datOut, idxOut);
         }
       }
     }

     /**
      * Must be called only with increasing docIDs. It's OK for some docIDs to be
      * skipped; they will be filled with 0 bytes.
      */
     @Override
     public abstract void add(int docID, BytesRef bytes) throws IOException;

     @Override
     public void finish(int docCount) throws IOException {
       try {
         IOUtils.closeSafely(false, datOut, idxOut);
       } finally {
         if (pool != null) {
           pool.reset();
         }
       }
     }

     @Override
     protected void add(int docID) throws IOException {
       add(docID, bytesRef);
     }

     @Override
     public void add(int docID, PerDocFieldValues docValues) throws IOException {
       final BytesRef ref;
       if ((ref = docValues.getBytes()) != null) {
         add(docID, ref);
       }
     }

     @Override
     protected void setNextEnum(ValuesEnum valuesEnum) {
       bytesRef = valuesEnum.bytes();
     }

     @Override
     public void files(Collection<String> files) throws IOException {
       assert datOut != null;
       files.add(IndexFileNames.segmentFileName(id, "", DATA_EXTENSION));
       if (idxOut != null) { // called after flush - so this must be initialized
         // if needed or present
         final String idxFile = IndexFileNames.segmentFileName(id, "",
             INDEX_EXTENSION);
         files.add(idxFile);
       }
     }
   }

   /**
    * Opens all necessary files, but does not read any data in until you call
    * {@link #load}.
    */
   static abstract class BytesReaderBase extends IndexDocValues {
     protected final IndexInput idxIn;
     protected final IndexInput datIn;
     protected final int version;
     protected final String id;

     protected BytesReaderBase(Directory dir, String id, String codecName,
         int maxVersion, boolean doIndex) throws IOException {
       this.id = id;
       datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
           Writer.DATA_EXTENSION));
       boolean success = false;
       try {
       version = CodecUtil.checkHeader(datIn, codecName, maxVersion, maxVersion);
       if (doIndex) {
         idxIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
             Writer.INDEX_EXTENSION));
         final int version2 = CodecUtil.checkHeader(idxIn, codecName,
             maxVersion, maxVersion);
         assert version == version2;
       } else {
         idxIn = null;
       }
       success = true;
       } finally {
         if (!success) {
           closeInternal();
         }
       }
     }

     /**
      * clones and returns the data {@link IndexInput}
      */
     protected final IndexInput cloneData() {
       assert datIn != null;
       return (IndexInput) datIn.clone();
     }

     /**
      * clones and returns the indexing {@link IndexInput}
      */
     protected final IndexInput cloneIndex() {
       assert idxIn != null;
       return (IndexInput) idxIn.clone();
     }

     @Override
     public void close() throws IOException {
       try {
         super.close();
       } finally {
          closeInternal();
       }
     }

     private void closeInternal() throws IOException {
       try {
         datIn.close();
       } finally {
         if (idxIn != null) {
           idxIn.close();
         }
       }
     }
   }

 }
	package org.apache.lucene.index.values;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/** Base class for specific Bytes Reader/Writer implementations */
	import java.io.IOException;
	import java.util.Collection;
	import java.util.Comparator;
	import java.util.concurrent.atomic.AtomicLong;

	import org.apache.lucene.index.IndexFileNames;
	import org.apache.lucene.index.values.IndexDocValues.SortedSource;
	import org.apache.lucene.index.values.IndexDocValues.Source;
	import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.store.IndexInput;
	import org.apache.lucene.store.IndexOutput;
	import org.apache.lucene.util.AttributeSource;
	import org.apache.lucene.util.ByteBlockPool;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.CodecUtil;
	import org.apache.lucene.util.IOUtils;
	import org.apache.lucene.util.PagedBytes;

	/**
	* Provides concrete Writer/Reader implementations for <tt>byte[]</tt> value per
	* document. There are 6 package-private default implementations of this, for
	* all combinations of {@link Mode#DEREF}/{@link Mode#STRAIGHT}/
	* {@link Mode#SORTED} x fixed-length/variable-length.
	*
	* <p>
	* NOTE: Currently the total amount of byte[] data stored (across a single
	* segment) cannot exceed 2GB.
	* </p>
	* <p>
	* NOTE: Each byte[] must be <= 32768 bytes in length
	* </p>
	*
	* @lucene.experimental
	*/
	public final class Bytes {
	// TODO - add bulk copy where possible
	private Bytes() { /* don't instantiate! */
	}

	/**
	* Defines the {@link Writer}s store mode. The writer will either store the
	* bytes sequentially ({@link #STRAIGHT}, dereferenced ({@link #DEREF}) or
	* sorted ({@link #SORTED})
	*
	* @lucene.experimental
	*/
	public static enum Mode {
	/**
	* Mode for sequentially stored bytes
	*/
	STRAIGHT,
	/**
	* Mode for dereferenced stored bytes
	*/
	DEREF,
	/**
	* Mode for sorted stored bytes
	*/
	SORTED
	};

	/**
	* Creates a new <tt>byte[]</tt> {@link Writer} instances for the given
	* directory.
	*
	* @param dir
	* the directory to write the values to
	* @param id
	* the id used to create a unique file name. Usually composed out of
	* the segment name and a unique id per segment.
	* @param mode
	* the writers store mode
	* @param comp
	* a {@link BytesRef} comparator - only used with {@link Mode#SORTED}
	* @param fixedSize
	* <code>true</code> if all bytes subsequently passed to the
	* {@link Writer} will have the same length
	* @param bytesUsed
	* an {@link AtomicLong} instance to track the used bytes within the
	* {@link Writer}. A call to {@link Writer#finish(int)} will release
	* all internally used resources and frees the memeory tracking
	* reference.
	* @return a new {@link Writer} instance
	* @throws IOException
	* if the files for the writer can not be created.
	*/
	public static Writer getWriter(Directory dir, String id, Mode mode,
	Comparator<BytesRef> comp, boolean fixedSize, AtomicLong bytesUsed)
	throws IOException {
	// TODO -- i shouldn't have to specify fixed? can
	// track itself & do the write thing at write time?
	if (comp == null) {
	comp = BytesRef.getUTF8SortedAsUnicodeComparator();
	}

	if (fixedSize) {
	if (mode == Mode.STRAIGHT) {
	return new FixedStraightBytesImpl.Writer(dir, id);
	} else if (mode == Mode.DEREF) {
	return new FixedDerefBytesImpl.Writer(dir, id, bytesUsed);
	} else if (mode == Mode.SORTED) {
	return new FixedSortedBytesImpl.Writer(dir, id, comp, bytesUsed);
	}
	} else {
	if (mode == Mode.STRAIGHT) {
	return new VarStraightBytesImpl.Writer(dir, id, bytesUsed);
	} else if (mode == Mode.DEREF) {
	return new VarDerefBytesImpl.Writer(dir, id, bytesUsed);
	} else if (mode == Mode.SORTED) {
	return new VarSortedBytesImpl.Writer(dir, id, comp, bytesUsed);
	}
	}

	throw new IllegalArgumentException("");
	}

	/**
	* Creates a new {@link IndexDocValues} instance that provides either memory
	* resident or iterative access to a per-document stored <tt>byte[]</tt>
	* value. The returned {@link IndexDocValues} instance will be initialized without
	* consuming a significant amount of memory.
	*
	* @param dir
	* the directory to load the {@link IndexDocValues} from.
	* @param id
	* the file ID in the {@link Directory} to load the values from.
	* @param mode
	* the mode used to store the values
	* @param fixedSize
	* <code>true</code> iff the values are stored with fixed-size,
	* otherwise <code>false</code>
	* @param maxDoc
	* the number of document values stored for the given ID
	* @return an initialized {@link IndexDocValues} instance.
	* @throws IOException
	* if an {@link IOException} occurs
	*/
	public static IndexDocValues getValues(Directory dir, String id, Mode mode,
	boolean fixedSize, int maxDoc) throws IOException {
	// TODO -- I can peek @ header to determing fixed/mode?
	if (fixedSize) {
	if (mode == Mode.STRAIGHT) {
	return new FixedStraightBytesImpl.Reader(dir, id, maxDoc);
	} else if (mode == Mode.DEREF) {
	return new FixedDerefBytesImpl.Reader(dir, id, maxDoc);
	} else if (mode == Mode.SORTED) {
	return new FixedSortedBytesImpl.Reader(dir, id, maxDoc);
	}
	} else {
	if (mode == Mode.STRAIGHT) {
	return new VarStraightBytesImpl.Reader(dir, id, maxDoc);
	} else if (mode == Mode.DEREF) {
	return new VarDerefBytesImpl.Reader(dir, id, maxDoc);
	} else if (mode == Mode.SORTED) {
	return new VarSortedBytesImpl.Reader(dir, id, maxDoc);
	}
	}

	throw new IllegalArgumentException("Illegal Mode: " + mode);
	}

	// TODO open up this API?
	static abstract class BytesBaseSource extends Source {
	private final PagedBytes pagedBytes;
	protected final IndexInput datIn;
	protected final IndexInput idxIn;
	protected final static int PAGED_BYTES_BITS = 15;
	protected final PagedBytes.Reader data;
	protected final long totalLengthInBytes;

	protected BytesBaseSource(IndexInput datIn, IndexInput idxIn,
	PagedBytes pagedBytes, long bytesToRead) throws IOException {
	assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
	+ (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
	this.datIn = datIn;
	this.totalLengthInBytes = bytesToRead;
	this.pagedBytes = pagedBytes;
	this.pagedBytes.copy(datIn, bytesToRead);
	data = pagedBytes.freeze(true);
	this.idxIn = idxIn;
	}

	public void close() throws IOException {
	try {
	data.close(); // close data
	} finally {
	try {
	if (datIn != null) {
	datIn.close();
	}
	} finally {
	if (idxIn != null) {// if straight - no index needed
	idxIn.close();
	}
	}
	}
	}

	/**
	* Returns one greater than the largest possible document number.
	*/
	protected abstract int maxDoc();

	@Override
	public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
	return new SourceEnum(attrSource, type(), this, maxDoc()) {
	@Override
	public int advance(int target) throws IOException {
	if (target >= numDocs) {
	return pos = NO_MORE_DOCS;
	}
	while (source.getBytes(target, bytesRef).length == 0) {
	if (++target >= numDocs) {
	return pos = NO_MORE_DOCS;
	}
	}
	return pos = target;
	}
	};
	}

	}

	static abstract class BytesBaseSortedSource extends SortedSource {
	protected final IndexInput datIn;
	protected final IndexInput idxIn;
	protected final BytesRef defaultValue = new BytesRef();
	protected final static int PAGED_BYTES_BITS = 15;
	private final PagedBytes pagedBytes;
	protected final PagedBytes.Reader data;
	private final Comparator<BytesRef> comp;

	protected BytesBaseSortedSource(IndexInput datIn, IndexInput idxIn,
	Comparator<BytesRef> comp, PagedBytes pagedBytes, long bytesToRead)
	throws IOException {
	assert bytesToRead <= datIn.length() : " file size is less than the expected size diff: "
	+ (bytesToRead - datIn.length()) + " pos: " + datIn.getFilePointer();
	this.datIn = datIn;
	this.pagedBytes = pagedBytes;
	this.pagedBytes.copy(datIn, bytesToRead);
	data = pagedBytes.freeze(true);
	this.idxIn = idxIn;
	this.comp = comp == null ? BytesRef.getUTF8SortedAsUnicodeComparator()
	: comp;

	}

	@Override
	public BytesRef getByOrd(int ord, BytesRef bytesRef) {
	assert ord >= 0;
	return deref(ord, bytesRef);
	}

	protected void closeIndexInput() throws IOException {
	try {
	if (datIn != null) {
	datIn.close();
	}
	} finally {
	if (idxIn != null) {// if straight
	idxIn.close();
	}
	}
	}

	/**
	* Returns the largest doc id + 1 in this doc values source
	*/
	protected abstract int maxDoc();

	/**
	* Copies the value for the given ord to the given {@link BytesRef} and
	* returns it.
	*/
	protected abstract BytesRef deref(int ord, BytesRef bytesRef);

	protected int binarySearch(BytesRef b, BytesRef bytesRef, int low,
	int high) {
	int mid = 0;
	while (low <= high) {
	mid = (low + high) >>> 1;
	deref(mid, bytesRef);
	final int cmp = comp.compare(bytesRef, b);
	if (cmp < 0) {
	low = mid + 1;
	} else if (cmp > 0) {
	high = mid - 1;
	} else {
	return mid;
	}
	}
	assert comp.compare(bytesRef, b) != 0;
	return -(low + 1);
	}

	@Override
	public ValuesEnum getEnum(AttributeSource attrSource) throws IOException {
	return new SourceEnum(attrSource, type(), this, maxDoc()) {

	@Override
	public int advance(int target) throws IOException {
	if (target >= numDocs) {
	return pos = NO_MORE_DOCS;
	}
	while (source.getBytes(target, bytesRef).length == 0) {
	if (++target >= numDocs) {
	return pos = NO_MORE_DOCS;
	}
	}
	return pos = target;
	}
	};
	}
	}

	// TODO: open up this API?!
	static abstract class BytesWriterBase extends Writer {
	private final String id;
	protected IndexOutput idxOut;
	protected IndexOutput datOut;
	protected BytesRef bytesRef;
	protected final ByteBlockPool pool;

	protected BytesWriterBase(Directory dir, String id, String codecName,
	int version, boolean initIndex, ByteBlockPool pool,
	AtomicLong bytesUsed) throws IOException {
	super(bytesUsed);
	this.id = id;
	this.pool = pool;
	datOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
	DATA_EXTENSION));
	boolean success = false;
	try {
	CodecUtil.writeHeader(datOut, codecName, version);
	if (initIndex) {
	idxOut = dir.createOutput(IndexFileNames.segmentFileName(id, "",
	INDEX_EXTENSION));
	CodecUtil.writeHeader(idxOut, codecName, version);
	} else {
	idxOut = null;
	}
	success = true;
	} finally {
	if (!success) {
	IOUtils.closeSafely(true, datOut, idxOut);
	}
	}
	}

	/**
	* Must be called only with increasing docIDs. It's OK for some docIDs to be
	* skipped; they will be filled with 0 bytes.
	*/
	@Override
	public abstract void add(int docID, BytesRef bytes) throws IOException;

	@Override
	public void finish(int docCount) throws IOException {
	try {
	IOUtils.closeSafely(false, datOut, idxOut);
	} finally {
	if (pool != null) {
	pool.reset();
	}
	}
	}

	@Override
	protected void add(int docID) throws IOException {
	add(docID, bytesRef);
	}

	@Override
	public void add(int docID, PerDocFieldValues docValues) throws IOException {
	final BytesRef ref;
	if ((ref = docValues.getBytes()) != null) {
	add(docID, ref);
	}
	}

	@Override
	protected void setNextEnum(ValuesEnum valuesEnum) {
	bytesRef = valuesEnum.bytes();
	}

	@Override
	public void files(Collection<String> files) throws IOException {
	assert datOut != null;
	files.add(IndexFileNames.segmentFileName(id, "", DATA_EXTENSION));
	if (idxOut != null) { // called after flush - so this must be initialized
	// if needed or present
	final String idxFile = IndexFileNames.segmentFileName(id, "",
	INDEX_EXTENSION);
	files.add(idxFile);
	}
	}
	}

	/**
	* Opens all necessary files, but does not read any data in until you call
	* {@link #load}.
	*/
	static abstract class BytesReaderBase extends IndexDocValues {
	protected final IndexInput idxIn;
	protected final IndexInput datIn;
	protected final int version;
	protected final String id;

	protected BytesReaderBase(Directory dir, String id, String codecName,
	int maxVersion, boolean doIndex) throws IOException {
	this.id = id;
	datIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
	Writer.DATA_EXTENSION));
	boolean success = false;
	try {
	version = CodecUtil.checkHeader(datIn, codecName, maxVersion, maxVersion);
	if (doIndex) {
	idxIn = dir.openInput(IndexFileNames.segmentFileName(id, "",
	Writer.INDEX_EXTENSION));
	final int version2 = CodecUtil.checkHeader(idxIn, codecName,
	maxVersion, maxVersion);
	assert version == version2;
	} else {
	idxIn = null;
	}
	success = true;
	} finally {
	if (!success) {
	closeInternal();
	}
	}
	}

	/**
	* clones and returns the data {@link IndexInput}
	*/
	protected final IndexInput cloneData() {
	assert datIn != null;
	return (IndexInput) datIn.clone();
	}

	/**
	* clones and returns the indexing {@link IndexInput}
	*/
	protected final IndexInput cloneIndex() {
	assert idxIn != null;
	return (IndexInput) idxIn.clone();
	}

	@Override
	public void close() throws IOException {
	try {
	super.close();
	} finally {
	closeInternal();
	}
	}

	private void closeInternal() throws IOException {
	try {
	datIn.close();
	} finally {
	if (idxIn != null) {
	idxIn.close();
	}
	}
	}
	}

	}