lucene/src/java/org/apache/lucene/index/values/FixedSortedBytesImpl.java - lucene-solr - Git at Google

 package org.apache.lucene.index.values;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.Comparator;
 import java.util.concurrent.atomic.AtomicLong;

 import org.apache.lucene.index.values.Bytes.BytesBaseSortedSource;
 import org.apache.lucene.index.values.Bytes.BytesReaderBase;
 import org.apache.lucene.index.values.Bytes.BytesWriterBase;
 import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.CodecUtil;
 import org.apache.lucene.util.PagedBytes;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.ByteBlockPool.Allocator;
 import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
 import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
 import org.apache.lucene.util.packed.PackedInts;

 // Stores fixed-length byte[] by deref, ie when two docs
 // have the same value, they store only 1 byte[]

 /**
  * @lucene.experimental
  */
 class FixedSortedBytesImpl {

   static final String CODEC_NAME = "FixedSortedBytes";
   static final int VERSION_START = 0;
   static final int VERSION_CURRENT = VERSION_START;

   static class Writer extends BytesWriterBase {
     private int size = -1;
     private int[] docToEntry;
     private final Comparator<BytesRef> comp;

     private final BytesRefHash hash = new BytesRefHash(pool,
         BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
             BytesRefHash.DEFAULT_CAPACITY, bytesUsed));

     public Writer(Directory dir, String id, Comparator<BytesRef> comp,
         AtomicLong bytesUsed) throws IOException {
       this(dir, id, comp, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed),
           bytesUsed);
     }

     public Writer(Directory dir, String id, Comparator<BytesRef> comp,
         Allocator allocator, AtomicLong bytesUsed) throws IOException {
       super(dir, id, CODEC_NAME, VERSION_CURRENT, true,
           new ByteBlockPool(allocator), bytesUsed);
       docToEntry = new int[1];
       // docToEntry[0] = -1;
       bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
       this.comp = comp;
     }

     @Override
     public void add(int docID, BytesRef bytes) throws IOException {
       if (bytes.length == 0)
         return; // default - skip it
       if (size == -1) {
         size = bytes.length;
         datOut.writeInt(size);
       } else if (bytes.length != size) {
         throw new IllegalArgumentException("expected bytes size=" + size
             + " but got " + bytes.length);
       }
       if (docID >= docToEntry.length) {
         final int[] newArray = new int[ArrayUtil.oversize(1 + docID,
             RamUsageEstimator.NUM_BYTES_INT)];
         System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length);
         bytesUsed.addAndGet((newArray.length - docToEntry.length)
             * RamUsageEstimator.NUM_BYTES_INT);
         docToEntry = newArray;
       }
       int e = hash.add(bytes);
       docToEntry[docID] = 1 + (e < 0 ? (-e) - 1 : e);
     }

     // Important that we get docCount, in case there were
     // some last docs that we didn't see
     @Override
     public void finish(int docCount) throws IOException {
       try {
         if (size == -1) {// no data added
           datOut.writeInt(size);
         }
         final int[] sortedEntries = hash.sort(comp);
         final int count = hash.size();
         int[] address = new int[count];
         // first dump bytes data, recording address as we go
         for (int i = 0; i < count; i++) {
           final int e = sortedEntries[i];
           final BytesRef bytes = hash.get(e, new BytesRef());
           assert bytes.length == size;
           datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
           address[e] = 1 + i;
         }

         idxOut.writeInt(count);

         // next write index
         PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
             PackedInts.bitsRequired(count));
         final int limit;
         if (docCount > docToEntry.length) {
           limit = docToEntry.length;
         } else {
           limit = docCount;
         }
         for (int i = 0; i < limit; i++) {
           final int e = docToEntry[i];
           if (e == 0) {
             // null is encoded as zero
             w.add(0);
           } else {
             assert e > 0 && e <= count : "index must  0 > && <= " + count
                 + " was: " + e;
             w.add(address[e - 1]);
           }
         }

         for (int i = limit; i < docCount; i++) {
           w.add(0);
         }
         w.finish();
       } finally {
         super.finish(docCount);
         bytesUsed.addAndGet((-docToEntry.length)
             * RamUsageEstimator.NUM_BYTES_INT);
         docToEntry = null;
         hash.close();
       }
     }
   }

   public static class Reader extends BytesReaderBase {
     private final int size;

     public Reader(Directory dir, String id, int maxDoc) throws IOException {
       super(dir, id, CODEC_NAME, VERSION_START, true);
       size = datIn.readInt();
     }

     @Override
     public org.apache.lucene.index.values.IndexDocValues.Source load()
         throws IOException {
       return loadSorted(null);
     }

     @Override
     public SortedSource loadSorted(Comparator<BytesRef> comp)
         throws IOException {
       final IndexInput idxInput = cloneIndex();
       final IndexInput datInput = cloneData();
       datInput.seek(CodecUtil.headerLength(CODEC_NAME) + 4);
       idxInput.seek(CodecUtil.headerLength(CODEC_NAME));
       return new Source(datInput, idxInput, size, idxInput.readInt(), comp);
     }

     private static class Source extends BytesBaseSortedSource {

       private final PackedInts.Reader index;
       private final int numValue;
       private final int size;

       public Source(IndexInput datIn, IndexInput idxIn, int size,
           int numValues, Comparator<BytesRef> comp) throws IOException {
         super(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), size
             * numValues);
         this.size = size;
         this.numValue = numValues;
         index = PackedInts.getReader(idxIn);
         closeIndexInput();
       }

       @Override
       public int ord(int docID) {
         return (int) index.get(docID) -1;
       }

       @Override
       public int getByValue(BytesRef bytes, BytesRef tmpRef) {
         return binarySearch(bytes, tmpRef, 0, numValue - 1);
       }

       @Override
       public int getValueCount() {
         return numValue;
       }

       @Override
       protected BytesRef deref(int ord, BytesRef bytesRef) {
         return data.fillSlice(bytesRef, (ord * size), size);
       }

       @Override
       public ValueType type() {
         return ValueType.BYTES_FIXED_SORTED;
       }

       @Override
       protected int maxDoc() {
         return index.size();
       }
     }

     @Override
     public ValuesEnum getEnum(AttributeSource source) throws IOException {
       // do unsorted
       return new DerefBytesEnum(source, cloneData(), cloneIndex(), size);
     }

     @Override
     public ValueType type() {
       return ValueType.BYTES_FIXED_SORTED;
     }
   }
 }
	package org.apache.lucene.index.values;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.Comparator;
	import java.util.concurrent.atomic.AtomicLong;

	import org.apache.lucene.index.values.Bytes.BytesBaseSortedSource;
	import org.apache.lucene.index.values.Bytes.BytesReaderBase;
	import org.apache.lucene.index.values.Bytes.BytesWriterBase;
	import org.apache.lucene.index.values.FixedDerefBytesImpl.Reader.DerefBytesEnum;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.store.IndexInput;
	import org.apache.lucene.util.ArrayUtil;
	import org.apache.lucene.util.AttributeSource;
	import org.apache.lucene.util.ByteBlockPool;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.BytesRefHash;
	import org.apache.lucene.util.CodecUtil;
	import org.apache.lucene.util.PagedBytes;
	import org.apache.lucene.util.RamUsageEstimator;
	import org.apache.lucene.util.ByteBlockPool.Allocator;
	import org.apache.lucene.util.ByteBlockPool.DirectTrackingAllocator;
	import org.apache.lucene.util.BytesRefHash.TrackingDirectBytesStartArray;
	import org.apache.lucene.util.packed.PackedInts;

	// Stores fixed-length byte[] by deref, ie when two docs
	// have the same value, they store only 1 byte[]

	/**
	* @lucene.experimental
	*/
	class FixedSortedBytesImpl {

	static final String CODEC_NAME = "FixedSortedBytes";
	static final int VERSION_START = 0;
	static final int VERSION_CURRENT = VERSION_START;

	static class Writer extends BytesWriterBase {
	private int size = -1;
	private int[] docToEntry;
	private final Comparator<BytesRef> comp;

	private final BytesRefHash hash = new BytesRefHash(pool,
	BytesRefHash.DEFAULT_CAPACITY, new TrackingDirectBytesStartArray(
	BytesRefHash.DEFAULT_CAPACITY, bytesUsed));

	public Writer(Directory dir, String id, Comparator<BytesRef> comp,
	AtomicLong bytesUsed) throws IOException {
	this(dir, id, comp, new DirectTrackingAllocator(ByteBlockPool.BYTE_BLOCK_SIZE, bytesUsed),
	bytesUsed);
	}

	public Writer(Directory dir, String id, Comparator<BytesRef> comp,
	Allocator allocator, AtomicLong bytesUsed) throws IOException {
	super(dir, id, CODEC_NAME, VERSION_CURRENT, true,
	new ByteBlockPool(allocator), bytesUsed);
	docToEntry = new int[1];
	// docToEntry[0] = -1;
	bytesUsed.addAndGet(RamUsageEstimator.NUM_BYTES_INT);
	this.comp = comp;
	}

	@Override
	public void add(int docID, BytesRef bytes) throws IOException {
	if (bytes.length == 0)
	return; // default - skip it
	if (size == -1) {
	size = bytes.length;
	datOut.writeInt(size);
	} else if (bytes.length != size) {
	throw new IllegalArgumentException("expected bytes size=" + size
	+ " but got " + bytes.length);
	}
	if (docID >= docToEntry.length) {
	final int[] newArray = new int[ArrayUtil.oversize(1 + docID,
	RamUsageEstimator.NUM_BYTES_INT)];
	System.arraycopy(docToEntry, 0, newArray, 0, docToEntry.length);
	bytesUsed.addAndGet((newArray.length - docToEntry.length)
	* RamUsageEstimator.NUM_BYTES_INT);
	docToEntry = newArray;
	}
	int e = hash.add(bytes);
	docToEntry[docID] = 1 + (e < 0 ? (-e) - 1 : e);
	}

	// Important that we get docCount, in case there were
	// some last docs that we didn't see
	@Override
	public void finish(int docCount) throws IOException {
	try {
	if (size == -1) {// no data added
	datOut.writeInt(size);
	}
	final int[] sortedEntries = hash.sort(comp);
	final int count = hash.size();
	int[] address = new int[count];
	// first dump bytes data, recording address as we go
	for (int i = 0; i < count; i++) {
	final int e = sortedEntries[i];
	final BytesRef bytes = hash.get(e, new BytesRef());
	assert bytes.length == size;
	datOut.writeBytes(bytes.bytes, bytes.offset, bytes.length);
	address[e] = 1 + i;
	}

	idxOut.writeInt(count);

	// next write index
	PackedInts.Writer w = PackedInts.getWriter(idxOut, docCount,
	PackedInts.bitsRequired(count));
	final int limit;
	if (docCount > docToEntry.length) {
	limit = docToEntry.length;
	} else {
	limit = docCount;
	}
	for (int i = 0; i < limit; i++) {
	final int e = docToEntry[i];
	if (e == 0) {
	// null is encoded as zero
	w.add(0);
	} else {
	assert e > 0 && e <= count : "index must 0 > && <= " + count
	+ " was: " + e;
	w.add(address[e - 1]);
	}
	}

	for (int i = limit; i < docCount; i++) {
	w.add(0);
	}
	w.finish();
	} finally {
	super.finish(docCount);
	bytesUsed.addAndGet((-docToEntry.length)
	* RamUsageEstimator.NUM_BYTES_INT);
	docToEntry = null;
	hash.close();
	}
	}
	}

	public static class Reader extends BytesReaderBase {
	private final int size;

	public Reader(Directory dir, String id, int maxDoc) throws IOException {
	super(dir, id, CODEC_NAME, VERSION_START, true);
	size = datIn.readInt();
	}

	@Override
	public org.apache.lucene.index.values.IndexDocValues.Source load()
	throws IOException {
	return loadSorted(null);
	}

	@Override
	public SortedSource loadSorted(Comparator<BytesRef> comp)
	throws IOException {
	final IndexInput idxInput = cloneIndex();
	final IndexInput datInput = cloneData();
	datInput.seek(CodecUtil.headerLength(CODEC_NAME) + 4);
	idxInput.seek(CodecUtil.headerLength(CODEC_NAME));
	return new Source(datInput, idxInput, size, idxInput.readInt(), comp);
	}

	private static class Source extends BytesBaseSortedSource {

	private final PackedInts.Reader index;
	private final int numValue;
	private final int size;

	public Source(IndexInput datIn, IndexInput idxIn, int size,
	int numValues, Comparator<BytesRef> comp) throws IOException {
	super(datIn, idxIn, comp, new PagedBytes(PAGED_BYTES_BITS), size
	* numValues);
	this.size = size;
	this.numValue = numValues;
	index = PackedInts.getReader(idxIn);
	closeIndexInput();
	}

	@Override
	public int ord(int docID) {
	return (int) index.get(docID) -1;
	}

	@Override
	public int getByValue(BytesRef bytes, BytesRef tmpRef) {
	return binarySearch(bytes, tmpRef, 0, numValue - 1);
	}

	@Override
	public int getValueCount() {
	return numValue;
	}

	@Override
	protected BytesRef deref(int ord, BytesRef bytesRef) {
	return data.fillSlice(bytesRef, (ord * size), size);
	}

	@Override
	public ValueType type() {
	return ValueType.BYTES_FIXED_SORTED;
	}

	@Override
	protected int maxDoc() {
	return index.size();
	}
	}

	@Override
	public ValuesEnum getEnum(AttributeSource source) throws IOException {
	// do unsorted
	return new DerefBytesEnum(source, cloneData(), cloneIndex(), size);
	}

	@Override
	public ValueType type() {
	return ValueType.BYTES_FIXED_SORTED;
	}
	}
	}