lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectDocValuesConsumer.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.codecs.memory;


 import java.io.IOException;
 import java.util.Iterator;

 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;

 import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.BYTES;
 import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.NUMBER;
 import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED;
 import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_NUMERIC;
 import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_NUMERIC_SINGLETON;
 import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_SET;
 import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_SET_SINGLETON;
 import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.VERSION_CURRENT;

 /**
  * Writer for {@link DirectDocValuesFormat}
  */

 class DirectDocValuesConsumer extends DocValuesConsumer {
   IndexOutput data, meta;
   final int maxDoc;

   DirectDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
     maxDoc = state.segmentInfo.maxDoc();
     boolean success = false;
     try {
       String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
       data = state.directory.createOutput(dataName, state.context);
       CodecUtil.writeIndexHeader(data, dataCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
       String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
       meta = state.directory.createOutput(metaName, state.context);
       CodecUtil.writeIndexHeader(meta, metaCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
       success = true;
     } finally {
       if (!success) {
         IOUtils.closeWhileHandlingException(this);
       }
     }
   }

   @Override
   public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
     meta.writeVInt(field.number);
     meta.writeByte(NUMBER);
     addNumericFieldValues(field, LegacyDocValuesIterables.numericIterable(field, valuesProducer, maxDoc));
   }

   private void addNumericFieldValues(FieldInfo field, Iterable<Number> values) throws IOException {
     meta.writeLong(data.getFilePointer());
     long minValue = Long.MAX_VALUE;
     long maxValue = Long.MIN_VALUE;
     boolean missing = false;

     long count = 0;
     for (Number nv : values) {
       if (nv != null) {
         long v = nv.longValue();
         minValue = Math.min(minValue, v);
         maxValue = Math.max(maxValue, v);
       } else {
         missing = true;
       }
       count++;
       if (count >= DirectDocValuesFormat.MAX_SORTED_SET_ORDS) {
         throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + DirectDocValuesFormat.MAX_SORTED_SET_ORDS + " values/total ords");
       }
     }
     meta.writeInt((int) count);

     if (missing) {
       long start = data.getFilePointer();
       writeMissingBitset(values);
       meta.writeLong(start);
       meta.writeLong(data.getFilePointer() - start);
     } else {
       meta.writeLong(-1L);
     }

     byte byteWidth;
     if (minValue >= Byte.MIN_VALUE && maxValue <= Byte.MAX_VALUE) {
       byteWidth = 1;
     } else if (minValue >= Short.MIN_VALUE && maxValue <= Short.MAX_VALUE) {
       byteWidth = 2;
     } else if (minValue >= Integer.MIN_VALUE && maxValue <= Integer.MAX_VALUE) {
       byteWidth = 4;
     } else {
       byteWidth = 8;
     }
     meta.writeByte(byteWidth);

     for (Number nv : values) {
       long v;
       if (nv != null) {
         v = nv.longValue();
       } else {
         v = 0;
       }

       switch(byteWidth) {
       case 1:
         data.writeByte((byte) v);
         break;
       case 2:
         data.writeShort((short) v);
         break;
       case 4:
         data.writeInt((int) v);
         break;
       case 8:
         data.writeLong(v);
         break;
       }
     }
   }

   @Override
   public void close() throws IOException {
     boolean success = false;
     try {
       if (meta != null) {
         meta.writeVInt(-1); // write EOF marker
         CodecUtil.writeFooter(meta); // write checksum
       }
       if (data != null) {
         CodecUtil.writeFooter(data);
       }
       success = true;
     } finally {
       if (success) {
         IOUtils.close(data, meta);
       } else {
         IOUtils.closeWhileHandlingException(data, meta);
       }
       data = meta = null;
     }
   }

   @Override
   public void addBinaryField(FieldInfo field, final DocValuesProducer valuesProducer) throws IOException {
     meta.writeVInt(field.number);
     meta.writeByte(BYTES);
     addBinaryFieldValues(field, LegacyDocValuesIterables.binaryIterable(field, valuesProducer, maxDoc));
   }

   private void addBinaryFieldValues(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
     // write the byte[] data
     final long startFP = data.getFilePointer();
     boolean missing = false;
     long totalBytes = 0;
     int count = 0;
     for(BytesRef v : values) {
       if (v != null) {
         data.writeBytes(v.bytes, v.offset, v.length);
         totalBytes += v.length;
         if (totalBytes > DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH) {
           throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, cannot have more than DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH (" + DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH + ") bytes");
         }
       } else {
         missing = true;
       }
       count++;
     }

     meta.writeLong(startFP);
     meta.writeInt((int) totalBytes);
     meta.writeInt(count);
     if (missing) {
       long start = data.getFilePointer();
       writeMissingBitset(values);
       meta.writeLong(start);
       meta.writeLong(data.getFilePointer() - start);
     } else {
       meta.writeLong(-1L);
     }

     int addr = 0;
     for (BytesRef v : values) {
       data.writeInt(addr);
       if (v != null) {
         addr += v.length;
       }
     }
     data.writeInt(addr);
   }

   // TODO: in some cases representing missing with minValue-1 wouldn't take up additional space and so on,
   // but this is very simple, and algorithms only check this for values of 0 anyway (doesnt slow down normal decode)
   void writeMissingBitset(Iterable<?> values) throws IOException {
     long bits = 0;
     int count = 0;
     for (Object v : values) {
       if (count == 64) {
         data.writeLong(bits);
         count = 0;
         bits = 0;
       }
       if (v != null) {
         bits |= 1L << (count & 0x3f);
       }
       count++;
     }
     if (count > 0) {
       data.writeLong(bits);
     }
   }

   @Override
   public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
     meta.writeVInt(field.number);
     meta.writeByte(SORTED);

     // write the ordinals as numerics
     addNumericFieldValues(field, LegacyDocValuesIterables.sortedOrdIterable(valuesProducer, field, maxDoc));
     // write the values as binary
     addBinaryFieldValues(field, LegacyDocValuesIterables.valuesIterable(valuesProducer.getSorted(field)));
   }

   @Override
   public void addSortedNumericField(FieldInfo field, final DocValuesProducer valuesProducer) throws IOException {

     final Iterable<Number> docToValueCount = LegacyDocValuesIterables.sortedNumericToDocCount(valuesProducer, field, maxDoc);
     final Iterable<Number> values = LegacyDocValuesIterables.sortedNumericToValues(valuesProducer, field);

     meta.writeVInt(field.number);
     if (isSingleValued(docToValueCount)) {
       meta.writeByte(SORTED_NUMERIC_SINGLETON);
       addNumericFieldValues(field, singletonView(docToValueCount, values, null));
     } else {
       meta.writeByte(SORTED_NUMERIC);

       // First write docToValueCounts, except we "aggregate" the
       // counts so they turn into addresses, and add a final
       // value = the total aggregate:
       addNumericFieldValues(field, countToAddressIterator(docToValueCount));

       // Write values for all docs, appended into one big
       // numerics:
       addNumericFieldValues(field, values);
     }
   }

   // note: this might not be the most efficient... but it's fairly simple
   @Override
   public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
     Iterable<BytesRef> values = LegacyDocValuesIterables.valuesIterable(valuesProducer.getSortedSet(field));
     Iterable<Number> docToOrdCount = LegacyDocValuesIterables.sortedSetOrdCountIterable(valuesProducer, field, maxDoc);
     Iterable<Number> ords = LegacyDocValuesIterables.sortedSetOrdsIterable(valuesProducer, field);

     meta.writeVInt(field.number);

     if (isSingleValued(docToOrdCount)) {
       meta.writeByte(SORTED_SET_SINGLETON);
       // Write ordinals for all docs, appended into one big
       // numerics:
       addNumericFieldValues(field, singletonView(docToOrdCount, ords, -1L));

       // write the values as binary
       addBinaryFieldValues(field, values);
     } else {
       meta.writeByte(SORTED_SET);

       // First write docToOrdCounts, except we "aggregate" the
       // counts so they turn into addresses, and add a final
       // value = the total aggregate:
       addNumericFieldValues(field, countToAddressIterator(docToOrdCount));

       // Write ordinals for all docs, appended into one big
       // numerics:
       addNumericFieldValues(field, ords);

       // write the values as binary
       addBinaryFieldValues(field, values);
     }
   }

   /**
    * Just aggregates the count values so they become
    * "addresses", and adds one more value in the end
    * (the final sum)
    */
   private Iterable<Number> countToAddressIterator(final Iterable<Number> counts) {
     return new Iterable<Number>() {
       @Override
       public Iterator<Number> iterator() {
         final Iterator<Number> iter = counts.iterator();

         return new Iterator<Number>() {

           long sum;
           boolean ended;

           @Override
           public boolean hasNext() {
             return iter.hasNext() || !ended;
           }

           @Override
           public Number next() {
             long toReturn = sum;

             if (iter.hasNext()) {
               Number n = iter.next();
               if (n != null) {
                 sum += n.longValue();
               }
             } else if (!ended) {
               ended = true;
             } else {
               assert false;
             }

             return toReturn;
           }

           @Override
           public void remove() {
             throw new UnsupportedOperationException();
           }
         };
       }
     };
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.codecs.memory;


	import java.io.IOException;
	import java.util.Iterator;

	import org.apache.lucene.codecs.CodecUtil;
	import org.apache.lucene.codecs.DocValuesConsumer;
	import org.apache.lucene.codecs.DocValuesProducer;
	import org.apache.lucene.index.FieldInfo;
	import org.apache.lucene.index.IndexFileNames;
	import org.apache.lucene.index.SegmentWriteState;
	import org.apache.lucene.store.IndexOutput;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.IOUtils;

	import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.BYTES;
	import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.NUMBER;
	import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED;
	import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_NUMERIC;
	import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_NUMERIC_SINGLETON;
	import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_SET;
	import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.SORTED_SET_SINGLETON;
	import static org.apache.lucene.codecs.memory.DirectDocValuesProducer.VERSION_CURRENT;

	/**
	* Writer for {@link DirectDocValuesFormat}
	*/

	class DirectDocValuesConsumer extends DocValuesConsumer {
	IndexOutput data, meta;
	final int maxDoc;

	DirectDocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException {
	maxDoc = state.segmentInfo.maxDoc();
	boolean success = false;
	try {
	String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
	data = state.directory.createOutput(dataName, state.context);
	CodecUtil.writeIndexHeader(data, dataCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
	String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension);
	meta = state.directory.createOutput(metaName, state.context);
	CodecUtil.writeIndexHeader(meta, metaCodec, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
	success = true;
	} finally {
	if (!success) {
	IOUtils.closeWhileHandlingException(this);
	}
	}
	}

	@Override
	public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
	meta.writeVInt(field.number);
	meta.writeByte(NUMBER);
	addNumericFieldValues(field, LegacyDocValuesIterables.numericIterable(field, valuesProducer, maxDoc));
	}

	private void addNumericFieldValues(FieldInfo field, Iterable<Number> values) throws IOException {
	meta.writeLong(data.getFilePointer());
	long minValue = Long.MAX_VALUE;
	long maxValue = Long.MIN_VALUE;
	boolean missing = false;

	long count = 0;
	for (Number nv : values) {
	if (nv != null) {
	long v = nv.longValue();
	minValue = Math.min(minValue, v);
	maxValue = Math.max(maxValue, v);
	} else {
	missing = true;
	}
	count++;
	if (count >= DirectDocValuesFormat.MAX_SORTED_SET_ORDS) {
	throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, must be <= " + DirectDocValuesFormat.MAX_SORTED_SET_ORDS + " values/total ords");
	}
	}
	meta.writeInt((int) count);

	if (missing) {
	long start = data.getFilePointer();
	writeMissingBitset(values);
	meta.writeLong(start);
	meta.writeLong(data.getFilePointer() - start);
	} else {
	meta.writeLong(-1L);
	}

	byte byteWidth;
	if (minValue >= Byte.MIN_VALUE && maxValue <= Byte.MAX_VALUE) {
	byteWidth = 1;
	} else if (minValue >= Short.MIN_VALUE && maxValue <= Short.MAX_VALUE) {
	byteWidth = 2;
	} else if (minValue >= Integer.MIN_VALUE && maxValue <= Integer.MAX_VALUE) {
	byteWidth = 4;
	} else {
	byteWidth = 8;
	}
	meta.writeByte(byteWidth);

	for (Number nv : values) {
	long v;
	if (nv != null) {
	v = nv.longValue();
	} else {
	v = 0;
	}

	switch(byteWidth) {
	case 1:
	data.writeByte((byte) v);
	break;
	case 2:
	data.writeShort((short) v);
	break;
	case 4:
	data.writeInt((int) v);
	break;
	case 8:
	data.writeLong(v);
	break;
	}
	}
	}

	@Override
	public void close() throws IOException {
	boolean success = false;
	try {
	if (meta != null) {
	meta.writeVInt(-1); // write EOF marker
	CodecUtil.writeFooter(meta); // write checksum
	}
	if (data != null) {
	CodecUtil.writeFooter(data);
	}
	success = true;
	} finally {
	if (success) {
	IOUtils.close(data, meta);
	} else {
	IOUtils.closeWhileHandlingException(data, meta);
	}
	data = meta = null;
	}
	}

	@Override
	public void addBinaryField(FieldInfo field, final DocValuesProducer valuesProducer) throws IOException {
	meta.writeVInt(field.number);
	meta.writeByte(BYTES);
	addBinaryFieldValues(field, LegacyDocValuesIterables.binaryIterable(field, valuesProducer, maxDoc));
	}

	private void addBinaryFieldValues(FieldInfo field, final Iterable<BytesRef> values) throws IOException {
	// write the byte[] data
	final long startFP = data.getFilePointer();
	boolean missing = false;
	long totalBytes = 0;
	int count = 0;
	for(BytesRef v : values) {
	if (v != null) {
	data.writeBytes(v.bytes, v.offset, v.length);
	totalBytes += v.length;
	if (totalBytes > DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH) {
	throw new IllegalArgumentException("DocValuesField \"" + field.name + "\" is too large, cannot have more than DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH (" + DirectDocValuesFormat.MAX_TOTAL_BYTES_LENGTH + ") bytes");
	}
	} else {
	missing = true;
	}
	count++;
	}

	meta.writeLong(startFP);
	meta.writeInt((int) totalBytes);
	meta.writeInt(count);
	if (missing) {
	long start = data.getFilePointer();
	writeMissingBitset(values);
	meta.writeLong(start);
	meta.writeLong(data.getFilePointer() - start);
	} else {
	meta.writeLong(-1L);
	}

	int addr = 0;
	for (BytesRef v : values) {
	data.writeInt(addr);
	if (v != null) {
	addr += v.length;
	}
	}
	data.writeInt(addr);
	}

	// TODO: in some cases representing missing with minValue-1 wouldn't take up additional space and so on,
	// but this is very simple, and algorithms only check this for values of 0 anyway (doesnt slow down normal decode)
	void writeMissingBitset(Iterable<?> values) throws IOException {
	long bits = 0;
	int count = 0;
	for (Object v : values) {
	if (count == 64) {
	data.writeLong(bits);
	count = 0;
	bits = 0;
	}
	if (v != null) {
	bits \|= 1L << (count & 0x3f);
	}
	count++;
	}
	if (count > 0) {
	data.writeLong(bits);
	}
	}

	@Override
	public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
	meta.writeVInt(field.number);
	meta.writeByte(SORTED);

	// write the ordinals as numerics
	addNumericFieldValues(field, LegacyDocValuesIterables.sortedOrdIterable(valuesProducer, field, maxDoc));
	// write the values as binary
	addBinaryFieldValues(field, LegacyDocValuesIterables.valuesIterable(valuesProducer.getSorted(field)));
	}

	@Override
	public void addSortedNumericField(FieldInfo field, final DocValuesProducer valuesProducer) throws IOException {

	final Iterable<Number> docToValueCount = LegacyDocValuesIterables.sortedNumericToDocCount(valuesProducer, field, maxDoc);
	final Iterable<Number> values = LegacyDocValuesIterables.sortedNumericToValues(valuesProducer, field);

	meta.writeVInt(field.number);
	if (isSingleValued(docToValueCount)) {
	meta.writeByte(SORTED_NUMERIC_SINGLETON);
	addNumericFieldValues(field, singletonView(docToValueCount, values, null));
	} else {
	meta.writeByte(SORTED_NUMERIC);

	// First write docToValueCounts, except we "aggregate" the
	// counts so they turn into addresses, and add a final
	// value = the total aggregate:
	addNumericFieldValues(field, countToAddressIterator(docToValueCount));

	// Write values for all docs, appended into one big
	// numerics:
	addNumericFieldValues(field, values);
	}
	}

	// note: this might not be the most efficient... but it's fairly simple
	@Override
	public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
	Iterable<BytesRef> values = LegacyDocValuesIterables.valuesIterable(valuesProducer.getSortedSet(field));
	Iterable<Number> docToOrdCount = LegacyDocValuesIterables.sortedSetOrdCountIterable(valuesProducer, field, maxDoc);
	Iterable<Number> ords = LegacyDocValuesIterables.sortedSetOrdsIterable(valuesProducer, field);

	meta.writeVInt(field.number);

	if (isSingleValued(docToOrdCount)) {
	meta.writeByte(SORTED_SET_SINGLETON);
	// Write ordinals for all docs, appended into one big
	// numerics:
	addNumericFieldValues(field, singletonView(docToOrdCount, ords, -1L));

	// write the values as binary
	addBinaryFieldValues(field, values);
	} else {
	meta.writeByte(SORTED_SET);

	// First write docToOrdCounts, except we "aggregate" the
	// counts so they turn into addresses, and add a final
	// value = the total aggregate:
	addNumericFieldValues(field, countToAddressIterator(docToOrdCount));

	// Write ordinals for all docs, appended into one big
	// numerics:
	addNumericFieldValues(field, ords);

	// write the values as binary
	addBinaryFieldValues(field, values);
	}
	}

	/**
	* Just aggregates the count values so they become
	* "addresses", and adds one more value in the end
	* (the final sum)
	*/
	private Iterable<Number> countToAddressIterator(final Iterable<Number> counts) {
	return new Iterable<Number>() {
	@Override
	public Iterator<Number> iterator() {
	final Iterator<Number> iter = counts.iterator();

	return new Iterator<Number>() {

	long sum;
	boolean ended;

	@Override
	public boolean hasNext() {
	return iter.hasNext() \|\| !ended;
	}

	@Override
	public Number next() {
	long toReturn = sum;

	if (iter.hasNext()) {
	Number n = iter.next();
	if (n != null) {
	sum += n.longValue();
	}
	} else if (!ended) {
	ended = true;
	} else {
	assert false;
	}

	return toReturn;
	}

	@Override
	public void remove() {
	throw new UnsupportedOperationException();
	}
	};
	}
	};
	}
	}