lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesConsumer.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.backward_codecs.lucene80;

 import static org.apache.lucene.backward_codecs.lucene80.Lucene80DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
 import static org.apache.lucene.backward_codecs.lucene80.Lucene80DocValuesFormat.NUMERIC_BLOCK_SHIFT;
 import static org.apache.lucene.backward_codecs.lucene80.Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE;

 import java.io.Closeable;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.EmptyDocValuesProducer;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.SortedSetSelector;
 import org.apache.lucene.store.ByteArrayDataOutput;
 import org.apache.lucene.store.ByteBuffersDataOutput;
 import org.apache.lucene.store.ByteBuffersIndexOutput;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.MathUtil;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.compress.LZ4;
 import org.apache.lucene.util.compress.LZ4.FastCompressionHashTable;
 import org.apache.lucene.util.packed.DirectMonotonicWriter;
 import org.apache.lucene.util.packed.DirectWriter;

 /** writer for {@link Lucene80DocValuesFormat} */
 final class Lucene80DocValuesConsumer extends DocValuesConsumer implements Closeable {

   final Lucene80DocValuesFormat.Mode mode;
   IndexOutput data, meta;
   final int maxDoc;
   private final SegmentWriteState state;
   private byte[] termsDictBuffer;

   /** expert: Creates a new writer */
   public Lucene80DocValuesConsumer(
       SegmentWriteState state,
       String dataCodec,
       String dataExtension,
       String metaCodec,
       String metaExtension,
       Lucene80DocValuesFormat.Mode mode)
       throws IOException {
     this.mode = mode;
     if (Lucene80DocValuesFormat.Mode.BEST_COMPRESSION == this.mode) {
       this.termsDictBuffer = new byte[1 << 14];
     }
     boolean success = false;
     try {
       this.state = state;
       String dataName =
           IndexFileNames.segmentFileName(
               state.segmentInfo.name, state.segmentSuffix, dataExtension);
       data = state.directory.createOutput(dataName, state.context);
       CodecUtil.writeIndexHeader(
           data,
           dataCodec,
           Lucene80DocValuesFormat.VERSION_CURRENT,
           state.segmentInfo.getId(),
           state.segmentSuffix);
       String metaName =
           IndexFileNames.segmentFileName(
               state.segmentInfo.name, state.segmentSuffix, metaExtension);
       meta = state.directory.createOutput(metaName, state.context);
       CodecUtil.writeIndexHeader(
           meta,
           metaCodec,
           Lucene80DocValuesFormat.VERSION_CURRENT,
           state.segmentInfo.getId(),
           state.segmentSuffix);
       maxDoc = state.segmentInfo.maxDoc();
       success = true;
     } finally {
       if (!success) {
         IOUtils.closeWhileHandlingException(this);
       }
     }
   }

   @Override
   public void close() throws IOException {
     boolean success = false;
     try {
       if (meta != null) {
         meta.writeInt(-1); // write EOF marker
         CodecUtil.writeFooter(meta); // write checksum
       }
       if (data != null) {
         CodecUtil.writeFooter(data); // write checksum
       }
       success = true;
     } finally {
       if (success) {
         IOUtils.close(data, meta);
       } else {
         IOUtils.closeWhileHandlingException(data, meta);
       }
       meta = data = null;
     }
   }

   @Override
   public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer)
       throws IOException {
     meta.writeInt(field.number);
     meta.writeByte(Lucene80DocValuesFormat.NUMERIC);

     writeValues(
         field,
         new EmptyDocValuesProducer() {
           @Override
           public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
             return DocValues.singleton(valuesProducer.getNumeric(field));
           }
         });
   }

   private static class MinMaxTracker {
     long min, max, numValues, spaceInBits;

     MinMaxTracker() {
       reset();
       spaceInBits = 0;
     }

     private void reset() {
       min = Long.MAX_VALUE;
       max = Long.MIN_VALUE;
       numValues = 0;
     }

     /** Accumulate a new value. */
     void update(long v) {
       min = Math.min(min, v);
       max = Math.max(max, v);
       ++numValues;
     }

     /** Update the required space. */
     void finish() {
       if (max > min) {
         spaceInBits += DirectWriter.unsignedBitsRequired(max - min) * numValues;
       }
     }

     /** Update space usage and get ready for accumulating values for the next block. */
     void nextBlock() {
       finish();
       reset();
     }
   }

   private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
     SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
     int numDocsWithValue = 0;
     MinMaxTracker minMax = new MinMaxTracker();
     MinMaxTracker blockMinMax = new MinMaxTracker();
     long gcd = 0;
     Set<Long> uniqueValues = new HashSet<>();
     for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
       for (int i = 0, count = values.docValueCount(); i < count; ++i) {
         long v = values.nextValue();

         if (gcd != 1) {
           if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) {
             // in that case v - minValue might overflow and make the GCD computation return
             // wrong results. Since these extreme values are unlikely, we just discard
             // GCD computation for them
             gcd = 1;
           } else if (minMax.numValues != 0) { // minValue needs to be set first
             gcd = MathUtil.gcd(gcd, v - minMax.min);
           }
         }

         minMax.update(v);
         blockMinMax.update(v);
         if (blockMinMax.numValues == NUMERIC_BLOCK_SIZE) {
           blockMinMax.nextBlock();
         }

         if (uniqueValues != null && uniqueValues.add(v) && uniqueValues.size() > 256) {
           uniqueValues = null;
         }
       }

       numDocsWithValue++;
     }

     minMax.finish();
     blockMinMax.finish();

     final long numValues = minMax.numValues;
     long min = minMax.min;
     final long max = minMax.max;
     assert blockMinMax.spaceInBits <= minMax.spaceInBits;

     if (numDocsWithValue == 0) { // meta[-2, 0]: No documents with values
       meta.writeLong(-2); // docsWithFieldOffset
       meta.writeLong(0L); // docsWithFieldLength
       meta.writeShort((short) -1); // jumpTableEntryCount
       meta.writeByte((byte) -1); // denseRankPower
     } else if (numDocsWithValue == maxDoc) { // meta[-1, 0]: All documents has values
       meta.writeLong(-1); // docsWithFieldOffset
       meta.writeLong(0L); // docsWithFieldLength
       meta.writeShort((short) -1); // jumpTableEntryCount
       meta.writeByte((byte) -1); // denseRankPower
     } else { // meta[data.offset, data.length]: IndexedDISI structure for documents with values
       long offset = data.getFilePointer();
       meta.writeLong(offset); // docsWithFieldOffset
       values = valuesProducer.getSortedNumeric(field);
       final short jumpTableEntryCount =
           IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
       meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
       meta.writeShort(jumpTableEntryCount);
       meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
     }

     meta.writeLong(numValues);
     final int numBitsPerValue;
     boolean doBlocks = false;
     Map<Long, Integer> encode = null;
     if (min >= max) { // meta[-1]: All values are 0
       numBitsPerValue = 0;
       meta.writeInt(-1); // tablesize
     } else {
       if (uniqueValues != null
           && uniqueValues.size() > 1
           && DirectWriter.unsignedBitsRequired(uniqueValues.size() - 1)
               < DirectWriter.unsignedBitsRequired((max - min) / gcd)) {
         numBitsPerValue = DirectWriter.unsignedBitsRequired(uniqueValues.size() - 1);
         final Long[] sortedUniqueValues = uniqueValues.toArray(new Long[0]);
         Arrays.sort(sortedUniqueValues);
         meta.writeInt(sortedUniqueValues.length); // tablesize
         for (Long v : sortedUniqueValues) {
           meta.writeLong(v); // table[] entry
         }
         encode = new HashMap<>();
         for (int i = 0; i < sortedUniqueValues.length; ++i) {
           encode.put(sortedUniqueValues[i], i);
         }
         min = 0;
         gcd = 1;
       } else {
         uniqueValues = null;
         // we do blocks if that appears to save 10+% storage
         doBlocks =
             minMax.spaceInBits > 0 && (double) blockMinMax.spaceInBits / minMax.spaceInBits <= 0.9;
         if (doBlocks) {
           numBitsPerValue = 0xFF;
           meta.writeInt(-2 - NUMERIC_BLOCK_SHIFT); // tablesize
         } else {
           numBitsPerValue = DirectWriter.unsignedBitsRequired((max - min) / gcd);
           if (gcd == 1
               && min > 0
               && DirectWriter.unsignedBitsRequired(max)
                   == DirectWriter.unsignedBitsRequired(max - min)) {
             min = 0;
           }
           meta.writeInt(-1); // tablesize
         }
       }
     }

     meta.writeByte((byte) numBitsPerValue);
     meta.writeLong(min);
     meta.writeLong(gcd);
     long startOffset = data.getFilePointer();
     meta.writeLong(startOffset); // valueOffset
     long jumpTableOffset = -1;
     if (doBlocks) {
       jumpTableOffset = writeValuesMultipleBlocks(valuesProducer.getSortedNumeric(field), gcd);
     } else if (numBitsPerValue != 0) {
       writeValuesSingleBlock(
           valuesProducer.getSortedNumeric(field), numValues, numBitsPerValue, min, gcd, encode);
     }
     meta.writeLong(data.getFilePointer() - startOffset); // valuesLength
     meta.writeLong(jumpTableOffset);
     return new long[] {numDocsWithValue, numValues};
   }

   private void writeValuesSingleBlock(
       SortedNumericDocValues values,
       long numValues,
       int numBitsPerValue,
       long min,
       long gcd,
       Map<Long, Integer> encode)
       throws IOException {
     DirectWriter writer = DirectWriter.getInstance(data, numValues, numBitsPerValue);
     for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
       for (int i = 0, count = values.docValueCount(); i < count; ++i) {
         long v = values.nextValue();
         if (encode == null) {
           writer.add((v - min) / gcd);
         } else {
           writer.add(encode.get(v));
         }
       }
     }
     writer.finish();
   }

   // Returns the offset to the jump-table for vBPV
   private long writeValuesMultipleBlocks(SortedNumericDocValues values, long gcd)
       throws IOException {
     long[] offsets = new long[ArrayUtil.oversize(1, Long.BYTES)];
     int offsetsIndex = 0;
     final long[] buffer = new long[NUMERIC_BLOCK_SIZE];
     final ByteBuffersDataOutput encodeBuffer = ByteBuffersDataOutput.newResettableInstance();
     int upTo = 0;
     for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
       for (int i = 0, count = values.docValueCount(); i < count; ++i) {
         buffer[upTo++] = values.nextValue();
         if (upTo == NUMERIC_BLOCK_SIZE) {
           offsets = ArrayUtil.grow(offsets, offsetsIndex + 1);
           offsets[offsetsIndex++] = data.getFilePointer();
           writeBlock(buffer, NUMERIC_BLOCK_SIZE, gcd, encodeBuffer);
           upTo = 0;
         }
       }
     }
     if (upTo > 0) {
       offsets = ArrayUtil.grow(offsets, offsetsIndex + 1);
       offsets[offsetsIndex++] = data.getFilePointer();
       writeBlock(buffer, upTo, gcd, encodeBuffer);
     }

     // All blocks has been written. Flush the offset jump-table
     final long offsetsOrigo = data.getFilePointer();
     for (int i = 0; i < offsetsIndex; i++) {
       data.writeLong(offsets[i]);
     }
     data.writeLong(offsetsOrigo);
     return offsetsOrigo;
   }

   private void writeBlock(long[] values, int length, long gcd, ByteBuffersDataOutput buffer)
       throws IOException {
     assert length > 0;
     long min = values[0];
     long max = values[0];
     for (int i = 1; i < length; ++i) {
       final long v = values[i];
       assert Math.floorMod(values[i] - min, gcd) == 0;
       min = Math.min(min, v);
       max = Math.max(max, v);
     }
     if (min == max) {
       data.writeByte((byte) 0);
       data.writeLong(min);
     } else {
       final int bitsPerValue = DirectWriter.unsignedBitsRequired(max - min);
       buffer.reset();
       assert buffer.size() == 0;
       final DirectWriter w = DirectWriter.getInstance(buffer, length, bitsPerValue);
       for (int i = 0; i < length; ++i) {
         w.add((values[i] - min) / gcd);
       }
       w.finish();
       data.writeByte((byte) bitsPerValue);
       data.writeLong(min);
       data.writeInt(Math.toIntExact(buffer.size()));
       buffer.copyTo(data);
     }
   }

   class CompressedBinaryBlockWriter implements Closeable {
     final FastCompressionHashTable ht = new FastCompressionHashTable();
     int uncompressedBlockLength = 0;
     int maxUncompressedBlockLength = 0;
     int numDocsInCurrentBlock = 0;
     final int[] docLengths = new int[Lucene80DocValuesFormat.BINARY_DOCS_PER_COMPRESSED_BLOCK];
     byte[] block = BytesRef.EMPTY_BYTES;
     int totalChunks = 0;
     long maxPointer = 0;
     final long blockAddressesStart;

     private final IndexOutput tempBinaryOffsets;

     public CompressedBinaryBlockWriter() throws IOException {
       tempBinaryOffsets =
           state.directory.createTempOutput(
               state.segmentInfo.name, "binary_pointers", state.context);
       boolean success = false;
       try {
         CodecUtil.writeHeader(
             tempBinaryOffsets,
             Lucene80DocValuesFormat.META_CODEC + "FilePointers",
             Lucene80DocValuesFormat.VERSION_CURRENT);
         blockAddressesStart = data.getFilePointer();
         success = true;
       } finally {
         if (success == false) {
           IOUtils.closeWhileHandlingException(this); // self-close because constructor caller can't
         }
       }
     }

     void addDoc(int doc, BytesRef v) throws IOException {
       docLengths[numDocsInCurrentBlock] = v.length;
       block = ArrayUtil.grow(block, uncompressedBlockLength + v.length);
       System.arraycopy(v.bytes, v.offset, block, uncompressedBlockLength, v.length);
       uncompressedBlockLength += v.length;
       numDocsInCurrentBlock++;
       if (numDocsInCurrentBlock == Lucene80DocValuesFormat.BINARY_DOCS_PER_COMPRESSED_BLOCK) {
         flushData();
       }
     }

     private void flushData() throws IOException {
       if (numDocsInCurrentBlock > 0) {
         // Write offset to this block to temporary offsets file
         totalChunks++;
         long thisBlockStartPointer = data.getFilePointer();

         // Optimisation - check if all lengths are same
         boolean allLengthsSame = true;
         for (int i = 1; i < Lucene80DocValuesFormat.BINARY_DOCS_PER_COMPRESSED_BLOCK; i++) {
           if (docLengths[i] != docLengths[i - 1]) {
             allLengthsSame = false;
             break;
           }
         }
         if (allLengthsSame) {
           // Only write one value shifted. Steal a bit to indicate all other lengths are the same
           int onlyOneLength = (docLengths[0] << 1) | 1;
           data.writeVInt(onlyOneLength);
         } else {
           for (int i = 0; i < Lucene80DocValuesFormat.BINARY_DOCS_PER_COMPRESSED_BLOCK; i++) {
             if (i == 0) {
               // Write first value shifted and steal a bit to indicate other lengths are to follow
               int multipleLengths = (docLengths[0] << 1);
               data.writeVInt(multipleLengths);
             } else {
               data.writeVInt(docLengths[i]);
             }
           }
         }
         maxUncompressedBlockLength = Math.max(maxUncompressedBlockLength, uncompressedBlockLength);
         LZ4.compress(block, 0, uncompressedBlockLength, data, ht);
         numDocsInCurrentBlock = 0;
         // Ensure initialized with zeroes because full array is always written
         Arrays.fill(docLengths, 0);
         uncompressedBlockLength = 0;
         maxPointer = data.getFilePointer();
         tempBinaryOffsets.writeVLong(maxPointer - thisBlockStartPointer);
       }
     }

     void writeMetaData() throws IOException {
       if (totalChunks == 0) {
         return;
       }

       long startDMW = data.getFilePointer();
       meta.writeLong(startDMW);

       meta.writeVInt(totalChunks);
       meta.writeVInt(Lucene80DocValuesFormat.BINARY_BLOCK_SHIFT);
       meta.writeVInt(maxUncompressedBlockLength);
       meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);

       CodecUtil.writeFooter(tempBinaryOffsets);
       IOUtils.close(tempBinaryOffsets);
       // write the compressed block offsets info to the meta file by reading from temp file
       try (ChecksumIndexInput filePointersIn =
           state.directory.openChecksumInput(tempBinaryOffsets.getName(), IOContext.READONCE)) {
         CodecUtil.checkHeader(
             filePointersIn,
             Lucene80DocValuesFormat.META_CODEC + "FilePointers",
             Lucene80DocValuesFormat.VERSION_CURRENT,
             Lucene80DocValuesFormat.VERSION_CURRENT);
         Throwable priorE = null;
         try {
           final DirectMonotonicWriter filePointers =
               DirectMonotonicWriter.getInstance(
                   meta, data, totalChunks, DIRECT_MONOTONIC_BLOCK_SHIFT);
           long fp = blockAddressesStart;
           for (int i = 0; i < totalChunks; ++i) {
             filePointers.add(fp);
             fp += filePointersIn.readVLong();
           }
           if (maxPointer < fp) {
             throw new CorruptIndexException(
                 "File pointers don't add up (" + fp + " vs expected " + maxPointer + ")",
                 filePointersIn);
           }
           filePointers.finish();
         } catch (Throwable e) {
           priorE = e;
         } finally {
           CodecUtil.checkFooter(filePointersIn, priorE);
         }
       }
       // Write the length of the DMW block in the data
       meta.writeLong(data.getFilePointer() - startDMW);
     }

     @Override
     public void close() throws IOException {
       if (tempBinaryOffsets != null) {
         IOUtils.close(tempBinaryOffsets);
         state.directory.deleteFile(tempBinaryOffsets.getName());
       }
     }
   }

   @Override
   public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
     field.putAttribute(Lucene80DocValuesFormat.MODE_KEY, mode.name());
     meta.writeInt(field.number);
     meta.writeByte(Lucene80DocValuesFormat.BINARY);

     switch (mode) {
       case BEST_SPEED:
         doAddUncompressedBinaryField(field, valuesProducer);
         break;
       case BEST_COMPRESSION:
         doAddCompressedBinaryField(field, valuesProducer);
         break;
       default:
         throw new AssertionError();
     }
   }

   private void doAddUncompressedBinaryField(FieldInfo field, DocValuesProducer valuesProducer)
       throws IOException {
     BinaryDocValues values = valuesProducer.getBinary(field);
     long start = data.getFilePointer();
     meta.writeLong(start); // dataOffset
     int numDocsWithField = 0;
     int minLength = Integer.MAX_VALUE;
     int maxLength = 0;
     for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
       numDocsWithField++;
       BytesRef v = values.binaryValue();
       int length = v.length;
       data.writeBytes(v.bytes, v.offset, v.length);
       minLength = Math.min(length, minLength);
       maxLength = Math.max(length, maxLength);
     }
     assert numDocsWithField <= maxDoc;
     meta.writeLong(data.getFilePointer() - start); // dataLength

     if (numDocsWithField == 0) {
       meta.writeLong(-2); // docsWithFieldOffset
       meta.writeLong(0L); // docsWithFieldLength
       meta.writeShort((short) -1); // jumpTableEntryCount
       meta.writeByte((byte) -1); // denseRankPower
     } else if (numDocsWithField == maxDoc) {
       meta.writeLong(-1); // docsWithFieldOffset
       meta.writeLong(0L); // docsWithFieldLength
       meta.writeShort((short) -1); // jumpTableEntryCount
       meta.writeByte((byte) -1); // denseRankPower
     } else {
       long offset = data.getFilePointer();
       meta.writeLong(offset); // docsWithFieldOffset
       values = valuesProducer.getBinary(field);
       final short jumpTableEntryCount =
           IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
       meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
       meta.writeShort(jumpTableEntryCount);
       meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
     }

     meta.writeInt(numDocsWithField);
     meta.writeInt(minLength);
     meta.writeInt(maxLength);
     if (maxLength > minLength) {
       start = data.getFilePointer();
       meta.writeLong(start);
       meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);

       final DirectMonotonicWriter writer =
           DirectMonotonicWriter.getInstance(
               meta, data, numDocsWithField + 1, DIRECT_MONOTONIC_BLOCK_SHIFT);
       long addr = 0;
       writer.add(addr);
       values = valuesProducer.getBinary(field);
       for (int doc = values.nextDoc();
           doc != DocIdSetIterator.NO_MORE_DOCS;
           doc = values.nextDoc()) {
         addr += values.binaryValue().length;
         writer.add(addr);
       }
       writer.finish();
       meta.writeLong(data.getFilePointer() - start);
     }
   }

   private void doAddCompressedBinaryField(FieldInfo field, DocValuesProducer valuesProducer)
       throws IOException {
     try (CompressedBinaryBlockWriter blockWriter = new CompressedBinaryBlockWriter()) {
       BinaryDocValues values = valuesProducer.getBinary(field);
       long start = data.getFilePointer();
       meta.writeLong(start); // dataOffset
       int numDocsWithField = 0;
       int minLength = Integer.MAX_VALUE;
       int maxLength = 0;
       for (int doc = values.nextDoc();
           doc != DocIdSetIterator.NO_MORE_DOCS;
           doc = values.nextDoc()) {
         numDocsWithField++;
         BytesRef v = values.binaryValue();
         blockWriter.addDoc(doc, v);
         int length = v.length;
         minLength = Math.min(length, minLength);
         maxLength = Math.max(length, maxLength);
       }
       blockWriter.flushData();

       assert numDocsWithField <= maxDoc;
       meta.writeLong(data.getFilePointer() - start); // dataLength

       if (numDocsWithField == 0) {
         meta.writeLong(-2); // docsWithFieldOffset
         meta.writeLong(0L); // docsWithFieldLength
         meta.writeShort((short) -1); // jumpTableEntryCount
         meta.writeByte((byte) -1); // denseRankPower
       } else if (numDocsWithField == maxDoc) {
         meta.writeLong(-1); // docsWithFieldOffset
         meta.writeLong(0L); // docsWithFieldLength
         meta.writeShort((short) -1); // jumpTableEntryCount
         meta.writeByte((byte) -1); // denseRankPower
       } else {
         long offset = data.getFilePointer();
         meta.writeLong(offset); // docsWithFieldOffset
         values = valuesProducer.getBinary(field);
         final short jumpTableEntryCount =
             IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
         meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
         meta.writeShort(jumpTableEntryCount);
         meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
       }

       meta.writeInt(numDocsWithField);
       meta.writeInt(minLength);
       meta.writeInt(maxLength);

       blockWriter.writeMetaData();
     }
   }

   @Override
   public void addSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
     meta.writeInt(field.number);
     meta.writeByte(Lucene80DocValuesFormat.SORTED);
     doAddSortedField(field, valuesProducer);
   }

   private void doAddSortedField(FieldInfo field, DocValuesProducer valuesProducer)
       throws IOException {
     SortedDocValues values = valuesProducer.getSorted(field);
     int numDocsWithField = 0;
     for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
       numDocsWithField++;
     }

     if (numDocsWithField == 0) {
       meta.writeLong(-2); // docsWithFieldOffset
       meta.writeLong(0L); // docsWithFieldLength
       meta.writeShort((short) -1); // jumpTableEntryCount
       meta.writeByte((byte) -1); // denseRankPower
     } else if (numDocsWithField == maxDoc) {
       meta.writeLong(-1); // docsWithFieldOffset
       meta.writeLong(0L); // docsWithFieldLength
       meta.writeShort((short) -1); // jumpTableEntryCount
       meta.writeByte((byte) -1); // denseRankPower
     } else {
       long offset = data.getFilePointer();
       meta.writeLong(offset); // docsWithFieldOffset
       values = valuesProducer.getSorted(field);
       final short jumpTableentryCount =
           IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
       meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
       meta.writeShort(jumpTableentryCount);
       meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
     }

     meta.writeInt(numDocsWithField);
     if (values.getValueCount() <= 1) {
       meta.writeByte((byte) 0); // bitsPerValue
       meta.writeLong(0L); // ordsOffset
       meta.writeLong(0L); // ordsLength
     } else {
       int numberOfBitsPerOrd = DirectWriter.unsignedBitsRequired(values.getValueCount() - 1);
       meta.writeByte((byte) numberOfBitsPerOrd); // bitsPerValue
       long start = data.getFilePointer();
       meta.writeLong(start); // ordsOffset
       DirectWriter writer = DirectWriter.getInstance(data, numDocsWithField, numberOfBitsPerOrd);
       values = valuesProducer.getSorted(field);
       for (int doc = values.nextDoc();
           doc != DocIdSetIterator.NO_MORE_DOCS;
           doc = values.nextDoc()) {
         writer.add(values.ordValue());
       }
       writer.finish();
       meta.writeLong(data.getFilePointer() - start); // ordsLength
     }

     addTermsDict(DocValues.singleton(valuesProducer.getSorted(field)));
   }

   private void addTermsDict(SortedSetDocValues values) throws IOException {
     final long size = values.getValueCount();
     meta.writeVLong(size);
     boolean compress =
         Lucene80DocValuesFormat.Mode.BEST_COMPRESSION == mode
             && values.getValueCount()
                 > Lucene80DocValuesFormat.TERMS_DICT_BLOCK_COMPRESSION_THRESHOLD;
     int code, blockMask, shift;
     if (compress) {
       code = Lucene80DocValuesFormat.TERMS_DICT_BLOCK_LZ4_CODE;
       blockMask = Lucene80DocValuesFormat.TERMS_DICT_BLOCK_LZ4_MASK;
       shift = Lucene80DocValuesFormat.TERMS_DICT_BLOCK_LZ4_SHIFT;
     } else {
       code = shift = Lucene80DocValuesFormat.TERMS_DICT_BLOCK_SHIFT;
       blockMask = Lucene80DocValuesFormat.TERMS_DICT_BLOCK_MASK;
     }

     meta.writeInt(code);
     meta.writeInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
     ByteBuffersDataOutput addressBuffer = new ByteBuffersDataOutput();
     ByteBuffersIndexOutput addressOutput =
         new ByteBuffersIndexOutput(addressBuffer, "temp", "temp");
     long numBlocks = (size + blockMask) >>> shift;
     DirectMonotonicWriter writer =
         DirectMonotonicWriter.getInstance(
             meta, addressOutput, numBlocks, DIRECT_MONOTONIC_BLOCK_SHIFT);

     BytesRefBuilder previous = new BytesRefBuilder();
     long ord = 0;
     long start = data.getFilePointer();
     int maxLength = 0, maxBlockLength = 0;
     TermsEnum iterator = values.termsEnum();

     FastCompressionHashTable ht = null;
     ByteArrayDataOutput bufferedOutput = null;
     if (compress) {
       ht = new FastCompressionHashTable();
       bufferedOutput = new ByteArrayDataOutput(termsDictBuffer);
     }

     for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
       if ((ord & blockMask) == 0) {
         if (compress && bufferedOutput.getPosition() > 0) {
           maxBlockLength =
               Math.max(maxBlockLength, compressAndGetTermsDictBlockLength(bufferedOutput, ht));
           bufferedOutput.reset(termsDictBuffer);
         }

         writer.add(data.getFilePointer() - start);
         data.writeVInt(term.length);
         data.writeBytes(term.bytes, term.offset, term.length);
       } else {
         final int prefixLength = StringHelper.bytesDifference(previous.get(), term);
         final int suffixLength = term.length - prefixLength;
         assert suffixLength > 0; // terms are unique
         DataOutput blockOutput;
         if (compress) {
           // Will write (suffixLength + 1 byte + 2 vint) bytes. Grow the buffer in need.
           bufferedOutput = maybeGrowBuffer(bufferedOutput, suffixLength + 11);
           blockOutput = bufferedOutput;
         } else {
           blockOutput = data;
         }
         blockOutput.writeByte(
             (byte) (Math.min(prefixLength, 15) | (Math.min(15, suffixLength - 1) << 4)));
         if (prefixLength >= 15) {
           blockOutput.writeVInt(prefixLength - 15);
         }
         if (suffixLength >= 16) {
           blockOutput.writeVInt(suffixLength - 16);
         }
         blockOutput.writeBytes(term.bytes, term.offset + prefixLength, suffixLength);
       }
       maxLength = Math.max(maxLength, term.length);
       previous.copyBytes(term);
       ++ord;
     }
     // Compress and write out the last block
     if (compress && bufferedOutput.getPosition() > 0) {
       maxBlockLength =
           Math.max(maxBlockLength, compressAndGetTermsDictBlockLength(bufferedOutput, ht));
     }

     writer.finish();
     meta.writeInt(maxLength);
     if (compress) {
       // Write one more int for storing max block length. For compressed terms dict only.
       meta.writeInt(maxBlockLength);
     }
     meta.writeLong(start);
     meta.writeLong(data.getFilePointer() - start);
     start = data.getFilePointer();
     addressBuffer.copyTo(data);
     meta.writeLong(start);
     meta.writeLong(data.getFilePointer() - start);

     // Now write the reverse terms index
     writeTermsIndex(values);
   }

   private int compressAndGetTermsDictBlockLength(
       ByteArrayDataOutput bufferedOutput, FastCompressionHashTable ht) throws IOException {
     int uncompressedLength = bufferedOutput.getPosition();
     data.writeVInt(uncompressedLength);
     long before = data.getFilePointer();
     LZ4.compress(termsDictBuffer, 0, uncompressedLength, data, ht);
     int compressedLength = (int) (data.getFilePointer() - before);
     // Block length will be used for creating buffer for decompression, one corner case is that
     // compressed length might be bigger than un-compressed length, so just return the bigger one.
     return Math.max(uncompressedLength, compressedLength);
   }

   private ByteArrayDataOutput maybeGrowBuffer(ByteArrayDataOutput bufferedOutput, int termLength) {
     int pos = bufferedOutput.getPosition(), originalLength = termsDictBuffer.length;
     if (pos + termLength >= originalLength - 1) {
       termsDictBuffer = ArrayUtil.grow(termsDictBuffer, originalLength + termLength);
       bufferedOutput = new ByteArrayDataOutput(termsDictBuffer, pos, termsDictBuffer.length - pos);
     }
     return bufferedOutput;
   }

   private void writeTermsIndex(SortedSetDocValues values) throws IOException {
     final long size = values.getValueCount();
     meta.writeInt(Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_SHIFT);
     long start = data.getFilePointer();

     long numBlocks =
         1L
             + ((size + Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK)
                 >>> Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_SHIFT);
     ByteBuffersDataOutput addressBuffer = new ByteBuffersDataOutput();
     DirectMonotonicWriter writer;
     try (ByteBuffersIndexOutput addressOutput =
         new ByteBuffersIndexOutput(addressBuffer, "temp", "temp")) {
       writer =
           DirectMonotonicWriter.getInstance(
               meta, addressOutput, numBlocks, DIRECT_MONOTONIC_BLOCK_SHIFT);
       TermsEnum iterator = values.termsEnum();
       BytesRefBuilder previous = new BytesRefBuilder();
       long offset = 0;
       long ord = 0;
       for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
         if ((ord & Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) == 0) {
           writer.add(offset);
           final int sortKeyLength;
           if (ord == 0) {
             // no previous term: no bytes to write
             sortKeyLength = 0;
           } else {
             sortKeyLength = StringHelper.sortKeyLength(previous.get(), term);
           }
           offset += sortKeyLength;
           data.writeBytes(term.bytes, term.offset, sortKeyLength);
         } else if ((ord & Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK)
             == Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) {
           previous.copyBytes(term);
         }
         ++ord;
       }
       writer.add(offset);
       writer.finish();
       meta.writeLong(start);
       meta.writeLong(data.getFilePointer() - start);
       start = data.getFilePointer();
       addressBuffer.copyTo(data);
       meta.writeLong(start);
       meta.writeLong(data.getFilePointer() - start);
     }
   }

   @Override
   public void addSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer)
       throws IOException {
     meta.writeInt(field.number);
     meta.writeByte(Lucene80DocValuesFormat.SORTED_NUMERIC);

     long[] stats = writeValues(field, valuesProducer);
     int numDocsWithField = Math.toIntExact(stats[0]);
     long numValues = stats[1];
     assert numValues >= numDocsWithField;

     meta.writeInt(numDocsWithField);
     if (numValues > numDocsWithField) {
       long start = data.getFilePointer();
       meta.writeLong(start);
       meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);

       final DirectMonotonicWriter addressesWriter =
           DirectMonotonicWriter.getInstance(
               meta, data, numDocsWithField + 1L, DIRECT_MONOTONIC_BLOCK_SHIFT);
       long addr = 0;
       addressesWriter.add(addr);
       SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
       for (int doc = values.nextDoc();
           doc != DocIdSetIterator.NO_MORE_DOCS;
           doc = values.nextDoc()) {
         addr += values.docValueCount();
         addressesWriter.add(addr);
       }
       addressesWriter.finish();
       meta.writeLong(data.getFilePointer() - start);
     }
   }

   @Override
   public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer)
       throws IOException {
     meta.writeInt(field.number);
     meta.writeByte(Lucene80DocValuesFormat.SORTED_SET);

     SortedSetDocValues values = valuesProducer.getSortedSet(field);
     int numDocsWithField = 0;
     long numOrds = 0;
     for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
       numDocsWithField++;
       for (long ord = values.nextOrd();
           ord != SortedSetDocValues.NO_MORE_ORDS;
           ord = values.nextOrd()) {
         numOrds++;
       }
     }

     if (numDocsWithField == numOrds) {
       meta.writeByte((byte) 0); // multiValued (0 = singleValued)
       doAddSortedField(
           field,
           new EmptyDocValuesProducer() {
             @Override
             public SortedDocValues getSorted(FieldInfo field) throws IOException {
               return SortedSetSelector.wrap(
                   valuesProducer.getSortedSet(field), SortedSetSelector.Type.MIN);
             }
           });
       return;
     }
     meta.writeByte((byte) 1); // multiValued (1 = multiValued)

     assert numDocsWithField != 0;
     if (numDocsWithField == maxDoc) {
       meta.writeLong(-1); // docsWithFieldOffset
       meta.writeLong(0L); // docsWithFieldLength
       meta.writeShort((short) -1); // jumpTableEntryCount
       meta.writeByte((byte) -1); // denseRankPower
     } else {
       long offset = data.getFilePointer();
       meta.writeLong(offset); // docsWithFieldOffset
       values = valuesProducer.getSortedSet(field);
       final short jumpTableEntryCount =
           IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
       meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
       meta.writeShort(jumpTableEntryCount);
       meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
     }

     int numberOfBitsPerOrd = DirectWriter.unsignedBitsRequired(values.getValueCount() - 1);
     meta.writeByte((byte) numberOfBitsPerOrd); // bitsPerValue
     long start = data.getFilePointer();
     meta.writeLong(start); // ordsOffset
     DirectWriter writer = DirectWriter.getInstance(data, numOrds, numberOfBitsPerOrd);
     values = valuesProducer.getSortedSet(field);
     for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
       for (long ord = values.nextOrd();
           ord != SortedSetDocValues.NO_MORE_ORDS;
           ord = values.nextOrd()) {
         writer.add(ord);
       }
     }
     writer.finish();
     meta.writeLong(data.getFilePointer() - start); // ordsLength

     meta.writeInt(numDocsWithField);
     start = data.getFilePointer();
     meta.writeLong(start); // addressesOffset
     meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);

     final DirectMonotonicWriter addressesWriter =
         DirectMonotonicWriter.getInstance(
             meta, data, numDocsWithField + 1, DIRECT_MONOTONIC_BLOCK_SHIFT);
     long addr = 0;
     addressesWriter.add(addr);
     values = valuesProducer.getSortedSet(field);
     for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
       values.nextOrd();
       addr++;
       while (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
         addr++;
       }
       addressesWriter.add(addr);
     }
     addressesWriter.finish();
     meta.writeLong(data.getFilePointer() - start); // addressesLength

     addTermsDict(values);
   }
 }