exec/java-exec/src/main/java/org/apache/drill/exec/record/RecordBatchSizer.java - drill - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.drill.exec.record;

 import java.util.ArrayList;
 import java.util.List;
 import java.util.Set;
 import java.util.Collection;
 import java.util.Map;
 import org.apache.drill.common.map.CaseInsensitiveMap;
 import org.apache.drill.common.types.TypeProtos;
 import org.apache.drill.common.types.TypeProtos.DataMode;
 import org.apache.drill.common.types.TypeProtos.MinorType;
 import org.apache.drill.exec.expr.TypeHelper;
 import org.apache.drill.exec.memory.AllocationManager.BufferLedger;
 import org.apache.drill.exec.memory.BaseAllocator;
 import org.apache.drill.exec.record.selection.SelectionVector2;
 import org.apache.drill.exec.vector.AllocationHelper;
 import org.apache.drill.exec.vector.NullableVector;
 import org.apache.drill.exec.vector.UInt1Vector;
 import org.apache.drill.exec.vector.UInt4Vector;
 import org.apache.drill.exec.vector.UntypedNullVector;
 import org.apache.drill.exec.vector.ValueVector;
 import org.apache.drill.exec.vector.complex.AbstractMapVector;
 import org.apache.drill.exec.vector.complex.AbstractRepeatedMapVector;
 import org.apache.drill.exec.vector.complex.RepeatedListVector;
 import org.apache.drill.exec.vector.complex.RepeatedValueVector;
 import org.apache.drill.exec.vector.VariableWidthVector;

 import com.google.common.collect.Sets;
 import org.apache.drill.exec.vector.complex.RepeatedVariableWidthVectorLike;
 import org.bouncycastle.util.Strings;

 import static org.apache.drill.exec.vector.AllocationHelper.STD_REPETITION_FACTOR;

 /**
  * Given a record batch or vector container, determines the actual memory
  * consumed by each column, the average row, and the entire record batch.
  */

 public class RecordBatchSizer {
   private static final int OFFSET_VECTOR_WIDTH = UInt4Vector.VALUE_WIDTH;
   private static final int BIT_VECTOR_WIDTH = UInt1Vector.VALUE_WIDTH;

   public static long multiplyByFactors(long size, double... factors)
   {
     double doubleSize = size;

     for (double factor: factors) {
       doubleSize *= factor;
     }

     return (long) doubleSize;
   }

   public static long multiplyByFactor(long size, double factor)
   {
     return (long) ((size) * factor);
   }

   /**
    * Column size information.
    */
   public class ColumnSize {
     public final String prefix;
     public final MaterializedField metadata;

     /**
      * This is the total size of just pure data for the column
      * for all entries.
      */

     private int totalDataSize;

     /**
      * This is the total size of data for the column + additional
      * metadata vector overhead we add for cardinality, variable length etc.
      * for all entries.
      */

     private int totalNetSize;

     /**
      * Number of occurrences of the value in the batch. This is trivial
      * for top-level scalars: it is the record count. For a top-level
      * repeated vector, this is the number of arrays, also the record
      * count. For a value nested inside a repeated map, it is the
      * total number of values across all maps, and may be less than,
      * greater than (but unlikely) same as the row count.
      */

     private final int valueCount;

     /**
      * Total number of elements for a repeated type, or same as
      * valueCount if this is a non-repeated type. That is, a batch
      * of 100 rows may have an array with 10 elements per row.
      * In this case, the element count is 1000.
      */

     private int elementCount;

     /**
      * The estimated, average number of elements per parent value.
      * Always 1 for a non-repeated type. For a repeated type,
      * this is the average entries per array (per repeated element).
      */

     private float cardinality;

     /**
      * Indicates if it is variable width column.
      * For map columns, this is true if any of the children is variable
      * width column.
      */

     private boolean isVariableWidth;

     /**
      * Indicates if cardinality is repeated(top level only).
      */

     private boolean isRepeated;

     /**
      * Indicates if cardinality is optional i.e. nullable(top level only).
      */
     private boolean isOptional;

     /**
      * Child columns if this is a map column.
      */
     private final Map<String, ColumnSize> children = CaseInsensitiveMap.newHashMap();

     /**
      * Returns true if there is an accurate std size. Otherwise it returns false.
      * @return True if there is an accurate std size. Otherwise it returns false.
      */
     public boolean hasStdDataSize() {
       return !isVariableWidth && !isRepeated;
     }

     /**
      * std pure data size per entry from Drill metadata, based on type.
      * Does not include metadata vector overhead we add for cardinality,
      * variable length etc.
      * For variable-width columns, we use 50 as std size for entry width.
      * For repeated column, we assume repetition of 10.
      */
     public int getStdDataSizePerEntry() {
       int stdDataSize;

       try {
         stdDataSize = TypeHelper.getSize(metadata.getType());

         // For variable width, typeHelper includes offset vector width. Adjust for that.
         if (isVariableWidth) {
           stdDataSize -= OFFSET_VECTOR_WIDTH;
         }

         if (isRepeated) {
           stdDataSize = stdDataSize * STD_REPETITION_FACTOR;
         }
       } catch (Exception e) {
         // For unsupported types, just set stdSize to 0.
         // Map, Union, List etc.
         stdDataSize = 0;
       }

       // Add sizes of children.
       for (ColumnSize columnSize : children.values()) {
         stdDataSize += columnSize.getStdDataSizePerEntry();
       }

       if (isRepeatedList()) {
         stdDataSize = stdDataSize * STD_REPETITION_FACTOR;
       }

       return stdDataSize;
     }

     /**
      * std net size per entry taking into account additional metadata vectors
      * we add on top for variable length, cardinality etc.
      * For variable-width columns, we use 50 as std data size for entry width.
      * For repeated column, we assume repetition of 10.
      */
     public int getStdNetSizePerEntry() {
       return getStdNetSizePerEntryCommon(metadata.getType(), isOptional, isRepeated, isRepeatedList(), children);
     }

     /**
      * This is the average actual per entry data size in bytes. Does not
      * include any overhead of metadata vectors.
      * For repeated columns, it is average for the repeated array, not
      * individual entry in the array.
      */
     public int getDataSizePerEntry() {
       return safeDivide(getTotalDataSize(), getValueCount());
     }

     /**
      * This is the average per entry size of just pure data plus
      * overhead of additional vectors we add on top like bits vector,
      * offset vector etc. This
      * size is larger than the actual data size since this size includes per-
      * column overhead for additional vectors we add for
      * cardinality, variable length etc.
      */
     public int getNetSizePerEntry() {
       return safeDivide(getTotalNetSize(), getValueCount());
     }

     /**
      * This returns actual entry size if rowCount > 0 or allocation size otherwise.
      * Use this for the cases when you might get empty batches with schema
      * and you still need to do memory calculations based on just schema.
      */
     public int getAllocSizePerEntry() {
       if (rowCount() != 0) {
         return getNetSizePerEntry();
       }

       int stdNetSize;
       try {
         stdNetSize = TypeHelper.getSize(metadata.getType());

         // TypeHelper estimates 50 bytes for variable length. That is pretty high number
         // to use as approximation for empty batches. Use 8 instead.
         switch (metadata.getType().getMinorType()) {
           case VARBINARY:
           case VARCHAR:
           case VAR16CHAR:
           case VARDECIMAL:
             stdNetSize = 4 + 8;
             break;
           default:
         }
       } catch (Exception e) {
         stdNetSize = 0;
       }

       if (isOptional) {
         stdNetSize += BIT_VECTOR_WIDTH;
       }

       if (isRepeated) {
         stdNetSize = (stdNetSize * STD_REPETITION_FACTOR) + OFFSET_VECTOR_WIDTH;
       }

       for (ColumnSize columnSize : children.values()) {
         stdNetSize += columnSize.getAllocSizePerEntry();
       }

       if (isRepeatedList()) {
         stdNetSize = (stdNetSize * STD_REPETITION_FACTOR) + OFFSET_VECTOR_WIDTH;
       }

       return stdNetSize;
     }

     /**
      * If there is an accurate std net size, that is returned. Otherwise the net size is returned.
      * @return If there is an accurate std net size, that is returned. Otherwise the net size is returned.
      */
     public int getStdNetOrNetSizePerEntry() {
       if (hasStdDataSize()) {
         return getStdNetSizePerEntry();
       } else {
         return getNetSizePerEntry();
       }
     }

     /**
      * This is the total data size for the column, including children for map
      * columns. Does not include any overhead of metadata vectors.
      */
     public int getTotalDataSize() {
       int dataSize = this.totalDataSize;
       for (ColumnSize columnSize : children.values()) {
         dataSize += columnSize.getTotalDataSize();
       }
       return dataSize;
     }

     /**
      * This is the total net size for the column, including children for map
      * columns. Includes overhead of metadata vectors.
      */
     public int getTotalNetSize() {
       return this.totalNetSize;
     }

     public int getValueCount() {
       return valueCount;
     }

     public int getElementCount() {
       return elementCount;
     }

     public float getCardinality() {
       return cardinality;
     }

     public boolean isVariableWidth() {
       return isVariableWidth;
     }

     public Map<String, ColumnSize> getChildren() {
       return children;
     }

     public boolean isComplex() {
       switch (metadata.getType().getMinorType()) {
         case LIST:
         case MAP:
         case DICT:
         case UNION:
           return true;
         default:
           return false;
       }
     }

     public boolean isRepeatedList() {
       if (metadata.getType().getMinorType() == MinorType.LIST &&
         metadata.getDataMode() == DataMode.REPEATED) {
         return true;
       }
       return false;
     }

     /**
      * This is the average per entry width, used for vector allocation.
      */
     private int getEntryWidthForAlloc() {
       int width = 0;
       if (isVariableWidth) {
         width = getAllocSizePerEntry() - OFFSET_VECTOR_WIDTH;

         // Subtract out the bits (is-set) vector width
         if (isOptional) {
           width -= BIT_VECTOR_WIDTH;
         }

         if (isRepeated && rowCount() == 0) {
           return (safeDivide(width, STD_REPETITION_FACTOR));
         }
       }

       return (safeDivide(width, getEntryCardinalityForAlloc()));
     }

     /**
      * This is the average per entry cardinality, used for vector allocation.
      */
     private float getEntryCardinalityForAlloc() {
       return getCardinality() == 0 ? (isRepeated ? STD_REPETITION_FACTOR : 1) :getCardinality();
     }

     public ColumnSize(ValueVector v, String prefix) {
       this.prefix = prefix;
       valueCount = v.getAccessor().getValueCount();
       metadata = v.getField();
       isVariableWidth = (v instanceof VariableWidthVector || v instanceof RepeatedVariableWidthVectorLike);
       elementCount = valueCount;
       cardinality = valueCount == 0 ? 0 : 1;
       totalNetSize = v.getPayloadByteCount(valueCount);

       // Special case. For union and list vectors, it is very complex
       // to figure out raw data size. Make it same as net size.
       if (metadata.getType().getMinorType() == MinorType.UNION ||
         (metadata.getType().getMinorType() == MinorType.LIST && v.getField().getDataMode() != DataMode.REPEATED)) {
         totalDataSize = totalNetSize;
       }

       switch(v.getField().getDataMode()) {
         case REPEATED:
           isRepeated = true;
           elementCount  = getElementCount(v);
           cardinality = valueCount == 0 ? 0 : elementCount * 1.0f / valueCount;

           // For complex types, there is nothing more to do for top columns.
           // Data size is calculated recursively for children later.
           if (isComplex()) {
             return;
           }

           // Calculate pure data size.
           if (isVariableWidth) {
             VariableWidthVector dataVector = ((VariableWidthVector) ((RepeatedValueVector) v).getDataVector());
             totalDataSize = dataVector.getCurrentSizeInBytes();
           } else {
             ValueVector dataVector = ((RepeatedValueVector) v).getDataVector();
             totalDataSize = dataVector.getPayloadByteCount(elementCount);
           }

           break;
         case OPTIONAL:
           isOptional = true;

           // For complex types, there is nothing more to do for top columns.
           // Data size is calculated recursively for children later.
           if (isComplex()) {
             return;
           }

           // Calculate pure data size.
           if (isVariableWidth) {
             VariableWidthVector variableWidthVector = ((VariableWidthVector) ((NullableVector) v).getValuesVector());
             totalDataSize = variableWidthVector.getCurrentSizeInBytes();
           } else {
             // Another special case.
             if (v instanceof UntypedNullVector) {
               return;
             }
             totalDataSize = ((NullableVector) v).getValuesVector().getPayloadByteCount(valueCount);
           }
           break;

         case REQUIRED:
           // For complex types, there is nothing more to do for top columns.
           // Data size is calculated recursively for children later.
           if (isComplex()) {
             return;
           }

           // Calculate pure data size.
           if (isVariableWidth) {
             totalDataSize = ((VariableWidthVector) v).getCurrentSizeInBytes();
           } else {
             totalDataSize = v.getPayloadByteCount(valueCount);
           }
           break;

         default:
             break;
       }
     }

     private int getElementCount(ValueVector v) {
       // Repeated vectors are special: they have an associated offset vector
       // that changes the value count of the contained vectors.
       UInt4Vector offsetVector = ((RepeatedValueVector) v).getOffsetVector();
       int childCount = valueCount == 0 ? 0 : offsetVector.getAccessor().get(valueCount);

       return childCount;
     }

     private void allocateMap(AbstractMapVector map, int recordCount) {
       if (map instanceof AbstractRepeatedMapVector) {
         ((AbstractRepeatedMapVector) map).allocateOffsetsNew(recordCount);
           recordCount *= Math.round(getEntryCardinalityForAlloc());
         }

       for (ValueVector vector : map) {
         children.get(vector.getField().getName()).allocateVector(vector, recordCount);
       }
     }

     private void allocateRepeatedList(RepeatedListVector vector, int recordCount) {
       vector.allocateOffsetsNew(recordCount);
       recordCount *= Math.round(getEntryCardinalityForAlloc());
       ColumnSize child = children.get(vector.getField().getName());
       if (vector.getDataVector() != null) {
         child.allocateVector(vector.getDataVector(), recordCount);
       }
     }

     public void allocateVector(ValueVector vector, int recordCount) {
       if (vector instanceof AbstractMapVector) {
         allocateMap((AbstractMapVector) vector, recordCount);
         return;
       }

       if (vector instanceof RepeatedListVector) {
         allocateRepeatedList((RepeatedListVector) vector, recordCount);
         return;
       }

       AllocationHelper.allocate(vector, recordCount, getEntryWidthForAlloc(), getEntryCardinalityForAlloc());
     }

     @Override
     public String toString() {
       StringBuilder buf = new StringBuilder()
           .append(prefix)
           .append(metadata.getName())
           .append("(type: ")
           .append(metadata.getType().getMode().name())
           .append(" ")
           .append(metadata.getType().getMinorType().name())
           .append(", count: ")
           .append(valueCount);
       if (metadata.getDataMode() == DataMode.REPEATED) {
         buf.append(", elements: ")
            .append(elementCount)
            .append(", per-array: ")
            .append(cardinality);
       }
       buf.append(", Per entry: std data size: ")
          .append(getStdDataSizePerEntry())
          .append(", std net size: ")
          .append(getStdNetSizePerEntry())
          .append(", actual data size: ")
          .append(getDataSizePerEntry())
          .append(", actual net size: ")
          .append(getNetSizePerEntry())
          .append("  Totals: data size: ")
          .append(getTotalDataSize())
          .append(", net size: ")
          .append(getTotalNetSize())
          .append(")");
       return buf.toString();
     }

     /**
      * Add a single vector initializer to a collection for the entire batch.
      * Uses the observed column size information to predict the size needed
      * when allocating a new vector for the same data. Adds a hint only for
      * variable-width or repeated types; no extra information is needed for
      * fixed width, non-repeated columns.
      *
      * @param initializer the vector initializer to hold the hints
      * for this column
      */

     public void buildVectorInitializer(VectorInitializer initializer) {
       int width = 0;
       switch(metadata.getType().getMinorType()) {
       case VAR16CHAR:
       case VARBINARY:
       case VARCHAR:

         // Subtract out the offset vector width
         width = getNetSizePerEntry() - OFFSET_VECTOR_WIDTH;

         // Subtract out the bits (is-set) vector width
         if (metadata.getDataMode() == DataMode.OPTIONAL) {
           width -= BIT_VECTOR_WIDTH;
         }
         break;
       default:
         break;
       }
       String name = prefix + metadata.getName();
       if (metadata.getDataMode() == DataMode.REPEATED) {
         if (width > 0) {
           // Estimated width is width of entire column. Divide
           // by element count to get per-element size.
           initializer.variableWidthArray(name, width / cardinality, cardinality);
         } else {
           initializer.fixedWidthArray(name, cardinality);
         }
       }
       else if (width > 0) {
         initializer.variableWidth(name, width);
       }

       for (ColumnSize columnSize : children.values()) {
         columnSize.buildVectorInitializer(initializer);
       }
     }

   }

    public static int getStdNetSizePerEntryCommon(TypeProtos.MajorType majorType, boolean isOptional, boolean isRepeated,
                                                  boolean isRepeatedList, Map<String, ColumnSize> children) {
     int stdNetSize;
     try {
       stdNetSize = TypeHelper.getSize(majorType);
     } catch (Exception e) {
       stdNetSize = 0;
     }

     if (isOptional) {
       stdNetSize += BIT_VECTOR_WIDTH;
     }

     if (isRepeated) {
       stdNetSize = (stdNetSize * STD_REPETITION_FACTOR) + OFFSET_VECTOR_WIDTH;
     }

     if (children != null) {
       for (ColumnSize columnSize : children.values()) {
         stdNetSize += columnSize.getStdNetSizePerEntry();
       }
     }

     if (isRepeatedList) {
       stdNetSize = (stdNetSize * STD_REPETITION_FACTOR) + OFFSET_VECTOR_WIDTH;
     }

     return stdNetSize;
   }

   private ColumnSize getComplexColumn(String path) {
     String[] segments = Strings.split(path, '.');
     Map<String, ColumnSize> map = columnSizes;
     return getComplexColumnImpl(segments, 0, map);
   }

   private ColumnSize getComplexColumnImpl(String[] segments, int level, Map<String, ColumnSize> map) {
     ColumnSize result = map.get(segments[level]);
     if (result == null || level == segments.length - 1) {
       return result;
     }
     map = result.getChildren();
     if (map == null) {
       return null;
     }
     return getComplexColumnImpl(segments, level + 1, map);
   }

   public ColumnSize getColumn(String name) {
     final RecordBatchSizer.ColumnSize columnSize =  columnSizes.get(name);
     if (columnSize != null) {
       return columnSize;
     } else {
       return getComplexColumn(name);
     }
   }

   // This keeps information for only top level columns. Information for nested
   // columns can be obtained from children of topColumns.
   private final Map<String, ColumnSize> columnSizes = new QuoteInsensitiveMap(CaseInsensitiveMap.newHashMap());

   /**
    * This field is used by the convenience method {@link #columnsList()}.
    */
   private final List<ColumnSize> columnSizesList = new ArrayList<>();

   /**
    * Number of records (rows) in the batch.
    */
   private final int rowCount;
   /**
    * Actual batch size summing all buffers used to store data
    * for the batch.
    */
   private long accountedMemorySize;
   /**
    * Actual row width computed by dividing total batch memory by the
    * record count.
    */
   private int grossRowWidth;
   /**
    * Actual row width computed by summing columns. Use this if the
    * vectors are partially full; prevents overestimating row width.
    */
   private int netRowWidth;
   private int netRowWidthCap50;

   /**
    * actual row size if input is not empty. Otherwise, standard size.
    */
   private int rowAllocWidth;
   private int stdRowWidth;

   public SelectionVector2 sv2 = null;
   private int sv2Size;

   private int avgDensity;

   private final Set<BufferLedger> ledgers = Sets.newIdentityHashSet();

   private long netBatchSize;

   /**
    *  Maximum width of a column; used for memory estimation in case of Varchars
    */

   public int maxSize;

   /**
    *  Count the nullable columns; used for memory estimation
    */

   public int nullableCount;


   public RecordBatchSizer(RecordBatch batch) {
     this(batch,
       (batch.getSchema() == null ? null : (batch.getSchema().getSelectionVectorMode() == BatchSchema.SelectionVectorMode.TWO_BYTE ?
         batch.getSelectionVector2() : null)));
   }
   /**
    * Create empirical metadata for a record batch given a vector accessible
    * (basically, an iterator over the vectors in the batch.)
    *
    * @param va iterator over the batch's vectors
    */

   public RecordBatchSizer(VectorAccessible va) {
     this(va, null);
   }

   /**
    * Create empirical metadata for a record batch given a vector accessible
    * (basically, an iterator over the vectors in the batch) along with a
    * selection vector for those records. The selection vector is used to
    * pad the estimated row width with the extra two bytes needed per record.
    * The selection vector memory is added to the total memory consumed by
    * this batch.
    *
    * @param va iterator over the batch's vectors
    * @param sv2 selection vector associated with this batch
    */

   public RecordBatchSizer(VectorAccessible va, SelectionVector2 sv2) {
     rowCount = va.getRecordCount();
     for (VectorWrapper<?> vw : va) {
       ColumnSize colSize = measureColumn(vw.getValueVector(), "");
       columnSizes.put(vw.getField().getName(), colSize);
       columnSizesList.add(colSize);
       stdRowWidth += colSize.getStdDataSizePerEntry();
       netBatchSize += colSize.getTotalNetSize();
       maxSize = Math.max(maxSize, colSize.getTotalDataSize());
       if (colSize.metadata.isNullable()) {
         nullableCount++;
       }
       netRowWidth += colSize.getNetSizePerEntry();
     }
     this.sv2 = sv2;
   }

   public void applySv2() {
     if (sv2 == null) {
       return;
     }

     sv2Size = BaseAllocator.nextPowerOfTwo(2 * rowCount);
     avgDensity = safeDivide(netBatchSize * 100L, getActualSize());
     accountedMemorySize += sv2Size;
   }

   /**
    *  Round up (if needed) to the next power of 2 (only up to 64)
    * @param arg Number to round up (must be < 64)
    * @return power of 2 result
    */
   private int roundUpToPowerOf2(int arg) {
     if ( arg <= 2 ) { return 2; }
     if ( arg <= 4 ) { return 4; }
     if ( arg <= 8 ) { return 8; }
     if ( arg <= 16 ) { return 16; }
     if ( arg <= 32 ) { return 32; }
     return 64;
   }

   private ColumnSize measureColumn(ValueVector v, String prefix) {
     ColumnSize colSize = new ColumnSize(v, prefix);
     switch (v.getField().getType().getMinorType()) {
       case MAP:
       case DICT:
         // Maps consume no size themselves. However, their contained
         // vectors do consume space, so visit columns recursively.
         expandMap(colSize, v, prefix + v.getField().getName() + ".");
         break;
       case LIST:
         // complex ListVector cannot be casted to RepeatedListVector.
         // do not expand the list if it is not repeated mode.
         if (v.getField().getDataMode() == DataMode.REPEATED) {
           expandList(colSize, (RepeatedListVector) v, prefix + v.getField().getName() + ".");
         }
         break;
       default:
         v.collectLedgers(ledgers);
     }

     netRowWidthCap50 += ! colSize.isVariableWidth ? colSize.getNetSizePerEntry() :
         8 /* offset vector */ + roundUpToPowerOf2(Math.min(colSize.getNetSizePerEntry(),50));
         // above change 8 to 4 after DRILL-5446 is fixed

     return colSize;
   }

   private void expandMap(ColumnSize colSize, ValueVector mapVector, String prefix) {
     for (ValueVector vector : mapVector) {
       colSize.children.put(vector.getField().getName(), measureColumn(vector, prefix));
     }

     // For a repeated map, we need the memory for the offset vector (only).
     // Map elements are recursively expanded above.
     if (mapVector.getField().getDataMode() == DataMode.REPEATED) {
       ((RepeatedValueVector) mapVector).getOffsetVector().collectLedgers(ledgers);
     }
   }

   private void expandList(ColumnSize colSize, RepeatedListVector vector, String prefix) {
     colSize.children.put(vector.getField().getName(), measureColumn(vector.getDataVector(), prefix));

     // Determine memory for the offset vector (only).
     vector.collectLedgers(ledgers);
   }

   public static int safeDivide(long num, long denom) {
     if (denom == 0) {
       return 0;
     }
     return (int) Math.ceil((double) num / denom);
   }

   public static int safeDivide(int num, int denom) {
     if (denom == 0) {
       return 0;
     }
     return (int) Math.ceil((double) num / denom);
   }

   public static int safeDivide(int num, float denom) {
     if (denom == 0) {
       return 0;
     }
     return (int) Math.ceil((double) num / denom);
   }

   public static int safeDivide(int num, double denom) {
     if (denom == 0) {
       return 0;
     }
     return (int) Math.ceil(num / denom);
   }

   public int rowCount() { return rowCount; }

   public int getStdRowWidth() {
     if (stdRowWidth != 0) {
       return stdRowWidth;
     }

     for (ColumnSize columnSize : columnSizes.values()) {
       stdRowWidth += columnSize.getStdDataSizePerEntry();
     }

     return stdRowWidth;
   }

   public int getRowAllocWidth() {
     if (rowAllocWidth != 0) {
       return rowAllocWidth;
     }

     for (ColumnSize columnSize : columnSizes.values()) {
       rowAllocWidth += columnSize.getAllocSizePerEntry();
     }

     return rowAllocWidth;
   }

   public long getActualSize() {
     if (accountedMemorySize != 0) {
       return accountedMemorySize;
     }

     for (BufferLedger ledger : ledgers) {
       accountedMemorySize += ledger.getAccountedSize();
     }

     if (sv2 != null) {
       sv2Size = sv2.getBuffer(false).capacity();
       accountedMemorySize += sv2Size;
     }

     return accountedMemorySize;
   }

   public int getGrossRowWidth() {
     if (grossRowWidth != 0) {
       return grossRowWidth;
     }

     grossRowWidth = safeDivide(getActualSize(), rowCount);

     return grossRowWidth;
   }

   public int getAvgDensity() {
     return safeDivide(netBatchSize * 100L, getActualSize());
   }


   public int getNetRowWidth() { return netRowWidth; }
   public Map<String, ColumnSize> columns() { return columnSizes; }

   /**
    * This is a convenience method to get the sizes of columns in the same order that the corresponding value vectors
    * are stored within a {@link org.apache.drill.exec.record.VectorAccessible}.
    * @return The sizes of columns in the same order that the corresponding value vectors are stored within a
    * {@link org.apache.drill.exec.record.VectorAccessible}.
    */
   public List<ColumnSize> columnsList() { return columnSizesList; }

   /**
    * Compute the "real" width of the row, taking into account each varchar column size
    * (historically capped at 50, and rounded up to power of 2 to match drill buf allocation)
    * and null marking columns.
    * @return "real" width of the row
    */
   public int getNetRowWidthCap50() { return netRowWidthCap50 + nullableCount; }
   public boolean hasSv2() { return sv2 != null; }
   public long getNetBatchSize() { return netBatchSize; }
   public int getMaxAvgColumnSize() { return safeDivide(maxSize, rowCount); }

   @Override
   public String toString() {
     StringBuilder buf = new StringBuilder();

     buf.append("Batch size: {");
     buf.append( "  Records: " );
     buf.append(rowCount);
     buf.append(", Total size: ");
     buf.append(accountedMemorySize);
     buf.append(", Data size: ");
     buf.append(netBatchSize);
     buf.append(", Gross row width: ");
     buf.append(grossRowWidth);
     buf.append(", Net row width: ");
     buf.append(netRowWidth);
     buf.append(", Density: ");
     buf.append(avgDensity);
     buf.append("% }\n");

     buf.append("Batch schema & sizes: {\n");
     for (ColumnSize colSize : columnSizes.values()) {
       buf.append("  ");
       buf.append(colSize.toString());
       buf.append(" }\n");
     }
     buf.append(" }\n");
     return buf.toString();
   }

   /**
    * The column size information gathered here represents empirically-derived
    * schema metadata. Use that metadata to create an instance of a class that
    * allocates memory for new vectors based on the observed size information.
    * The caller provides the row count; the size information here provides
    * column widths and the number of elements in each array.
    */

   public VectorInitializer buildVectorInitializer() {
     VectorInitializer initializer = new VectorInitializer();
     for (ColumnSize colSize : columnSizes.values()) {
       colSize.buildVectorInitializer(initializer);
     }
     return initializer;
   }

   public void allocateVectors(VectorContainer container, int recordCount) {
     for (VectorWrapper<?> w : container) {
       ColumnSize colSize = columnSizes.get(w.getField().getName());
       colSize.allocateVector(w.getValueVector(), recordCount);
     }
   }

   /**
    * A map that can handle quoted and unquoted column names; ideally this logic temporary and
    * should be removed as soon as all readers standardize handling of missing columns. Quoted columns
    * have been added in DRILL-4264.
    */
   private static final class QuoteInsensitiveMap implements Map<String, ColumnSize> {
     /** Original Map */
     private final Map<String, ColumnSize> originalMap;

     private QuoteInsensitiveMap(Map<String, ColumnSize> originalMap) {
       this.originalMap = originalMap;
     }

     @Override
     public int size() {
       return originalMap.size();
     }

     @Override
     public boolean isEmpty() {
       return originalMap.isEmpty();
     }

     @Override
     public boolean containsKey(Object key) {
       return originalMap.containsKey(key);
     }

     @Override
     public boolean containsValue(Object value) {
       return originalMap.containsValue(value);
     }

     @Override
     public ColumnSize get(Object key) {
       ColumnSize value = originalMap.get(key);

       if (value == null) {
         value = originalMap.get(quoteString(key));
       }
       return value;
     }

     @Override
     public ColumnSize put(String key, ColumnSize value) {
       return originalMap.put(key, value);
     }

     @Override
     public ColumnSize remove(Object key) {
       ColumnSize value = originalMap.remove(key);

       if (value == null) {
         value = originalMap.remove(quoteString(key));
       }
       return value;
     }

     @Override
     public void putAll(Map<? extends String, ? extends ColumnSize> m) {
       originalMap.putAll(m);
     }

     @Override
     public void clear() {
       originalMap.clear();
     }

     @Override
     public Set<String> keySet() {
       return originalMap.keySet();
     }

     @Override
     public Collection<ColumnSize> values() {
       return originalMap.values();
     }

     @Override
     public Set<Entry<String, ColumnSize>> entrySet() {
       return originalMap.entrySet();
     }

     private String quoteString(Object key) {
       if (key instanceof String) {
         return "`" + key + '`';
       }
       throw new IllegalArgumentException();
     }

   }
 }