Simple rename of unreleased quantization parameter (#12811)

commit: a26a80c89c2e625607b8d2ddb74230b57076d1da [log] [tgz]
author: Benjamin Trent <ben.w.trent@gmail.com> Wed Nov 15 15:00:12 2023 -0500
committer: GitHub <noreply@github.com> Wed Nov 15 15:00:12 2023 -0500
tree: 36c8d2f77087873464268ea968034be060c55a72
parent: 05a336ea69efb5e8c9f99d0424811154834ec665 [diff]
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java
index 23d607a..1b45c7f 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java

@@ -84,8 +84,8 @@
    * @param beamWidth the size of the queue maintained during graph construction.
    * @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
    *     larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec
-   * @param configuredQuantile the quantile for scalar quantizing the vectors, when `null` it is
-   *     calculated based on the vector field dimensions.
+   * @param confidenceInterval the confidenceInterval for scalar quantizing the vectors, when `null`
+   *     it is calculated based on the vector field dimensions.
    * @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
    *     generated by this format to do the merge
    */
@@ -93,7 +93,7 @@
       int maxConn,
       int beamWidth,
       int numMergeWorkers,
-      Float configuredQuantile,
+      Float confidenceInterval,
       ExecutorService mergeExec) {
     super("Lucene99HnswScalarQuantizedVectorsFormat");
     if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) {
@@ -122,7 +122,7 @@
     }
     this.numMergeWorkers = numMergeWorkers;
     this.mergeExec = mergeExec;
-    this.flatVectorsFormat = new Lucene99ScalarQuantizedVectorsFormat(configuredQuantile);
+    this.flatVectorsFormat = new Lucene99ScalarQuantizedVectorsFormat(confidenceInterval);
   }
 
   @Override

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java
index f6550a2..27e6ef9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java

@@ -43,17 +43,17 @@
 
   private static final FlatVectorsFormat rawVectorFormat = new Lucene99FlatVectorsFormat();
 
-  /** The minimum quantile */
-  private static final float MINIMUM_QUANTILE = 0.9f;
+  /** The minimum confidence interval */
+  private static final float MINIMUM_CONFIDENCE_INTERVAL = 0.9f;
 
-  /** The maximum quantile */
-  private static final float MAXIMUM_QUANTILE = 1f;
+  /** The maximum confidence interval */
+  private static final float MAXIMUM_CONFIDENCE_INTERVAL = 1f;
 
   /**
-   * Controls the quantile used to scalar quantize the vectors the default quantile is calculated as
-   * `1-1/(vector_dimensions + 1)`
+   * Controls the confidence interval used to scalar quantize the vectors the default value is
+   * calculated as `1-1/(vector_dimensions + 1)`
    */
-  final Float quantile;
+  final Float confidenceInterval;
 
   /** Constructs a format using default graph construction parameters */
   public Lucene99ScalarQuantizedVectorsFormat() {
@@ -63,24 +63,26 @@
   /**
    * Constructs a format using the given graph construction parameters.
    *
-   * @param quantile the quantile for scalar quantizing the vectors, when `null` it is calculated
-   *     based on the vector field dimensions.
+   * @param confidenceInterval the confidenceInterval for scalar quantizing the vectors, when `null`
+   *     it is calculated based on the vector field dimensions.
    */
-  public Lucene99ScalarQuantizedVectorsFormat(Float quantile) {
-    if (quantile != null && (quantile < MINIMUM_QUANTILE || quantile > MAXIMUM_QUANTILE)) {
+  public Lucene99ScalarQuantizedVectorsFormat(Float confidenceInterval) {
+    if (confidenceInterval != null
+        && (confidenceInterval < MINIMUM_CONFIDENCE_INTERVAL
+            || confidenceInterval > MAXIMUM_CONFIDENCE_INTERVAL)) {
       throw new IllegalArgumentException(
-          "quantile must be between "
-              + MINIMUM_QUANTILE
+          "confidenceInterval must be between "
+              + MINIMUM_CONFIDENCE_INTERVAL
               + " and "
-              + MAXIMUM_QUANTILE
-              + "; quantile="
-              + quantile);
+              + MAXIMUM_CONFIDENCE_INTERVAL
+              + "; confidenceInterval="
+              + confidenceInterval);
     }
-    this.quantile = quantile;
+    this.confidenceInterval = confidenceInterval;
   }
 
-  static float calculateDefaultQuantile(int vectorDimension) {
-    return Math.max(MINIMUM_QUANTILE, 1f - (1f / (vectorDimension + 1)));
+  static float calculateDefaultConfidenceInterval(int vectorDimension) {
+    return Math.max(MINIMUM_CONFIDENCE_INTERVAL, 1f - (1f / (vectorDimension + 1)));
   }
 
   @Override
@@ -88,8 +90,8 @@
     return NAME
         + "(name="
         + NAME
-        + ", quantile="
-        + quantile
+        + ", confidenceInterval="
+        + confidenceInterval
         + ", rawVectorFormat="
         + rawVectorFormat
         + ")";
@@ -98,7 +100,7 @@
   @Override
   public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
     return new Lucene99ScalarQuantizedVectorsWriter(
-        state, quantile, rawVectorFormat.fieldsWriter(state));
+        state, confidenceInterval, rawVectorFormat.fieldsWriter(state));
   }
 
   @Override

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java
index aaf706c..e9c488a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java

@@ -303,10 +303,10 @@
       dimension = input.readVInt();
       size = input.readInt();
       if (size > 0) {
-        float configuredQuantile = Float.intBitsToFloat(input.readInt());
+        float confidenceInterval = Float.intBitsToFloat(input.readInt());
         float minQuantile = Float.intBitsToFloat(input.readInt());
         float maxQuantile = Float.intBitsToFloat(input.readInt());
-        scalarQuantizer = new ScalarQuantizer(minQuantile, maxQuantile, configuredQuantile);
+        scalarQuantizer = new ScalarQuantizer(minQuantile, maxQuantile, confidenceInterval);
       } else {
         scalarQuantizer = null;
       }

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java
index e74217b..760e56d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java

@@ -19,7 +19,7 @@
 
 import static org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
 import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.QUANTIZED_VECTOR_COMPONENT;
-import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.calculateDefaultQuantile;
+import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.calculateDefaultConfidenceInterval;
 import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
 import static org.apache.lucene.util.RamUsageEstimator.shallowSizeOfInstance;
 
@@ -91,14 +91,14 @@
 
   private final List<FieldWriter> fields = new ArrayList<>();
   private final IndexOutput meta, quantizedVectorData;
-  private final Float quantile;
+  private final Float confidenceInterval;
   private final FlatVectorsWriter rawVectorDelegate;
   private boolean finished;
 
   Lucene99ScalarQuantizedVectorsWriter(
-      SegmentWriteState state, Float quantile, FlatVectorsWriter rawVectorDelegate)
+      SegmentWriteState state, Float confidenceInterval, FlatVectorsWriter rawVectorDelegate)
       throws IOException {
-    this.quantile = quantile;
+    this.confidenceInterval = confidenceInterval;
     segmentWriteState = state;
     String metaFileName =
         IndexFileNames.segmentFileName(
@@ -142,12 +142,12 @@
   public FlatFieldVectorsWriter<?> addField(
       FieldInfo fieldInfo, KnnFieldVectorsWriter<?> indexWriter) throws IOException {
     if (fieldInfo.getVectorEncoding().equals(VectorEncoding.FLOAT32)) {
-      float quantile =
-          this.quantile == null
-              ? calculateDefaultQuantile(fieldInfo.getVectorDimension())
-              : this.quantile;
+      float confidenceInterval =
+          this.confidenceInterval == null
+              ? calculateDefaultConfidenceInterval(fieldInfo.getVectorDimension())
+              : this.confidenceInterval;
       FieldWriter quantizedWriter =
-          new FieldWriter(quantile, fieldInfo, segmentWriteState.infoStream, indexWriter);
+          new FieldWriter(confidenceInterval, fieldInfo, segmentWriteState.infoStream, indexWriter);
       fields.add(quantizedWriter);
       indexWriter = quantizedWriter;
     }
@@ -169,16 +169,16 @@
       DocsWithFieldSet docsWithField =
           writeQuantizedVectorData(quantizedVectorData, byteVectorValues);
       long vectorDataLength = quantizedVectorData.getFilePointer() - vectorDataOffset;
-      float quantile =
-          this.quantile == null
-              ? calculateDefaultQuantile(fieldInfo.getVectorDimension())
-              : this.quantile;
+      float confidenceInterval =
+          this.confidenceInterval == null
+              ? calculateDefaultConfidenceInterval(fieldInfo.getVectorDimension())
+              : this.confidenceInterval;
       writeMeta(
           fieldInfo,
           segmentWriteState.segmentInfo.maxDoc(),
           vectorDataOffset,
           vectorDataLength,
-          quantile,
+          confidenceInterval,
           mergedQuantizationState.getLowerQuantile(),
           mergedQuantizationState.getUpperQuantile(),
           docsWithField);
@@ -251,7 +251,7 @@
         maxDoc,
         vectorDataOffset,
         vectorDataLength,
-        quantile,
+        confidenceInterval,
         fieldData.minQuantile,
         fieldData.maxQuantile,
         fieldData.docsWithField);
@@ -262,7 +262,7 @@
       int maxDoc,
       long vectorDataOffset,
       long vectorDataLength,
-      Float configuredQuantizationQuantile,
+      Float confidenceInterval,
       Float lowerQuantile,
       Float upperQuantile,
       DocsWithFieldSet docsWithField)
@@ -279,9 +279,9 @@
       assert Float.isFinite(lowerQuantile) && Float.isFinite(upperQuantile);
       meta.writeInt(
           Float.floatToIntBits(
-              configuredQuantizationQuantile != null
-                  ? configuredQuantizationQuantile
-                  : calculateDefaultQuantile(field.getVectorDimension())));
+              confidenceInterval != null
+                  ? confidenceInterval
+                  : calculateDefaultConfidenceInterval(field.getVectorDimension())));
       meta.writeInt(Float.floatToIntBits(lowerQuantile));
       meta.writeInt(Float.floatToIntBits(upperQuantile));
     }
@@ -344,7 +344,7 @@
         maxDoc,
         vectorDataOffset,
         quantizedVectorLength,
-        quantile,
+        confidenceInterval,
         fieldData.minQuantile,
         fieldData.maxQuantile,
         newDocsWithField);
@@ -374,11 +374,11 @@
   private ScalarQuantizer mergeQuantiles(FieldInfo fieldInfo, MergeState mergeState)
       throws IOException {
     assert fieldInfo.getVectorEncoding() == VectorEncoding.FLOAT32;
-    float quantile =
-        this.quantile == null
-            ? calculateDefaultQuantile(fieldInfo.getVectorDimension())
-            : this.quantile;
-    return mergeAndRecalculateQuantiles(mergeState, fieldInfo, quantile);
+    float confidenceInterval =
+        this.confidenceInterval == null
+            ? calculateDefaultConfidenceInterval(fieldInfo.getVectorDimension())
+            : this.confidenceInterval;
+    return mergeAndRecalculateQuantiles(mergeState, fieldInfo, confidenceInterval);
   }
 
   private ScalarQuantizedCloseableRandomVectorScorerSupplier mergeOneFieldToIndex(
@@ -408,16 +408,16 @@
           quantizationDataInput, quantizationDataInput.length() - CodecUtil.footerLength());
       long vectorDataLength = quantizedVectorData.getFilePointer() - vectorDataOffset;
       CodecUtil.retrieveChecksum(quantizationDataInput);
-      float quantile =
-          this.quantile == null
-              ? calculateDefaultQuantile(fieldInfo.getVectorDimension())
-              : this.quantile;
+      float confidenceInterval =
+          this.confidenceInterval == null
+              ? calculateDefaultConfidenceInterval(fieldInfo.getVectorDimension())
+              : this.confidenceInterval;
       writeMeta(
           fieldInfo,
           segmentWriteState.segmentInfo.maxDoc(),
           vectorDataOffset,
           vectorDataLength,
-          quantile,
+          confidenceInterval,
           mergedQuantizationState.getLowerQuantile(),
           mergedQuantizationState.getUpperQuantile(),
           docsWithField);
@@ -446,7 +446,9 @@
   }
 
   static ScalarQuantizer mergeQuantiles(
-      List<ScalarQuantizer> quantizationStates, List<Integer> segmentSizes, float quantile) {
+      List<ScalarQuantizer> quantizationStates,
+      List<Integer> segmentSizes,
+      float confidenceInterval) {
     assert quantizationStates.size() == segmentSizes.size();
     if (quantizationStates.isEmpty()) {
       return null;
@@ -464,7 +466,7 @@
     }
     lowerQuantile /= totalCount;
     upperQuantile /= totalCount;
-    return new ScalarQuantizer(lowerQuantile, upperQuantile, quantile);
+    return new ScalarQuantizer(lowerQuantile, upperQuantile, confidenceInterval);
   }
 
   /**
@@ -521,7 +523,7 @@
   }
 
   static ScalarQuantizer mergeAndRecalculateQuantiles(
-      MergeState mergeState, FieldInfo fieldInfo, float quantile) throws IOException {
+      MergeState mergeState, FieldInfo fieldInfo, float confidenceInterval) throws IOException {
     List<ScalarQuantizer> quantizationStates = new ArrayList<>(mergeState.liveDocs.length);
     List<Integer> segmentSizes = new ArrayList<>(mergeState.liveDocs.length);
     for (int i = 0; i < mergeState.liveDocs.length; i++) {
@@ -536,7 +538,8 @@
         segmentSizes.add(fvv.size());
       }
     }
-    ScalarQuantizer mergedQuantiles = mergeQuantiles(quantizationStates, segmentSizes, quantile);
+    ScalarQuantizer mergedQuantiles =
+        mergeQuantiles(quantizationStates, segmentSizes, confidenceInterval);
     // Segments no providing quantization state indicates that their quantiles were never
     // calculated.
     // To be safe, we should always recalculate given a sample set over all the float vectors in the
@@ -545,7 +548,7 @@
     if (mergedQuantiles == null || shouldRecomputeQuantiles(mergedQuantiles, quantizationStates)) {
       FloatVectorValues vectorValues =
           KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState);
-      mergedQuantiles = ScalarQuantizer.fromVectors(vectorValues, quantile);
+      mergedQuantiles = ScalarQuantizer.fromVectors(vectorValues, confidenceInterval);
     }
     return mergedQuantiles;
   }
@@ -599,7 +602,7 @@
     private static final long SHALLOW_SIZE = shallowSizeOfInstance(FieldWriter.class);
     private final List<float[]> floatVectors;
     private final FieldInfo fieldInfo;
-    private final float quantile;
+    private final float confidenceInterval;
     private final InfoStream infoStream;
     private final boolean normalize;
     private float minQuantile = Float.POSITIVE_INFINITY;
@@ -609,12 +612,12 @@
 
     @SuppressWarnings("unchecked")
     FieldWriter(
-        float quantile,
+        float confidenceInterval,
         FieldInfo fieldInfo,
         InfoStream infoStream,
         KnnFieldVectorsWriter<?> indexWriter) {
       super((KnnFieldVectorsWriter<float[]>) indexWriter);
-      this.quantile = quantile;
+      this.confidenceInterval = confidenceInterval;
       this.fieldInfo = fieldInfo;
       this.normalize = fieldInfo.getVectorSimilarityFunction() == VectorSimilarityFunction.COSINE;
       this.floatVectors = new ArrayList<>();
@@ -635,15 +638,15 @@
               new FloatVectorWrapper(
                   floatVectors,
                   fieldInfo.getVectorSimilarityFunction() == VectorSimilarityFunction.COSINE),
-              quantile);
+              confidenceInterval);
       minQuantile = quantizer.getLowerQuantile();
       maxQuantile = quantizer.getUpperQuantile();
       if (infoStream.isEnabled(QUANTIZED_VECTOR_COMPONENT)) {
         infoStream.message(
             QUANTIZED_VECTOR_COMPONENT,
             "quantized field="
-                + " quantile="
-                + quantile
+                + " confidenceInterval="
+                + confidenceInterval
                 + " minQuantile="
                 + minQuantile
                 + " maxQuantile="
@@ -654,7 +657,7 @@
 
     ScalarQuantizer createQuantizer() {
       assert finished;
-      return new ScalarQuantizer(minQuantile, maxQuantile, quantile);
+      return new ScalarQuantizer(minQuantile, maxQuantile, confidenceInterval);
     }
 
     @Override

diff --git a/lucene/core/src/java/org/apache/lucene/util/ScalarQuantizer.java b/lucene/core/src/java/org/apache/lucene/util/ScalarQuantizer.java
index d5c5646..41ec4fe 100644
--- a/lucene/core/src/java/org/apache/lucene/util/ScalarQuantizer.java
+++ b/lucene/core/src/java/org/apache/lucene/util/ScalarQuantizer.java

@@ -28,15 +28,15 @@
 /**
  * Will scalar quantize float vectors into `int8` byte values. This is a lossy transformation.
  * Scalar quantization works by first calculating the quantiles of the float vector values. The
- * quantiles are calculated using the configured quantile/confidence interval. The [minQuantile,
- * maxQuantile] are then used to scale the values into the range [0, 127] and bucketed into the
- * nearest byte values.
+ * quantiles are calculated using the configured confidence interval. The [minQuantile, maxQuantile]
+ * are then used to scale the values into the range [0, 127] and bucketed into the nearest byte
+ * values.
  *
  * <h2>How Scalar Quantization Works</h2>
  *
- * <p>The basic mathematical equations behind this are fairly straight forward. Given a float vector
- * `v` and a quantile `q` we can calculate the quantiles of the vector values [minQuantile,
- * maxQuantile].
+ * <p>The basic mathematical equations behind this are fairly straight forward and based on min/max
+ * normalization. Given a float vector `v` and a confidenceInterval `q` we can calculate the
+ * quantiles of the vector values [minQuantile, maxQuantile].
  *
  * <pre class="prettyprint">
  *   byte = (float - minQuantile) * 127/(maxQuantile - minQuantile)
@@ -69,21 +69,20 @@
 
   private final float alpha;
   private final float scale;
-  private final float minQuantile, maxQuantile, configuredQuantile;
+  private final float minQuantile, maxQuantile, confidenceInterval;
 
   /**
    * @param minQuantile the lower quantile of the distribution
    * @param maxQuantile the upper quantile of the distribution
-   * @param configuredQuantile The configured quantile/confidence interval used to calculate the
-   *     quantiles.
+   * @param confidenceInterval The configured confidence interval used to calculate the quantiles.
    */
-  public ScalarQuantizer(float minQuantile, float maxQuantile, float configuredQuantile) {
+  public ScalarQuantizer(float minQuantile, float maxQuantile, float confidenceInterval) {
     assert maxQuantile >= minQuantile;
     this.minQuantile = minQuantile;
     this.maxQuantile = maxQuantile;
     this.scale = 127f / (maxQuantile - minQuantile);
     this.alpha = (maxQuantile - minQuantile) / 127f;
-    this.configuredQuantile = configuredQuantile;
+    this.confidenceInterval = confidenceInterval;
   }
 
   /**
@@ -171,8 +170,8 @@
     return maxQuantile;
   }
 
-  public float getConfiguredQuantile() {
-    return configuredQuantile;
+  public float getConfidenceInterval() {
+    return confidenceInterval;
   }
 
   public float getConstantMultiplier() {
@@ -186,8 +185,8 @@
         + minQuantile
         + ", maxQuantile="
         + maxQuantile
-        + ", configuredQuantile="
-        + configuredQuantile
+        + ", confidenceInterval="
+        + confidenceInterval
         + '}';
   }
 
@@ -201,17 +200,17 @@
    * #SCALAR_QUANTIZATION_SAMPLE_SIZE} will be read and the quantiles calculated.
    *
    * @param floatVectorValues the float vector values from which to calculate the quantiles
-   * @param quantile the quantile/confidence interval used to calculate the quantiles
+   * @param confidenceInterval the confidence interval used to calculate the quantiles
    * @return A new {@link ScalarQuantizer} instance
    * @throws IOException if there is an error reading the float vector values
    */
-  public static ScalarQuantizer fromVectors(FloatVectorValues floatVectorValues, float quantile)
-      throws IOException {
-    assert 0.9f <= quantile && quantile <= 1f;
+  public static ScalarQuantizer fromVectors(
+      FloatVectorValues floatVectorValues, float confidenceInterval) throws IOException {
+    assert 0.9f <= confidenceInterval && confidenceInterval <= 1f;
     if (floatVectorValues.size() == 0) {
-      return new ScalarQuantizer(0f, 0f, quantile);
+      return new ScalarQuantizer(0f, 0f, confidenceInterval);
     }
-    if (quantile == 1f) {
+    if (confidenceInterval == 1f) {
       float min = Float.POSITIVE_INFINITY;
       float max = Float.NEGATIVE_INFINITY;
       while (floatVectorValues.nextDoc() != NO_MORE_DOCS) {
@@ -220,7 +219,7 @@
           max = Math.max(max, v);
         }
       }
-      return new ScalarQuantizer(min, max, quantile);
+      return new ScalarQuantizer(min, max, confidenceInterval);
     }
     int dim = floatVectorValues.dimension();
     if (floatVectorValues.size() < SCALAR_QUANTIZATION_SAMPLE_SIZE) {
@@ -231,8 +230,8 @@
         System.arraycopy(floatVector, 0, values, copyOffset, floatVector.length);
         copyOffset += dim;
       }
-      float[] upperAndLower = getUpperAndLowerQuantile(values, quantile);
-      return new ScalarQuantizer(upperAndLower[0], upperAndLower[1], quantile);
+      float[] upperAndLower = getUpperAndLowerQuantile(values, confidenceInterval);
+      return new ScalarQuantizer(upperAndLower[0], upperAndLower[1], confidenceInterval);
     }
     int numFloatVecs = floatVectorValues.size();
     // Reservoir sample the vector ordinals we want to read
@@ -258,22 +257,23 @@
       System.arraycopy(floatVector, 0, values, copyOffset, floatVector.length);
       copyOffset += dim;
     }
-    float[] upperAndLower = getUpperAndLowerQuantile(values, quantile);
-    return new ScalarQuantizer(upperAndLower[0], upperAndLower[1], quantile);
+    float[] upperAndLower = getUpperAndLowerQuantile(values, confidenceInterval);
+    return new ScalarQuantizer(upperAndLower[0], upperAndLower[1], confidenceInterval);
   }
 
   /**
    * Takes an array of floats, sorted or not, and returns a minimum and maximum value. These values
-   * are such that they reside on the `(1 - quantile)/2` and `quantile/2` percentiles. Example:
-   * providing floats `[0..100]` and asking for `90` quantiles will return `5` and `95`.
+   * are such that they reside on the `(1 - confidenceInterval)/2` and `confidenceInterval/2`
+   * percentiles. Example: providing floats `[0..100]` and asking for `90` quantiles will return `5`
+   * and `95`.
    *
    * @param arr array of floats
-   * @param quantileFloat the configured quantile
+   * @param confidenceInterval the configured confidence interval
    * @return lower and upper quantile values
    */
-  static float[] getUpperAndLowerQuantile(float[] arr, float quantileFloat) {
-    assert 0.9f <= quantileFloat && quantileFloat <= 1f;
-    int selectorIndex = (int) (arr.length * (1f - quantileFloat) / 2f + 0.5f);
+  static float[] getUpperAndLowerQuantile(float[] arr, float confidenceInterval) {
+    assert 0.9f <= confidenceInterval && confidenceInterval <= 1f;
+    int selectorIndex = (int) (arr.length * (1f - confidenceInterval) / 2f + 0.5f);
     if (selectorIndex > 0) {
       Selector selector = new FloatSelector(arr);
       selector.select(0, arr.length, arr.length - selectorIndex);

diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java
index f8e1fc1..2c866bd 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswQuantizedVectorsFormat.java

@@ -37,6 +37,7 @@
 import org.apache.lucene.index.VectorSimilarityFunction;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
+import org.apache.lucene.util.SameThreadExecutorService;
 import org.apache.lucene.util.ScalarQuantizer;
 import org.apache.lucene.util.VectorUtil;
 
@@ -64,11 +65,12 @@
     for (int i = 0; i < numVectors; i++) {
       vectors.add(randomVector(dim));
     }
-    float quantile = Lucene99ScalarQuantizedVectorsFormat.calculateDefaultQuantile(dim);
+    float confidenceInterval =
+        Lucene99ScalarQuantizedVectorsFormat.calculateDefaultConfidenceInterval(dim);
     ScalarQuantizer scalarQuantizer =
         ScalarQuantizer.fromVectors(
             new Lucene99ScalarQuantizedVectorsWriter.FloatVectorWrapper(vectors, normalize),
-            quantile);
+            confidenceInterval);
     float[] expectedCorrections = new float[numVectors];
     byte[][] expectedVectors = new byte[numVectors][];
     for (int i = 0; i < numVectors; i++) {
@@ -148,7 +150,38 @@
           }
         };
     String expectedString =
-        "Lucene99HnswScalarQuantizedVectorsFormat(name=Lucene99HnswScalarQuantizedVectorsFormat, maxConn=10, beamWidth=20, flatVectorFormat=Lucene99ScalarQuantizedVectorsFormat(name=Lucene99ScalarQuantizedVectorsFormat, quantile=0.9, rawVectorFormat=Lucene99FlatVectorsFormat()))";
+        "Lucene99HnswScalarQuantizedVectorsFormat(name=Lucene99HnswScalarQuantizedVectorsFormat, maxConn=10, beamWidth=20, flatVectorFormat=Lucene99ScalarQuantizedVectorsFormat(name=Lucene99ScalarQuantizedVectorsFormat, confidenceInterval=0.9, rawVectorFormat=Lucene99FlatVectorsFormat()))";
     assertEquals(expectedString, customCodec.knnVectorsFormat().toString());
   }
+
+  public void testLimits() {
+    expectThrows(
+        IllegalArgumentException.class, () -> new Lucene99HnswScalarQuantizedVectorsFormat(-1, 20));
+    expectThrows(
+        IllegalArgumentException.class, () -> new Lucene99HnswScalarQuantizedVectorsFormat(0, 20));
+    expectThrows(
+        IllegalArgumentException.class, () -> new Lucene99HnswScalarQuantizedVectorsFormat(20, 0));
+    expectThrows(
+        IllegalArgumentException.class, () -> new Lucene99HnswScalarQuantizedVectorsFormat(20, -1));
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> new Lucene99HnswScalarQuantizedVectorsFormat(512 + 1, 20));
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> new Lucene99HnswScalarQuantizedVectorsFormat(20, 3201));
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> new Lucene99HnswScalarQuantizedVectorsFormat(20, 100, 0, 1.1f, null));
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> new Lucene99HnswScalarQuantizedVectorsFormat(20, 100, 0, 0.8f, null));
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> new Lucene99HnswScalarQuantizedVectorsFormat(20, 100, 100, null, null));
+    expectThrows(
+        IllegalArgumentException.class,
+        () ->
+            new Lucene99HnswScalarQuantizedVectorsFormat(
+                20, 100, 1, null, new SameThreadExecutorService()));
+  }
 }

diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswVectorsFormat.java
index 085a203..c444802 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswVectorsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene99/TestLucene99HnswVectorsFormat.java

@@ -21,6 +21,7 @@
 import org.apache.lucene.codecs.KnnVectorsFormat;
 import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
 import org.apache.lucene.tests.util.TestUtil;
+import org.apache.lucene.util.SameThreadExecutorService;
 
 public class TestLucene99HnswVectorsFormat extends BaseKnnVectorsFormatTestCase {
   @Override
@@ -48,5 +49,10 @@
     expectThrows(IllegalArgumentException.class, () -> new Lucene99HnswVectorsFormat(20, -1));
     expectThrows(IllegalArgumentException.class, () -> new Lucene99HnswVectorsFormat(512 + 1, 20));
     expectThrows(IllegalArgumentException.class, () -> new Lucene99HnswVectorsFormat(20, 3201));
+    expectThrows(
+        IllegalArgumentException.class, () -> new Lucene99HnswVectorsFormat(20, 100, 100, null));
+    expectThrows(
+        IllegalArgumentException.class,
+        () -> new Lucene99HnswVectorsFormat(20, 100, 1, new SameThreadExecutorService()));
   }
 }

diff --git a/lucene/core/src/test/org/apache/lucene/util/TestScalarQuantizedVectorSimilarity.java b/lucene/core/src/test/org/apache/lucene/util/TestScalarQuantizedVectorSimilarity.java
index 9e3996c..addcbf2 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestScalarQuantizedVectorSimilarity.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestScalarQuantizedVectorSimilarity.java

@@ -32,10 +32,11 @@
     int numVecs = 100;
 
     float[][] floats = randomFloats(numVecs, dims);
-    for (float quantile : new float[] {0.9f, 0.95f, 0.99f, (1 - 1f / (dims + 1)), 1f}) {
-      float error = Math.max((100 - quantile) * 0.01f, 0.01f);
+    for (float confidenceInterval : new float[] {0.9f, 0.95f, 0.99f, (1 - 1f / (dims + 1)), 1f}) {
+      float error = Math.max((100 - confidenceInterval) * 0.01f, 0.01f);
       FloatVectorValues floatVectorValues = fromFloats(floats);
-      ScalarQuantizer scalarQuantizer = ScalarQuantizer.fromVectors(floatVectorValues, quantile);
+      ScalarQuantizer scalarQuantizer =
+          ScalarQuantizer.fromVectors(floatVectorValues, confidenceInterval);
       byte[][] quantized = new byte[floats.length][];
       float[] offsets =
           quantizeVectors(scalarQuantizer, floats, quantized, VectorSimilarityFunction.EUCLIDEAN);
@@ -61,10 +62,11 @@
 
     float[][] floats = randomFloats(numVecs, dims);
 
-    for (float quantile : new float[] {0.9f, 0.95f, 0.99f, (1 - 1f / (dims + 1)), 1f}) {
-      float error = Math.max((100 - quantile) * 0.01f, 0.01f);
+    for (float confidenceInterval : new float[] {0.9f, 0.95f, 0.99f, (1 - 1f / (dims + 1)), 1f}) {
+      float error = Math.max((100 - confidenceInterval) * 0.01f, 0.01f);
       FloatVectorValues floatVectorValues = fromFloatsNormalized(floats);
-      ScalarQuantizer scalarQuantizer = ScalarQuantizer.fromVectors(floatVectorValues, quantile);
+      ScalarQuantizer scalarQuantizer =
+          ScalarQuantizer.fromVectors(floatVectorValues, confidenceInterval);
       byte[][] quantized = new byte[floats.length][];
       float[] offsets =
           quantizeVectorsNormalized(
@@ -94,10 +96,11 @@
     for (float[] fs : floats) {
       VectorUtil.l2normalize(fs);
     }
-    for (float quantile : new float[] {0.9f, 0.95f, 0.99f, (1 - 1f / (dims + 1)), 1f}) {
-      float error = Math.max((100 - quantile) * 0.01f, 0.01f);
+    for (float confidenceInterval : new float[] {0.9f, 0.95f, 0.99f, (1 - 1f / (dims + 1)), 1f}) {
+      float error = Math.max((100 - confidenceInterval) * 0.01f, 0.01f);
       FloatVectorValues floatVectorValues = fromFloats(floats);
-      ScalarQuantizer scalarQuantizer = ScalarQuantizer.fromVectors(floatVectorValues, quantile);
+      ScalarQuantizer scalarQuantizer =
+          ScalarQuantizer.fromVectors(floatVectorValues, confidenceInterval);
       byte[][] quantized = new byte[floats.length][];
       float[] offsets =
           quantizeVectors(scalarQuantizer, floats, quantized, VectorSimilarityFunction.DOT_PRODUCT);
@@ -123,10 +126,11 @@
     int numVecs = 100;
 
     float[][] floats = randomFloats(numVecs, dims);
-    for (float quantile : new float[] {0.9f, 0.95f, 0.99f, (1 - 1f / (dims + 1)), 1f}) {
-      float error = Math.max((100 - quantile) * 0.5f, 0.5f);
+    for (float confidenceInterval : new float[] {0.9f, 0.95f, 0.99f, (1 - 1f / (dims + 1)), 1f}) {
+      float error = Math.max((100 - confidenceInterval) * 0.5f, 0.5f);
       FloatVectorValues floatVectorValues = fromFloats(floats);
-      ScalarQuantizer scalarQuantizer = ScalarQuantizer.fromVectors(floatVectorValues, quantile);
+      ScalarQuantizer scalarQuantizer =
+          ScalarQuantizer.fromVectors(floatVectorValues, confidenceInterval);
       byte[][] quantized = new byte[floats.length][];
       float[] offsets =
           quantizeVectors(
commit	a26a80c89c2e625607b8d2ddb74230b57076d1da	[log] [tgz]
author	Benjamin Trent <ben.w.trent@gmail.com>	Wed Nov 15 15:00:12 2023 -0500
committer	GitHub <noreply@github.com>	Wed Nov 15 15:00:12 2023 -0500
tree	36c8d2f77087873464268ea968034be060c55a72
parent	05a336ea69efb5e8c9f99d0424811154834ec665 [diff]