blob: 78ad53cf4fa64677b1551b8b1cf39297da8eca5b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.lucene94;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.lucene.codecs.lucene90.IndexedDISI;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.packed.DirectMonotonicReader;
/** Read the vector values from the index input. This supports both iterated and random access. */
abstract class OffHeapVectorValues extends VectorValues
implements RandomAccessVectorValues, RandomAccessVectorValuesProducer {
protected final int dimension;
protected final int size;
protected final IndexInput slice;
protected final BytesRef binaryValue;
protected final ByteBuffer byteBuffer;
protected final int byteSize;
protected final float[] value;
OffHeapVectorValues(int dimension, int size, IndexInput slice, int byteSize) {
this.dimension = dimension;
this.size = size;
this.slice = slice;
this.byteSize = byteSize;
byteBuffer = ByteBuffer.allocate(byteSize);
value = new float[dimension];
binaryValue = new BytesRef(byteBuffer.array(), byteBuffer.arrayOffset(), byteSize);
}
@Override
public int dimension() {
return dimension;
}
@Override
public int size() {
return size;
}
@Override
public long cost() {
return size;
}
@Override
public float[] vectorValue(int targetOrd) throws IOException {
slice.seek((long) targetOrd * byteSize);
slice.readFloats(value, 0, value.length);
return value;
}
@Override
public BytesRef binaryValue(int targetOrd) throws IOException {
readValue(targetOrd);
return binaryValue;
}
private void readValue(int targetOrd) throws IOException {
slice.seek((long) targetOrd * byteSize);
slice.readBytes(byteBuffer.array(), byteBuffer.arrayOffset(), byteSize);
}
public abstract int ordToDoc(int ord);
static OffHeapVectorValues load(
Lucene94HnswVectorsReader.FieldEntry fieldEntry, IndexInput vectorData) throws IOException {
if (fieldEntry.docsWithFieldOffset == -2) {
return new EmptyOffHeapVectorValues(fieldEntry.dimension);
}
IndexInput bytesSlice =
vectorData.slice("vector-data", fieldEntry.vectorDataOffset, fieldEntry.vectorDataLength);
int byteSize =
switch (fieldEntry.vectorEncoding) {
case BYTE -> fieldEntry.dimension;
case FLOAT32 -> fieldEntry.dimension * Float.BYTES;
};
if (fieldEntry.docsWithFieldOffset == -1) {
return new DenseOffHeapVectorValues(
fieldEntry.dimension, fieldEntry.size, bytesSlice, byteSize);
} else {
return new SparseOffHeapVectorValues(fieldEntry, vectorData, bytesSlice, byteSize);
}
}
abstract Bits getAcceptOrds(Bits acceptDocs);
static class DenseOffHeapVectorValues extends OffHeapVectorValues {
private int doc = -1;
public DenseOffHeapVectorValues(int dimension, int size, IndexInput slice, int byteSize) {
super(dimension, size, slice, byteSize);
}
@Override
public float[] vectorValue() throws IOException {
slice.seek((long) doc * byteSize);
slice.readFloats(value, 0, value.length);
return value;
}
@Override
public BytesRef binaryValue() throws IOException {
slice.seek((long) doc * byteSize);
slice.readBytes(byteBuffer.array(), byteBuffer.arrayOffset(), byteSize, false);
return binaryValue;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public int advance(int target) throws IOException {
assert docID() < target;
if (target >= size) {
return doc = NO_MORE_DOCS;
}
return doc = target;
}
@Override
public RandomAccessVectorValues randomAccess() throws IOException {
return new DenseOffHeapVectorValues(dimension, size, slice.clone(), byteSize);
}
@Override
public int ordToDoc(int ord) {
return ord;
}
@Override
Bits getAcceptOrds(Bits acceptDocs) {
return acceptDocs;
}
}
private static class SparseOffHeapVectorValues extends OffHeapVectorValues {
private final DirectMonotonicReader ordToDoc;
private final IndexedDISI disi;
// dataIn was used to init a new IndexedDIS for #randomAccess()
private final IndexInput dataIn;
private final Lucene94HnswVectorsReader.FieldEntry fieldEntry;
public SparseOffHeapVectorValues(
Lucene94HnswVectorsReader.FieldEntry fieldEntry,
IndexInput dataIn,
IndexInput slice,
int byteSize)
throws IOException {
super(fieldEntry.dimension, fieldEntry.size, slice, byteSize);
this.fieldEntry = fieldEntry;
final RandomAccessInput addressesData =
dataIn.randomAccessSlice(fieldEntry.addressesOffset, fieldEntry.addressesLength);
this.dataIn = dataIn;
this.ordToDoc = DirectMonotonicReader.getInstance(fieldEntry.meta, addressesData);
this.disi =
new IndexedDISI(
dataIn,
fieldEntry.docsWithFieldOffset,
fieldEntry.docsWithFieldLength,
fieldEntry.jumpTableEntryCount,
fieldEntry.denseRankPower,
fieldEntry.size);
}
@Override
public float[] vectorValue() throws IOException {
slice.seek((long) (disi.index()) * byteSize);
slice.readFloats(value, 0, value.length);
return value;
}
@Override
public BytesRef binaryValue() throws IOException {
slice.seek((long) (disi.index()) * byteSize);
slice.readBytes(byteBuffer.array(), byteBuffer.arrayOffset(), byteSize, false);
return binaryValue;
}
@Override
public int docID() {
return disi.docID();
}
@Override
public int nextDoc() throws IOException {
return disi.nextDoc();
}
@Override
public int advance(int target) throws IOException {
assert docID() < target;
return disi.advance(target);
}
@Override
public RandomAccessVectorValues randomAccess() throws IOException {
return new SparseOffHeapVectorValues(fieldEntry, dataIn, slice.clone(), byteSize);
}
@Override
public int ordToDoc(int ord) {
return (int) ordToDoc.get(ord);
}
@Override
Bits getAcceptOrds(Bits acceptDocs) {
if (acceptDocs == null) {
return null;
}
return new Bits() {
@Override
public boolean get(int index) {
return acceptDocs.get(ordToDoc(index));
}
@Override
public int length() {
return size;
}
};
}
}
private static class EmptyOffHeapVectorValues extends OffHeapVectorValues {
public EmptyOffHeapVectorValues(int dimension) {
super(dimension, 0, null, 0);
}
private int doc = -1;
@Override
public int dimension() {
return super.dimension();
}
@Override
public int size() {
return 0;
}
@Override
public float[] vectorValue() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public BytesRef binaryValue() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public int advance(int target) throws IOException {
return doc = NO_MORE_DOCS;
}
@Override
public long cost() {
return 0;
}
@Override
public RandomAccessVectorValues randomAccess() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public float[] vectorValue(int targetOrd) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public BytesRef binaryValue(int targetOrd) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public int ordToDoc(int ord) {
throw new UnsupportedOperationException();
}
@Override
Bits getAcceptOrds(Bits acceptDocs) {
return null;
}
}
}