blob: ab2e40ae9a9127d812596369e935d394dc9834bb [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
import { popcnt_bit_range } from './util/bit';
import { VectorLike, Vector } from './vector';
import { VectorType, TypedArray, TypedArrayConstructor, Dictionary } from './type';
import { Int, Bool, FlatListType, List, FixedSizeList, Struct, Map_ } from './type';
import { DataType, FlatType, ListType, NestedType, SingleNestedType, DenseUnion, SparseUnion } from './type';
export function toTypedArray<T extends TypedArray>(ArrayType: TypedArrayConstructor<T>, values?: T | ArrayLike<number> | Iterable<number> | null): T {
if (!ArrayType && ArrayBuffer.isView(values)) { return values; }
return values instanceof ArrayType ? values
: !values || !ArrayBuffer.isView(values) ? ArrayType.from(values || [])
: new ArrayType(values.buffer, values.byteOffset, values.byteLength / ArrayType.BYTES_PER_ELEMENT);
}
export type Data<T extends DataType> = DataTypes<T>[T['TType']] & BaseData<T>;
export interface DataTypes<T extends DataType> {
/* [Type.NONE]*/ 0: BaseData<T>;
/* [Type.Null]*/ 1: FlatData<T>;
/* [Type.Int]*/ 2: FlatData<T>;
/* [Type.Float]*/ 3: FlatData<T>;
/* [Type.Binary]*/ 4: FlatListData<T>;
/* [Type.Utf8]*/ 5: FlatListData<T>;
/* [Type.Bool]*/ 6: BoolData;
/* [Type.Decimal]*/ 7: FlatData<T>;
/* [Type.Date]*/ 8: FlatData<T>;
/* [Type.Time]*/ 9: FlatData<T>;
/* [Type.Timestamp]*/ 10: FlatData<T>;
/* [Type.Interval]*/ 11: FlatData<T>;
/* [Type.List]*/ 12: ListData<List<T>>;
/* [Type.Struct]*/ 13: NestedData<Struct>;
/* [Type.Union]*/ 14: UnionData;
/* [Type.FixedSizeBinary]*/ 15: FlatData<T>;
/* [Type.FixedSizeList]*/ 16: SingleNestedData<FixedSizeList<T>>;
/* [Type.Map]*/ 17: NestedData<Map_>;
/* [Type.DenseUnion]*/ DenseUnion: DenseUnionData;
/*[Type.SparseUnion]*/ SparseUnion: SparseUnionData;
/*[ Type.Dictionary]*/ Dictionary: DictionaryData<any>;
}
// When slicing, we do not know the null count of the sliced range without
// doing some computation. To avoid doing this eagerly, we set the null count
// to -1 (any negative number will do). When Array::null_count is called the
// first time, the null count will be computed. See ARROW-33
export type kUnknownNullCount = -1;
export const kUnknownNullCount = -1;
export class BaseData<T extends DataType = DataType> implements VectorLike {
public type: T;
public length: number;
public offset: number;
// @ts-ignore
public childData: Data<any>[];
protected _nullCount: number | kUnknownNullCount;
protected /* [VectorType.OFFSET]:*/ 0?: Int32Array;
protected /* [VectorType.DATA]:*/ 1?: T['TArray'];
protected /*[VectorType.VALIDITY]:*/ 2?: Uint8Array;
protected /* [VectorType.TYPE]:*/ 3?: Int8Array;
constructor(type: T, length: number, offset?: number, nullCount?: number) {
this.type = type;
this.length = Math.floor(Math.max(length || 0, 0));
this.offset = Math.floor(Math.max(offset || 0, 0));
this._nullCount = Math.floor(Math.max(nullCount || 0, -1));
}
public get typeId() { return this.type.TType; }
public get nullBitmap() { return this[VectorType.VALIDITY]; }
public get nullCount() {
let nullCount = this._nullCount;
let nullBitmap: Uint8Array | undefined;
if (nullCount === -1 && (nullBitmap = this[VectorType.VALIDITY])) {
this._nullCount = nullCount = this.length - popcnt_bit_range(nullBitmap, this.offset, this.offset + this.length);
}
return nullCount;
}
public clone<R extends T>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount) {
return new BaseData(type, length, offset, nullCount);
}
public slice(offset: number, length: number) {
return length <= 0 ? this : this.sliceInternal(this.clone(
this.type, length, this.offset + offset, +(this._nullCount === 0) - 1
) as any, offset, length);
}
protected sliceInternal(clone: this, offset: number, length: number) {
let arr: any;
// If typeIds exist, slice the typeIds buffer
(arr = this[VectorType.TYPE]) && (clone[VectorType.TYPE] = this.sliceData(arr, offset, length));
// If offsets exist, only slice the offsets buffer
(arr = this[VectorType.OFFSET]) && (clone[VectorType.OFFSET] = this.sliceOffsets(arr, offset, length)) ||
// Otherwise if no offsets, slice the data buffer
(arr = this[VectorType.DATA]) && (clone[VectorType.DATA] = this.sliceData(arr, offset, length));
return clone;
}
protected sliceData(data: T['TArray'] & TypedArray, offset: number, length: number) {
return data.subarray(offset, offset + length);
}
protected sliceOffsets(valueOffsets: Int32Array, offset: number, length: number) {
return valueOffsets.subarray(offset, offset + length + 1);
}
}
export class FlatData<T extends FlatType> extends BaseData<T> {
public /* [VectorType.DATA]:*/ 1: T['TArray'];
public /*[VectorType.VALIDITY]:*/ 2: Uint8Array;
public get values() { return this[VectorType.DATA]; }
constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, data: Iterable<number>, offset?: number, nullCount?: number) {
super(type, length, offset, nullCount);
this[VectorType.DATA] = toTypedArray(this.ArrayType, data);
this[VectorType.VALIDITY] = toTypedArray(Uint8Array, nullBitmap);
}
public get ArrayType(): T['ArrayType'] { return this.type.ArrayType; }
public clone<R extends T>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount) {
return new (this.constructor as any)(type, length, this[VectorType.VALIDITY], this[VectorType.DATA], offset, nullCount) as FlatData<R>;
}
}
export class BoolData extends FlatData<Bool> {
protected sliceData(data: Uint8Array) { return data; }
}
export class FlatListData<T extends FlatListType> extends FlatData<T> {
public /* [VectorType.OFFSET]:*/ 0: Int32Array;
public /* [VectorType.DATA]:*/ 1: T['TArray'];
public /*[VectorType.VALIDITY]:*/ 2: Uint8Array;
public get values() { return this[VectorType.DATA]; }
public get valueOffsets() { return this[VectorType.OFFSET]; }
constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, valueOffsets: Iterable<number>, data: T['TArray'], offset?: number, nullCount?: number) {
super(type, length, nullBitmap, data, offset, nullCount);
this[VectorType.OFFSET] = toTypedArray(Int32Array, valueOffsets);
}
public clone<R extends T>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount) {
return new FlatListData(type, length, this[VectorType.VALIDITY], this[VectorType.OFFSET], this[VectorType.DATA], offset, nullCount) as FlatListData<R>;
}
}
export class DictionaryData<T extends DataType> extends BaseData<Dictionary<T>> {
protected _dictionary: Vector<T>;
protected _indicies: Data<Int<any>>;
public get indicies() { return this._indicies; }
public get dictionary() { return this._dictionary; }
constructor(type: Dictionary<T>, dictionary: Vector<T>, indicies: Data<Int<any>>) {
super(type, indicies.length, (indicies as any)._nullCount);
this._indicies = indicies;
this._dictionary = dictionary;
this.length = this._indicies.length;
}
public get nullCount() { return this._indicies.nullCount; }
public clone<R extends Dictionary<T>>(type: R, length = this.length, offset = this.offset) {
const data = this._dictionary.data.clone(type.dictionary as any);
return new DictionaryData<R>(
this.type as any,
this._dictionary.clone(data) as any,
this._indicies.slice(offset - this.offset, length)
) as any;
}
protected sliceInternal(clone: this, _offset: number, _length: number) {
clone.length = clone._indicies.length;
clone._nullCount = (clone._indicies as any)._nullCount;
return clone;
}
}
export class NestedData<T extends NestedType = NestedType> extends BaseData<T> {
public /*[VectorType.VALIDITY]:*/ 2: Uint8Array;
constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, childData: Data<any>[], offset?: number, nullCount?: number) {
super(type, length, offset, nullCount);
this.childData = childData;
this[VectorType.VALIDITY] = toTypedArray(Uint8Array, nullBitmap);
}
public clone<R extends T>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount) {
return new NestedData<R>(type, length, this[VectorType.VALIDITY], this.childData, offset, nullCount);
}
protected sliceInternal(clone: this, offset: number, length: number) {
if (!this[VectorType.OFFSET]) {
clone.childData = this.childData.map((child) => child.slice(offset, length));
}
return super.sliceInternal(clone, offset, length);
}
}
export class SingleNestedData<T extends SingleNestedType> extends NestedData<T> {
protected _valuesData: Data<T>;
public get values() { return this._valuesData; }
constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, valueChildData: Data<T>, offset?: number, nullCount?: number) {
super(type, length, nullBitmap, [valueChildData], offset, nullCount);
this._valuesData = valueChildData;
}
}
export class ListData<T extends ListType> extends SingleNestedData<T> {
public /* [VectorType.OFFSET]:*/ 0: Int32Array;
public /*[VectorType.VALIDITY]:*/ 2: Uint8Array;
public get valueOffsets() { return this[VectorType.OFFSET]; }
constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, valueOffsets: Iterable<number>, valueChildData: Data<T>, offset?: number, nullCount?: number) {
super(type, length, nullBitmap, valueChildData, offset, nullCount);
this[VectorType.OFFSET] = toTypedArray(Int32Array, valueOffsets);
}
public clone<R extends T>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount) {
return new ListData<R>(type, length, this[VectorType.VALIDITY], this[VectorType.OFFSET], this._valuesData as any, offset, nullCount);
}
}
export class UnionData<T extends (DenseUnion | SparseUnion) = any> extends NestedData<T> {
public /* [VectorType.TYPE]:*/ 3: T['TArray'];
public get typeIds() { return this[VectorType.TYPE]; }
constructor(type: T, length: number, nullBitmap: Uint8Array | null | undefined, typeIds: Iterable<number>, childData: Data<any>[], offset?: number, nullCount?: number) {
super(type, length, nullBitmap, childData, offset, nullCount);
this[VectorType.TYPE] = toTypedArray(Int8Array, typeIds);
}
public clone<R extends T>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount) {
return new UnionData<R>(type, length, this[VectorType.VALIDITY], this[VectorType.TYPE], this.childData, offset, nullCount);
}
}
export class SparseUnionData extends UnionData<SparseUnion> {
constructor(type: SparseUnion, length: number, nullBitmap: Uint8Array | null | undefined, typeIds: Iterable<number>, childData: Data<any>[], offset?: number, nullCount?: number) {
super(type, length, nullBitmap, typeIds, childData, offset, nullCount);
}
public clone<R extends SparseUnion>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount) {
return new SparseUnionData(
type,
length,
this[VectorType.VALIDITY],
this[VectorType.TYPE],
this.childData,
offset, nullCount
) as any as UnionData<R>;
}
}
export class DenseUnionData extends UnionData<DenseUnion> {
public /* [VectorType.OFFSET]:*/ 0: Int32Array;
public get valueOffsets() { return this[VectorType.OFFSET]; }
constructor(type: DenseUnion, length: number, nullBitmap: Uint8Array | null | undefined, typeIds: Iterable<number>, valueOffsets: Iterable<number>, childData: Data<any>[], offset?: number, nullCount?: number) {
super(type, length, nullBitmap, typeIds, childData, offset, nullCount);
this[VectorType.OFFSET] = toTypedArray(Int32Array, valueOffsets);
}
public clone<R extends DenseUnion>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount) {
return new DenseUnionData(
type,
length,
this[VectorType.VALIDITY],
this[VectorType.TYPE],
this[VectorType.OFFSET],
this.childData,
offset, nullCount
) as any as UnionData<R>;
}
}
export class ChunkedData<T extends DataType> extends BaseData<T> {
// @ts-ignore
protected _chunkData: Data<T>[];
protected _chunkVectors: Vector<T>[];
protected _chunkOffsets: Uint32Array;
public get chunkVectors() { return this._chunkVectors; }
public get chunkOffsets() { return this._chunkOffsets; }
public get chunkData() {
return this._chunkData || (
this._chunkData = this._chunkVectors.map(({ data }) => data));
}
constructor(type: T, length: number, chunkVectors: Vector<T>[], offset?: number, nullCount?: number, chunkOffsets?: Uint32Array) {
super(type, length, offset, nullCount);
this._chunkVectors = chunkVectors;
this._chunkOffsets = chunkOffsets || ChunkedData.computeOffsets(chunkVectors);
}
public get nullCount() {
let nullCount = this._nullCount;
if (nullCount === -1) {
this._nullCount = nullCount = this._chunkVectors.reduce((x, c) => x + c.nullCount, 0);
}
return nullCount;
}
public clone<R extends T>(type: R, length = this.length, offset = this.offset, nullCount = this._nullCount) {
return new ChunkedData<R>(
type, length,
this._chunkVectors.map((vec) => vec.clone(vec.data.clone(type))) as any,
offset, nullCount, this._chunkOffsets
);
}
protected sliceInternal(clone: this, offset: number, length: number) {
const chunks = this._chunkVectors;
const offsets = this._chunkOffsets;
const chunkSlices: Vector<T>[] = [];
for (let childIndex = -1, numChildren = chunks.length; ++childIndex < numChildren;) {
const child = chunks[childIndex];
const childLength = child.length;
const childOffset = offsets[childIndex];
// If the child is to the right of the slice boundary, exclude
if (childOffset >= offset + length) { continue; }
// If the child is to the left of of the slice boundary, exclude
if (offset >= childOffset + childLength) { continue; }
// If the child is between both left and right boundaries, include w/o slicing
if (childOffset >= offset && (childOffset + childLength) <= offset + length) {
chunkSlices.push(child);
continue;
}
// If the child overlaps one of the slice boundaries, include that slice
const begin = Math.max(0, offset - childOffset);
const end = begin + Math.min(childLength - begin, (offset + length) - childOffset);
chunkSlices.push(child.slice(begin, end));
}
clone._chunkVectors = chunkSlices;
clone._chunkOffsets = ChunkedData.computeOffsets(chunkSlices);
return clone;
}
static computeOffsets<T extends DataType>(childVectors: Vector<T>[]) {
const childOffsets = new Uint32Array(childVectors.length + 1);
for (let index = 0, length = childOffsets.length, childOffset = childOffsets[0] = 0; ++index < length;) {
childOffsets[index] = (childOffset += childVectors[index - 1].length);
}
return childOffsets;
}
}