blob: fa1d16efcba72b3f9de4b22d70624d821afe0b8d [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
import { Data, ChunkedData, FlatData, BoolData, FlatListData, NestedData, DictionaryData } from './data';
import { VisitorNode, TypeVisitor, VectorVisitor } from './visitor';
import { DataType, ListType, FlatType, NestedType, FlatListType, TimeUnit } from './type';
import { IterableArrayLike, Precision, DateUnit, IntervalUnit, UnionMode } from './type';
export interface VectorLike { length: number; nullCount: number; }
export interface View<T extends DataType> {
clone(data: Data<T>): this;
isValid(index: number): boolean;
get(index: number): T['TValue'] | null;
set(index: number, value: T['TValue']): void;
toArray(): IterableArrayLike<T['TValue'] | null>;
indexOf(search: T['TValue']): number;
[Symbol.iterator](): IterableIterator<T['TValue'] | null>;
}
export class Vector<T extends DataType = any> implements VectorLike, View<T>, VisitorNode {
public static create<T extends DataType>(data: Data<T>): Vector<T> {
return createVector(data);
}
public static concat<T extends DataType>(source?: Vector<T> | null, ...others: Vector<T>[]): Vector<T> {
return others.reduce((a, b) => a ? a.concat(b) : b, source!);
}
public type: T;
public length: number;
public readonly data: Data<T>;
public readonly view: View<T>;
constructor(data: Data<T>, view: View<T>) {
this.data = data;
this.type = data.type;
this.length = data.length;
let nulls: Uint8Array;
if ((<any> data instanceof ChunkedData) && !(view instanceof ChunkedView)) {
this.view = new ChunkedView(data);
} else if (!(view instanceof ValidityView) && (nulls = data.nullBitmap!) && nulls.length > 0 && data.nullCount > 0) {
this.view = new ValidityView(data, view);
} else {
this.view = view;
}
}
public get nullCount() { return this.data.nullCount; }
public get nullBitmap() { return this.data.nullBitmap; }
public get [Symbol.toStringTag]() {
return `Vector<${this.type[Symbol.toStringTag]}>`;
}
public toJSON(): any { return this.toArray(); }
public clone<R extends T>(data: Data<R>, view: View<R> = this.view.clone(data) as any): this {
return new (this.constructor as any)(data, view);
}
public isValid(index: number): boolean {
return this.view.isValid(index);
}
public get(index: number): T['TValue'] | null {
return this.view.get(index);
}
public set(index: number, value: T['TValue']): void {
return this.view.set(index, value);
}
public toArray(): IterableArrayLike<T['TValue'] | null> {
return this.view.toArray();
}
public indexOf(value: T['TValue']) {
return this.view.indexOf(value);
}
public [Symbol.iterator](): IterableIterator<T['TValue'] | null> {
return this.view[Symbol.iterator]();
}
public concat(...others: Vector<T>[]): this {
if ((others = others.filter(Boolean)).length === 0) {
return this;
}
const { view } = this;
const vecs = !(view instanceof ChunkedView)
? [this, ...others]
: [...view.chunkVectors, ...others];
const offsets = ChunkedData.computeOffsets(vecs);
const chunksLength = offsets[offsets.length - 1];
const chunkedData = new ChunkedData(this.type, chunksLength, vecs, 0, -1, offsets);
return this.clone(chunkedData, new ChunkedView(chunkedData)) as this;
}
public slice(begin?: number, end?: number): this {
let { length } = this;
let size = (this.view as any).size || 1;
let total = length, from = (begin || 0) * size;
let to = (typeof end === 'number' ? end : total) * size;
if (to < 0) { to = total - (to * -1) % total; }
if (from < 0) { from = total - (from * -1) % total; }
if (to < from) { [from, to] = [to, from]; }
total = !isFinite(total = (to - from)) || total < 0 ? 0 : total;
const slicedData = this.data.slice(from, Math.min(total, length));
return this.clone(slicedData, this.view.clone(slicedData)) as this;
}
public acceptTypeVisitor(visitor: TypeVisitor): any {
return TypeVisitor.visitTypeInline(visitor, this.type);
}
public acceptVectorVisitor(visitor: VectorVisitor): any {
return VectorVisitor.visitTypeInline(visitor, this.type, this);
}
}
export abstract class FlatVector<T extends FlatType> extends Vector<T> {
public get values() { return this.data.values; }
public lows(): IntVector<Int32> { return this.asInt32(0, 2); }
public highs(): IntVector<Int32> { return this.asInt32(1, 2); }
public asInt32(offset: number = 0, stride: number = 2): IntVector<Int32> {
let data = (this.data as FlatData<any>).clone(new Int32());
if (offset > 0) {
data = data.slice(offset, this.length - offset);
}
const int32s = new IntVector(data, new PrimitiveView(data, stride));
int32s.length = this.length / stride | 0;
return int32s;
}
}
export abstract class ListVectorBase<T extends (ListType | FlatListType)> extends Vector<T> {
public get values() { return this.data.values; }
public get valueOffsets() { return this.data.valueOffsets; }
public getValueOffset(index: number) {
return this.valueOffsets[index];
}
public getValueLength(index: number) {
return this.valueOffsets[index + 1] - this.valueOffsets[index];
}
}
export abstract class NestedVector<T extends NestedType> extends Vector<T> {
// @ts-ignore
public readonly view: NestedView<T>;
// @ts-ignore
protected _childData: Data<any>[];
public getChildAt<R extends DataType = DataType>(index: number): Vector<R> | null {
return this.view.getChildAt<R>(index);
}
public get childData(): Data<any>[] {
let data: Data<T> | Data<any>[];
if ((data = this._childData)) {
// Return the cached childData reference first
return data as Data<any>[];
} else if (!(<any> (data = this.data) instanceof ChunkedData)) {
// If data isn't chunked, cache and return NestedData's childData
return this._childData = (data as NestedData<T>).childData;
}
// Otherwise if the data is chunked, concatenate the childVectors from each chunk
// to construct a single chunked Vector for each column. Then return the ChunkedData
// instance from each unified chunked column as the childData of a chunked NestedVector
const chunks = ((data as ChunkedData<T>).chunkVectors as NestedVector<T>[]);
return this._childData = chunks
.reduce<(Vector<T> | null)[][]>((cols, chunk) => chunk.childData
.reduce<(Vector<T> | null)[][]>((cols, _, i) => (
(cols[i] || (cols[i] = [])).push(chunk.getChildAt(i))
) && cols || cols, cols), [] as Vector<T>[][])
.map((vecs) => Vector.concat<T>(...vecs).data);
}
}
import { List, Binary, Utf8, Bool, } from './type';
import { Null, Int, Float, Decimal, Date_, Time, Timestamp, Interval } from './type';
import { Uint8, Uint16, Uint32, Uint64, Int8, Int16, Int32, Int64, Float16, Float32, Float64 } from './type';
import { Struct, Union, SparseUnion, DenseUnion, FixedSizeBinary, FixedSizeList, Map_, Dictionary } from './type';
import { ChunkedView } from './vector/chunked';
import { DictionaryView } from './vector/dictionary';
import { ListView, FixedSizeListView, BinaryView, Utf8View } from './vector/list';
import { UnionView, DenseUnionView, NestedView, StructView, MapView } from './vector/nested';
import { FlatView, NullView, BoolView, ValidityView, PrimitiveView, FixedSizeView, Float16View } from './vector/flat';
import { DateDayView, DateMillisecondView, IntervalYearMonthView } from './vector/flat';
import { TimestampDayView, TimestampSecondView, TimestampMillisecondView, TimestampMicrosecondView, TimestampNanosecondView } from './vector/flat';
import { packBools } from './util/bit';
export class NullVector extends Vector<Null> {
constructor(data: Data<Null>, view: View<Null> = new NullView(data)) {
super(data, view);
}
}
export class BoolVector extends Vector<Bool> {
public static from(data: IterableArrayLike<boolean>) {
return new BoolVector(new BoolData(new Bool(), data.length, null, packBools(data)));
}
public get values() { return this.data.values; }
constructor(data: Data<Bool>, view: View<Bool> = new BoolView(data)) {
super(data, view);
}
}
export class IntVector<T extends Int = Int<any>> extends FlatVector<T> {
public static from(data: Int8Array): IntVector<Int8>;
public static from(data: Int16Array): IntVector<Int16>;
public static from(data: Int32Array): IntVector<Int32>;
public static from(data: Uint8Array): IntVector<Uint8>;
public static from(data: Uint16Array): IntVector<Uint16>;
public static from(data: Uint32Array): IntVector<Uint32>;
public static from(data: Int32Array, is64: true): IntVector<Int64>;
public static from(data: Uint32Array, is64: true): IntVector<Uint64>;
public static from(data: any, is64?: boolean) {
if (is64 === true) {
return data instanceof Int32Array
? new IntVector(new FlatData(new Int64(), data.length, null, data))
: new IntVector(new FlatData(new Uint64(), data.length, null, data));
}
switch (data.constructor) {
case Int8Array: return new IntVector(new FlatData(new Int8(), data.length, null, data));
case Int16Array: return new IntVector(new FlatData(new Int16(), data.length, null, data));
case Int32Array: return new IntVector(new FlatData(new Int32(), data.length, null, data));
case Uint8Array: return new IntVector(new FlatData(new Uint8(), data.length, null, data));
case Uint16Array: return new IntVector(new FlatData(new Uint16(), data.length, null, data));
case Uint32Array: return new IntVector(new FlatData(new Uint32(), data.length, null, data));
}
throw new TypeError('Unrecognized Int data');
}
static defaultView<T extends Int>(data: Data<T>) {
return data.type.bitWidth <= 32 ? new FlatView(data) : new FixedSizeView(data, (data.type.bitWidth / 32) | 0);
}
constructor(data: Data<T>, view: View<T> = IntVector.defaultView(data)) {
super(data, view);
}
}
export class FloatVector<T extends Float = Float<any>> extends FlatVector<T> {
public static from(data: Uint16Array): FloatVector<Float16>;
public static from(data: Float32Array): FloatVector<Float32>;
public static from(data: Float64Array): FloatVector<Float64>;
public static from(data: any) {
switch (data.constructor) {
case Uint16Array: return new FloatVector(new FlatData(new Float16(), data.length, null, data));
case Float32Array: return new FloatVector(new FlatData(new Float32(), data.length, null, data));
case Float64Array: return new FloatVector(new FlatData(new Float64(), data.length, null, data));
}
throw new TypeError('Unrecognized Float data');
}
static defaultView<T extends Float>(data: Data<T>): FlatView<any> {
return data.type.precision !== Precision.HALF ? new FlatView(data) : new Float16View(data as Data<Float16>);
}
constructor(data: Data<T>, view: View<T> = FloatVector.defaultView(data)) {
super(data, view);
}
}
export class DateVector extends FlatVector<Date_> {
static defaultView<T extends Date_>(data: Data<T>) {
return data.type.unit === DateUnit.DAY ? new DateDayView(data) : new DateMillisecondView(data, 2);
}
constructor(data: Data<Date_>, view: View<Date_> = DateVector.defaultView(data)) {
super(data, view);
}
public lows(): IntVector<Int32> {
return this.type.unit === DateUnit.DAY ? this.asInt32(0, 1) : this.asInt32(0, 2);
}
public highs(): IntVector<Int32> {
return this.type.unit === DateUnit.DAY ? this.asInt32(0, 1) : this.asInt32(1, 2);
}
public asEpochMilliseconds(): IntVector<Int32> {
let data = (this.data as FlatData<any>).clone(new Int32());
switch (this.type.unit) {
case DateUnit.DAY: return new IntVector(data, new TimestampDayView(data as any, 1) as any);
case DateUnit.MILLISECOND: return new IntVector(data, new TimestampMillisecondView(data as any, 2) as any);
}
throw new TypeError(`Unrecognized date unit "${DateUnit[this.type.unit]}"`);
}
}
export class DecimalVector extends FlatVector<Decimal> {
constructor(data: Data<Decimal>, view: View<Decimal> = new FixedSizeView(data, 4)) {
super(data, view);
}
}
export class TimeVector extends FlatVector<Time> {
static defaultView<T extends Time>(data: Data<T>) {
return data.type.bitWidth <= 32 ? new FlatView(data) : new FixedSizeView(data, (data.type.bitWidth / 32) | 0);
}
constructor(data: Data<Time>, view: View<Time> = TimeVector.defaultView(data)) {
super(data, view);
}
public lows(): IntVector<Int32> {
return this.type.bitWidth <= 32 ? this.asInt32(0, 1) : this.asInt32(0, 2);
}
public highs(): IntVector<Int32> {
return this.type.bitWidth <= 32 ? this.asInt32(0, 1) : this.asInt32(1, 2);
}
}
export class TimestampVector extends FlatVector<Timestamp> {
constructor(data: Data<Timestamp>, view: View<Timestamp> = new FixedSizeView(data, 2)) {
super(data, view);
}
public asEpochMilliseconds(): IntVector<Int32> {
let data = (this.data as FlatData<any>).clone(new Int32());
switch (this.type.unit) {
case TimeUnit.SECOND: return new IntVector(data, new TimestampSecondView(data as any, 1) as any);
case TimeUnit.MILLISECOND: return new IntVector(data, new TimestampMillisecondView(data as any, 2) as any);
case TimeUnit.MICROSECOND: return new IntVector(data, new TimestampMicrosecondView(data as any, 2) as any);
case TimeUnit.NANOSECOND: return new IntVector(data, new TimestampNanosecondView(data as any, 2) as any);
}
throw new TypeError(`Unrecognized time unit "${TimeUnit[this.type.unit]}"`);
}
}
export class IntervalVector extends FlatVector<Interval> {
static defaultView<T extends Interval>(data: Data<T>) {
return data.type.unit === IntervalUnit.YEAR_MONTH ? new IntervalYearMonthView(data) : new FixedSizeView(data, 2);
}
constructor(data: Data<Interval>, view: View<Interval> = IntervalVector.defaultView(data)) {
super(data, view);
}
public lows(): IntVector<Int32> {
return this.type.unit === IntervalUnit.YEAR_MONTH ? this.asInt32(0, 1) : this.asInt32(0, 2);
}
public highs(): IntVector<Int32> {
return this.type.unit === IntervalUnit.YEAR_MONTH ? this.asInt32(0, 1) : this.asInt32(1, 2);
}
}
export class BinaryVector extends ListVectorBase<Binary> {
constructor(data: Data<Binary>, view: View<Binary> = new BinaryView(data)) {
super(data, view);
}
public asUtf8() {
return new Utf8Vector((this.data as FlatListData<any>).clone(new Utf8()));
}
}
export class FixedSizeBinaryVector extends FlatVector<FixedSizeBinary> {
constructor(data: Data<FixedSizeBinary>, view: View<FixedSizeBinary> = new FixedSizeView(data, data.type.byteWidth)) {
super(data, view);
}
}
export class Utf8Vector extends ListVectorBase<Utf8> {
constructor(data: Data<Utf8>, view: View<Utf8> = new Utf8View(data)) {
super(data, view);
}
public asBinary() {
return new BinaryVector((this.data as FlatListData<any>).clone(new Binary()));
}
}
export class ListVector<T extends DataType = DataType> extends ListVectorBase<List<T>> {
constructor(data: Data<List<T>>, view: View<List<T>> = new ListView(data)) {
super(data, view);
}
}
export class FixedSizeListVector extends Vector<FixedSizeList> {
constructor(data: Data<FixedSizeList>, view: View<FixedSizeList> = new FixedSizeListView(data)) {
super(data, view);
}
}
export class MapVector extends NestedVector<Map_> {
constructor(data: Data<Map_>, view: View<Map_> = new MapView(data)) {
super(data, view);
}
public asStruct() {
return new StructVector((this.data as NestedData<any>).clone(new Struct(this.type.children)));
}
}
export class StructVector extends NestedVector<Struct> {
constructor(data: Data<Struct>, view: View<Struct> = new StructView(data)) {
super(data, view);
}
public asMap(keysSorted: boolean = false) {
return new MapVector((this.data as NestedData<any>).clone(new Map_(keysSorted, this.type.children)));
}
}
export class UnionVector<T extends (SparseUnion | DenseUnion) = any> extends NestedVector<T> {
constructor(data: Data<T>, view: View<T> = <any> (data.type.mode === UnionMode.Sparse ? new UnionView<SparseUnion>(data as Data<SparseUnion>) : new DenseUnionView(data as Data<DenseUnion>))) {
super(data, view);
}
}
export class DictionaryVector<T extends DataType = DataType> extends Vector<Dictionary<T>> {
// @ts-ignore
public readonly indicies: Vector<Int>;
// @ts-ignore
public readonly dictionary: Vector<T>;
constructor(data: Data<Dictionary<T>>, view: View<Dictionary<T>> = new DictionaryView<T>(data.dictionary, new IntVector(data.indicies))) {
super(data as Data<any>, view);
if (data instanceof DictionaryData && view instanceof DictionaryView) {
this.indicies = view.indicies;
this.dictionary = data.dictionary;
} else if (data instanceof ChunkedData && view instanceof ChunkedView) {
const chunks = view.chunkVectors as DictionaryVector<T>[];
// Assume the last chunk's dictionary data is the most up-to-date,
// including data from DictionaryBatches that were marked as deltas
this.dictionary = chunks[chunks.length - 1].dictionary;
this.indicies = chunks.reduce<Vector<Int> | null>(
(idxs: Vector<Int> | null, dict: DictionaryVector<T>) =>
!idxs ? dict.indicies! : idxs.concat(dict.indicies!),
null
)!;
} else {
throw new TypeError(`Unrecognized DictionaryVector view`);
}
}
public getKey(index: number) { return this.indicies.get(index); }
public getValue(key: number) { return this.dictionary.get(key); }
public reverseLookup(value: T) { return this.dictionary.indexOf(value); }
}
export const createVector = ((VectorLoader: new <T extends DataType>(data: Data<T>) => TypeVisitor) => (
<T extends DataType>(data: Data<T>) => TypeVisitor.visitTypeInline(new VectorLoader(data), data.type) as Vector<T>
))(class VectorLoader<T extends DataType> extends TypeVisitor {
constructor(private data: Data<T>) { super(); }
visitNull (_type: Null) { return new NullVector(this.data); }
visitInt (_type: Int) { return new IntVector(this.data); }
visitFloat (_type: Float) { return new FloatVector(this.data); }
visitBinary (_type: Binary) { return new BinaryVector(this.data); }
visitUtf8 (_type: Utf8) { return new Utf8Vector(this.data); }
visitBool (_type: Bool) { return new BoolVector(this.data); }
visitDecimal (_type: Decimal) { return new DecimalVector(this.data); }
visitDate (_type: Date_) { return new DateVector(this.data); }
visitTime (_type: Time) { return new TimeVector(this.data); }
visitTimestamp (_type: Timestamp) { return new TimestampVector(this.data); }
visitInterval (_type: Interval) { return new IntervalVector(this.data); }
visitList (_type: List) { return new ListVector(this.data); }
visitStruct (_type: Struct) { return new StructVector(this.data); }
visitUnion (_type: Union) { return new UnionVector(this.data); }
visitFixedSizeBinary(_type: FixedSizeBinary) { return new FixedSizeBinaryVector(this.data); }
visitFixedSizeList (_type: FixedSizeList) { return new FixedSizeListVector(this.data); }
visitMap (_type: Map_) { return new MapVector(this.data); }
visitDictionary (_type: Dictionary) { return new DictionaryVector(this.data); }
});