blob: 2f138182bbdd9063513a6c0c0e25695d8a850886 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
import '../jest-extensions';
import {
Data, Schema, Field, Table, RecordBatch, Column,
Vector, Int32Vector, Float32Vector, Utf8Vector, DictionaryVector,
Struct, Float32, Int32, Dictionary, Utf8, Int8
} from 'apache-arrow';
import { arange } from './utils';
const NAMES = ['f32', 'i32', 'dictionary'] as (keyof TestDataSchema)[];
const F32 = 0, I32 = 1, DICT = 2;
export const test_data = [
{
name: `single record batch`,
table: getSingleRecordBatchTable,
// Use Math.fround to coerce to float32
values: () => [
[Math.fround(-0.3), -1, 'a'],
[Math.fround(-0.2), 1, 'b'],
[Math.fround(-0.1), -1, 'c'],
[Math.fround(0), 1, 'a'],
[Math.fround(0.1), -1, 'b'],
[Math.fround(0.2), 1, 'c'],
[Math.fround(0.3), -1, 'a']
]
}, {
name: `multiple record batches`,
table: getMultipleRecordBatchesTable,
values: () => [
[Math.fround(-0.3), -1, 'a'],
[Math.fround(-0.2), 1, 'b'],
[Math.fround(-0.1), -1, 'c'],
[Math.fround(0), 1, 'a'],
[Math.fround(0.1), -1, 'b'],
[Math.fround(0.2), 1, 'c'],
[Math.fround(0.3), -1, 'a'],
[Math.fround(0.2), 1, 'b'],
[Math.fround(0.1), -1, 'c'],
]
}, {
name: `struct`,
table: () => Table.fromStruct(getStructTable().getColumn('struct')!),
// Use Math.fround to coerce to float32
values: () => [
[Math.fround(-0.3), -1, 'a'],
[Math.fround(-0.2), 1, 'b'],
[Math.fround(-0.1), -1, 'c'],
[Math.fround(0), 1, 'a'],
[Math.fround(0.1), -1, 'b'],
[Math.fround(0.2), 1, 'c'],
[Math.fround(0.3), -1, 'a']
]
},
];
function compareBatchAndTable(source: Table, offset: number, batch: RecordBatch, table: Table) {
expect(batch).toHaveLength(table.length);
expect(table.numCols).toEqual(source.numCols);
expect(batch.numCols).toEqual(source.numCols);
for (let i = -1, n = source.numCols; ++i < n;) {
const v0 = source.getColumnAt(i)!.slice(offset, offset + batch.length);
const v1 = batch.getChildAt(i);
const v2 = table.getColumnAt(i);
const name = source.schema.fields[i].name;
expect([v1, `batch`, name]).toEqualVector([v0, `source`]);
expect([v2, `table`, name]).toEqualVector([v0, `source`]);
}
}
describe(`Table`, () => {
test(`can create an empty table`, () => {
expect(Table.empty()).toHaveLength(0);
});
test(`Table.from([]) creates an empty table`, () => {
expect(Table.from([])).toHaveLength(0);
});
test(`Table.from() creates an empty table`, () => {
expect(Table.from()).toHaveLength(0);
});
describe(`new()`, () => {
test(`creates an empty Table with Columns`, () => {
let i32 = Column.new('i32', Data.new(new Int32(), 0, 0));
let f32 = Column.new('f32', Data.new(new Float32(), 0, 0));
const table = Table.new(i32, f32);
i32 = table.getColumn('i32')!;
f32 = table.getColumn('f32')!;
expect(table).toHaveLength(0);
expect(i32).toHaveLength(0);
expect(f32).toHaveLength(0);
expect(i32.toArray()).toBeInstanceOf(Int32Array);
expect(f32.toArray()).toBeInstanceOf(Float32Array);
});
test(`creates a new Table from a Column`, () => {
const i32s = new Int32Array(arange(new Array<number>(10)));
let i32 = Column.new('i32', Data.Int(new Int32(), 0, i32s.length, 0, null, i32s));
expect(i32.name).toBe('i32');
expect(i32).toHaveLength(i32s.length);
expect(i32.nullable).toBe(true);
expect(i32.nullCount).toBe(0);
const table = Table.new(i32);
i32 = table.getColumnAt(0)!;
expect(i32.name).toBe('i32');
expect(i32).toHaveLength(i32s.length);
expect(i32.nullable).toBe(true);
expect(i32.nullCount).toBe(0);
expect(i32).toEqualVector(Int32Vector.from(i32s));
});
test(`creates a new Table from Columns`, () => {
const i32s = new Int32Array(arange(new Array<number>(10)));
const f32s = new Float32Array(arange(new Array<number>(10)));
let i32 = Column.new('i32', Data.Int(new Int32(), 0, i32s.length, 0, null, i32s));
let f32 = Column.new('f32', Data.Float(new Float32(), 0, f32s.length, 0, null, f32s));
expect(i32.name).toBe('i32');
expect(f32.name).toBe('f32');
expect(i32).toHaveLength(i32s.length);
expect(f32).toHaveLength(f32s.length);
expect(i32.nullable).toBe(true);
expect(f32.nullable).toBe(true);
expect(i32.nullCount).toBe(0);
expect(f32.nullCount).toBe(0);
const table = Table.new(i32, f32);
i32 = table.getColumnAt(0)!;
f32 = table.getColumnAt(1)!;
expect(i32.name).toBe('i32');
expect(f32.name).toBe('f32');
expect(i32).toHaveLength(i32s.length);
expect(f32).toHaveLength(f32s.length);
expect(i32.nullable).toBe(true);
expect(f32.nullable).toBe(true);
expect(i32.nullCount).toBe(0);
expect(f32.nullCount).toBe(0);
expect(i32).toEqualVector(Int32Vector.from(i32s));
expect(f32).toEqualVector(Float32Vector.from(f32s));
});
test(`creates a new Table from Columns with different lengths`, () => {
const i32s = new Int32Array(arange(new Array<number>(20)));
const f32s = new Float32Array(arange(new Array<number>(8)));
let i32 = Column.new('i32', Int32Vector.from(i32s));
let f32 = Column.new('f32', Float32Vector.from(f32s));
expect(i32.name).toBe('i32');
expect(f32.name).toBe('f32');
expect(i32).toHaveLength(i32s.length);
expect(f32).toHaveLength(f32s.length);
expect(i32.nullable).toBe(true);
expect(f32.nullable).toBe(true);
expect(i32.nullCount).toBe(0);
expect(f32.nullCount).toBe(0);
const table = Table.new([i32, f32]);
i32 = table.getColumnAt(0)!;
f32 = table.getColumnAt(1)!;
expect(i32.name).toBe('i32');
expect(f32.name).toBe('f32');
expect(i32).toHaveLength(i32s.length);
expect(f32).toHaveLength(i32s.length); // new length should be the same as the longest sibling
expect(i32.nullable).toBe(true);
expect(f32.nullable).toBe(true); // true, with 12 additional nulls
expect(i32.nullCount).toBe(0);
expect(f32.nullCount).toBe(i32s.length - f32s.length);
const f32Expected = Data.Float(
f32.type, 0, i32s.length,
i32s.length - f32s.length,
new Uint8Array(8).fill(255, 0, 1), f32s);
expect(i32).toEqualVector(Int32Vector.from(i32s));
expect(f32).toEqualVector(new Float32Vector(f32Expected));
});
test(`creates a new Table from Columns with different lengths and number of inner chunks`, () => {
const i32s = new Int32Array(arange(new Array<number>(20)));
const f32s = new Float32Array(arange(new Array<number>(16)));
let i32 = Column.new('i32', Int32Vector.from(i32s));
let f32 = Column.new('f32', Float32Vector.from(f32s.slice(0, 8)), Float32Vector.from(f32s.slice(8, 16)));
expect(i32.name).toBe('i32');
expect(f32.name).toBe('f32');
expect(i32).toHaveLength(i32s.length);
expect(f32).toHaveLength(f32s.length);
expect(i32.nullable).toBe(true);
expect(f32.nullable).toBe(true);
expect(i32.nullCount).toBe(0);
expect(f32.nullCount).toBe(0);
const table = Table.new({ i32Renamed: i32, f32Renamed: f32 });
i32 = table.getColumn('i32Renamed');
f32 = table.getColumn('f32Renamed');
expect(i32.name).toBe('i32Renamed');
expect(f32.name).toBe('f32Renamed');
expect(i32).toHaveLength(i32s.length);
expect(f32).toHaveLength(i32s.length); // new length should be the same as the longest sibling
expect(i32.nullable).toBe(true);
expect(f32.nullable).toBe(true); // true, with 4 additional nulls
expect(i32.nullCount).toBe(0);
expect(f32.nullCount).toBe(i32s.length - f32s.length);
const f32Expected = Data.Float(
f32.type, 0, i32s.length,
i32s.length - f32s.length,
new Uint8Array(8).fill(255, 0, 2), f32s);
expect(i32).toEqualVector(Int32Vector.from(i32s));
expect(f32).toEqualVector(new Float32Vector(f32Expected));
});
test(`creates a new Table from Typed Arrays`, () => {
let i32s = Int32Array.from({length: 10}, (_, i) => i);
let f32s = Float32Array.from({length: 10}, (_, i) => i);
const table = Table.new({ i32s, f32s });
const i32 = table.getColumn('i32s')!;
const f32 = table.getColumn('f32s')!;
expect(table).toHaveLength(10);
expect(i32).toHaveLength(10);
expect(f32).toHaveLength(10);
expect(i32.toArray()).toBeInstanceOf(Int32Array);
expect(f32.toArray()).toBeInstanceOf(Float32Array);
expect(i32.toArray()).toEqual(i32s);
expect(f32.toArray()).toEqual(f32s);
});
});
test(`Table.serialize() serializes sliced RecordBatches`, () => {
const table = getSingleRecordBatchTable();
const batch = table.chunks[0], half = batch.length / 2 | 0;
// First compare what happens when slicing from the batch level
let [batch1, batch2] = [batch.slice(0, half), batch.slice(half)];
compareBatchAndTable(table, 0, batch1, Table.from(new Table(batch1).serialize()));
compareBatchAndTable(table, half, batch2, Table.from(new Table(batch2).serialize()));
// Then compare what happens when creating a RecordBatch by slicing each child individually
batch1 = new RecordBatch(batch1.schema, batch1.length, batch1.schema.fields.map((_, i) => {
return batch.getChildAt(i)!.slice(0, half);
}));
batch2 = new RecordBatch(batch2.schema, batch2.length, batch2.schema.fields.map((_, i) => {
return batch.getChildAt(i)!.slice(half);
}));
compareBatchAndTable(table, 0, batch1, Table.from(new Table(batch1).serialize()));
compareBatchAndTable(table, half, batch2, Table.from(new Table(batch2).serialize()));
});
for (let datum of test_data) {
describe(datum.name, () => {
test(`has the correct length`, () => {
const table = datum.table();
const values = datum.values();
expect(table).toHaveLength(values.length);
});
test(`gets expected values`, () => {
const table = datum.table();
const values = datum.values();
for (let i = -1; ++i < values.length;) {
const row = table.get(i);
const expected = values[i];
expect(row.f32).toEqual(expected[F32]);
expect(row.i32).toEqual(expected[I32]);
expect(row.dictionary).toEqual(expected[DICT]);
}
});
test(`iterates expected values`, () => {
let i = 0;
const table = datum.table();
const values = datum.values();
for (let row of table) {
const expected = values[i++];
expect(row.f32).toEqual(expected[F32]);
expect(row.i32).toEqual(expected[I32]);
expect(row.dictionary).toEqual(expected[DICT]);
}
});
test(`serialize and de-serialize is a no-op`, () => {
const table = datum.table();
const clone = Table.from(table.serialize());
expect(clone).toEqualTable(table);
});
test(`count() returns the correct length`, () => {
const table = datum.table();
const values = datum.values();
expect(table.count()).toEqual(values.length);
});
test(`getColumnIndex`, () => {
const table = datum.table();
expect(table.getColumnIndex('i32')).toEqual(I32);
expect(table.getColumnIndex('f32')).toEqual(F32);
expect(table.getColumnIndex('dictionary')).toEqual(DICT);
});
const table = datum.table();
const values = datum.values();
test(`table.select() basic tests`, () => {
let selected = table.select('f32', 'dictionary');
expect(selected.schema.fields).toHaveLength(2);
expect(selected.schema.fields[0]).toEqual(table.schema.fields[0]);
expect(selected.schema.fields[1]).toEqual(table.schema.fields[2]);
expect(selected).toHaveLength(values.length);
let idx = 0, expected_row;
for (let row of selected) {
expected_row = values[idx++];
expect(row.f32).toEqual(expected_row[F32]);
expect(row.dictionary).toEqual(expected_row[DICT]);
}
});
});
}
});
type TestDataSchema = { f32: Float32; i32: Int32; dictionary: Dictionary<Utf8, Int8> };
function getTestVectors(f32Values: number[], i32Values: number[], dictIndices: number[]) {
const values = Utf8Vector.from(['a', 'b', 'c']);
const i32Data = Data.Int(new Int32(), 0, i32Values.length, 0, null, i32Values);
const f32Data = Data.Float(new Float32(), 0, f32Values.length, 0, null, f32Values);
return [Vector.new(f32Data), Vector.new(i32Data), DictionaryVector.from(values, new Int8(), dictIndices)];
}
function getSingleRecordBatchTable() {
const vectors = getTestVectors(
[-0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3],
[-1, 1, -1, 1, -1, 1, -1],
[0, 1, 2, 0, 1, 2, 0]
);
return Table.new<TestDataSchema>(vectors, NAMES);
}
function getMultipleRecordBatchesTable() {
const types = getTestVectors([], [], []).map((vec) => vec.type);
const fields = NAMES.map((name, i) => Field.new(name, types[i]));
const schema = new Schema<TestDataSchema>(fields);
const b1 = new RecordBatch(schema, 3, getTestVectors(
[-0.3, -0.2, -0.1],
[-1, 1, -1],
[0, 1, 2]
));
const b2 = new RecordBatch(schema, 3, getTestVectors(
[0, 0.1, 0.2],
[1, -1, 1],
[0, 1, 2]
));
const b3 = new RecordBatch(schema, 3, getTestVectors(
[0.3, 0.2, 0.1],
[-1, 1, -1],
[0, 1, 2]
));
return new Table<TestDataSchema>([b1, b2, b3]);
}
function getStructTable() {
const table = getSingleRecordBatchTable();
const struct = new Struct<TestDataSchema>(table.schema.fields);
const children = table.schema.fields.map((_, i) => table.getColumnAt(i)!);
const structVec = Vector.new(Data.Struct(struct, 0, table.length, 0, null, children));
return Table.new<{ struct: Struct<TestDataSchema> }>([structVec], ['struct']);
}