| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| /// EXPERIMENTAL: Metadata for n-dimensional sparse arrays, aka "sparse tensors". |
| /// Arrow implementations in general are not required to implement this type |
| |
| include "Tensor.fbs"; |
| |
| namespace org.apache.arrow.flatbuf; |
| |
| /// ---------------------------------------------------------------------- |
| /// EXPERIMENTAL: Data structures for sparse tensors |
| |
| /// Coordinate (COO) format of sparse tensor index. |
| /// |
| /// COO's index list are represented as a NxM matrix, |
| /// where N is the number of non-zero values, |
| /// and M is the number of dimensions of a sparse tensor. |
| /// |
| /// indicesBuffer stores the location and size of the data of this indices |
| /// matrix. The value type and the stride of the indices matrix is |
| /// specified in indicesType and indicesStrides fields. |
| /// |
| /// For example, let X be a 2x3x4x5 tensor, and it has the following |
| /// 6 non-zero values: |
| /// ```text |
| /// X[0, 1, 2, 0] := 1 |
| /// X[1, 1, 2, 3] := 2 |
| /// X[0, 2, 1, 0] := 3 |
| /// X[0, 1, 3, 0] := 4 |
| /// X[0, 1, 2, 1] := 5 |
| /// X[1, 2, 0, 4] := 6 |
| /// ``` |
| /// In COO format, the index matrix of X is the following 4x6 matrix: |
| /// ```text |
| /// [[0, 0, 0, 0, 1, 1], |
| /// [1, 1, 1, 2, 1, 2], |
| /// [2, 2, 3, 1, 2, 0], |
| /// [0, 1, 0, 0, 3, 4]] |
| /// ``` |
| /// When isCanonical is true, the indices is sorted in lexicographical order |
| /// (row-major order), and it does not have duplicated entries. Otherwise, |
| /// the indices may not be sorted, or may have duplicated entries. |
| table SparseTensorIndexCOO { |
| /// The type of values in indicesBuffer |
| indicesType: Int (required); |
| |
| /// Non-negative byte offsets to advance one value cell along each dimension |
| /// If omitted, default to row-major order (C-like). |
| indicesStrides: [long]; |
| |
| /// The location and size of the indices matrix's data |
| indicesBuffer: Buffer (required); |
| |
| /// This flag is true if and only if the indices matrix is sorted in |
| /// row-major order, and does not have duplicated entries. |
| /// This sort order is the same as of Tensorflow's SparseTensor, |
| /// but it is inverse order of SciPy's canonical coo_matrix |
| /// (SciPy employs column-major order for its coo_matrix). |
| isCanonical: bool; |
| } |
| |
| enum SparseMatrixCompressedAxis: short { Row, Column } |
| |
| /// Compressed Sparse format, that is matrix-specific. |
| table SparseMatrixIndexCSX { |
| /// Which axis, row or column, is compressed |
| compressedAxis: SparseMatrixCompressedAxis; |
| |
| /// The type of values in indptrBuffer |
| indptrType: Int (required); |
| |
| /// indptrBuffer stores the location and size of indptr array that |
| /// represents the range of the rows. |
| /// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. |
| /// The length of this array is 1 + (the number of rows), and the type |
| /// of index value is long. |
| /// |
| /// For example, let X be the following 6x4 matrix: |
| /// ```text |
| /// X := [[0, 1, 2, 0], |
| /// [0, 0, 3, 0], |
| /// [0, 4, 0, 5], |
| /// [0, 0, 0, 0], |
| /// [6, 0, 7, 8], |
| /// [0, 9, 0, 0]]. |
| /// ``` |
| /// The array of non-zero values in X is: |
| /// ```text |
| /// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. |
| /// ``` |
| /// And the indptr of X is: |
| /// ```text |
| /// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. |
| /// ``` |
| indptrBuffer: Buffer (required); |
| |
| /// The type of values in indicesBuffer |
| indicesType: Int (required); |
| |
| /// indicesBuffer stores the location and size of the array that |
| /// contains the column indices of the corresponding non-zero values. |
| /// The type of index value is long. |
| /// |
| /// For example, the indices of the above X is: |
| /// ```text |
| /// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. |
| /// ``` |
| /// Note that the indices are sorted in lexicographical order for each row. |
| indicesBuffer: Buffer (required); |
| } |
| |
| /// Compressed Sparse Fiber (CSF) sparse tensor index. |
| table SparseTensorIndexCSF { |
| /// CSF is a generalization of compressed sparse row (CSR) index. |
| /// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) |
| /// |
| /// CSF index recursively compresses each dimension of a tensor into a set |
| /// of prefix trees. Each path from a root to leaf forms one tensor |
| /// non-zero index. CSF is implemented with two arrays of buffers and one |
| /// arrays of integers. |
| /// |
| /// For example, let X be a 2x3x4x5 tensor and let it have the following |
| /// 8 non-zero values: |
| /// ```text |
| /// X[0, 0, 0, 1] := 1 |
| /// X[0, 0, 0, 2] := 2 |
| /// X[0, 1, 0, 0] := 3 |
| /// X[0, 1, 0, 2] := 4 |
| /// X[0, 1, 1, 0] := 5 |
| /// X[1, 1, 1, 0] := 6 |
| /// X[1, 1, 1, 1] := 7 |
| /// X[1, 1, 1, 2] := 8 |
| /// ``` |
| /// As a prefix tree this would be represented as: |
| /// ```text |
| /// 0 1 |
| /// / \ | |
| /// 0 1 1 |
| /// / / \ | |
| /// 0 0 1 1 |
| /// /| /| | /| | |
| /// 1 2 0 2 0 0 1 2 |
| /// ``` |
| /// The type of values in indptrBuffers |
| indptrType: Int (required); |
| |
| /// indptrBuffers stores the sparsity structure. |
| /// Each two consecutive dimensions in a tensor correspond to a buffer in |
| /// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` |
| /// and `indptrBuffers[dim][i + 1]` signify a range of nodes in |
| /// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. |
| /// |
| /// For example, the indptrBuffers for the above X is: |
| /// ```text |
| /// indptrBuffer(X) = [ |
| /// [0, 2, 3], |
| /// [0, 1, 3, 4], |
| /// [0, 2, 4, 5, 8] |
| /// ]. |
| /// ``` |
| indptrBuffers: [Buffer] (required); |
| |
| /// The type of values in indicesBuffers |
| indicesType: Int (required); |
| |
| /// indicesBuffers stores values of nodes. |
| /// Each tensor dimension corresponds to a buffer in indicesBuffers. |
| /// For example, the indicesBuffers for the above X is: |
| /// ```text |
| /// indicesBuffer(X) = [ |
| /// [0, 1], |
| /// [0, 1, 1], |
| /// [0, 0, 1, 1], |
| /// [1, 2, 0, 2, 0, 0, 1, 2] |
| /// ]. |
| /// ``` |
| indicesBuffers: [Buffer] (required); |
| |
| /// axisOrder stores the sequence in which dimensions were traversed to |
| /// produce the prefix tree. |
| /// For example, the axisOrder for the above X is: |
| /// ```text |
| /// axisOrder(X) = [0, 1, 2, 3]. |
| /// ``` |
| axisOrder: [int] (required); |
| } |
| |
| union SparseTensorIndex { |
| SparseTensorIndexCOO, |
| SparseMatrixIndexCSX, |
| SparseTensorIndexCSF |
| } |
| |
| table SparseTensor { |
| /// The type of data contained in a value cell. |
| /// Currently only fixed-width value types are supported, |
| /// no strings or nested types. |
| type: Type (required); |
| |
| /// The dimensions of the tensor, optionally named. |
| shape: [TensorDim] (required); |
| |
| /// The number of non-zero values in a sparse tensor. |
| non_zero_length: long; |
| |
| /// Sparse tensor index |
| sparseIndex: SparseTensorIndex (required); |
| |
| /// The location and size of the tensor's data |
| data: Buffer (required); |
| } |
| |
| root_type SparseTensor; |