blob: daff0194fe5f7d1b15956f85474edaaf002e9555 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// Unit tests for DataType (and subclasses), Field, and Schema
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
#include <iostream>
#include <gtest/gtest.h>
#include "arrow/sparse_tensor.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/testing/util.h"
#include "arrow/type.h"
namespace arrow {
static inline void CheckSparseIndexFormatType(SparseTensorFormat::type expected,
const SparseTensor& sparse_tensor) {
ASSERT_EQ(expected, sparse_tensor.format_id());
ASSERT_EQ(expected, sparse_tensor.sparse_index()->format_id());
}
static inline void AssertCOOIndex(
const std::shared_ptr<SparseCOOIndex::CoordsTensor>& sidx, const int64_t nth,
const std::vector<int64_t>& expected_values) {
int64_t n = static_cast<int64_t>(expected_values.size());
for (int64_t i = 0; i < n; ++i) {
ASSERT_EQ(expected_values[i], sidx->Value({nth, i}));
}
}
TEST(TestSparseCOOTensor, CreationEmptyTensor) {
std::vector<int64_t> shape = {2, 3, 4};
SparseTensorImpl<SparseCOOIndex> st1(int64(), shape);
std::vector<std::string> dim_names = {"foo", "bar", "baz"};
SparseTensorImpl<SparseCOOIndex> st2(int64(), shape, dim_names);
ASSERT_EQ(0, st1.non_zero_length());
ASSERT_EQ(0, st2.non_zero_length());
ASSERT_EQ(24, st1.size());
ASSERT_EQ(24, st2.size());
ASSERT_EQ(std::vector<std::string>({"foo", "bar", "baz"}), st2.dim_names());
ASSERT_EQ("foo", st2.dim_name(0));
ASSERT_EQ("bar", st2.dim_name(1));
ASSERT_EQ("baz", st2.dim_name(2));
ASSERT_EQ(std::vector<std::string>({}), st1.dim_names());
ASSERT_EQ("", st1.dim_name(0));
ASSERT_EQ("", st1.dim_name(1));
ASSERT_EQ("", st1.dim_name(2));
}
TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
std::vector<int64_t> shape = {2, 3, 4};
std::vector<int64_t> values = {1, 0, 2, 0, 0, 3, 0, 4, 5, 0, 6, 0,
0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
std::vector<std::string> dim_names = {"foo", "bar", "baz"};
NumericTensor<Int64Type> tensor1(buffer, shape);
NumericTensor<Int64Type> tensor2(buffer, shape, {}, dim_names);
SparseTensorImpl<SparseCOOIndex> st1(tensor1);
SparseTensorImpl<SparseCOOIndex> st2(tensor2);
CheckSparseIndexFormatType(SparseTensorFormat::COO, st1);
ASSERT_EQ(12, st1.non_zero_length());
ASSERT_TRUE(st1.is_mutable());
ASSERT_EQ(std::vector<std::string>({"foo", "bar", "baz"}), st2.dim_names());
ASSERT_EQ("foo", st2.dim_name(0));
ASSERT_EQ("bar", st2.dim_name(1));
ASSERT_EQ("baz", st2.dim_name(2));
ASSERT_EQ(std::vector<std::string>({}), st1.dim_names());
ASSERT_EQ("", st1.dim_name(0));
ASSERT_EQ("", st1.dim_name(1));
ASSERT_EQ("", st1.dim_name(2));
const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
ASSERT_EQ(std::string("SparseCOOIndex"), si.ToString());
std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
ASSERT_TRUE(sidx->is_column_major());
AssertCOOIndex(sidx, 0, {0, 0, 0});
AssertCOOIndex(sidx, 1, {0, 0, 2});
AssertCOOIndex(sidx, 2, {0, 1, 1});
AssertCOOIndex(sidx, 10, {1, 2, 1});
AssertCOOIndex(sidx, 11, {1, 2, 3});
}
TEST(TestSparseCOOTensor, CreationFromTensor) {
std::vector<int64_t> shape = {2, 3, 4};
std::vector<int64_t> values = {1, 0, 2, 0, 0, 3, 0, 4, 5, 0, 6, 0,
0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
std::vector<std::string> dim_names = {"foo", "bar", "baz"};
Tensor tensor1(int64(), buffer, shape);
Tensor tensor2(int64(), buffer, shape, {}, dim_names);
SparseTensorImpl<SparseCOOIndex> st1(tensor1);
SparseTensorImpl<SparseCOOIndex> st2(tensor2);
ASSERT_EQ(12, st1.non_zero_length());
ASSERT_TRUE(st1.is_mutable());
ASSERT_EQ(std::vector<std::string>({"foo", "bar", "baz"}), st2.dim_names());
ASSERT_EQ("foo", st2.dim_name(0));
ASSERT_EQ("bar", st2.dim_name(1));
ASSERT_EQ("baz", st2.dim_name(2));
ASSERT_EQ(std::vector<std::string>({}), st1.dim_names());
ASSERT_EQ("", st1.dim_name(0));
ASSERT_EQ("", st1.dim_name(1));
ASSERT_EQ("", st1.dim_name(2));
const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
ASSERT_TRUE(sidx->is_column_major());
AssertCOOIndex(sidx, 0, {0, 0, 0});
AssertCOOIndex(sidx, 1, {0, 0, 2});
AssertCOOIndex(sidx, 2, {0, 1, 1});
AssertCOOIndex(sidx, 10, {1, 2, 1});
AssertCOOIndex(sidx, 11, {1, 2, 3});
}
TEST(TestSparseCOOTensor, CreationFromNonContiguousTensor) {
std::vector<int64_t> shape = {2, 3, 4};
std::vector<int64_t> values = {1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0,
5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0,
13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0};
std::vector<int64_t> strides = {192, 64, 16};
std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
Tensor tensor(int64(), buffer, shape, strides);
SparseTensorImpl<SparseCOOIndex> st(tensor);
ASSERT_EQ(12, st.non_zero_length());
ASSERT_TRUE(st.is_mutable());
const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data());
AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st.sparse_index());
std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
ASSERT_TRUE(sidx->is_column_major());
AssertCOOIndex(sidx, 0, {0, 0, 0});
AssertCOOIndex(sidx, 1, {0, 0, 2});
AssertCOOIndex(sidx, 2, {0, 1, 1});
AssertCOOIndex(sidx, 10, {1, 2, 1});
AssertCOOIndex(sidx, 11, {1, 2, 3});
}
TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
std::vector<int64_t> shape = {6, 4};
std::vector<int64_t> values = {1, 0, 2, 0, 0, 3, 0, 4, 5, 0, 6, 0,
0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
std::vector<std::string> dim_names = {"foo", "bar", "baz"};
NumericTensor<Int64Type> tensor1(buffer, shape);
NumericTensor<Int64Type> tensor2(buffer, shape, {}, dim_names);
SparseTensorImpl<SparseCSRIndex> st1(tensor1);
SparseTensorImpl<SparseCSRIndex> st2(tensor2);
CheckSparseIndexFormatType(SparseTensorFormat::CSR, st1);
ASSERT_EQ(12, st1.non_zero_length());
ASSERT_TRUE(st1.is_mutable());
ASSERT_EQ(std::vector<std::string>({"foo", "bar", "baz"}), st2.dim_names());
ASSERT_EQ("foo", st2.dim_name(0));
ASSERT_EQ("bar", st2.dim_name(1));
ASSERT_EQ("baz", st2.dim_name(2));
ASSERT_EQ(std::vector<std::string>({}), st1.dim_names());
ASSERT_EQ("", st1.dim_name(0));
ASSERT_EQ("", st1.dim_name(1));
ASSERT_EQ("", st1.dim_name(2));
const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st1.sparse_index());
ASSERT_EQ(std::string("SparseCSRIndex"), si.ToString());
ASSERT_EQ(1, si.indptr()->ndim());
ASSERT_EQ(1, si.indices()->ndim());
const int64_t* indptr_begin = reinterpret_cast<const int64_t*>(si.indptr()->raw_data());
std::vector<int64_t> indptr_values(indptr_begin,
indptr_begin + si.indptr()->shape()[0]);
ASSERT_EQ(7, indptr_values.size());
ASSERT_EQ(std::vector<int64_t>({0, 2, 4, 6, 8, 10, 12}), indptr_values);
const int64_t* indices_begin =
reinterpret_cast<const int64_t*>(si.indices()->raw_data());
std::vector<int64_t> indices_values(indices_begin,
indices_begin + si.indices()->shape()[0]);
ASSERT_EQ(12, indices_values.size());
ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
}
TEST(TestSparseCSRMatrix, CreationFromNonContiguousTensor) {
std::vector<int64_t> shape = {6, 4};
std::vector<int64_t> values = {1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0,
5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0,
13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0};
std::vector<int64_t> strides = {64, 16};
std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
Tensor tensor(int64(), buffer, shape, strides);
SparseTensorImpl<SparseCSRIndex> st(tensor);
ASSERT_EQ(12, st.non_zero_length());
ASSERT_TRUE(st.is_mutable());
const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data());
AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st.sparse_index());
ASSERT_EQ(1, si.indptr()->ndim());
ASSERT_EQ(1, si.indices()->ndim());
const int64_t* indptr_begin = reinterpret_cast<const int64_t*>(si.indptr()->raw_data());
std::vector<int64_t> indptr_values(indptr_begin,
indptr_begin + si.indptr()->shape()[0]);
ASSERT_EQ(7, indptr_values.size());
ASSERT_EQ(std::vector<int64_t>({0, 2, 4, 6, 8, 10, 12}), indptr_values);
const int64_t* indices_begin =
reinterpret_cast<const int64_t*>(si.indices()->raw_data());
std::vector<int64_t> indices_values(indices_begin,
indices_begin + si.indices()->shape()[0]);
ASSERT_EQ(12, indices_values.size());
ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
}
} // namespace arrow