| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include <algorithm> |
| #include <cstdint> |
| #include <cstring> |
| #include <limits> |
| #include <memory> |
| #include <numeric> |
| #include <string> |
| #include <type_traits> |
| #include <vector> |
| |
| #include <gtest/gtest.h> |
| |
| #include "arrow/array.h" |
| #include "arrow/array/diff.h" |
| #include "arrow/compute/api.h" |
| #include "arrow/status.h" |
| #include "arrow/testing/gtest_common.h" |
| #include "arrow/testing/random.h" |
| #include "arrow/testing/util.h" |
| #include "arrow/type.h" |
| #include "arrow/util/logging.h" |
| |
| namespace arrow { |
| |
| using internal::checked_cast; |
| using internal::checked_pointer_cast; |
| |
| constexpr random::SeedType kSeed = 0xdeadbeef; |
| static const auto edits_type = |
| struct_({field("insert", boolean()), field("run_length", int64())}); |
| |
| Status ValidateEditScript(const Array& edits, const Array& base, const Array& target) { |
| // beginning (in base) of the run before the current hunk |
| int64_t base_run_begin = 0; |
| return VisitEditScript(edits, [&](int64_t delete_begin, int64_t delete_end, |
| int64_t insert_begin, int64_t insert_end) { |
| auto target_run_begin = insert_begin - (delete_begin - base_run_begin); |
| if (!base.RangeEquals(base_run_begin, delete_begin, target_run_begin, target)) { |
| return Status::Invalid("base and target were unequal in a run"); |
| } |
| |
| base_run_begin = delete_end; |
| for (int64_t i = insert_begin; i < insert_end; ++i) { |
| for (int64_t d = delete_begin; d < delete_end; ++d) { |
| if (target.RangeEquals(i, i + 1, d, base)) { |
| return Status::Invalid("a deleted element was simultaneously inserted"); |
| } |
| } |
| } |
| |
| return Status::OK(); |
| }); |
| } |
| |
| class DiffTest : public ::testing::Test { |
| protected: |
| DiffTest() : rng_(kSeed) {} |
| |
| void DoDiff() { |
| auto edits = Diff(*base_, *target_, default_memory_pool()); |
| ASSERT_OK(edits.status()); |
| edits_ = edits.ValueOrDie(); |
| ASSERT_OK(edits_->ValidateFull()); |
| ASSERT_TRUE(edits_->type()->Equals(edits_type)); |
| insert_ = checked_pointer_cast<BooleanArray>(edits_->field(0)); |
| run_lengths_ = checked_pointer_cast<Int64Array>(edits_->field(1)); |
| } |
| |
| void DoDiffAndFormat(std::stringstream* out) { |
| DoDiff(); |
| auto formatter = MakeUnifiedDiffFormatter(*base_->type(), out); |
| ASSERT_OK(formatter.status()); |
| ASSERT_OK(formatter.ValueOrDie()(*edits_, *base_, *target_)); |
| } |
| |
| // validate diff and assert that it formats as expected, both directly |
| // and through Array::Equals |
| void AssertDiffAndFormat(const std::string& formatted_expected) { |
| std::stringstream formatted; |
| |
| DoDiffAndFormat(&formatted); |
| ASSERT_EQ(formatted.str(), formatted_expected) << "formatted diff incorrectly"; |
| formatted.str(""); |
| |
| ASSERT_EQ(edits_->length() == 1, |
| base_->Equals(*target_, EqualOptions().diff_sink(&formatted))); |
| ASSERT_EQ(formatted.str(), formatted_expected) |
| << "Array::Equals formatted diff incorrectly"; |
| } |
| |
| void AssertInsertIs(const std::string& insert_json) { |
| AssertArraysEqual(*ArrayFromJSON(boolean(), insert_json), *insert_, /*verbose=*/true); |
| } |
| |
| void AssertRunLengthIs(const std::string& run_lengths_json) { |
| AssertArraysEqual(*ArrayFromJSON(int64(), run_lengths_json), *run_lengths_, |
| /*verbose=*/true); |
| } |
| |
| void BaseAndTargetFromRandomFilter(std::shared_ptr<Array> values, |
| double filter_probability) { |
| std::shared_ptr<Array> base_filter, target_filter; |
| do { |
| base_filter = this->rng_.Boolean(values->length(), filter_probability, 0.0); |
| target_filter = this->rng_.Boolean(values->length(), filter_probability, 0.0); |
| } while (base_filter->Equals(target_filter)); |
| |
| ASSERT_OK_AND_ASSIGN(Datum out_datum, compute::Filter(values, base_filter)); |
| base_ = out_datum.make_array(); |
| |
| ASSERT_OK_AND_ASSIGN(out_datum, compute::Filter(values, target_filter)); |
| target_ = out_datum.make_array(); |
| } |
| |
| void TestBasicsWithUnions(UnionMode::type mode) { |
| ASSERT_OK_AND_ASSIGN( |
| auto type, |
| UnionType::Make({field("foo", utf8()), field("bar", int32())}, {2, 5}, mode)); |
| |
| // insert one |
| base_ = ArrayFromJSON(type, R"([[2, "!"], [5, 3], [5, 13]])"); |
| target_ = ArrayFromJSON(type, R"([[2, "!"], [2, "?"], [5, 3], [5, 13]])"); |
| DoDiff(); |
| AssertInsertIs("[false, true]"); |
| AssertRunLengthIs("[1, 2]"); |
| |
| // delete one |
| base_ = ArrayFromJSON(type, R"([[2, "!"], [2, "?"], [5, 3], [5, 13]])"); |
| target_ = ArrayFromJSON(type, R"([[2, "!"], [5, 3], [5, 13]])"); |
| DoDiff(); |
| AssertInsertIs("[false, false]"); |
| AssertRunLengthIs("[1, 2]"); |
| |
| // change one |
| base_ = ArrayFromJSON(type, R"([[5, 3], [2, "!"], [5, 13]])"); |
| target_ = ArrayFromJSON(type, R"([[2, "3"], [2, "!"], [5, 13]])"); |
| DoDiff(); |
| AssertInsertIs("[false, false, true]"); |
| AssertRunLengthIs("[0, 0, 2]"); |
| |
| // null out one |
| base_ = ArrayFromJSON(type, R"([[2, "!"], [5, 3], [5, 13]])"); |
| target_ = ArrayFromJSON(type, R"([[2, "!"], [5, 3], null])"); |
| DoDiff(); |
| AssertInsertIs("[false, false, true]"); |
| AssertRunLengthIs("[2, 0, 0]"); |
| } |
| |
| random::RandomArrayGenerator rng_; |
| std::shared_ptr<StructArray> edits_; |
| std::shared_ptr<Array> base_, target_; |
| std::shared_ptr<BooleanArray> insert_; |
| std::shared_ptr<Int64Array> run_lengths_; |
| }; |
| |
| TEST_F(DiffTest, Trivial) { |
| base_ = ArrayFromJSON(int32(), "[]"); |
| target_ = ArrayFromJSON(int32(), "[]"); |
| DoDiff(); |
| AssertInsertIs("[false]"); |
| AssertRunLengthIs("[0]"); |
| |
| base_ = ArrayFromJSON(null(), "[null, null]"); |
| target_ = ArrayFromJSON(null(), "[null, null, null, null]"); |
| DoDiff(); |
| AssertInsertIs("[false, true, true]"); |
| AssertRunLengthIs("[2, 0, 0]"); |
| |
| base_ = ArrayFromJSON(int32(), "[1, 2, 3]"); |
| target_ = ArrayFromJSON(int32(), "[1, 2, 3]"); |
| DoDiff(); |
| AssertInsertIs("[false]"); |
| AssertRunLengthIs("[3]"); |
| } |
| |
| TEST_F(DiffTest, Errors) { |
| std::stringstream formatted; |
| |
| base_ = ArrayFromJSON(int32(), "[]"); |
| target_ = ArrayFromJSON(utf8(), "[]"); |
| ASSERT_RAISES(TypeError, Diff(*base_, *target_, default_memory_pool())); |
| |
| ASSERT_FALSE(base_->Equals(*target_, EqualOptions().diff_sink(&formatted))); |
| ASSERT_EQ(formatted.str(), "# Array types differed: int32 vs string\n"); |
| } |
| |
| template <typename ArrowType> |
| class DiffTestWithNumeric : public DiffTest { |
| protected: |
| std::shared_ptr<DataType> type_singleton() { |
| return TypeTraits<ArrowType>::type_singleton(); |
| } |
| }; |
| |
| TYPED_TEST_SUITE(DiffTestWithNumeric, NumericArrowTypes); |
| |
| TYPED_TEST(DiffTestWithNumeric, Basics) { |
| // insert one |
| this->base_ = ArrayFromJSON(this->type_singleton(), "[1, 2, null, 5]"); |
| this->target_ = ArrayFromJSON(this->type_singleton(), "[1, 2, 3, null, 5]"); |
| this->DoDiff(); |
| this->AssertInsertIs("[false, true]"); |
| this->AssertRunLengthIs("[2, 2]"); |
| |
| // delete one |
| this->base_ = ArrayFromJSON(this->type_singleton(), "[1, 2, 3, null, 5]"); |
| this->target_ = ArrayFromJSON(this->type_singleton(), "[1, 2, null, 5]"); |
| this->DoDiff(); |
| this->AssertInsertIs("[false, false]"); |
| this->AssertRunLengthIs("[2, 2]"); |
| |
| // change one |
| this->base_ = ArrayFromJSON(this->type_singleton(), "[1, 2, 3, null, 5]"); |
| this->target_ = ArrayFromJSON(this->type_singleton(), "[1, 2, 23, null, 5]"); |
| this->DoDiff(); |
| this->AssertInsertIs("[false, false, true]"); |
| this->AssertRunLengthIs("[2, 0, 2]"); |
| |
| // null out one |
| this->base_ = ArrayFromJSON(this->type_singleton(), "[1, 2, 3, null, 5]"); |
| this->target_ = ArrayFromJSON(this->type_singleton(), "[1, 2, null, null, 5]"); |
| this->DoDiff(); |
| this->AssertInsertIs("[false, false, true]"); |
| this->AssertRunLengthIs("[2, 1, 1]"); |
| |
| // append some |
| this->base_ = ArrayFromJSON(this->type_singleton(), "[1, 2, 3, null, 5]"); |
| this->target_ = ArrayFromJSON(this->type_singleton(), "[1, 2, 3, null, 5, 6, 7, 8, 9]"); |
| this->DoDiff(); |
| this->AssertInsertIs("[false, true, true, true, true]"); |
| this->AssertRunLengthIs("[5, 0, 0, 0, 0]"); |
| |
| // prepend some |
| this->base_ = ArrayFromJSON(this->type_singleton(), "[1, 2, 3, null, 5]"); |
| this->target_ = ArrayFromJSON(this->type_singleton(), "[6, 4, 2, 0, 1, 2, 3, null, 5]"); |
| this->DoDiff(); |
| this->AssertInsertIs("[false, true, true, true, true]"); |
| this->AssertRunLengthIs("[0, 0, 0, 0, 5]"); |
| } |
| |
| TEST_F(DiffTest, CompareRandomInt64) { |
| for (auto null_probability : {0.0, 0.25}) { |
| auto values = this->rng_.Int64(1 << 10, 0, 127, null_probability); |
| for (const double filter_probability : {0.99, 0.75, 0.5}) { |
| this->BaseAndTargetFromRandomFilter(values, filter_probability); |
| |
| std::stringstream formatted; |
| this->DoDiffAndFormat(&formatted); |
| auto st = ValidateEditScript(*this->edits_, *this->base_, *this->target_); |
| if (!st.ok()) { |
| ASSERT_OK(Status(st.code(), st.message() + "\n" + formatted.str())); |
| } |
| } |
| } |
| } |
| |
| TEST_F(DiffTest, CompareRandomStrings) { |
| for (auto null_probability : {0.0, 0.25}) { |
| auto values = this->rng_.StringWithRepeats(1 << 10, 1 << 8, 0, 32, null_probability); |
| for (const double filter_probability : {0.99, 0.75, 0.5}) { |
| this->BaseAndTargetFromRandomFilter(values, filter_probability); |
| |
| std::stringstream formatted; |
| this->DoDiffAndFormat(&formatted); |
| auto st = ValidateEditScript(*this->edits_, *this->base_, *this->target_); |
| if (!st.ok()) { |
| ASSERT_OK(Status(st.code(), st.message() + "\n" + formatted.str())); |
| } |
| } |
| } |
| } |
| |
| TEST_F(DiffTest, BasicsWithBooleans) { |
| // insert one |
| base_ = ArrayFromJSON(boolean(), R"([true, true, true])"); |
| target_ = ArrayFromJSON(boolean(), R"([true, false, true, true])"); |
| DoDiff(); |
| AssertInsertIs("[false, true]"); |
| AssertRunLengthIs("[1, 2]"); |
| |
| // delete one |
| base_ = ArrayFromJSON(boolean(), R"([true, false, true, true])"); |
| target_ = ArrayFromJSON(boolean(), R"([true, true, true])"); |
| DoDiff(); |
| AssertInsertIs("[false, false]"); |
| AssertRunLengthIs("[1, 2]"); |
| |
| // change one |
| base_ = ArrayFromJSON(boolean(), R"([false, false, true])"); |
| target_ = ArrayFromJSON(boolean(), R"([true, false, true])"); |
| DoDiff(); |
| AssertInsertIs("[false, false, true]"); |
| AssertRunLengthIs("[0, 0, 2]"); |
| |
| // null out one |
| base_ = ArrayFromJSON(boolean(), R"([true, false, true])"); |
| target_ = ArrayFromJSON(boolean(), R"([true, false, null])"); |
| DoDiff(); |
| AssertInsertIs("[false, false, true]"); |
| AssertRunLengthIs("[2, 0, 0]"); |
| } |
| |
| TEST_F(DiffTest, BasicsWithStrings) { |
| // insert one |
| base_ = ArrayFromJSON(utf8(), R"(["give", "a", "break"])"); |
| target_ = ArrayFromJSON(utf8(), R"(["give", "me", "a", "break"])"); |
| DoDiff(); |
| AssertInsertIs("[false, true]"); |
| AssertRunLengthIs("[1, 2]"); |
| |
| // delete one |
| base_ = ArrayFromJSON(utf8(), R"(["give", "me", "a", "break"])"); |
| target_ = ArrayFromJSON(utf8(), R"(["give", "a", "break"])"); |
| DoDiff(); |
| AssertInsertIs("[false, false]"); |
| AssertRunLengthIs("[1, 2]"); |
| |
| // change one |
| base_ = ArrayFromJSON(utf8(), R"(["give", "a", "break"])"); |
| target_ = ArrayFromJSON(utf8(), R"(["gimme", "a", "break"])"); |
| DoDiff(); |
| AssertInsertIs("[false, false, true]"); |
| AssertRunLengthIs("[0, 0, 2]"); |
| |
| // null out one |
| base_ = ArrayFromJSON(utf8(), R"(["give", "a", "break"])"); |
| target_ = ArrayFromJSON(utf8(), R"(["give", "a", null])"); |
| DoDiff(); |
| AssertInsertIs("[false, false, true]"); |
| AssertRunLengthIs("[2, 0, 0]"); |
| } |
| |
| TEST_F(DiffTest, BasicsWithLists) { |
| // insert one |
| base_ = ArrayFromJSON(list(int32()), R"([[2, 3, 1], [], [13]])"); |
| target_ = ArrayFromJSON(list(int32()), R"([[2, 3, 1], [5, 9], [], [13]])"); |
| DoDiff(); |
| AssertInsertIs("[false, true]"); |
| AssertRunLengthIs("[1, 2]"); |
| |
| // delete one |
| base_ = ArrayFromJSON(list(int32()), R"([[2, 3, 1], [5, 9], [], [13]])"); |
| target_ = ArrayFromJSON(list(int32()), R"([[2, 3, 1], [], [13]])"); |
| DoDiff(); |
| AssertInsertIs("[false, false]"); |
| AssertRunLengthIs("[1, 2]"); |
| |
| // change one |
| base_ = ArrayFromJSON(list(int32()), R"([[2, 3, 1], [], [13]])"); |
| target_ = ArrayFromJSON(list(int32()), R"([[3, 3, 3], [], [13]])"); |
| DoDiff(); |
| AssertInsertIs("[false, false, true]"); |
| AssertRunLengthIs("[0, 0, 2]"); |
| |
| // null out one |
| base_ = ArrayFromJSON(list(int32()), R"([[2, 3, 1], [], [13]])"); |
| target_ = ArrayFromJSON(list(int32()), R"([[2, 3, 1], [], null])"); |
| DoDiff(); |
| AssertInsertIs("[false, false, true]"); |
| AssertRunLengthIs("[2, 0, 0]"); |
| } |
| |
| TEST_F(DiffTest, BasicsWithStructs) { |
| auto type = struct_({field("foo", utf8()), field("bar", int32())}); |
| |
| // insert one |
| base_ = ArrayFromJSON(type, R"([{"foo": "!", "bar": 3}, {}, {"bar": 13}])"); |
| target_ = |
| ArrayFromJSON(type, R"([{"foo": "!", "bar": 3}, {"foo": "?"}, {}, {"bar": 13}])"); |
| DoDiff(); |
| AssertInsertIs("[false, true]"); |
| AssertRunLengthIs("[1, 2]"); |
| |
| // delete one |
| base_ = |
| ArrayFromJSON(type, R"([{"foo": "!", "bar": 3}, {"foo": "?"}, {}, {"bar": 13}])"); |
| target_ = ArrayFromJSON(type, R"([{"foo": "!", "bar": 3}, {}, {"bar": 13}])"); |
| DoDiff(); |
| AssertInsertIs("[false, false]"); |
| AssertRunLengthIs("[1, 2]"); |
| |
| // change one |
| base_ = ArrayFromJSON(type, R"([{"foo": "!", "bar": 3}, {}, {"bar": 13}])"); |
| target_ = ArrayFromJSON(type, R"([{"foo": "!", "bar": 2}, {}, {"bar": 13}])"); |
| DoDiff(); |
| AssertInsertIs("[false, false, true]"); |
| AssertRunLengthIs("[0, 0, 2]"); |
| |
| // null out one |
| base_ = ArrayFromJSON(type, R"([{"foo": "!", "bar": 3}, {}, {"bar": 13}])"); |
| target_ = ArrayFromJSON(type, R"([{"foo": "!", "bar": 3}, {}, null])"); |
| DoDiff(); |
| AssertInsertIs("[false, false, true]"); |
| AssertRunLengthIs("[2, 0, 0]"); |
| } |
| |
| TEST_F(DiffTest, BasicsWithSparseUnions) { TestBasicsWithUnions(UnionMode::SPARSE); } |
| |
| TEST_F(DiffTest, BasicsWithDenseUnions) { TestBasicsWithUnions(UnionMode::DENSE); } |
| |
| TEST_F(DiffTest, UnifiedDiffFormatter) { |
| // no changes |
| base_ = ArrayFromJSON(utf8(), R"(["give", "me", "a", "break"])"); |
| target_ = ArrayFromJSON(utf8(), R"(["give", "me", "a", "break"])"); |
| AssertDiffAndFormat(R"()"); |
| |
| // insert one |
| base_ = ArrayFromJSON(utf8(), R"(["give", "a", "break"])"); |
| target_ = ArrayFromJSON(utf8(), R"(["give", "me", "a", "break"])"); |
| AssertDiffAndFormat(R"( |
| @@ -1, +1 @@ |
| +"me" |
| )"); |
| |
| // delete one |
| base_ = ArrayFromJSON(utf8(), R"(["give", "me", "a", "break"])"); |
| target_ = ArrayFromJSON(utf8(), R"(["give", "a", "break"])"); |
| AssertDiffAndFormat(R"( |
| @@ -1, +1 @@ |
| -"me" |
| )"); |
| |
| // change one |
| base_ = ArrayFromJSON(utf8(), R"(["give", "a", "break"])"); |
| target_ = ArrayFromJSON(utf8(), R"(["gimme", "a", "break"])"); |
| AssertDiffAndFormat(R"( |
| @@ -0, +0 @@ |
| -"give" |
| +"gimme" |
| )"); |
| |
| // null out one |
| base_ = ArrayFromJSON(utf8(), R"(["give", "a", "break"])"); |
| target_ = ArrayFromJSON(utf8(), R"(["give", "a", null])"); |
| AssertDiffAndFormat(R"( |
| @@ -2, +2 @@ |
| -"break" |
| +null |
| )"); |
| |
| // strings with escaped chars |
| base_ = ArrayFromJSON(utf8(), R"(["newline:\n", "quote:'", "backslash:\\"])"); |
| target_ = |
| ArrayFromJSON(utf8(), R"(["newline:\n", "tab:\t", "quote:\"", "backslash:\\"])"); |
| AssertDiffAndFormat(R"( |
| @@ -1, +1 @@ |
| -"quote:'" |
| +"tab:\t" |
| +"quote:\"" |
| )"); |
| |
| // date32 |
| base_ = ArrayFromJSON(date32(), R"([0, 1, 2, 31, 4])"); |
| target_ = ArrayFromJSON(date32(), R"([0, 1, 31, 2, 4])"); |
| AssertDiffAndFormat(R"( |
| @@ -2, +2 @@ |
| -1970-01-03 |
| @@ -4, +3 @@ |
| +1970-01-03 |
| )"); |
| |
| // date64 |
| constexpr int64_t ms_per_day = 24 * 60 * 60 * 1000; |
| ArrayFromVector<Date64Type>( |
| {0 * ms_per_day, 1 * ms_per_day, 2 * ms_per_day, 31 * ms_per_day, 4 * ms_per_day}, |
| &base_); |
| ArrayFromVector<Date64Type>( |
| {0 * ms_per_day, 1 * ms_per_day, 31 * ms_per_day, 2 * ms_per_day, 4 * ms_per_day}, |
| &target_); |
| AssertDiffAndFormat(R"( |
| @@ -2, +2 @@ |
| -1970-01-03 |
| @@ -4, +3 @@ |
| +1970-01-03 |
| )"); |
| |
| // timestamp |
| auto x = 678 + 1000000 * (5 + 60 * (4 + 60 * (3 + 24 * int64_t(1)))); |
| ArrayFromVector<TimestampType>(timestamp(TimeUnit::MICRO), {0, 1, x, 2, 4}, &base_); |
| ArrayFromVector<TimestampType>(timestamp(TimeUnit::MICRO), {0, 1, 2, x, 4}, &target_); |
| AssertDiffAndFormat(R"( |
| @@ -2, +2 @@ |
| -1970-01-02 03:04:05.000678 |
| @@ -4, +3 @@ |
| +1970-01-02 03:04:05.000678 |
| )"); |
| |
| // lists |
| base_ = ArrayFromJSON(list(int32()), R"([[2, 3, 1], [], [13], []])"); |
| target_ = ArrayFromJSON(list(int32()), R"([[2, 3, 1], [5, 9], [], [13]])"); |
| AssertDiffAndFormat(R"( |
| @@ -1, +1 @@ |
| +[5, 9] |
| @@ -3, +4 @@ |
| -[] |
| )"); |
| |
| // maps |
| base_ = ArrayFromJSON(map(utf8(), int32()), R"([ |
| [["foo", 2], ["bar", 3], ["baz", 1]], |
| [], |
| [["quux", 13]], |
| [] |
| ])"); |
| target_ = ArrayFromJSON(map(utf8(), int32()), R"([ |
| [["foo", 2], ["bar", 3], ["baz", 1]], |
| [["ytho", 11]], |
| [], |
| [["quux", 13]] |
| ])"); |
| AssertDiffAndFormat(R"( |
| @@ -1, +1 @@ |
| +[{key: "ytho", value: 11}] |
| @@ -3, +4 @@ |
| -[] |
| )"); |
| |
| // structs |
| auto type = struct_({field("foo", utf8()), field("bar", int32())}); |
| base_ = ArrayFromJSON(type, R"([{"foo": "!", "bar": 3}, {}, {"bar": 13}])"); |
| target_ = ArrayFromJSON(type, R"([{"foo": null, "bar": 2}, {}, {"bar": 13}])"); |
| AssertDiffAndFormat(R"( |
| @@ -0, +0 @@ |
| -{foo: "!", bar: 3} |
| +{bar: 2} |
| )"); |
| |
| // unions |
| for (auto union_ : UnionTypeFactories()) { |
| type = union_({field("foo", utf8()), field("bar", int32())}, {2, 5}); |
| base_ = ArrayFromJSON(type, R"([[2, "!"], [5, 3], [5, 13]])"); |
| target_ = ArrayFromJSON(type, R"([[2, "!"], [2, "3"], [5, 13]])"); |
| AssertDiffAndFormat(R"( |
| @@ -1, +1 @@ |
| -{5: 3} |
| +{2: "3"} |
| )"); |
| } |
| |
| for (auto type : {int8(), uint8(), // verify that these are printed as numbers rather |
| // than their ascii characters |
| int16(), uint16()}) { |
| // small difference |
| base_ = ArrayFromJSON(type, "[0, 1, 2, 3, 5, 8, 11, 13, 17]"); |
| target_ = ArrayFromJSON(type, "[2, 3, 5, 7, 11, 13, 17, 19]"); |
| AssertDiffAndFormat(R"( |
| @@ -0, +0 @@ |
| -0 |
| -1 |
| @@ -5, +3 @@ |
| -8 |
| +7 |
| @@ -9, +7 @@ |
| +19 |
| )"); |
| |
| // large difference |
| base_ = ArrayFromJSON(type, "[57, 10, 22, 126, 42]"); |
| target_ = ArrayFromJSON(type, "[58, 57, 75, 93, 53, 8, 22, 42, 79, 11]"); |
| AssertDiffAndFormat(R"( |
| @@ -0, +0 @@ |
| +58 |
| @@ -1, +2 @@ |
| -10 |
| +75 |
| +93 |
| +53 |
| +8 |
| @@ -3, +7 @@ |
| -126 |
| @@ -5, +8 @@ |
| +79 |
| +11 |
| )"); |
| } |
| } |
| |
| TEST_F(DiffTest, DictionaryDiffFormatter) { |
| std::stringstream formatted; |
| |
| // differing indices |
| auto base_dict = ArrayFromJSON(utf8(), R"(["a", "b", "c"])"); |
| auto base_indices = ArrayFromJSON(int8(), "[0, 1, 2, 2, 0, 1]"); |
| ASSERT_OK_AND_ASSIGN(base_, DictionaryArray::FromArrays( |
| dictionary(base_indices->type(), base_dict->type()), |
| base_indices, base_dict)); |
| |
| auto target_dict = base_dict; |
| auto target_indices = ArrayFromJSON(int8(), "[0, 1, 2, 2, 1, 1]"); |
| ASSERT_OK_AND_ASSIGN( |
| target_, |
| DictionaryArray::FromArrays(dictionary(target_indices->type(), target_dict->type()), |
| target_indices, target_dict)); |
| |
| base_->Equals(*target_, EqualOptions().diff_sink(&formatted)); |
| auto formatted_expected_indices = R"(# Dictionary arrays differed |
| ## dictionary diff |
| ## indices diff |
| @@ -4, +4 @@ |
| -0 |
| @@ -6, +5 @@ |
| +1 |
| )"; |
| ASSERT_EQ(formatted.str(), formatted_expected_indices); |
| |
| // Note: Diff doesn't work at the moment with dictionary arrays |
| ASSERT_RAISES(NotImplemented, Diff(*base_, *target_)); |
| |
| // differing dictionaries |
| target_dict = ArrayFromJSON(utf8(), R"(["b", "c", "a"])"); |
| target_indices = base_indices; |
| ASSERT_OK_AND_ASSIGN( |
| target_, |
| DictionaryArray::FromArrays(dictionary(target_indices->type(), target_dict->type()), |
| target_indices, target_dict)); |
| |
| formatted.str(""); |
| base_->Equals(*target_, EqualOptions().diff_sink(&formatted)); |
| auto formatted_expected_values = R"(# Dictionary arrays differed |
| ## dictionary diff |
| @@ -0, +0 @@ |
| -"a" |
| @@ -3, +2 @@ |
| +"a" |
| ## indices diff |
| )"; |
| ASSERT_EQ(formatted.str(), formatted_expected_values); |
| } |
| |
| void MakeSameLength(std::shared_ptr<Array>* a, std::shared_ptr<Array>* b) { |
| auto length = std::min((*a)->length(), (*b)->length()); |
| *a = (*a)->Slice(0, length); |
| *b = (*b)->Slice(0, length); |
| } |
| |
| TEST_F(DiffTest, CompareRandomStruct) { |
| for (auto null_probability : {0.0, 0.25}) { |
| constexpr auto length = 1 << 10; |
| auto int32_values = this->rng_.Int32(length, 0, 127, null_probability); |
| auto utf8_values = this->rng_.String(length, 0, 16, null_probability); |
| for (const double filter_probability : {0.9999, 0.75}) { |
| this->BaseAndTargetFromRandomFilter(int32_values, filter_probability); |
| auto int32_base = this->base_; |
| auto int32_target = this->base_; |
| |
| this->BaseAndTargetFromRandomFilter(utf8_values, filter_probability); |
| auto utf8_base = this->base_; |
| auto utf8_target = this->base_; |
| |
| MakeSameLength(&int32_base, &utf8_base); |
| MakeSameLength(&int32_target, &utf8_target); |
| |
| auto type = struct_({field("i", int32()), field("s", utf8())}); |
| auto base_res = StructArray::Make({int32_base, utf8_base}, type->fields()); |
| ASSERT_OK(base_res.status()); |
| base_ = base_res.ValueOrDie(); |
| auto target_res = StructArray::Make({int32_target, utf8_target}, type->fields()); |
| ASSERT_OK(target_res.status()); |
| target_ = target_res.ValueOrDie(); |
| |
| std::stringstream formatted; |
| this->DoDiffAndFormat(&formatted); |
| auto st = ValidateEditScript(*this->edits_, *this->base_, *this->target_); |
| if (!st.ok()) { |
| ASSERT_OK(Status(st.code(), st.message() + "\n" + formatted.str())); |
| } |
| } |
| } |
| } |
| |
| } // namespace arrow |