blob: 6a0cb220e6cf4a55be1cc1c0fb81033c88895fdf [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import pyarrow as pa
import pytest
from hudi import HudiTableBuilder
@pytest.fixture
def base_uri():
return "test://base/uri"
@pytest.fixture
def builder(base_uri):
return HudiTableBuilder(base_uri)
def test_initialization(builder, base_uri):
assert builder.base_uri == base_uri
assert builder.hudi_options == {}
assert builder.storage_options == {}
assert builder.options == {}
def test_from_base_uri(base_uri):
builder = HudiTableBuilder.from_base_uri(base_uri)
assert builder.base_uri == base_uri
@pytest.mark.parametrize(
"method,attr",
[
("with_hudi_option", "hudi_options"),
("with_storage_option", "storage_options"),
("with_option", "options"),
],
)
def test_with_single_option(builder, method, attr):
getattr(builder, method)("key1", "value1")
assert getattr(builder, attr) == {"key1": "value1"}
@pytest.mark.parametrize(
"method,attr",
[
("with_hudi_options", "hudi_options"),
("with_storage_options", "storage_options"),
("with_options", "options"),
],
)
def test_with_multiple_options(builder, method, attr):
options = {"key1": "value1", "key2": "value2"}
getattr(builder, method)(options)
assert getattr(builder, attr) == options
def test_read_table_returns_correct_data(get_sample_table):
table_path = get_sample_table
table = HudiTableBuilder.from_base_uri(table_path).build()
batches = table.read_snapshot()
t = pa.Table.from_batches(batches).select([0, 5, 6, 9]).sort_by("ts")
assert t.to_pylist() == [
{
"_hoodie_commit_time": "20240402144910683",
"ts": 1695046462179,
"uuid": "9909a8b1-2d15-4d3d-8ec9-efc48c536a00",
"fare": 339.0,
},
{
"_hoodie_commit_time": "20240402123035233",
"ts": 1695091554788,
"uuid": "e96c4396-3fad-413a-a942-4cb36106d721",
"fare": 27.7,
},
{
"_hoodie_commit_time": "20240402123035233",
"ts": 1695115999911,
"uuid": "c8abbe79-8d89-47ea-b4ce-4d224bae5bfa",
"fare": 17.85,
},
{
"_hoodie_commit_time": "20240402123035233",
"ts": 1695159649087,
"uuid": "334e26e9-8355-45cc-97c6-c31daf0df330",
"fare": 19.1,
},
{
"_hoodie_commit_time": "20240402123035233",
"ts": 1695516137016,
"uuid": "e3cf430c-889d-4015-bc98-59bdce1e530c",
"fare": 34.15,
},
]
@pytest.mark.parametrize(
"hudi_options,storage_options,options",
[
(
{"hoodie.read.file_group.start_timestamp": "resolved value"},
{"hoodie.read.file_group.start_timestamp": "not taking"},
{"hoodie.read.file_group.start_timestamp": "lower precedence"},
),
(
{},
{"hoodie.read.file_group.start_timestamp": "not taking"},
{"hoodie.read.file_group.start_timestamp": "resolved value"},
),
],
)
def test_setting_table_options(
get_sample_table, hudi_options, storage_options, options
):
table_path = get_sample_table
table = (
HudiTableBuilder.from_base_uri(table_path)
.with_hudi_options(hudi_options)
.with_storage_options(storage_options)
.with_options(options)
.build()
)
assert (
table.hudi_options().get("hoodie.read.file_group.start_timestamp")
== "resolved value"
)