blob: 3bfacdf4818abe902fef0ec2a3eb2732e65848db [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import random
from typing import List
import pytest
from pyiceberg.utils.bin_packing import ListPacker, PackingIterator
INT_MAX = 2147483647
@pytest.mark.parametrize(
"splits, lookback, split_size, open_cost",
[
([random.randint(0, 64) for x in range(200)], 20, 128, 4), # random splits
([], 20, 128, 4), # no splits
(
[0] * 100 + [random.randint(0, 64) in range(10)] + [0] * 100,
20,
128,
4,
), # sparse
],
)
def test_bin_packing(splits: List[int], lookback: int, split_size: int, open_cost: int) -> None:
def weight_func(x: int) -> int:
return max(x, open_cost)
item_list_sums: List[int] = [sum(item) for item in PackingIterator(splits, split_size, lookback, weight_func)]
assert all(split_size >= item_sum >= 0 for item_sum in item_list_sums)
@pytest.mark.parametrize(
"splits, target_weight, lookback, largest_bin_first, expected_lists",
[
(
[36, 36, 36, 36, 73, 110, 128],
128,
2,
True,
[[110], [128], [36, 73], [36, 36, 36]],
),
(
[36, 36, 36, 36, 73, 110, 128],
128,
2,
False,
[[36, 36, 36], [36, 73], [110], [128]],
),
(
[64, 64, 128, 32, 32, 32, 32],
128,
1,
True,
[[64, 64], [128], [32, 32, 32, 32]],
),
(
[64, 64, 128, 32, 32, 32, 32],
128,
1,
False,
[[64, 64], [128], [32, 32, 32, 32]],
),
],
)
def test_bin_packing_lookback(
splits: List[int], target_weight: int, lookback: int, largest_bin_first: bool, expected_lists: List[List[int]]
) -> None:
def weight_func(x: int) -> int:
return x
packer: ListPacker[int] = ListPacker(target_weight, lookback, largest_bin_first)
assert list(PackingIterator(splits, target_weight, lookback, weight_func, largest_bin_first)) == expected_lists
assert list(packer.pack(splits, weight_func)) == expected_lists
@pytest.mark.parametrize(
"splits, target_weight, lookback, largest_bin_first, expected_lists",
[
# Single Lookback Tests
([1, 2, 3, 4, 5], 3, 1, False, [[1, 2], [3], [4], [5]]),
([1, 2, 3, 4, 5], 4, 1, False, [[1, 2], [3], [4], [5]]),
([1, 2, 3, 4, 5], 5, 1, False, [[1], [2, 3], [4], [5]]),
([1, 2, 3, 4, 5], 6, 1, False, [[1, 2, 3], [4], [5]]),
([1, 2, 3, 4, 5], 7, 1, False, [[1, 2], [3, 4], [5]]),
([1, 2, 3, 4, 5], 8, 1, False, [[1, 2], [3, 4], [5]]),
([1, 2, 3, 4, 5], 9, 1, False, [[1, 2, 3], [4, 5]]),
([1, 2, 3, 4, 5], 11, 1, False, [[1, 2, 3], [4, 5]]),
([1, 2, 3, 4, 5], 12, 1, False, [[1, 2], [3, 4, 5]]),
([1, 2, 3, 4, 5], 14, 1, False, [[1], [2, 3, 4, 5]]),
([1, 2, 3, 4, 5], 15, 1, False, [[1, 2, 3, 4, 5]]),
# Unlimited Lookback Tests
([1, 2, 3, 4, 5], 3, INT_MAX, False, [[1, 2], [3], [4], [5]]),
([1, 2, 3, 4, 5], 4, INT_MAX, False, [[2], [1, 3], [4], [5]]),
([1, 2, 3, 4, 5], 5, INT_MAX, False, [[2, 3], [1, 4], [5]]),
([1, 2, 3, 4, 5], 6, INT_MAX, False, [[3], [2, 4], [1, 5]]),
([1, 2, 3, 4, 5], 7, INT_MAX, False, [[1], [3, 4], [2, 5]]),
([1, 2, 3, 4, 5], 8, INT_MAX, False, [[1, 2, 4], [3, 5]]),
([1, 2, 3, 4, 5], 9, INT_MAX, False, [[1, 2, 3], [4, 5]]),
([1, 2, 3, 4, 5], 10, INT_MAX, False, [[2, 3], [1, 4, 5]]),
([1, 2, 3, 4, 5], 11, INT_MAX, False, [[1, 3], [2, 4, 5]]),
([1, 2, 3, 4, 5], 12, INT_MAX, False, [[1, 2], [3, 4, 5]]),
([1, 2, 3, 4, 5], 13, INT_MAX, False, [[2], [1, 3, 4, 5]]),
([1, 2, 3, 4, 5], 14, INT_MAX, False, [[1], [2, 3, 4, 5]]),
([1, 2, 3, 4, 5], 15, INT_MAX, False, [[1, 2, 3, 4, 5]]),
],
)
def test_reverse_bin_packing_lookback(
splits: List[int], target_weight: int, lookback: int, largest_bin_first: bool, expected_lists: List[List[int]]
) -> None:
packer: ListPacker[int] = ListPacker(target_weight, lookback, largest_bin_first)
result = packer.pack_end(splits, lambda x: x)
assert result == expected_lists