blob: 201dbbbd6d93f081a0024236c8549d41fa3beb0e [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=eval-used,protected-access,redefined-outer-name
from datetime import date
from decimal import Decimal
from typing import Any, Callable
from uuid import UUID
import mmh3 as mmh3
import pytest
from pyiceberg import transforms
from pyiceberg.expressions import (
BoundEqualTo,
BoundGreaterThan,
BoundGreaterThanOrEqual,
BoundIn,
BoundLessThan,
BoundLessThanOrEqual,
BoundNotIn,
BoundNotNull,
BoundNotStartsWith,
BoundReference,
BoundStartsWith,
EqualTo,
GreaterThanOrEqual,
In,
LessThanOrEqual,
NotIn,
NotNull,
NotStartsWith,
Reference,
StartsWith,
)
from pyiceberg.expressions.literals import (
DateLiteral,
DecimalLiteral,
TimestampLiteral,
literal,
)
from pyiceberg.schema import Accessor
from pyiceberg.transforms import (
BucketTransform,
DayTransform,
HourTransform,
IdentityTransform,
MonthTransform,
TimeTransform,
Transform,
TruncateTransform,
UnknownTransform,
VoidTransform,
YearTransform,
)
from pyiceberg.typedef import IcebergBaseModel
from pyiceberg.types import (
BinaryType,
BooleanType,
DateType,
DecimalType,
DoubleType,
FixedType,
FloatType,
IntegerType,
LongType,
NestedField,
PrimitiveType,
StringType,
TimestampType,
TimestamptzType,
TimeType,
UUIDType,
)
from pyiceberg.utils.datetime import (
date_str_to_days,
date_to_days,
time_to_micros,
timestamp_to_micros,
timestamptz_to_micros,
)
@pytest.mark.parametrize(
"test_input,test_type,expected",
[
(1, IntegerType(), 1392991556),
(34, IntegerType(), 2017239379),
(34, LongType(), 2017239379),
(date_to_days(date(2017, 11, 16)), DateType(), -653330422),
(date_str_to_days("2017-11-16"), DateType(), -653330422),
(time_to_micros("22:31:08"), TimeType(), -662762989),
(
timestamp_to_micros("2017-11-16T22:31:08"),
TimestampType(),
-2047944441,
),
(
timestamptz_to_micros("2017-11-16T14:31:08-08:00"),
TimestamptzType(),
-2047944441,
),
(b"\x00\x01\x02\x03", BinaryType(), -188683207),
(b"\x00\x01\x02\x03", FixedType(4), -188683207),
("iceberg", StringType(), 1210000089),
(UUID("f79c3e09-677c-4bbd-a479-3f349cb785e7"), UUIDType(), 1488055340),
],
)
def test_bucket_hash_values(test_input: Any, test_type: PrimitiveType, expected: Any) -> None:
assert BucketTransform(num_buckets=8).transform(test_type, bucket=False)(test_input) == expected
@pytest.mark.parametrize(
"transform,value,expected",
[
(BucketTransform(100).transform(IntegerType()), 34, 79),
(BucketTransform(100).transform(LongType()), 34, 79),
(BucketTransform(100).transform(DateType()), 17486, 26),
(BucketTransform(100).transform(TimeType()), 81068000000, 59),
(BucketTransform(100).transform(TimestampType()), 1510871468000000, 7),
(BucketTransform(100).transform(DecimalType(9, 2)), Decimal("14.20"), 59),
(BucketTransform(100).transform(StringType()), "iceberg", 89),
(
BucketTransform(100).transform(UUIDType()),
UUID("f79c3e09-677c-4bbd-a479-3f349cb785e7"),
40,
),
(BucketTransform(128).transform(FixedType(3)), b"foo", 32),
(BucketTransform(128).transform(BinaryType()), b"\x00\x01\x02\x03", 57),
],
)
def test_buckets(transform: Callable[[Any], int], value: Any, expected: int) -> None:
assert transform(value) == expected
@pytest.mark.parametrize(
"type_var",
[
BinaryType(),
DateType(),
DecimalType(8, 5),
FixedType(8),
IntegerType(),
LongType(),
StringType(),
TimestampType(),
TimestamptzType(),
TimeType(),
UUIDType(),
],
)
def test_bucket_method(type_var: PrimitiveType) -> None:
bucket_transform = BucketTransform(8) # type: ignore
assert str(bucket_transform) == str(eval(repr(bucket_transform)))
assert bucket_transform.can_transform(type_var)
assert bucket_transform.result_type(type_var) == IntegerType()
assert bucket_transform.num_buckets == 8
assert bucket_transform.apply(None) is None
assert bucket_transform.to_human_string(type_var, "test") == "test"
def test_string_with_surrogate_pair() -> None:
string_with_surrogate_pair = "string with a surrogate pair: 💰"
as_bytes = bytes(string_with_surrogate_pair, "UTF-8")
bucket_transform = BucketTransform(100).transform(StringType(), bucket=False)
assert bucket_transform(string_with_surrogate_pair) == mmh3.hash(as_bytes)
@pytest.mark.parametrize(
"date_val,date_transform,expected",
[
(47, YearTransform(), "2017"),
(575, MonthTransform(), "2017-12"),
(17501, DayTransform(), "2017-12-01"),
],
)
def test_date_to_human_string(date_val: int, date_transform: TimeTransform[Any], expected: str) -> None:
assert date_transform.to_human_string(DateType(), date_val) == expected
@pytest.mark.parametrize(
"date_transform",
[
YearTransform(),
MonthTransform(),
DayTransform(),
],
)
def test_none_date_to_human_string(date_transform: TimeTransform[Any]) -> None:
assert date_transform.to_human_string(DateType(), None) == "null"
def test_hour_to_human_string() -> None:
assert HourTransform().to_human_string(TimestampType(), None) == "null"
assert HourTransform().to_human_string(TimestampType(), 420042) == "2017-12-01-18" # type: ignore
@pytest.mark.parametrize(
"negative_value,time_transform,expected",
[
(-1, YearTransform(), "1969"),
(-1, MonthTransform(), "1969-12"),
(-1, DayTransform(), "1969-12-31"),
(-1, HourTransform(), "1969-12-31-23"),
],
)
def test_negative_value_to_human_string(negative_value: int, time_transform: TimeTransform[Any], expected: str) -> None:
assert time_transform.to_human_string(TimestampType(), negative_value) == expected
@pytest.mark.parametrize(
"type_var",
[
DateType(),
TimestampType(),
TimestamptzType(),
],
)
def test_time_methods(type_var: PrimitiveType) -> None:
assert YearTransform().can_transform(type_var)
assert MonthTransform().can_transform(type_var)
assert DayTransform().can_transform(type_var)
assert YearTransform().preserves_order
assert MonthTransform().preserves_order
assert DayTransform().preserves_order
assert YearTransform().result_type(type_var) == IntegerType()
assert MonthTransform().result_type(type_var) == IntegerType()
assert DayTransform().result_type(type_var) == DateType()
assert YearTransform().dedup_name == "time"
assert MonthTransform().dedup_name == "time"
assert DayTransform().dedup_name == "time"
@pytest.mark.parametrize(
"transform,type_var,value,expected",
[
(DayTransform(), DateType(), 17501, 17501),
(DayTransform(), DateType(), -1, -1),
(MonthTransform(), DateType(), 17501, 575),
(MonthTransform(), DateType(), -1, -1),
(YearTransform(), DateType(), 17501, 47),
(YearTransform(), DateType(), -1, -1),
(YearTransform(), TimestampType(), 1512151975038194, 47),
(YearTransform(), TimestampType(), -1, -1),
(MonthTransform(), TimestamptzType(), 1512151975038194, 575),
(MonthTransform(), TimestamptzType(), -1, -1),
(DayTransform(), TimestampType(), 1512151975038194, 17501),
(DayTransform(), TimestampType(), -1, -1),
],
)
def test_time_apply_method(transform: TimeTransform[Any], type_var: PrimitiveType, value: int, expected: int) -> None:
assert transform.transform(type_var)(value) == expected
@pytest.mark.parametrize(
"type_var",
[
TimestampType(),
TimestamptzType(),
],
)
def test_hour_method(type_var: PrimitiveType) -> None:
assert HourTransform().can_transform(type_var)
assert HourTransform().result_type(type_var) == IntegerType()
assert HourTransform().transform(type_var)(1512151975038194) == 420042 # type: ignore
assert HourTransform().dedup_name == "time"
@pytest.mark.parametrize(
"transform,other_transform",
[
(YearTransform(), MonthTransform()),
(YearTransform(), DayTransform()),
(YearTransform(), HourTransform()),
(MonthTransform(), DayTransform()),
(MonthTransform(), HourTransform()),
(DayTransform(), HourTransform()),
],
)
def test_satisfies_order_of_method(transform: TimeTransform[Any], other_transform: TimeTransform[Any]) -> None:
assert transform.satisfies_order_of(transform)
assert other_transform.satisfies_order_of(transform)
assert not transform.satisfies_order_of(other_transform)
assert not transform.satisfies_order_of(VoidTransform())
assert not other_transform.satisfies_order_of(IdentityTransform())
@pytest.mark.parametrize(
"type_var,value,expected",
[
(LongType(), None, "null"),
(DateType(), 17501, "2017-12-01"),
(TimeType(), 36775038194, "10:12:55.038194"),
(TimestamptzType(), 1512151975038194, "2017-12-01T18:12:55.038194+00:00"),
(TimestampType(), 1512151975038194, "2017-12-01T18:12:55.038194"),
(LongType(), -1234567890000, "-1234567890000"),
(StringType(), "a/b/c=d", "a/b/c=d"),
(DecimalType(9, 2), Decimal("-1.50"), "-1.50"),
(FixedType(100), b"foo", "Zm9v"),
],
)
def test_identity_human_string(type_var: PrimitiveType, value: Any, expected: str) -> None:
identity = IdentityTransform() # type: ignore
assert identity.to_human_string(type_var, value) == expected
@pytest.mark.parametrize(
"type_var",
[
BinaryType(),
BooleanType(),
DateType(),
DecimalType(8, 2),
DoubleType(),
FixedType(16),
FloatType(),
IntegerType(),
LongType(),
StringType(),
TimestampType(),
TimestamptzType(),
TimeType(),
UUIDType(),
],
)
def test_identity_method(type_var: PrimitiveType) -> None:
identity_transform = IdentityTransform() # type: ignore
assert str(identity_transform) == str(eval(repr(identity_transform)))
assert identity_transform.can_transform(type_var)
assert identity_transform.result_type(type_var) == type_var
assert identity_transform.transform(type_var)("test") == "test"
@pytest.mark.parametrize("type_var", [IntegerType(), LongType()])
@pytest.mark.parametrize(
"input_var,expected",
[(1, 0), (5, 0), (9, 0), (10, 10), (11, 10), (-1, -10), (-10, -10), (-12, -20)],
)
def test_truncate_integer(type_var: PrimitiveType, input_var: int, expected: int) -> None:
trunc = TruncateTransform(10) # type: ignore
assert trunc.transform(type_var)(input_var) == expected
@pytest.mark.parametrize(
"input_var,expected",
[
(Decimal("12.34"), Decimal("12.30")),
(Decimal("12.30"), Decimal("12.30")),
(Decimal("12.29"), Decimal("12.20")),
(Decimal("0.05"), Decimal("0.00")),
(Decimal("-0.05"), Decimal("-0.10")),
],
)
def test_truncate_decimal(input_var: Decimal, expected: Decimal) -> None:
trunc = TruncateTransform(10) # type: ignore
assert trunc.transform(DecimalType(9, 2))(input_var) == expected
@pytest.mark.parametrize("input_var,expected", [("abcdefg", "abcde"), ("abc", "abc")])
def test_truncate_string(input_var: str, expected: str) -> None:
trunc = TruncateTransform(5) # type: ignore
assert trunc.transform(StringType())(input_var) == expected
@pytest.mark.parametrize(
"type_var,value,expected_human_str,expected",
[
(BinaryType(), b"\x00\x01\x02\x03", "AAECAw==", b"\x00"),
(BinaryType(), bytes("\u2603de", "utf-8"), "4piDZGU=", b"\xe2"),
(DecimalType(8, 5), Decimal("14.21"), "14.21", Decimal("14.21")),
(IntegerType(), 123, "123", 123),
(LongType(), 123, "123", 123),
(StringType(), "foo", "foo", "f"),
(StringType(), "\u2603de", "\u2603de", "\u2603"),
],
)
def test_truncate_method(type_var: PrimitiveType, value: Any, expected_human_str: str, expected: Any) -> None:
truncate_transform = TruncateTransform(1) # type: ignore
assert str(truncate_transform) == str(eval(repr(truncate_transform)))
assert truncate_transform.can_transform(type_var)
assert truncate_transform.result_type(type_var) == type_var
assert truncate_transform.to_human_string(type_var, value) == expected_human_str
assert truncate_transform.transform(type_var)(value) == expected
assert truncate_transform.to_human_string(type_var, None) == "null"
assert truncate_transform.width == 1
assert truncate_transform.transform(type_var)(None) is None
assert truncate_transform.preserves_order
assert truncate_transform.satisfies_order_of(truncate_transform)
def test_unknown_transform() -> None:
unknown_transform = transforms.UnknownTransform("unknown") # type: ignore
assert str(unknown_transform) == str(eval(repr(unknown_transform)))
with pytest.raises(AttributeError):
unknown_transform.transform(StringType())("test")
assert not unknown_transform.can_transform(FixedType(5))
assert isinstance(unknown_transform.result_type(BooleanType()), StringType)
def test_void_transform() -> None:
void_transform = VoidTransform() # type: ignore
assert void_transform is VoidTransform()
assert void_transform == eval(repr(void_transform))
assert void_transform.transform(StringType())("test") is None
assert void_transform.can_transform(BooleanType())
assert isinstance(void_transform.result_type(BooleanType()), BooleanType)
assert not void_transform.preserves_order
assert void_transform.satisfies_order_of(VoidTransform())
assert not void_transform.satisfies_order_of(BucketTransform(100))
assert void_transform.to_human_string(StringType(), "test") == "null"
assert void_transform.dedup_name == "void"
class TestType(IcebergBaseModel):
__root__: Transform[Any, Any]
def test_bucket_transform_serialize() -> None:
assert BucketTransform(num_buckets=22).json() == '"bucket[22]"'
def test_bucket_transform_deserialize() -> None:
transform = TestType.parse_raw('"bucket[22]"').__root__
assert transform == BucketTransform(num_buckets=22)
def test_bucket_transform_str() -> None:
assert str(BucketTransform(num_buckets=22)) == "bucket[22]"
def test_bucket_transform_repr() -> None:
assert repr(BucketTransform(num_buckets=22)) == "BucketTransform(num_buckets=22)"
def test_truncate_transform_serialize() -> None:
assert UnknownTransform("unknown").json() == '"unknown"'
def test_unknown_transform_deserialize() -> None:
transform = TestType.parse_raw('"unknown"').__root__
assert transform == UnknownTransform("unknown")
def test_unknown_transform_str() -> None:
assert str(UnknownTransform("unknown")) == "unknown"
def test_unknown_transform_repr() -> None:
assert repr(UnknownTransform("unknown")) == "UnknownTransform(transform='unknown')"
def test_void_transform_serialize() -> None:
assert VoidTransform().json() == '"void"'
def test_void_transform_deserialize() -> None:
transform = TestType.parse_raw('"void"').__root__
assert transform == VoidTransform()
def test_void_transform_str() -> None:
assert str(VoidTransform()) == "void"
def test_void_transform_repr() -> None:
assert repr(VoidTransform()) == "VoidTransform()"
def test_year_transform_serialize() -> None:
assert YearTransform().json() == '"year"'
def test_year_transform_deserialize() -> None:
transform = TestType.parse_raw('"year"').__root__
assert transform == YearTransform()
def test_month_transform_serialize() -> None:
assert MonthTransform().json() == '"month"'
def test_month_transform_deserialize() -> None:
transform = TestType.parse_raw('"month"').__root__
assert transform == MonthTransform()
def test_day_transform_serialize() -> None:
assert DayTransform().json() == '"day"'
def test_day_transform_deserialize() -> None:
transform = TestType.parse_raw('"day"').__root__
assert transform == DayTransform()
def test_hour_transform_serialize() -> None:
assert HourTransform().json() == '"hour"'
def test_hour_transform_deserialize() -> None:
transform = TestType.parse_raw('"hour"').__root__
assert transform == HourTransform()
@pytest.mark.parametrize(
"transform,transform_str",
[
(YearTransform(), "year"),
(MonthTransform(), "month"),
(DayTransform(), "day"),
(HourTransform(), "hour"),
],
)
def test_datetime_transform_str(transform: TimeTransform[Any], transform_str: str) -> None:
assert str(transform) == transform_str
@pytest.mark.parametrize(
"transform,transform_repr",
[
(YearTransform(), "YearTransform()"),
(MonthTransform(), "MonthTransform()"),
(DayTransform(), "DayTransform()"),
(HourTransform(), "HourTransform()"),
],
)
def test_datetime_transform_repr(transform: TimeTransform[Any], transform_repr: str) -> None:
assert repr(transform) == transform_repr
@pytest.fixture
def bound_reference_str() -> BoundReference[str]:
return BoundReference(field=NestedField(1, "field", StringType(), required=False), accessor=Accessor(position=0, inner=None))
@pytest.fixture
def bound_reference_date() -> BoundReference[int]:
return BoundReference(field=NestedField(1, "field", DateType(), required=False), accessor=Accessor(position=0, inner=None))
@pytest.fixture
def bound_reference_timestamp() -> BoundReference[int]:
return BoundReference(
field=NestedField(1, "field", TimestampType(), required=False), accessor=Accessor(position=0, inner=None)
)
@pytest.fixture
def bound_reference_decimal() -> BoundReference[Decimal]:
return BoundReference(
field=NestedField(1, "field", DecimalType(8, 2), required=False), accessor=Accessor(position=0, inner=None)
)
@pytest.fixture
def bound_reference_long() -> BoundReference[int]:
return BoundReference(
field=NestedField(1, "field", DecimalType(8, 2), required=False), accessor=Accessor(position=0, inner=None)
)
def test_projection_bucket_unary(bound_reference_str: BoundReference[str]) -> None:
assert BucketTransform(2).project("name", BoundNotNull(term=bound_reference_str)) == NotNull(term=Reference(name="name"))
def test_projection_bucket_literal(bound_reference_str: BoundReference[str]) -> None:
assert BucketTransform(2).project("name", BoundEqualTo(term=bound_reference_str, literal=literal("data"))) == EqualTo(
term="name", literal=1
)
def test_projection_bucket_set_same_bucket(bound_reference_str: BoundReference[str]) -> None:
assert BucketTransform(2).project(
"name", BoundIn(term=bound_reference_str, literals={literal("hello"), literal("world")})
) == EqualTo(term="name", literal=1)
def test_projection_bucket_set_in(bound_reference_str: BoundReference[str]) -> None:
assert BucketTransform(3).project(
"name", BoundIn(term=bound_reference_str, literals={literal("hello"), literal("world")})
) == In(term="name", literals={1, 2})
def test_projection_bucket_set_not_in(bound_reference_str: BoundReference[str]) -> None:
assert (
BucketTransform(3).project("name", BoundNotIn(term=bound_reference_str, literals={literal("hello"), literal("world")}))
is None
)
def test_projection_year_unary(bound_reference_date: BoundReference[int]) -> None:
assert YearTransform().project("name", BoundNotNull(term=bound_reference_date)) == NotNull(term="name")
def test_projection_year_literal(bound_reference_date: BoundReference[int]) -> None:
assert YearTransform().project("name", BoundEqualTo(term=bound_reference_date, literal=DateLiteral(1925))) == EqualTo(
term="name", literal=5
)
def test_projection_year_set_same_year(bound_reference_date: BoundReference[int]) -> None:
assert YearTransform().project(
"name", BoundIn(term=bound_reference_date, literals={DateLiteral(1925), DateLiteral(1926)})
) == EqualTo(term="name", literal=5)
def test_projection_year_set_in(bound_reference_date: BoundReference[int]) -> None:
assert YearTransform().project(
"name", BoundIn(term=bound_reference_date, literals={DateLiteral(1925), DateLiteral(2925)})
) == In(term="name", literals={8, 5})
def test_projection_year_set_not_in(bound_reference_date: BoundReference[int]) -> None:
assert (
YearTransform().project("name", BoundNotIn(term=bound_reference_date, literals={DateLiteral(1925), DateLiteral(2925)}))
is None
)
def test_projection_month_unary(bound_reference_date: BoundReference[int]) -> None:
assert MonthTransform().project("name", BoundNotNull(term=bound_reference_date)) == NotNull(term="name")
def test_projection_month_literal(bound_reference_date: BoundReference[int]) -> None:
assert MonthTransform().project("name", BoundEqualTo(term=bound_reference_date, literal=DateLiteral(1925))) == EqualTo(
term="name", literal=63
)
def test_projection_month_set_same_month(bound_reference_date: BoundReference[int]) -> None:
assert MonthTransform().project(
"name", BoundIn(term=bound_reference_date, literals={DateLiteral(1925), DateLiteral(1926)})
) == EqualTo(term="name", literal=63)
def test_projection_month_set_in(bound_reference_date: BoundReference[int]) -> None:
assert MonthTransform().project(
"name", BoundIn(term=bound_reference_date, literals={DateLiteral(1925), DateLiteral(2925)})
) == In(term="name", literals={96, 63})
def test_projection_day_month_not_in(bound_reference_date: BoundReference[int]) -> None:
assert (
MonthTransform().project("name", BoundNotIn(term=bound_reference_date, literals={DateLiteral(1925), DateLiteral(2925)}))
is None
)
def test_projection_day_unary(bound_reference_timestamp: BoundReference[int]) -> None:
assert DayTransform().project("name", BoundNotNull(term=bound_reference_timestamp)) == NotNull(term="name")
def test_projection_day_literal(bound_reference_timestamp: BoundReference[int]) -> None:
assert DayTransform().project(
"name", BoundEqualTo(term=bound_reference_timestamp, literal=TimestampLiteral(1667696874000))
) == EqualTo(term="name", literal=19)
def test_projection_day_set_same_day(bound_reference_timestamp: BoundReference[int]) -> None:
assert DayTransform().project(
"name",
BoundIn(term=bound_reference_timestamp, literals={TimestampLiteral(1667696874001), TimestampLiteral(1667696874000)}),
) == EqualTo(term="name", literal=19)
def test_projection_day_set_in(bound_reference_timestamp: BoundReference[int]) -> None:
assert DayTransform().project(
"name",
BoundIn(term=bound_reference_timestamp, literals={TimestampLiteral(1667696874001), TimestampLiteral(1567696874000)}),
) == In(term="name", literals={18, 19})
def test_projection_day_set_not_in(bound_reference_timestamp: BoundReference[int]) -> None:
assert (
DayTransform().project(
"name",
BoundNotIn(term=bound_reference_timestamp, literals={TimestampLiteral(1567696874), TimestampLiteral(1667696874)}),
)
is None
)
def test_projection_day_human(bound_reference_date: BoundReference[int]) -> None:
date_literal = DateLiteral(17532)
assert DayTransform().project("dt", BoundEqualTo(term=bound_reference_date, literal=date_literal)) == EqualTo(
term="dt", literal=17532
) # == 2018, 1, 1
assert DayTransform().project("dt", BoundLessThanOrEqual(term=bound_reference_date, literal=date_literal)) == LessThanOrEqual(
term="dt", literal=17532
) # <= 2018, 1, 1
assert DayTransform().project("dt", BoundLessThan(term=bound_reference_date, literal=date_literal)) == LessThanOrEqual(
term="dt", literal=17531
) # <= 2017, 12, 31
assert DayTransform().project(
"dt", BoundGreaterThanOrEqual(term=bound_reference_date, literal=date_literal)
) == GreaterThanOrEqual(
term="dt", literal=17532
) # >= 2018, 1, 1
assert DayTransform().project("dt", BoundGreaterThan(term=bound_reference_date, literal=date_literal)) == GreaterThanOrEqual(
term="dt", literal=17533
) # >= 2018, 1, 2
def test_projection_hour_unary(bound_reference_timestamp: BoundReference[int]) -> None:
assert HourTransform().project("name", BoundNotNull(term=bound_reference_timestamp)) == NotNull(term="name")
TIMESTAMP_EXAMPLE = 1667696874000000 # Sun Nov 06 2022 01:07:54
HOUR_IN_MICROSECONDS = 60 * 60 * 1000 * 1000
def test_projection_hour_literal(bound_reference_timestamp: BoundReference[int]) -> None:
assert HourTransform().project(
"name", BoundEqualTo(term=bound_reference_timestamp, literal=TimestampLiteral(TIMESTAMP_EXAMPLE))
) == EqualTo(term="name", literal=463249)
def test_projection_hour_set_same_hour(bound_reference_timestamp: BoundReference[int]) -> None:
assert HourTransform().project(
"name",
BoundIn(
term=bound_reference_timestamp,
literals={TimestampLiteral(TIMESTAMP_EXAMPLE + 1), TimestampLiteral(TIMESTAMP_EXAMPLE)},
),
) == EqualTo(term="name", literal=463249)
def test_projection_hour_set_in(bound_reference_timestamp: BoundReference[int]) -> None:
assert HourTransform().project(
"name",
BoundIn(
term=bound_reference_timestamp,
literals={TimestampLiteral(TIMESTAMP_EXAMPLE + HOUR_IN_MICROSECONDS), TimestampLiteral(TIMESTAMP_EXAMPLE)},
),
) == In(term="name", literals={463249, 463250})
def test_projection_hour_set_not_in(bound_reference_timestamp: BoundReference[int]) -> None:
assert (
HourTransform().project(
"name",
BoundNotIn(
term=bound_reference_timestamp,
literals={TimestampLiteral(TIMESTAMP_EXAMPLE + HOUR_IN_MICROSECONDS), TimestampLiteral(TIMESTAMP_EXAMPLE)},
),
)
is None
)
def test_projection_identity_unary(bound_reference_timestamp: BoundReference[int]) -> None:
assert IdentityTransform().project("name", BoundNotNull(term=bound_reference_timestamp)) == NotNull(term="name")
def test_projection_identity_literal(bound_reference_timestamp: BoundReference[int]) -> None:
assert IdentityTransform().project(
"name", BoundEqualTo(term=bound_reference_timestamp, literal=TimestampLiteral(TIMESTAMP_EXAMPLE))
) == EqualTo(term="name", literal=TimestampLiteral(TIMESTAMP_EXAMPLE))
def test_projection_identity_set_in(bound_reference_timestamp: BoundReference[int]) -> None:
assert IdentityTransform().project(
"name",
BoundIn(
term=bound_reference_timestamp,
literals={TimestampLiteral(TIMESTAMP_EXAMPLE + HOUR_IN_MICROSECONDS), TimestampLiteral(TIMESTAMP_EXAMPLE)},
),
) == In(
term="name",
literals={TimestampLiteral(TIMESTAMP_EXAMPLE + HOUR_IN_MICROSECONDS), TimestampLiteral(TIMESTAMP_EXAMPLE)},
)
def test_projection_identity_set_not_in(bound_reference_timestamp: BoundReference[int]) -> None:
assert IdentityTransform().project(
"name",
BoundNotIn(
term=bound_reference_timestamp,
literals={TimestampLiteral(TIMESTAMP_EXAMPLE + HOUR_IN_MICROSECONDS), TimestampLiteral(TIMESTAMP_EXAMPLE)},
),
) == NotIn(
term="name",
literals={TimestampLiteral(TIMESTAMP_EXAMPLE + HOUR_IN_MICROSECONDS), TimestampLiteral(TIMESTAMP_EXAMPLE)},
)
def test_projection_truncate_string_unary(bound_reference_str: BoundReference[str]) -> None:
assert TruncateTransform(2).project("name", BoundNotNull(term=bound_reference_str)) == NotNull(term="name")
def test_projection_truncate_string_literal_eq(bound_reference_str: BoundReference[str]) -> None:
assert TruncateTransform(2).project("name", BoundEqualTo(term=bound_reference_str, literal=literal("data"))) == EqualTo(
term="name", literal=literal("da")
)
def test_projection_truncate_string_literal_gt(bound_reference_str: BoundReference[str]) -> None:
assert TruncateTransform(2).project("name", BoundGreaterThan(term=bound_reference_str, literal=literal("data"))) == EqualTo(
term="name", literal=literal("da")
)
def test_projection_truncate_string_literal_gte(bound_reference_str: BoundReference[str]) -> None:
assert TruncateTransform(2).project(
"name", BoundGreaterThanOrEqual(term=bound_reference_str, literal=literal("data"))
) == EqualTo(term="name", literal=literal("da"))
def test_projection_truncate_string_set_same_result(bound_reference_str: BoundReference[str]) -> None:
assert TruncateTransform(2).project(
"name", BoundIn(term=bound_reference_str, literals={literal("hello"), literal("helloworld")})
) == EqualTo(term="name", literal=literal("he"))
def test_projection_truncate_string_set_in(bound_reference_str: BoundReference[str]) -> None:
assert TruncateTransform(3).project(
"name", BoundIn(term=bound_reference_str, literals={literal("hello"), literal("world")})
) == In(term="name", literals={literal("hel"), literal("wor")})
def test_projection_truncate_string_set_not_in(bound_reference_str: BoundReference[str]) -> None:
assert (
TruncateTransform(3).project("name", BoundNotIn(term=bound_reference_str, literals={literal("hello"), literal("world")}))
is None
)
def test_projection_truncate_decimal_literal_eq(bound_reference_decimal: BoundReference[Decimal]) -> None:
assert TruncateTransform(2).project(
"name", BoundEqualTo(term=bound_reference_decimal, literal=DecimalLiteral(Decimal(19.25)))
) == EqualTo(term="name", literal=Decimal("19.24"))
def test_projection_truncate_decimal_literal_gt(bound_reference_decimal: BoundReference[Decimal]) -> None:
assert TruncateTransform(2).project(
"name", BoundGreaterThan(term=bound_reference_decimal, literal=DecimalLiteral(Decimal(19.25)))
) == GreaterThanOrEqual(term="name", literal=Decimal("19.26"))
def test_projection_truncate_decimal_literal_gte(bound_reference_decimal: BoundReference[Decimal]) -> None:
assert TruncateTransform(2).project(
"name", BoundGreaterThanOrEqual(term=bound_reference_decimal, literal=DecimalLiteral(Decimal(19.25)))
) == GreaterThanOrEqual(term="name", literal=Decimal("19.24"))
def test_projection_truncate_decimal_in(bound_reference_decimal: BoundReference[Decimal]) -> None:
assert TruncateTransform(2).project(
"name", BoundIn(term=bound_reference_decimal, literals={literal(Decimal(19.25)), literal(Decimal(18.15))})
) == In(
term="name",
literals={
Decimal("19.24"),
Decimal("18.14999999999999857891452847979962825775146484374"),
},
)
def test_projection_truncate_long_literal_eq(bound_reference_decimal: BoundReference[Decimal]) -> None:
assert TruncateTransform(2).project(
"name", BoundEqualTo(term=bound_reference_decimal, literal=DecimalLiteral(Decimal(19.25)))
) == EqualTo(term="name", literal=Decimal("19.24"))
def test_projection_truncate_long_literal_gt(bound_reference_decimal: BoundReference[Decimal]) -> None:
assert TruncateTransform(2).project(
"name", BoundGreaterThan(term=bound_reference_decimal, literal=DecimalLiteral(Decimal(19.25)))
) == GreaterThanOrEqual(term="name", literal=Decimal("19.26"))
def test_projection_truncate_long_literal_gte(bound_reference_decimal: BoundReference[Decimal]) -> None:
assert TruncateTransform(2).project(
"name", BoundGreaterThanOrEqual(term=bound_reference_decimal, literal=DecimalLiteral(Decimal(19.25)))
) == GreaterThanOrEqual(term="name", literal=Decimal("19.24"))
def test_projection_truncate_long_in(bound_reference_decimal: BoundReference[Decimal]) -> None:
assert TruncateTransform(2).project(
"name", BoundIn(term=bound_reference_decimal, literals={DecimalLiteral(Decimal(19.25)), DecimalLiteral(Decimal(18.15))})
) == In(
term="name",
literals={
Decimal("19.24"),
Decimal("18.14999999999999857891452847979962825775146484374"),
},
)
def test_projection_truncate_string_starts_with(bound_reference_str: BoundReference[str]) -> None:
assert TruncateTransform(2).project(
"name", BoundStartsWith(term=bound_reference_str, literal=literal("hello"))
) == StartsWith(term="name", literal=literal("he"))
def test_projection_truncate_string_not_starts_with(bound_reference_str: BoundReference[str]) -> None:
assert TruncateTransform(2).project(
"name", BoundNotStartsWith(term=bound_reference_str, literal=literal("hello"))
) == NotStartsWith(term="name", literal=literal("he"))