blob: 319a246fa952d21caaca783cf803aca6d83691a0 [file]
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
"""
Tests for Identifier parsing, including backtick support for database names with periods.
"""
import unittest
from pypaimon.common.identifier import Identifier
class IdentifierTest(unittest.TestCase):
"""Tests for Identifier.from_string()."""
def test_simple_identifier(self):
"""Simple database.table parsing."""
identifier = Identifier.from_string("mydb.mytable")
self.assertEqual(identifier.database, "mydb")
self.assertEqual(identifier.object, "mytable")
def test_java_compatible_split_on_first_period(self):
"""Java-compatible: splits on first period only, allowing periods in table name."""
identifier = Identifier.from_string("mydb.my.table.name")
self.assertEqual(identifier.database, "mydb")
self.assertEqual(identifier.object, "my.table.name")
def test_backtick_quoted_database_name_with_period(self):
"""Backtick-quoted database name containing a period."""
identifier = Identifier.from_string("`db.name`.table_name")
self.assertEqual(identifier.database, "db.name")
self.assertEqual(identifier.object, "table_name")
def test_backtick_quoted_both_parts(self):
"""Both database and table names backtick-quoted."""
identifier = Identifier.from_string("`db.name`.`table.name`")
self.assertEqual(identifier.database, "db.name")
self.assertEqual(identifier.object, "table.name")
def test_backtick_quoted_database_only(self):
"""Only database name backtick-quoted."""
identifier = Identifier.from_string("`my.database`.simple_table")
self.assertEqual(identifier.database, "my.database")
self.assertEqual(identifier.object, "simple_table")
def test_get_full_name(self):
"""get_full_name() returns database.object format."""
identifier = Identifier.create("mydb", "mytable")
self.assertEqual(identifier.get_full_name(), "mydb.mytable")
def test_constructor_with_branch_encodes_into_object(self):
"""``Identifier(db, table, branch=...)`` mirrors Java's 3-arg constructor."""
identifier = Identifier("mydb", "mytable", branch="feature")
self.assertEqual(identifier.object, "mytable$branch_feature")
self.assertEqual(identifier.get_full_name(), "mydb.mytable$branch_feature")
self.assertEqual(identifier.get_table_name(), "mytable")
self.assertEqual(identifier.get_branch_name(), "feature")
self.assertEqual(identifier.get_branch_name_or_default(), "feature")
self.assertFalse(identifier.is_system_table())
def test_main_branch_is_not_encoded_into_object(self):
"""``main`` (case-insensitive) is the default branch and is not encoded into ``object``."""
for branch in ("main", "MAIN", "Main"):
identifier = Identifier("mydb", "mytable", branch=branch)
self.assertEqual(identifier.object, "mytable")
# Wire-equal to a no-branch identifier.
self.assertEqual(identifier, Identifier("mydb", "mytable"))
def test_get_branch_name_or_default_when_unset(self):
"""``get_branch_name_or_default`` falls back to 'main'."""
identifier = Identifier.create("mydb", "mytable")
self.assertIsNone(identifier.get_branch_name())
self.assertEqual(identifier.get_branch_name_or_default(), "main")
def test_unknown_database_drops_database_segment(self):
"""``UNKNOWN_DATABASE`` is dropped from full name (Java-compatible)."""
identifier = Identifier("unknown", "mytable")
self.assertEqual(identifier.get_full_name(), "mytable")
def test_empty_string_raises_error(self):
"""Empty string should raise ValueError."""
with self.assertRaises(ValueError):
Identifier.from_string("")
def test_whitespace_only_raises_error(self):
"""Whitespace-only string should raise ValueError."""
with self.assertRaises(ValueError):
Identifier.from_string(" ")
def test_no_period_raises_error(self):
"""String without period should raise ValueError."""
with self.assertRaises(ValueError):
Identifier.from_string("nodothere")
def test_unclosed_backtick_raises_error(self):
"""Unclosed backtick should raise ValueError."""
with self.assertRaises(ValueError):
Identifier.from_string("`unclosed.db.mytable")
def test_invalid_backtick_format_raises_error(self):
"""Invalid backtick format (too many parts) should raise ValueError."""
with self.assertRaises(ValueError):
Identifier.from_string("`a`.`b`.`c`")
def test_is_system_table_regular_table(self):
"""A plain table object is not a system table."""
self.assertFalse(Identifier.create("mydb", "mytable").is_system_table())
def test_is_system_table_snapshots_suffix(self):
"""object name '<base>$snapshots' is a system table."""
self.assertTrue(Identifier("mydb", "orders$snapshots").is_system_table())
self.assertTrue(
Identifier("mydb", "orders", system_table="snapshots").is_system_table())
def test_is_system_table_schemas_suffix(self):
"""object name '<base>$schemas' is a system table."""
self.assertTrue(Identifier("mydb", "orders$schemas").is_system_table())
def test_is_system_table_files_suffix(self):
"""object name '<base>$files' is a system table."""
self.assertTrue(Identifier("mydb", "orders$files").is_system_table())
def test_is_system_table_two_parts_with_branch_prefix(self):
"""A '<table>$branch_<name>' object is a branched table, NOT a system table."""
identifier = Identifier("mydb", "orders$branch_dev")
self.assertFalse(identifier.is_system_table())
self.assertEqual(identifier.get_table_name(), "orders")
self.assertEqual(identifier.get_branch_name(), "dev")
self.assertIsNone(identifier.get_system_table_name())
def test_is_system_table_three_parts_branch_and_system(self):
"""A '<table>$branch_<name>$snapshots' object is a system table on a branch."""
identifier = Identifier("mydb", "orders$branch_dev$snapshots")
self.assertTrue(identifier.is_system_table())
self.assertEqual(identifier.get_table_name(), "orders")
self.assertEqual(identifier.get_branch_name(), "dev")
self.assertEqual(identifier.get_system_table_name(), "snapshots")
def test_three_parts_without_branch_prefix_raises(self):
"""A '<table>$<x>$<y>' object without branch prefix is invalid (single '$' allowed)."""
identifier = Identifier("mydb", "orders$schemas$snapshots")
with self.assertRaises(ValueError):
identifier.is_system_table()
def test_constructor_with_branch_and_system_table(self):
"""``Identifier(db, table, branch=..., system_table=...)`` mirrors Java's 4-arg constructor."""
identifier = Identifier(
"mydb", "orders", branch="dev", system_table="snapshots"
)
self.assertEqual(identifier.object, "orders$branch_dev$snapshots")
self.assertEqual(identifier.get_table_name(), "orders")
self.assertEqual(identifier.get_branch_name(), "dev")
self.assertEqual(identifier.get_system_table_name(), "snapshots")
def test_constructor_with_system_table_only(self):
"""Constructor with system_table but no branch."""
identifier = Identifier("mydb", "orders", system_table="files")
self.assertEqual(identifier.object, "orders$files")
self.assertEqual(identifier.get_table_name(), "orders")
self.assertIsNone(identifier.get_branch_name())
self.assertEqual(identifier.get_system_table_name(), "files")
def test_create_is_two_arg_alias(self):
"""``Identifier.create(db, object)`` is a 2-arg alias of the JSON constructor (matches Java)."""
identifier = Identifier.create("db", "orders$snapshots")
self.assertEqual(identifier.object, "orders$snapshots")
self.assertEqual(identifier.get_table_name(), "orders")
self.assertEqual(identifier.get_system_table_name(), "snapshots")
self.assertTrue(identifier.is_system_table())
def test_constructor_forms_are_wire_equivalent(self):
"""The encoding constructor and the @JsonCreator form produce wire-equal identifiers."""
encoded = Identifier(
"mydb", "orders", branch="dev", system_table="snapshots"
)
from_wire = Identifier("mydb", "orders$branch_dev$snapshots")
self.assertEqual(encoded, from_wire)
self.assertEqual(hash(encoded), hash(from_wire))
def test_branch_property_reads_decoded_branch(self):
"""``identifier.branch`` is a read-only alias for ``get_branch_name()``."""
self.assertEqual(Identifier("db", "tbl", branch="dev").branch, "dev")
self.assertEqual(Identifier("db", "tbl$branch_dev").branch, "dev")
self.assertIsNone(Identifier("db", "tbl").branch)
def test_equality_and_hash_ignore_cached_fields(self):
"""Equality and hash depend only on (database, object), matching Java JSON shape."""
a = Identifier("mydb", "orders$branch_dev$snapshots")
b = Identifier(
"mydb", "orders", branch="dev", system_table="snapshots"
)
self.assertEqual(a, b)
self.assertEqual(hash(a), hash(b))
class IdentifierBackwardCompatibilityTest(unittest.TestCase):
"""Locks in that the public surface from before this PR keeps working.
Pre-PR ``Identifier`` exposed three signatures that this PR's
encoding-into-object refactor risked breaking. They must continue to
run without raising; semantics may shift to the wire-correct form
(branches encoded into ``object``), but no caller should hit a
TypeError / AttributeError on an unchanged call site.
"""
def test_constructor_branch_kwarg_still_accepted(self):
# ``Identifier(db, obj, branch=...)`` was the old dataclass init
# signature. It must still construct without error; the branch is
# now encoded into ``object`` so the wire is Java-compatible.
identifier = Identifier(database="db", object="tbl", branch="feature")
self.assertEqual(identifier.object, "tbl$branch_feature")
self.assertEqual(identifier.get_branch_name(), "feature")
def test_constructor_branch_kwarg_none_is_accepted(self):
# Explicit ``branch=None`` (e.g. from JSON deserialization paths)
# must remain a no-op rather than triggering the encoding path.
identifier = Identifier("db", "tbl", branch=None)
self.assertEqual(identifier.object, "tbl")
self.assertIsNone(identifier.get_branch_name())
def test_branch_attribute_read(self):
# Old code that read ``identifier.branch`` keeps reading the
# branch name (now decoded from ``object``).
self.assertEqual(Identifier("db", "tbl", branch="dev").branch, "dev")
self.assertEqual(Identifier("db", "tbl$branch_dev").branch, "dev")
self.assertIsNone(Identifier("db", "tbl").branch)
def test_branch_attribute_write(self):
# Old code that assigned to ``identifier.branch`` keeps working.
# The setter re-encodes ``object`` so the wire is consistent.
identifier = Identifier("db", "tbl")
identifier.branch = "feature"
self.assertEqual(identifier.branch, "feature")
self.assertEqual(identifier.object, "tbl$branch_feature")
self.assertEqual(identifier.get_branch_name(), "feature")
# Clearing back to None drops the encoded branch segment.
identifier.branch = None
self.assertEqual(identifier.object, "tbl")
self.assertIsNone(identifier.branch)
def test_create_two_arg_form_with_dollar_object(self):
# ``Identifier.create(db, "tbl$snapshots")`` was the documented
# way to construct system-table identifiers pre-PR. It must keep
# producing an equivalent system-table identifier.
identifier = Identifier.create("db", "orders$snapshots")
self.assertEqual(identifier.object, "orders$snapshots")
self.assertTrue(identifier.is_system_table())
self.assertEqual(identifier.get_table_name(), "orders")
self.assertEqual(identifier.get_system_table_name(), "snapshots")
if __name__ == '__main__':
unittest.main()