blob: 5bc43302a941f6176f3aae5da27a297cc2956454 [file]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Scan / split implementation for system tables.
System tables materialise their entire contents into a single PyArrow
table that is shipped through a single :class:`SystemSplit`. This keeps
the read pipeline trivially correct: there's no manifest pruning, no
predicate pushdown and no parallelism to coordinate, only metadata
that fits comfortably in memory.
"""
from typing import List, Optional, TYPE_CHECKING
import pyarrow
from pypaimon.manifest.schema.data_file_meta import DataFileMeta
from pypaimon.read.plan import Plan
from pypaimon.read.split import Split
from pypaimon.table.row.generic_row import GenericRow
if TYPE_CHECKING: # pragma: no cover - type-only import
from pypaimon.table.system.system_table import SystemTable
class SystemSplit(Split):
"""A single in-memory split carrying the whole system table."""
def __init__(self, arrow_table: pyarrow.Table):
self._arrow_table = arrow_table
@property
def row_count(self) -> int:
return self._arrow_table.num_rows
@property
def files(self) -> List[DataFileMeta]:
return []
@property
def partition(self) -> Optional[GenericRow]:
return None
@property
def bucket(self) -> int:
return -1
def arrow_table(self) -> pyarrow.Table:
return self._arrow_table
class SystemTableScan:
"""Returns a one-element plan containing the entire system table."""
def __init__(self, system_table: "SystemTable"):
self.system_table = system_table
def plan(self) -> Plan:
arrow_table = self.system_table._build_arrow_table()
return Plan(_splits=[SystemSplit(arrow_table)])