blob: 935d68adea9945f2615b80207e8b0b5fdeb7cbea [file]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Dict, List, Optional
from pypaimon.common.file_io import FileIO
from pypaimon.common.identifier import Identifier
from pypaimon.schema.table_schema import TableSchema
from pypaimon.table.table import Table
# Fixed schema matching Java ObjectTable.SCHEMA
OBJECT_TABLE_FIELD_NAMES = ["path", "name", "length", "mtime", "atime", "owner"]
class ObjectTable(Table):
"""An object table refers to a directory that contains multiple objects (files).
Object table provides metadata indexes for unstructured data objects in this
directory, allowing users to analyze unstructured data in Object Storage.
This is a read-only table. Write operations are not supported.
"""
def __init__(
self,
file_io: FileIO,
identifier: Identifier,
table_schema: TableSchema,
location: str,
options: Optional[Dict[str, str]] = None,
comment: Optional[str] = None,
):
self.file_io = file_io
self.identifier = identifier
self._table_schema = table_schema
self._location = location.rstrip("/")
self._options = options or dict(table_schema.options)
self.comment = comment
self.partition_keys: List[str] = []
self.primary_keys: List[str] = []
def name(self) -> str:
return self.identifier.get_table_name()
def full_name(self) -> str:
return self.identifier.get_full_name()
@property
def table_schema(self) -> TableSchema:
return self._table_schema
@table_schema.setter
def table_schema(self, value: TableSchema):
self._table_schema = value
def location(self) -> str:
return self._location
def options(self) -> Dict[str, str]:
return self._options
def copy(self, dynamic_options: Dict[str, str]) -> "ObjectTable":
new_options = dict(self._options)
new_options.update(dynamic_options or {})
return ObjectTable(
file_io=self.file_io,
identifier=self.identifier,
table_schema=self._table_schema,
location=self._location,
options=new_options,
comment=self.comment,
)
def new_read_builder(self):
from pypaimon.table.object.object_read_builder import ObjectReadBuilder
return ObjectReadBuilder(self)
def new_batch_write_builder(self):
raise NotImplementedError(
"ObjectTable is read-only and does not support batch write."
)
def new_stream_read_builder(self):
raise NotImplementedError("ObjectTable does not support stream read.")
def new_stream_write_builder(self):
raise NotImplementedError(
"ObjectTable is read-only and does not support stream write."
)
def new_full_text_search_builder(self):
raise NotImplementedError(
"ObjectTable is read-only and does not support full text search."
)
def new_vector_search_builder(self):
raise NotImplementedError(
"ObjectTable is read-only and does not support vector search."
)