blob: 649aee425611dc0d3b904fe81a5ed89cf2b50306 [file]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from enum import Enum
from typing import Dict, List, Optional
from pypaimon.common.file_io import FileIO
from pypaimon.common.identifier import Identifier
from pypaimon.schema.table_schema import TableSchema
from pypaimon.table.table import Table
class Format(str, Enum):
ORC = "orc"
PARQUET = "parquet"
CSV = "csv"
TEXT = "text"
JSON = "json"
@classmethod
def parse(cls, file_format: str) -> "Format":
s = (file_format or "parquet").strip().upper()
try:
return cls[s]
except KeyError:
raise ValueError(
f"Format table unsupported file format: {file_format}. "
f"Supported: {[f.name for f in cls]}"
)
class FormatTable(Table):
def __init__(
self,
file_io: FileIO,
identifier: Identifier,
table_schema: TableSchema,
location: str,
format: Format,
options: Optional[Dict[str, str]] = None,
comment: Optional[str] = None,
):
self.file_io = file_io
self.identifier = identifier
self._table_schema = table_schema
self._location = location.rstrip("/")
self._format = format
self._options = options or dict(table_schema.options)
self.comment = comment
self.fields = table_schema.fields
self.field_names = [f.name for f in self.fields]
self.partition_keys = table_schema.partition_keys or []
self.primary_keys: List[str] = [] # format table has no primary key
def name(self) -> str:
return self.identifier.get_table_name()
def full_name(self) -> str:
return self.identifier.get_full_name()
@property
def table_schema(self) -> TableSchema:
return self._table_schema
@table_schema.setter
def table_schema(self, value: TableSchema):
self._table_schema = value
def location(self) -> str:
return self._location
def format(self) -> Format:
return self._format
def options(self) -> Dict[str, str]:
return self._options
def new_read_builder(self):
from pypaimon.table.format.format_read_builder import FormatReadBuilder
return FormatReadBuilder(self)
def new_batch_write_builder(self):
from pypaimon.table.format.format_batch_write_builder import \
FormatBatchWriteBuilder
return FormatBatchWriteBuilder(self)
def new_stream_read_builder(self):
raise NotImplementedError("Format table does not support stream read.")
def new_stream_write_builder(self):
raise NotImplementedError("Format table does not support stream write.")
def new_full_text_search_builder(self):
raise NotImplementedError("Format table does not support full text search.")
def new_vector_search_builder(self):
raise NotImplementedError("Format table does not support vector search.")