blob: 87b8e9034ae7b7c394340d522fa9f02b0ff31806 [file] [log] [blame]
################################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
from enum import Enum
from pypaimon.common.memory_size import MemorySize
class CoreOptions(str, Enum):
"""Core options for paimon."""
def __str__(self):
return self.value
# Basic options
AUTO_CREATE = "auto-create"
PATH = "path"
TYPE = "type"
BRANCH = "branch"
BUCKET = "bucket"
BUCKET_KEY = "bucket-key"
WAREHOUSE = "warehouse"
SCAN_MANIFEST_PARALLELISM = "scan.manifest.parallelism"
# File format options
FILE_FORMAT = "file.format"
FILE_FORMAT_ORC = "orc"
FILE_FORMAT_AVRO = "avro"
FILE_FORMAT_PARQUET = "parquet"
FILE_FORMAT_BLOB = "blob"
FILE_COMPRESSION = "file.compression"
FILE_COMPRESSION_PER_LEVEL = "file.compression.per.level"
FILE_FORMAT_PER_LEVEL = "file.format.per.level"
FILE_BLOCK_SIZE = "file.block-size"
FILE_BLOB_AS_DESCRIPTOR = "blob-as-descriptor"
TARGET_FILE_SIZE = "target-file-size"
BLOB_TARGET_FILE_SIZE = "blob.target-file-size"
# Scan options
SCAN_FALLBACK_BRANCH = "scan.fallback-branch"
INCREMENTAL_BETWEEN_TIMESTAMP = "incremental-between-timestamp"
SOURCE_SPLIT_TARGET_SIZE = "source.split.target-size"
SOURCE_SPLIT_OPEN_FILE_COST = "source.split.open-file-cost"
# Commit options
COMMIT_USER_PREFIX = "commit.user-prefix"
ROW_TRACKING_ENABLED = "row-tracking.enabled"
DATA_EVOLUTION_ENABLED = "data-evolution.enabled"
@staticmethod
def get_blob_as_descriptor(options: dict) -> bool:
return options.get(CoreOptions.FILE_BLOB_AS_DESCRIPTOR, "false").lower() == 'true'
@staticmethod
def get_split_target_size(options: dict) -> int:
"""Get split target size from options, default to 128MB."""
if CoreOptions.SOURCE_SPLIT_TARGET_SIZE in options:
size_str = options[CoreOptions.SOURCE_SPLIT_TARGET_SIZE]
return MemorySize.parse(size_str).get_bytes()
return MemorySize.of_mebi_bytes(128).get_bytes()
@staticmethod
def get_split_open_file_cost(options: dict) -> int:
"""Get split open file cost from options, default to 4MB."""
if CoreOptions.SOURCE_SPLIT_OPEN_FILE_COST in options:
cost_str = options[CoreOptions.SOURCE_SPLIT_OPEN_FILE_COST]
return MemorySize.parse(cost_str).get_bytes()
return MemorySize.of_mebi_bytes(4).get_bytes()
@staticmethod
def get_target_file_size(options: dict, has_primary_key: bool = False) -> int:
"""Get target file size from options, default to 128MB for primary key table, 256MB for append-only table."""
if CoreOptions.TARGET_FILE_SIZE in options:
size_str = options[CoreOptions.TARGET_FILE_SIZE]
return MemorySize.parse(size_str).get_bytes()
return MemorySize.of_mebi_bytes(128 if has_primary_key else 256).get_bytes()
@staticmethod
def get_blob_target_file_size(options: dict) -> int:
"""Get blob target file size from options, default to target-file-size (256MB for append-only table)."""
if CoreOptions.BLOB_TARGET_FILE_SIZE in options:
size_str = options[CoreOptions.BLOB_TARGET_FILE_SIZE]
return MemorySize.parse(size_str).get_bytes()
return CoreOptions.get_target_file_size(options, has_primary_key=False)