blob: f52fb3730c28ffb6006c7dab2d22c1ee4d311ec3 [file] [log] [blame]
/*
* Copyright 2024-present Alibaba Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <cstdint>
#include <map>
#include <memory>
#include <optional>
#include <string>
#include <vector>
#include "paimon/defs.h"
#include "paimon/executor.h"
#include "paimon/memory/memory_pool.h"
#include "paimon/metrics.h"
#include "paimon/result.h"
#include "paimon/status.h"
#include "paimon/type_fwd.h"
#include "paimon/visibility.h"
namespace paimon {
class CommitContext;
class CommitMessage;
/// Interface for commit operations in a file store.
///
/// The `FileStoreCommit` class provides interfaces for committing changes, expiring old snapshots,
/// dropping partitions, and retrieving commit metrics.
class PAIMON_EXPORT FileStoreCommit {
public:
/// Create an instance of `FileStoreCommit`.
///
/// @param context A unique pointer to the `CommitContext` used for commit operations.
///
/// @return A Result containing a unique pointer to the `FileStoreCommit` instance.
static Result<std::unique_ptr<FileStoreCommit>> Create(std::unique_ptr<CommitContext> context);
virtual ~FileStoreCommit() = default;
/// Commit changes to the file store.
///
/// @param commit_messages A vector of commit messages to be committed.
/// @param commit_identifier An optional identifier for the commit operation. Default is
/// `BATCH_WRITE_COMMIT_IDENTIFIER`.
/// @param watermark An optional event-time watermark used to indicate the progress of data
/// processing. Default is std::nullopt.
/// @return Status indicating the success or failure of the commit operation.
virtual Status Commit(const std::vector<std::shared_ptr<CommitMessage>>& commit_messages,
int64_t commit_identifier = BATCH_WRITE_COMMIT_IDENTIFIER,
std::optional<int64_t> watermark = std::nullopt) = 0;
/// Filter out all `std::vector<CommitMessage>` which have been committed and commit the
/// remaining ones.
///
/// Compared to commit, this method will first check if a commit_identifier has been
/// committed, so this method might be slower. A common usage of this method is to retry the
/// commit process after a failure.
///
/// @param commit_identifier_and_messages A map containing all {@link CommitMessage}s in
/// question. The key is the commit_identifier.
///
/// @param watermark An optional event-time watermark used to indicate the progress of data
/// processing. Default is std::nullopt.
/// @return Number of `std::vector<CommitMessage>` committed.
virtual Result<int32_t> FilterAndCommit(
const std::map<int64_t, std::vector<std::shared_ptr<CommitMessage>>>&
commit_identifier_and_messages,
std::optional<int64_t> watermark = std::nullopt) = 0;
/// Overwrite from manifest committable and partition.
///
/// @param partitions A single partition maps each partition key to a partition value. Depending
/// on the user-defined statement, the partition might not include all partition keys. Also
/// note that this partition does not necessarily equal to the partitions of the newly added
/// key-values. This is just the partition to be cleaned up.
/// @param commit_messages Description of the commit messages.
/// @param commit_identifier Unique identifier.
/// @param watermark An optional event-time watermark used to indicate the progress of data
/// processing. Default is std::nullopt.
/// @return Result of the operation.
virtual Status Overwrite(const std::vector<std::map<std::string, std::string>>& partitions,
const std::vector<std::shared_ptr<CommitMessage>>& commit_messages,
int64_t commit_identifier,
std::optional<int64_t> watermark = std::nullopt) = 0;
/// This is a temporary interface for internal use. It will be removed in a future version.
/// Please do not rely on it for long-term use.
///
/// @param partitions Description of the partitions.
/// @param commit_messages Description of the commit messages.
/// @param commit_identifier Unique identifier.
/// @param watermark An optional event-time watermark used to indicate the progress of data
/// processing. Default is std::nullopt.
/// @return Result of the operation.
virtual Result<int32_t> FilterAndOverwrite(
const std::vector<std::map<std::string, std::string>>& partitions,
const std::vector<std::shared_ptr<CommitMessage>>& commit_messages,
int64_t commit_identifier, std::optional<int64_t> watermark = std::nullopt) = 0;
/// If user want to use REST catalog commit, please set
/// `CommitContextBuilder::UseRESTCatalogCommit()`, then call `Commit()` (or
/// `FilterAndCommit()`) normally, then call this method to get the last commit table request,
/// which is a JSON string that can be used to send to REST catalog server.
///
/// @note Temporary interface for internal use, will be removed in the future.
///
/// @return A Result containing a JSON string which including `snapshot` and `statistics`, but
/// excluding `tableId`.
virtual Result<std::string> GetLastCommitTableRequest() = 0;
/// Expire old snapshot in the file store.
///
/// @return Result<int32_t> indicating the number of expired items or an error status.
virtual Result<int32_t> Expire() = 0;
/// Drop specified partitions from the file store.
///
/// @param partitions A vector of partitions to be dropped.
/// @param commit_identifier An identifier for the commit operation.
/// @return Status indicating the success or failure of the drop partition operation.
virtual Status DropPartition(const std::vector<std::map<std::string, std::string>>& partitions,
int64_t commit_identifier) = 0;
/// Retrieve metrics related to commit operations.
///
/// @return A shared pointer to a `Metrics` object containing commit metrics.
virtual std::shared_ptr<Metrics> GetCommitMetrics() const = 0;
};
} // namespace paimon