blob: f5871950708a47b962743ab4ae1ff76b11c6fa92 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "IcebergNestedField.pb.h"
#include "memory/VeloxColumnarBatch.h"
#include "utils/Metrics.h"
#include "velox/connectors/hive/iceberg/IcebergColumnHandle.h"
#include "velox/connectors/hive/iceberg/IcebergDataSink.h"
namespace gluten {
struct WriteStats {
uint64_t numWrittenBytes{0};
uint32_t numWrittenFiles{0};
uint64_t writeIOTimeNs{0};
uint64_t writeWallNs{0};
bool empty() const;
std::string toString() const;
};
class IcebergWriter {
public:
IcebergWriter(
const facebook::velox::RowTypePtr& rowType,
int32_t format,
const std::string& outputDirectory,
facebook::velox::common::CompressionKind compressionKind,
std::shared_ptr<const facebook::velox::connector::hive::iceberg::IcebergPartitionSpec> spec,
const gluten::IcebergNestedField& field,
const std::unordered_map<std::string, std::string>& sparkConfs,
std::shared_ptr<facebook::velox::memory::MemoryPool> memoryPool,
std::shared_ptr<facebook::velox::memory::MemoryPool> connectorPool);
void write(const VeloxColumnarBatch& batch);
std::vector<std::string> commit();
WriteStats writeStats() const;
private:
facebook::velox::RowTypePtr rowType_;
const facebook::velox::connector::hive::iceberg::IcebergNestedField field_;
std::shared_ptr<facebook::velox::memory::MemoryPool> pool_;
std::shared_ptr<facebook::velox::memory::MemoryPool> connectorPool_;
std::shared_ptr<facebook::velox::connector::hive::HiveConfig> connectorConfig_;
std::shared_ptr<facebook::velox::config::ConfigBase> connectorSessionProperties_;
std::unique_ptr<facebook::velox::connector::ConnectorQueryCtx> connectorQueryCtx_;
std::unique_ptr<facebook::velox::connector::hive::iceberg::IcebergDataSink> dataSink_;
// Records the writer creation time in ns.
const uint64_t createTimeNs_{0};
};
std::shared_ptr<const facebook::velox::connector::hive::iceberg::IcebergPartitionSpec>
parseIcebergPartitionSpec(const uint8_t* data, const int32_t length, facebook::velox::RowTypePtr rowType);
} // namespace gluten