blob: dbfd20a1b7e0c4b225c359e0515228fe0207b4ba [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <string_view>
#include "arrow/util/logging.h"
#include "arrow/util/secure_string.h"
#include "arrow/util/string.h"
#include "parquet/encryption/crypto_factory.h"
#include "parquet/encryption/encryption_internal.h"
#include "parquet/encryption/file_key_unwrapper.h"
#include "parquet/encryption/file_system_key_material_store.h"
#include "parquet/encryption/key_toolkit_internal.h"
using arrow::util::SecureString;
namespace parquet::encryption {
void CryptoFactory::RegisterKmsClientFactory(
std::shared_ptr<KmsClientFactory> kms_client_factory) {
key_toolkit_->RegisterKmsClientFactory(std::move(kms_client_factory));
}
std::shared_ptr<FileEncryptionProperties> CryptoFactory::GetFileEncryptionProperties(
const KmsConnectionConfig& kms_connection_config,
const EncryptionConfiguration& encryption_config, const std::string& file_path,
const std::shared_ptr<::arrow::fs::FileSystem>& file_system) {
if (!encryption_config.uniform_encryption && encryption_config.column_keys.empty()) {
throw ParquetException("Either column_keys or uniform_encryption must be set");
} else if (encryption_config.uniform_encryption &&
!encryption_config.column_keys.empty()) {
throw ParquetException("Cannot set both column_keys and uniform_encryption");
}
const std::string& footer_key_id = encryption_config.footer_key;
const std::string& column_key_str = encryption_config.column_keys;
std::shared_ptr<FileKeyMaterialStore> key_material_store = nullptr;
if (!encryption_config.internal_key_material) {
try {
key_material_store =
FileSystemKeyMaterialStore::Make(file_path, file_system, false);
} catch (ParquetException& e) {
std::stringstream ss;
ss << "Failed to get key material store.\n" << e.what() << "\n";
throw ParquetException(ss.str());
}
}
FileKeyWrapper key_wrapper(key_toolkit_.get(), kms_connection_config,
key_material_store, encryption_config.cache_lifetime_seconds,
encryption_config.double_wrapping);
int32_t dek_length_bits = encryption_config.data_key_length_bits;
if (!internal::ValidateKeyLength(dek_length_bits)) {
std::ostringstream ss;
ss << "Wrong data key length : " << dek_length_bits;
throw ParquetException(ss.str());
}
int dek_length = dek_length_bits / 8;
SecureString footer_key(dek_length, '\0');
RandBytes(footer_key.as_span().data(), footer_key.size());
std::string footer_key_metadata =
key_wrapper.GetEncryptionKeyMetadata(footer_key, footer_key_id, true);
FileEncryptionProperties::Builder properties_builder =
FileEncryptionProperties::Builder(footer_key);
properties_builder.footer_key_metadata(std::move(footer_key_metadata));
properties_builder.algorithm(encryption_config.encryption_algorithm);
if (!encryption_config.uniform_encryption) {
ColumnPathToEncryptionPropertiesMap encrypted_columns =
GetColumnEncryptionProperties(dek_length, column_key_str, &key_wrapper);
properties_builder.encrypted_columns(std::move(encrypted_columns));
if (encryption_config.plaintext_footer) {
properties_builder.set_plaintext_footer();
}
}
if (key_material_store != nullptr) {
key_material_store->SaveMaterial();
}
return properties_builder.build();
}
ColumnPathToEncryptionPropertiesMap CryptoFactory::GetColumnEncryptionProperties(
int dek_length, const std::string& column_keys, FileKeyWrapper* key_wrapper) {
ColumnPathToEncryptionPropertiesMap encrypted_columns;
std::vector<::std::string_view> key_to_columns =
::arrow::internal::SplitString(column_keys, ';');
for (size_t i = 0; i < key_to_columns.size(); ++i) {
std::string cur_key_to_columns =
::arrow::internal::TrimString(std::string(key_to_columns[i]));
if (cur_key_to_columns.empty()) {
continue;
}
std::vector<::std::string_view> parts =
::arrow::internal::SplitString(cur_key_to_columns, ':');
if (parts.size() != 2) {
std::ostringstream message;
message << "Incorrect key to columns mapping in column keys property"
<< ": [" << cur_key_to_columns << "]";
throw ParquetException(message.str());
}
std::string column_key_id = ::arrow::internal::TrimString(std::string(parts[0]));
if (column_key_id.empty()) {
throw ParquetException("Empty key name in column keys property.");
}
std::string column_names_str = ::arrow::internal::TrimString(std::string(parts[1]));
std::vector<::std::string_view> column_names =
::arrow::internal::SplitString(column_names_str, ',');
if (0 == column_names.size()) {
throw ParquetException("No columns to encrypt defined for key: " + column_key_id);
}
for (size_t j = 0; j < column_names.size(); ++j) {
std::string column_name =
::arrow::internal::TrimString(std::string(column_names[j]));
if (column_name.empty()) {
std::ostringstream message;
message << "Empty column name in column keys property for key: " << column_key_id;
throw ParquetException(message.str());
}
if (encrypted_columns.find(column_name) != encrypted_columns.end()) {
throw ParquetException("Multiple keys defined for the same column: " +
column_name);
}
SecureString column_key(dek_length, '\0');
RandBytes(column_key.as_span().data(), column_key.size());
std::string column_key_key_metadata =
key_wrapper->GetEncryptionKeyMetadata(column_key, column_key_id, false);
std::shared_ptr<ColumnEncryptionProperties> cmd =
ColumnEncryptionProperties::Builder()
.key(column_key)
->key_metadata(column_key_key_metadata)
->build();
encrypted_columns.insert({column_name, cmd});
}
}
if (encrypted_columns.empty()) {
throw ParquetException("No column keys configured in column keys property.");
}
return encrypted_columns;
}
std::shared_ptr<FileDecryptionProperties> CryptoFactory::GetFileDecryptionProperties(
const KmsConnectionConfig& kms_connection_config,
const DecryptionConfiguration& decryption_config, const std::string& file_path,
const std::shared_ptr<::arrow::fs::FileSystem>& file_system) {
auto key_retriever = std::make_shared<FileKeyUnwrapper>(
key_toolkit_, kms_connection_config, decryption_config.cache_lifetime_seconds,
file_path, file_system);
return FileDecryptionProperties::Builder()
.key_retriever(std::move(key_retriever))
->plaintext_files_allowed()
->build();
}
void CryptoFactory::RotateMasterKeys(
const KmsConnectionConfig& kms_connection_config,
const std::string& parquet_file_path,
const std::shared_ptr<::arrow::fs::FileSystem>& file_system, bool double_wrapping,
double cache_lifetime_seconds) {
key_toolkit_->RotateMasterKeys(kms_connection_config, parquet_file_path, file_system,
double_wrapping, cache_lifetime_seconds);
}
} // namespace parquet::encryption