blob: a365b4df4bf9febfbf9cf21d355d1a71ab36e2ff [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <memory>
#include <mutex>
#include <string>
#include <vector>
#include "arrow/util/secure_string.h"
#include "parquet/schema.h"
namespace parquet {
namespace encryption {
class AesDecryptor;
class AesEncryptor;
} // namespace encryption
class ColumnCryptoMetaData;
class DecryptionKeyRetriever;
class FileDecryptionProperties;
// An object handling decryption using well-known encryption parameters
//
// CAUTION: Decryptor objects are not thread-safe.
class PARQUET_EXPORT Decryptor {
public:
Decryptor(std::unique_ptr<encryption::AesDecryptor> decryptor,
::arrow::util::SecureString key, std::string file_aad, std::string aad,
::arrow::MemoryPool* pool);
~Decryptor();
const std::string& file_aad() const { return file_aad_; }
void UpdateAad(const std::string& aad) { aad_ = aad; }
::arrow::MemoryPool* pool() { return pool_; }
[[nodiscard]] int32_t PlaintextLength(int32_t ciphertext_len) const;
[[nodiscard]] int32_t CiphertextLength(int32_t plaintext_len) const;
int32_t Decrypt(::arrow::util::span<const uint8_t> ciphertext,
::arrow::util::span<uint8_t> plaintext);
private:
std::unique_ptr<encryption::AesDecryptor> aes_decryptor_;
::arrow::util::SecureString key_;
std::string file_aad_;
std::string aad_;
::arrow::MemoryPool* pool_;
};
class InternalFileDecryptor {
public:
explicit InternalFileDecryptor(std::shared_ptr<FileDecryptionProperties> properties,
const std::string& file_aad,
ParquetCipher::type algorithm,
const std::string& footer_key_metadata,
::arrow::MemoryPool* pool);
const std::string& file_aad() const { return file_aad_; }
const ::arrow::util::SecureString& GetFooterKey();
ParquetCipher::type algorithm() const { return algorithm_; }
const std::string& footer_key_metadata() const { return footer_key_metadata_; }
const std::shared_ptr<FileDecryptionProperties>& properties() const {
return properties_;
}
::arrow::MemoryPool* pool() const { return pool_; }
// Get a Decryptor instance for the Parquet footer
std::unique_ptr<Decryptor> GetFooterDecryptor();
// Get a Decryptor instance for column chunk metadata.
std::unique_ptr<Decryptor> GetColumnMetaDecryptor(
const std::string& column_path, const std::string& column_key_metadata,
const std::string& aad = "") {
return GetColumnDecryptor(column_path, column_key_metadata, aad, /*metadata=*/true);
}
// Get a Decryptor instance for column chunk data.
std::unique_ptr<Decryptor> GetColumnDataDecryptor(
const std::string& column_path, const std::string& column_key_metadata,
const std::string& aad = "") {
return GetColumnDecryptor(column_path, column_key_metadata, aad, /*metadata=*/false);
}
// Get a Decryptor factory for column chunk metadata.
//
// This is typically useful if multi-threaded decryption is expected.
// This is a static function as it accepts a null `InternalFileDecryptor*`
// argument if the column is not encrypted.
static std::function<std::unique_ptr<Decryptor>()> GetColumnMetaDecryptorFactory(
InternalFileDecryptor*, const ColumnCryptoMetaData* crypto_metadata,
const std::string& aad = "");
// Get a Decryptor factory for column chunk data.
//
// This is typically useful if multi-threaded decryption is expected.
// This is a static function as it accepts a null `InternalFileDecryptor*`
// argument if the column is not encrypted.
static std::function<std::unique_ptr<Decryptor>()> GetColumnDataDecryptorFactory(
InternalFileDecryptor*, const ColumnCryptoMetaData* crypto_metadata,
const std::string& aad = "");
private:
std::shared_ptr<FileDecryptionProperties> properties_;
// Concatenation of aad_prefix (if exists) and aad_file_unique
std::string file_aad_;
ParquetCipher::type algorithm_;
std::string footer_key_metadata_;
::arrow::MemoryPool* pool_;
// Protects footer_key_ updates
std::mutex mutex_;
::arrow::util::SecureString footer_key_;
::arrow::util::SecureString GetColumnKey(const std::string& column_path,
const std::string& column_key_metadata);
static ::arrow::util::SecureString RetrieveColumnKeyIfEmpty(
::arrow::util::SecureString column_key, const std::string& column_key_metadata,
const std::shared_ptr<DecryptionKeyRetriever>& key_retriever);
std::unique_ptr<Decryptor> GetFooterDecryptor(const std::string& aad, bool metadata);
std::unique_ptr<Decryptor> GetColumnDecryptor(const std::string& column_path,
const std::string& column_key_metadata,
const std::string& aad, bool metadata);
std::function<std::unique_ptr<Decryptor>()> GetColumnDecryptorFactory(
const ColumnCryptoMetaData* crypto_metadata, const std::string& aad, bool metadata);
};
void UpdateDecryptor(Decryptor* decryptor, int16_t row_group_ordinal,
int16_t column_ordinal, int8_t module_type);
} // namespace parquet