blob: 0af0278b260432d94b1d87814e1ab365406285eb [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest.h>
#include <stdio.h>
#include <arrow/io/file.h>
#include "parquet/column_reader.h"
#include "parquet/column_writer.h"
#include "parquet/encryption/test_encryption_util.h"
#include "parquet/file_reader.h"
#include "parquet/file_writer.h"
#include "parquet/platform.h"
#include "parquet/test_util.h"
/*
* This file contains unit-tests for writing encrypted Parquet files with
* different encryption configurations.
* The files are saved in temporary folder and will be deleted after reading
* them in read_configurations_test.cc test.
*
* A detailed description of the Parquet Modular Encryption specification can be found
* here:
* https://github.com/apache/parquet-format/blob/encryption/Encryption.md
*
* Each unit-test creates a single parquet file with eight columns using one of the
* following encryption configurations:
*
* - Encryption configuration 1: Encrypt all columns and the footer with the same key.
* (uniform encryption)
* - Encryption configuration 2: Encrypt two columns and the footer, with different
* keys.
* - Encryption configuration 3: Encrypt two columns, with different keys.
* Don’t encrypt footer (to enable legacy readers)
* - plaintext footer mode.
* - Encryption configuration 4: Encrypt two columns and the footer, with different
* keys. Supply aad_prefix for file identity
* verification.
* - Encryption configuration 5: Encrypt two columns and the footer, with different
* keys. Supply aad_prefix, and call
* disable_aad_prefix_storage to prevent file
* identity storage in file metadata.
* - Encryption configuration 6: Encrypt two columns and the footer, with different
* keys. Use the alternative (AES_GCM_CTR_V1) algorithm.
*/
namespace parquet::encryption::test {
using FileClass = ::arrow::io::FileOutputStream;
std::unique_ptr<TemporaryDir> temp_dir;
class TestEncryptionConfiguration : public ::testing::Test {
public:
static void SetUpTestCase();
protected:
FileEncryptor encryptor_;
std::string path_to_double_field_ = kDoubleFieldName;
std::string path_to_float_field_ = kFloatFieldName;
std::string file_name_;
SecureString kFooterEncryptionKey_ = kFooterEncryptionKey;
SecureString kColumnEncryptionKey1_ = kColumnEncryptionKey1;
SecureString kColumnEncryptionKey2_ = kColumnEncryptionKey2;
std::string kFileName_ = std::string(kFileName);
void EncryptFile(std::shared_ptr<FileEncryptionProperties> encryption_configurations,
std::string file_name) {
std::string file = temp_dir->path().ToString() + file_name;
encryptor_.EncryptFile(file, encryption_configurations);
}
};
// Encryption configuration 1: Encrypt all columns and the footer with the same key.
// (uniform encryption)
TEST_F(TestEncryptionConfiguration, UniformEncryption) {
FileEncryptionProperties::Builder file_encryption_builder_1(kFooterEncryptionKey_);
this->EncryptFile(file_encryption_builder_1.footer_key_metadata("kf")->build(),
"tmp_uniform_encryption.parquet.encrypted");
}
// Encryption configuration 2: Encrypt two columns and the footer, with different keys.
TEST_F(TestEncryptionConfiguration, EncryptTwoColumnsAndTheFooter) {
std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>> encryption_cols2;
encryption_cols2[path_to_double_field_] =
ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1_, "kc1");
encryption_cols2[path_to_float_field_] =
ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey2_, "kc2");
encryption_cols2[kBooleanFieldName] = ColumnEncryptionProperties::Unencrypted();
FileEncryptionProperties::Builder file_encryption_builder_2(kFooterEncryptionKey_);
this->EncryptFile(file_encryption_builder_2.footer_key_metadata("kf")
->encrypted_columns(std::move(encryption_cols2))
->build(),
"tmp_encrypt_columns_and_footer.parquet.encrypted");
}
// Encryption configuration 3: Encrypt two columns, with different keys.
// Don’t encrypt footer.
// (plaintext footer mode, readable by legacy readers)
TEST_F(TestEncryptionConfiguration, EncryptTwoColumnsWithPlaintextFooter) {
std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>> encryption_cols3;
encryption_cols3[path_to_double_field_] =
ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1_, "kc1");
encryption_cols3[path_to_float_field_] =
ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey2_, "kc2");
encryption_cols3[kBooleanFieldName] = ColumnEncryptionProperties::Unencrypted();
FileEncryptionProperties::Builder file_encryption_builder_3(kFooterEncryptionKey_);
this->EncryptFile(file_encryption_builder_3.footer_key_metadata("kf")
->encrypted_columns(std::move(encryption_cols3))
->set_plaintext_footer()
->build(),
"tmp_encrypt_columns_plaintext_footer.parquet.encrypted");
}
// Encryption configuration 4: Encrypt two columns and the footer, with different keys.
// Use aad_prefix.
TEST_F(TestEncryptionConfiguration, EncryptTwoColumnsAndFooterWithAadPrefix) {
std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>> encryption_cols4;
encryption_cols4[path_to_double_field_] =
ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1_, "kc1");
encryption_cols4[path_to_float_field_] =
ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey2_, "kc2");
encryption_cols4[kBooleanFieldName] = ColumnEncryptionProperties::Unencrypted();
FileEncryptionProperties::Builder file_encryption_builder_4(kFooterEncryptionKey_);
this->EncryptFile(file_encryption_builder_4.footer_key_metadata("kf")
->encrypted_columns(std::move(encryption_cols4))
->aad_prefix(kFileName_)
->build(),
"tmp_encrypt_columns_and_footer_aad.parquet.encrypted");
}
// Encryption configuration 5: Encrypt two columns and the footer, with different keys.
// Use aad_prefix and disable_aad_prefix_storage.
TEST_F(TestEncryptionConfiguration,
EncryptTwoColumnsAndFooterWithAadPrefixDisable_aad_prefix_storage) {
std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>> encryption_cols5;
encryption_cols5[path_to_double_field_] =
ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1_, "kc1");
encryption_cols5[path_to_float_field_] =
ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey2_, "kc2");
encryption_cols5[kBooleanFieldName] = ColumnEncryptionProperties::Unencrypted();
FileEncryptionProperties::Builder file_encryption_builder_5(kFooterEncryptionKey_);
this->EncryptFile(
file_encryption_builder_5.encrypted_columns(std::move(encryption_cols5))
->footer_key_metadata("kf")
->aad_prefix(kFileName_)
->disable_aad_prefix_storage()
->build(),
"tmp_encrypt_columns_and_footer_disable_aad_storage.parquet.encrypted");
}
// Encryption configuration 6: Encrypt two columns and the footer, with different keys.
// Use AES_GCM_CTR_V1 algorithm.
TEST_F(TestEncryptionConfiguration, EncryptTwoColumnsAndFooterUseAES_GCM_CTR) {
std::map<std::string, std::shared_ptr<ColumnEncryptionProperties>> encryption_cols6;
encryption_cols6[path_to_double_field_] =
ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey1_, "kc1");
encryption_cols6[path_to_float_field_] =
ColumnEncryptionProperties::WithColumnKey(kColumnEncryptionKey2_, "kc2");
encryption_cols6[kBooleanFieldName] = ColumnEncryptionProperties::Unencrypted();
FileEncryptionProperties::Builder file_encryption_builder_6(kFooterEncryptionKey_);
EXPECT_NO_THROW(
this->EncryptFile(file_encryption_builder_6.footer_key_metadata("kf")
->encrypted_columns(std::move(encryption_cols6))
->algorithm(parquet::ParquetCipher::AES_GCM_CTR_V1)
->build(),
"tmp_encrypt_columns_and_footer_ctr.parquet.encrypted"));
}
// Set temp_dir before running the write/read tests. The encrypted files will
// be written/read from this directory.
void TestEncryptionConfiguration::SetUpTestCase() {
temp_dir = temp_data_dir().ValueOrDie();
}
} // namespace parquet::encryption::test