PARQUET-1227: Thrift crypto metadata structures
Author: Gidon Gershinsky <gidon@il.ibm.com>
Closes #463 from ggershinsky/p1227-thrift-crypto-metadata and squashes the following commits:
579c319 [Gidon Gershinsky] enums -> unions
d07d708 [Gidon Gershinsky] bug fix
ee4b931 [Gidon Gershinsky] multiple keys and ciphers
8d0c179 [Gidon Gershinsky] New Thrift structures for Parquet modular encryption
diff --git a/src/parquet/parquet.thrift b/src/parquet/parquet.thrift
index cfcc1fe..aec8e54 100644
--- a/src/parquet/parquet.thrift
+++ b/src/parquet/parquet.thrift
@@ -661,6 +661,22 @@
13: optional list<PageEncodingStats> encoding_stats;
}
+struct EncryptionWithFooterKey {
+}
+
+struct EncryptionWithColumnKey {
+ /** Column path in schema **/
+ 1: required list<string> path_in_schema
+
+ /** Retrieval metadata of the column-specific key **/
+ 2: optional binary column_key_metadata
+}
+
+union ColumnCryptoMetaData {
+ 1: EncryptionWithFooterKey ENCRYPTION_WITH_FOOTER_KEY
+ 2: EncryptionWithColumnKey ENCRYPTION_WITH_COLUMN_KEY
+}
+
struct ColumnChunk {
/** File where column data is stored. If not set, assumed to be same file as
* metadata. This path is relative to the current file.
@@ -687,6 +703,9 @@
/** Size of ColumnChunk's ColumnIndex, in bytes **/
7: optional i32 column_index_length
+
+ /** Crypto metadata of encrypted columns **/
+ 8: optional ColumnCryptoMetaData crypto_meta_data
}
struct RowGroup {
@@ -869,3 +888,37 @@
7: optional list<ColumnOrder> column_orders;
}
+struct AesGcmV1 {
+ /** Retrieval metadata of AAD used for encryption of pages and structures **/
+ 1: optional binary aad_metadata
+}
+
+struct AesGcmCtrV1 {
+ /** Retrieval metadata of AAD used for encryption of structures **/
+ 1: optional binary aad_metadata
+}
+
+union EncryptionAlgorithm {
+ 1: AesGcmV1 AES_GCM_V1
+ 2: AesGcmCtrV1 AES_GCM_CTR_V1
+}
+
+struct FileCryptoMetaData {
+ 1: required EncryptionAlgorithm encryption_algorithm
+
+ /** Parquet footer can be encrypted, or left as plaintext **/
+ 2: required bool encrypted_footer
+
+ /** Retrieval metadata of key used for encryption of footer,
+ * and (possibly) columns **/
+ 3: optional binary footer_key_metadata
+
+ /** Offset of Parquet footer (encrypted, or plaintext) **/
+ 4: required i64 footer_offset
+
+ /** If file IVs are comprised of a fixed part,
+ * and variable parts (random or counter), keep the fixed
+ * part here **/
+ 5: optional binary iv_prefix
+}
+