blob: b2d7038ed2e7af8d096ef94fc228dbc23f6be5fe [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package parquet
import (
format ""
// Constants that will be used as the default values with encryption/decryption
const (
// By default we'll use AesGCM as our encryption algorithm
DefaultEncryptionAlgorithm = AesGcm
MaximalAadMetadataLength int32 = 256
// if encryption is turned on, we will default to also encrypting the footer
DefaultEncryptedFooter = true
DefaultCheckSignature = true
// by default if you set the file decryption properties, we will error
// on any plaintext files unless otherwise specified.
DefaultAllowPlaintextFiles = false
AadFileUniqueLength int32 = 8
// ColumnPathToDecryptionPropsMap maps column paths to decryption properties
type ColumnPathToDecryptionPropsMap map[string]*ColumnDecryptionProperties
// ColumnPathToEncryptionPropsMap maps column paths to encryption properties
type ColumnPathToEncryptionPropsMap map[string]*ColumnEncryptionProperties
// ColumnEncryptionProperties specifies how to encrypt a given column
type ColumnEncryptionProperties struct {
columnPath string
encrypted bool
encryptedWithFooterKey bool
key string
keyMetadata string
utilized bool
// ColumnPath returns which column these properties are for
func (ce *ColumnEncryptionProperties) ColumnPath() string {
return ce.columnPath
// IsEncrypted returns true if this column is encrypted.
func (ce *ColumnEncryptionProperties) IsEncrypted() bool { return ce.encrypted }
// IsEncryptedWithFooterKey returns if this column was encrypted with the footer key itself, or false if a separate
// key was used for encrypting this column.
func (ce *ColumnEncryptionProperties) IsEncryptedWithFooterKey() bool {
return ce.encryptedWithFooterKey
// Key returns the key used for encrypting this column if it isn't encrypted by the footer key
func (ce *ColumnEncryptionProperties) Key() string { return ce.key }
// KeyMetadata returns the key identifier which is used with a KeyRetriever to get the key for this column if it is not
// encrypted using the footer key
func (ce *ColumnEncryptionProperties) KeyMetadata() string { return ce.keyMetadata }
// WipeOutEncryptionKey Clears the encryption key, used after completion of file writing
func (ce *ColumnEncryptionProperties) WipeOutEncryptionKey() { ce.key = "" }
// IsUtilized returns whether or not these properties have already been used, if the key is empty
// then this is always false
func (ce *ColumnEncryptionProperties) IsUtilized() bool {
if ce.key == "" {
return false
return ce.utilized
// SetUtilized is used for marking it as utilized once it is used in FileEncryptionProperties
// as the encryption key will be wiped out on completion of writing
func (ce *ColumnEncryptionProperties) SetUtilized() {
ce.utilized = true
// Clone returns a instance of ColumnEncryptionProperties with the same key and metadata
func (ce *ColumnEncryptionProperties) Clone() *ColumnEncryptionProperties {
copy := ce.key
return NewColumnEncryptionProperties(ce.columnPath, WithKey(copy), WithKeyMetadata(ce.keyMetadata))
type colEncryptConfig struct {
key string
keyMetadata string
encrypted bool
// ColumnEncryptOption how to specify options to the the NewColumnEncryptionProperties function.
type ColumnEncryptOption func(*colEncryptConfig)
// WithKey sets a column specific key.
// If key is not set on an encrypted column, the column will be encrypted with the footer key.
// key length must be either 16, 24, or 32 bytes
// the key is cloned and will be wiped out (array values set to 0) upon completion of file writing.
// Caller is responsible for wiping out input key array
func WithKey(key string) ColumnEncryptOption {
return func(c *colEncryptConfig) {
if key != "" {
c.key = key
// WithKeyMetadata sets the key retrieval metadata, use either KeyMetadata or KeyID but not both
func WithKeyMetadata(keyMeta string) ColumnEncryptOption {
return func(c *colEncryptConfig) {
c.keyMetadata = keyMeta
// WithKeyID is a convenience function to set the key metadata using a string id.
// Set a key retrieval metadata (converted from String). and use either KeyMetadata or KeyID, not both.
// KeyID will be converted to metadata (UTF-8 Array)
func WithKeyID(keyID string) ColumnEncryptOption {
if !utf8.ValidString(keyID) {
panic("parquet: key id should be UTF8 encoded")
return WithKeyMetadata(keyID)
// NewColumnEncryptionProperties constructs properties for the provided column path, modified by the options provided
func NewColumnEncryptionProperties(name string, opts ...ColumnEncryptOption) *ColumnEncryptionProperties {
var cfg colEncryptConfig
cfg.encrypted = true
for _, o := range opts {
return &ColumnEncryptionProperties{
utilized: false,
encrypted: cfg.encrypted,
encryptedWithFooterKey: cfg.encrypted && cfg.key == "",
keyMetadata: cfg.keyMetadata,
key: cfg.key,
columnPath: name,
// ColumnDecryptionProperties are the specifications for how to decrypt a given column.
type ColumnDecryptionProperties struct {
columnPath string
key string
utilized bool
// NewColumnDecryptionProperties constructs a new ColumnDecryptionProperties for the given column path, modified by
// the provided options
func NewColumnDecryptionProperties(column string, opts ...ColumnDecryptOption) *ColumnDecryptionProperties {
var cfg columnDecryptConfig
for _, o := range opts {
return &ColumnDecryptionProperties{
columnPath: column,
utilized: false,
key: cfg.key,
// ColumnPath returns which column these properties describe how to decrypt
func (cd *ColumnDecryptionProperties) ColumnPath() string { return cd.columnPath }
// Key returns the key specified to decrypt this column, or is empty if the Footer Key should be used.
func (cd *ColumnDecryptionProperties) Key() string { return cd.key }
// IsUtilized returns whether or not these properties have been used for decryption already
func (cd *ColumnDecryptionProperties) IsUtilized() bool { return cd.utilized }
// SetUtilized is used by the reader to specify when we've decrypted the column and have used the key so we know
// to wipe out the keys.
func (cd *ColumnDecryptionProperties) SetUtilized() { cd.utilized = true }
// WipeOutDecryptionKey is called after decryption to ensure the key doesn't stick around and get re-used.
func (cd *ColumnDecryptionProperties) WipeOutDecryptionKey() { cd.key = "" }
// Clone returns a new instance of ColumnDecryptionProperties with the same key and column
func (cd *ColumnDecryptionProperties) Clone() *ColumnDecryptionProperties {
return NewColumnDecryptionProperties(cd.columnPath, WithDecryptKey(cd.key))
type columnDecryptConfig struct {
key string
// ColumnDecryptOption is the type of the options passed for constructing Decryption Properties
type ColumnDecryptOption func(*columnDecryptConfig)
// WithDecryptKey specifies the key to utilize for decryption
func WithDecryptKey(key string) ColumnDecryptOption {
return func(cfg *columnDecryptConfig) {
if key != "" {
cfg.key = key
// AADPrefixVerifier is an interface for any object that can be used to verify the identity of the file being decrypted.
// It should panic if the provided AAD identity is bad.
// In a data set, AAD Prefixes should be collected, and then checked for missing files.
type AADPrefixVerifier interface {
// Verify identity of file. panic if bad
// DecryptionKeyRetriever is an interface for getting the desired key for decryption from metadata. It should take in
// some metadata identifier and return the actual Key to use for decryption.
type DecryptionKeyRetriever interface {
GetKey(keyMetadata []byte) string
// FileDecryptionProperties define the File Level configuration for decrypting a parquet file. Once constructed they are
// read only.
type FileDecryptionProperties struct {
footerKey string
aadPrefix string
checkPlaintextFooterIntegrity bool
plaintextAllowed bool
utilized bool
columnDecryptProps ColumnPathToDecryptionPropsMap
Verifier AADPrefixVerifier
KeyRetriever DecryptionKeyRetriever
// NewFileDecryptionProperties takes in the options for constructing a new FileDecryptionProperties object, otherwise
// it will use the default configuration which will check footer integrity of a plaintext footer for an encrypted file
// for unencrypted parquet files, the decryption properties should not be set.
func NewFileDecryptionProperties(opts ...FileDecryptionOption) *FileDecryptionProperties {
var cfg fileDecryptConfig
cfg.checkFooterIntegrity = DefaultCheckSignature
cfg.plaintextAllowed = DefaultAllowPlaintextFiles
for _, o := range opts {
return &FileDecryptionProperties{
Verifier: cfg.verifier,
footerKey: cfg.footerKey,
checkPlaintextFooterIntegrity: cfg.checkFooterIntegrity,
KeyRetriever: cfg.retriever,
aadPrefix: cfg.aadPrefix,
columnDecryptProps: cfg.colDecrypt,
plaintextAllowed: cfg.plaintextAllowed,
utilized: false,
// ColumnKey returns the key to be used for decrypting the provided column.
func (fd *FileDecryptionProperties) ColumnKey(path string) string {
if d, ok := fd.columnDecryptProps[path]; ok {
if d != nil {
return d.Key()
return ""
// FooterKey returns the key utilized for decrypting the Footer if encrypted and any columns that are encrypted with
// the footer key.
func (fd *FileDecryptionProperties) FooterKey() string { return fd.footerKey }
// AadPrefix returns the prefix to be supplied for constructing the identification strings when decrypting
func (fd *FileDecryptionProperties) AadPrefix() string { return fd.aadPrefix }
// PlaintextFooterIntegrity returns whether or not an integrity check will be performed on a plaintext footer for an
// encrypted file.
func (fd *FileDecryptionProperties) PlaintextFooterIntegrity() bool {
return fd.checkPlaintextFooterIntegrity
// PlaintextFilesAllowed returns whether or not this instance of decryption properties are allowed on a plaintext file.
func (fd *FileDecryptionProperties) PlaintextFilesAllowed() bool { return fd.plaintextAllowed }
// SetUtilized is called to mark this instance as utilized once it is used to read a file. A single instance
// can be used for reading one file only. Setting this ensures the keys will be wiped out upon completion of file reading.
func (fd *FileDecryptionProperties) SetUtilized() { fd.utilized = true }
// IsUtilized returns whether or not this instance has been used to decrypt a file. If the footer key and prefix are
// empty and there are no column decryption properties, then this is always false.
func (fd *FileDecryptionProperties) IsUtilized() bool {
if fd.footerKey == "" && len(fd.columnDecryptProps) == 0 && fd.aadPrefix == "" {
return false
return fd.utilized
// WipeOutDecryptionKeys will clear all the keys for this instance including the column level ones, this will be called
// after this instance has been utilized.
func (fd *FileDecryptionProperties) WipeOutDecryptionKeys() {
fd.footerKey = ""
for _, cd := range fd.columnDecryptProps {
// Clone returns a new instance of these properties, changing the prefix if set (keeping the same prefix if left empty)
func (fd *FileDecryptionProperties) Clone(newAadPrefix string) *FileDecryptionProperties {
keyCopy := fd.footerKey
colDecryptMapCopy := make(ColumnPathToDecryptionPropsMap)
for k, v := range fd.columnDecryptProps {
colDecryptMapCopy[k] = v.Clone()
if newAadPrefix == "" {
newAadPrefix = fd.aadPrefix
return &FileDecryptionProperties{
footerKey: keyCopy,
KeyRetriever: fd.KeyRetriever,
checkPlaintextFooterIntegrity: fd.checkPlaintextFooterIntegrity,
Verifier: fd.Verifier,
columnDecryptProps: colDecryptMapCopy,
aadPrefix: newAadPrefix,
plaintextAllowed: fd.plaintextAllowed,
utilized: false,
type fileDecryptConfig struct {
footerKey string
aadPrefix string
verifier AADPrefixVerifier
colDecrypt ColumnPathToDecryptionPropsMap
retriever DecryptionKeyRetriever
checkFooterIntegrity bool
plaintextAllowed bool
// FileDecryptionOption is how to supply options to constructing a new FileDecryptionProperties instance.
type FileDecryptionOption func(*fileDecryptConfig)
// WithFooterKey sets an explicit footer key. If Applied on a file that contains footer key
// metadata the metadata will be ignored, the footer will be decrypted/verified with this key.
// If the explicit key is not set, footer key will be fetched from the key retriever.
// With explcit keys or AAD prefix, new encryption properties object must be created for each
// encrypted file.
// Explicit encryption keys (footer and column) are cloned.
// Upon completion of file reading, the cloned encryption keys in the properties will be wiped out
// Caller is responsible for wiping out the input key array
// footer key length must be either 16, 24, or 32 bytes
func WithFooterKey(key string) FileDecryptionOption {
return func(cfg *fileDecryptConfig) {
if key != "" {
cfg.footerKey = key
// WithPrefixVerifier supplies a verifier object to use for verifying the AAD Prefixes stored in the file.
func WithPrefixVerifier(verifier AADPrefixVerifier) FileDecryptionOption {
return func(cfg *fileDecryptConfig) {
if verifier != nil {
cfg.verifier = verifier
// WithColumnKeys sets explicit column keys.
// It's also possible to set a key retriever on this property object.
// Upon file decryption, availability of explicit keys is checked before invocation
// of the retreiver callback.
// If an explicit key is available for a footer or a column, its key metadata will be ignored.
func WithColumnKeys(decrypt ColumnPathToDecryptionPropsMap) FileDecryptionOption {
return func(cfg *fileDecryptConfig) {
if len(decrypt) == 0 {
if len(cfg.colDecrypt) != 0 {
panic("column properties already set")
for _, v := range decrypt {
if v.IsUtilized() {
panic("parquet: column properties utilized in another file")
cfg.colDecrypt = decrypt
// WithKeyRetriever sets a key retriever callback. It's also possible to set explicit footer or column keys.
func WithKeyRetriever(retriever DecryptionKeyRetriever) FileDecryptionOption {
return func(cfg *fileDecryptConfig) {
if retriever != nil {
cfg.retriever = retriever
// DisableFooterSignatureVerification skips integrity verification of plaintext footers.
// If not called, integrity of plaintext footers will be checked in runtime, and will panic
// if the footer signing key is not available
// or if the footer content and signature don't match
func DisableFooterSignatureVerification() FileDecryptionOption {
return func(cfg *fileDecryptConfig) {
cfg.checkFooterIntegrity = false
// WithPlaintextAllowed sets allowing plaintext files.
// By default, reading plaintext (unencrypted) files is not allowed when using
// a decryptor.
// In order to detect files that were not encrypted by mistake.
// However the default behavior can be overridden by using this method.
func WithPlaintextAllowed() FileDecryptionOption {
return func(cfg *fileDecryptConfig) {
cfg.plaintextAllowed = true
// WithDecryptAadPrefix explicitly supplies the file aad prefix.
// A must when a prefix is used for file encryption, but not stored in the file.
func WithDecryptAadPrefix(prefix string) FileDecryptionOption {
return func(cfg *fileDecryptConfig) {
if prefix != "" {
cfg.aadPrefix = prefix
// Algorithm describes how something was encrypted, representing the EncryptionAlgorithm object from the
// parquet.thrift file.
type Algorithm struct {
Algo Cipher
Aad struct {
AadPrefix []byte
AadFileUnique []byte
SupplyAadPrefix bool
// ToThrift returns an instance to be used for serializing when writing a file.
func (e Algorithm) ToThrift() *format.EncryptionAlgorithm {
if e.Algo == AesGcm {
return &format.EncryptionAlgorithm{
AES_GCM_V1: &format.AesGcmV1{
AadPrefix: e.Aad.AadPrefix,
AadFileUnique: e.Aad.AadFileUnique,
SupplyAadPrefix: &e.Aad.SupplyAadPrefix,
return &format.EncryptionAlgorithm{
AES_GCM_CTR_V1: &format.AesGcmCtrV1{
AadPrefix: e.Aad.AadPrefix,
AadFileUnique: e.Aad.AadFileUnique,
SupplyAadPrefix: &e.Aad.SupplyAadPrefix,
// AlgorithmFromThrift converts the thrift object to the Algorithm struct for easier usage.
func AlgorithmFromThrift(enc *format.EncryptionAlgorithm) (ret Algorithm) {
if enc.IsSetAES_GCM_V1() {
ret.Algo = AesGcm
ret.Aad.AadFileUnique = enc.AES_GCM_V1.AadFileUnique
ret.Aad.AadPrefix = enc.AES_GCM_V1.AadPrefix
ret.Aad.SupplyAadPrefix = *enc.AES_GCM_V1.SupplyAadPrefix
ret.Algo = AesCtr
ret.Aad.AadFileUnique = enc.AES_GCM_CTR_V1.AadFileUnique
ret.Aad.AadPrefix = enc.AES_GCM_CTR_V1.AadPrefix
ret.Aad.SupplyAadPrefix = *enc.AES_GCM_CTR_V1.SupplyAadPrefix
// FileEncryptionProperties describe how to encrypt a parquet file when writing data.
type FileEncryptionProperties struct {
alg Algorithm
footerKey string
footerKeyMetadata string
encryptedFooter bool
fileAad string
utilized bool
storeAadPrefixInFile bool
aadPrefix string
encryptedCols ColumnPathToEncryptionPropsMap
// EncryptedFooter returns if the footer for this file should be encrypted or left in plaintext.
func (fe *FileEncryptionProperties) EncryptedFooter() bool { return fe.encryptedFooter }
// Algorithm returns the description of how we will perform the encryption, the algorithm, prefixes, and so on.
func (fe *FileEncryptionProperties) Algorithm() Algorithm { return fe.alg }
// FooterKey returns the actual key used to encrypt the footer if it is encrypted, or to encrypt any columns which
// will be encrypted with it rather than their own keys.
func (fe *FileEncryptionProperties) FooterKey() string { return fe.footerKey }
// FooterKeyMetadata is used for retrieving a key from the key retriever in order to set the footer key
func (fe *FileEncryptionProperties) FooterKeyMetadata() string { return fe.footerKeyMetadata }
// FileAad returns the aad identification to be used at the file level which gets concatenated with the row and column
// information for encrypting data.
func (fe *FileEncryptionProperties) FileAad() string { return fe.fileAad }
// IsUtilized returns whether or not this instance has been used to encrypt a file
func (fe *FileEncryptionProperties) IsUtilized() bool { return fe.utilized }
// SetUtilized is called after writing a file. A FileEncryptionProperties object can be used for writing one file only,
// the encryption keys will be wiped out upon completion of writing the file.
func (fe *FileEncryptionProperties) SetUtilized() { fe.utilized = true }
// EncryptedColumns returns the mapping of column paths to column encryption properties
func (fe *FileEncryptionProperties) EncryptedColumns() ColumnPathToEncryptionPropsMap {
return fe.encryptedCols
// ColumnEncryptionProperties returns the properties for encrypting a given column.
// This may be nil for columns that aren't encrypted or may be default properties.
func (fe *FileEncryptionProperties) ColumnEncryptionProperties(path string) *ColumnEncryptionProperties {
if len(fe.encryptedCols) == 0 {
return NewColumnEncryptionProperties(path)
if c, ok := fe.encryptedCols[path]; ok {
return c
return nil
// Clone allows returning an identical property setup for another file with the option to update the aadPrefix,
// (if given the empty string, the current aad prefix will be used) since a single instance can only be used
// to encrypt one file before wiping out the keys.
func (fe *FileEncryptionProperties) Clone(newAadPrefix string) *FileEncryptionProperties {
footerKeyCopy := fe.footerKey
encryptedColsCopy := make(ColumnPathToEncryptionPropsMap)
for k, v := range fe.encryptedCols {
encryptedColsCopy[k] = v.Clone()
if newAadPrefix == "" {
newAadPrefix = fe.aadPrefix
opts := []EncryptOption{
WithAlg(fe.alg.Algo), WithFooterKeyMetadata(fe.footerKeyMetadata),
WithAadPrefix(newAadPrefix), WithEncryptedColumns(encryptedColsCopy),
if !fe.encryptedFooter {
opts = append(opts, WithPlaintextFooter())
if !fe.storeAadPrefixInFile {
opts = append(opts, DisableAadPrefixStorage())
return NewFileEncryptionProperties(footerKeyCopy, opts...)
// WipeOutEncryptionKeys clears all of the encryption keys for this and the columns
func (fe *FileEncryptionProperties) WipeOutEncryptionKeys() {
fe.footerKey = ""
for _, elem := range fe.encryptedCols {
type configEncrypt struct {
cipher Cipher
encryptFooter bool
keyMetadata string
aadprefix string
storeAadPrefixInFile bool
encryptedCols ColumnPathToEncryptionPropsMap
// EncryptOption is used for specifying values when building FileEncryptionProperties
type EncryptOption func(*configEncrypt)
// WithPlaintextFooter sets the writer to write the footer in plain text, otherwise the footer will be encrypted
// too (which is the default behavior).
func WithPlaintextFooter() EncryptOption {
return func(cfg *configEncrypt) {
cfg.encryptFooter = false
// WithAlg sets the encryption algorithm to utilize. (default is AesGcm)
func WithAlg(cipher Cipher) EncryptOption {
return func(cfg *configEncrypt) {
cfg.cipher = cipher
// WithFooterKeyID sets a key retrieval metadata to use (converted from string), this must be a utf8 string.
// use either WithFooterKeyID or WithFooterKeyMetadata, not both.
func WithFooterKeyID(key string) EncryptOption {
if !utf8.ValidString(key) {
panic("parquet: footer key id should be UTF8 encoded")
return WithFooterKeyMetadata(key)
// WithFooterKeyMetadata sets a key retrieval metadata to use for getting the key.
// Use either WithFooterKeyID or WithFooterKeyMetadata, not both.
func WithFooterKeyMetadata(keyMeta string) EncryptOption {
return func(cfg *configEncrypt) {
if keyMeta != "" {
cfg.keyMetadata = keyMeta
// WithAadPrefix sets the AAD prefix to use for encryption and by default will store it in the file
func WithAadPrefix(aadPrefix string) EncryptOption {
return func(cfg *configEncrypt) {
if aadPrefix != "" {
cfg.aadprefix = aadPrefix
cfg.storeAadPrefixInFile = true
// DisableAadPrefixStorage will set the properties to not store the AadPrefix in the file. If this isn't called
// and the AadPrefix is set, then it will be stored. This needs to in the options *after* WithAadPrefix to have an effect.
func DisableAadPrefixStorage() EncryptOption {
return func(cfg *configEncrypt) {
cfg.storeAadPrefixInFile = false
// WithEncryptedColumns sets the map of columns and their properties (keys etc.) If not called, then all columns will
// be encrypted with the footer key. If called, then columns not in the map will be left unencrypted.
func WithEncryptedColumns(encrypted ColumnPathToEncryptionPropsMap) EncryptOption {
none := func(*configEncrypt) {}
if len(encrypted) == 0 {
return none
return func(cfg *configEncrypt) {
if len(cfg.encryptedCols) != 0 {
panic("column properties already set")
for _, v := range encrypted {
if v.IsUtilized() {
panic("column properties utilized in another file")
cfg.encryptedCols = encrypted
// NewFileEncryptionProperties returns a new File Encryption description object using the options provided.
func NewFileEncryptionProperties(footerKey string, opts ...EncryptOption) *FileEncryptionProperties {
var cfg configEncrypt
cfg.cipher = DefaultEncryptionAlgorithm
cfg.encryptFooter = DefaultEncryptedFooter
for _, o := range opts {
props := &FileEncryptionProperties{
footerKey: footerKey,
footerKeyMetadata: cfg.keyMetadata,
encryptedFooter: cfg.encryptFooter,
aadPrefix: cfg.aadprefix,
storeAadPrefixInFile: cfg.storeAadPrefixInFile,
encryptedCols: cfg.encryptedCols,
utilized: false,
aadFileUnique := [AadFileUniqueLength]uint8{}
_, err := rand.Read(aadFileUnique[:])
if err != nil {
supplyAadPrefix := false
if props.aadPrefix == "" {
props.fileAad = string(aadFileUnique[:])
} else {
props.fileAad = props.aadPrefix + string(aadFileUnique[:])
if !props.storeAadPrefixInFile {
supplyAadPrefix = true
props.alg.Algo = cfg.cipher
props.alg.Aad.AadFileUnique = aadFileUnique[:]
props.alg.Aad.SupplyAadPrefix = supplyAadPrefix
if cfg.aadprefix != "" && cfg.storeAadPrefixInFile {
props.alg.Aad.AadPrefix = []byte(props.aadPrefix)
return props