blob: 227325723584ec27047ddcc380f87c74c62c13d9 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
// Package encryption contains the internal helpers for the parquet AES encryption/decryption handling.
// Testing for this is done via integration testing at the top level parquet package via attempting to
// read and write encrypted files with different configurations to match test files in parquet-testing
package encryption
import (
// important constants for handling the aes encryption
const (
GcmTagLength = 16
NonceLength = 12
gcmMode = 0
ctrMode = 1
ctrIVLen = 16
bufferSizeLength = 4
// Module constants for constructing the AAD bytes, the order here is
// important as the constants are set via iota.
const (
FooterModule int8 = iota
type aesEncryptor struct {
mode int
ciphertextSizeDelta int
// NewAesEncryptor constructs an encryptor for the passed in cipher and whether
// or not it's being used to encrypt metadata.
func NewAesEncryptor(alg parquet.Cipher, metadata bool) *aesEncryptor {
ret := &aesEncryptor{}
ret.ciphertextSizeDelta = bufferSizeLength + NonceLength
if metadata || alg == parquet.AesGcm {
ret.mode = gcmMode
ret.ciphertextSizeDelta += GcmTagLength
} else {
ret.mode = ctrMode
return ret
// CiphertextSizeDelta is the number of extra bytes that are part of the encrypted data
// above and beyond the plaintext value.
func (a *aesEncryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta }
// SignedFooterEncrypt writes the signature for the provided footer bytes using the given key, AAD and nonce.
// It returns the number of bytes that were written to w.
func (a *aesEncryptor) SignedFooterEncrypt(w io.Writer, footer, key, aad, nonce []byte) int {
if a.mode != gcmMode {
panic("must use AES GCM (metadata) encryptor")
block, err := aes.NewCipher(key)
if err != nil {
aead, err := cipher.NewGCM(block)
if err != nil {
if aead.NonceSize() != NonceLength {
panic(xerrors.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength))
if aead.Overhead() != GcmTagLength {
panic(xerrors.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength))
ciphertext := aead.Seal(nil, nonce, footer, aad)
bufferSize := uint32(len(ciphertext) + len(nonce))
// data is written with a prefix of the size written as a little endian 32bit int.
if err := binary.Write(w, binary.LittleEndian, bufferSize); err != nil {
return bufferSizeLength + int(bufferSize)
// Encrypt calculates the ciphertext for src with the given key and aad, then writes it to w.
// Returns the total number of bytes written.
func (a *aesEncryptor) Encrypt(w io.Writer, src, key, aad []byte) int {
block, err := aes.NewCipher(key)
if err != nil {
nonce := make([]byte, NonceLength)
if a.mode == gcmMode {
aead, err := cipher.NewGCM(block)
if err != nil {
if aead.NonceSize() != NonceLength {
panic(xerrors.Errorf("nonce size mismatch %d, %d", aead.NonceSize(), NonceLength))
if aead.Overhead() != GcmTagLength {
panic(xerrors.Errorf("tagsize mismatch %d %d", aead.Overhead(), GcmTagLength))
ciphertext := aead.Seal(nil, nonce, src, aad)
bufferSize := len(ciphertext) + len(nonce)
// data is written with a prefix of the size written as a little endian 32bit int.
if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil {
return bufferSizeLength + bufferSize
// Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial
// counter field.
// The first 31 bits of the initial counter field are set to 0, the last bit
// is set to 1.
iv := make([]byte, ctrIVLen)
copy(iv, nonce)
iv[ctrIVLen-1] = 1
bufferSize := NonceLength + len(src)
// data is written with a prefix of the size written as a little endian 32bit int.
if err := binary.Write(w, binary.LittleEndian, uint32(bufferSize)); err != nil {
cipher.StreamWriter{S: cipher.NewCTR(block, iv), W: w}.Write(src)
return bufferSizeLength + bufferSize
type aesDecryptor struct {
mode int
ciphertextSizeDelta int
// newAesDecryptor constructs and returns a decryptor for the given cipher type and whether or
// not it is intended to be used for decrypting metadata.
func newAesDecryptor(alg parquet.Cipher, metadata bool) *aesDecryptor {
ret := &aesDecryptor{}
ret.ciphertextSizeDelta = bufferSizeLength + NonceLength
if metadata || alg == parquet.AesGcm {
ret.mode = gcmMode
ret.ciphertextSizeDelta += GcmTagLength
} else {
ret.mode = ctrMode
return ret
// CiphertextSizeDelta is the number of bytes in the ciphertext that will not exist in the
// plaintext due to be used for the decryption. The total size - the CiphertextSizeDelta is
// the length of the plaintext after decryption.
func (a *aesDecryptor) CiphertextSizeDelta() int { return a.ciphertextSizeDelta }
// Decrypt returns the plaintext version of the given ciphertext when decrypted
// with the provided key and AAD security bytes.
func (a *aesDecryptor) Decrypt(cipherText, key, aad []byte) []byte {
block, err := aes.NewCipher(key)
if err != nil {
writtenCiphertextLen := binary.LittleEndian.Uint32(cipherText)
cipherLen := writtenCiphertextLen + bufferSizeLength
nonce := cipherText[bufferSizeLength : bufferSizeLength+NonceLength]
if a.mode == gcmMode {
aead, err := cipher.NewGCM(block)
if err != nil {
plain, err := aead.Open(nil, nonce, cipherText[bufferSizeLength+NonceLength:cipherLen], aad)
if err != nil {
return plain
// Parquet CTR IVs are comprised of a 12-byte nonce and a 4-byte initial
// counter field.
// The first 31 bits of the initial counter field are set to 0, the last bit
// is set to 1.
iv := make([]byte, ctrIVLen)
copy(iv, nonce)
iv[ctrIVLen-1] = 1
stream := cipher.NewCTR(block, iv)
dst := make([]byte, len(cipherText)-bufferSizeLength-NonceLength)
stream.XORKeyStream(dst, cipherText[bufferSizeLength+NonceLength:])
return dst
// CreateModuleAad creates the section AAD security bytes for the file, module, row group, column and page.
// This should be used for being passed to the encryptor and decryptor whenever requesting AAD bytes.
func CreateModuleAad(fileAad string, moduleType int8, rowGroupOrdinal, columnOrdinal, pageOrdinal int16) string {
buf := bytes.NewBuffer([]byte(fileAad))
if moduleType == FooterModule {
return buf.String()
binary.Write(buf, binary.LittleEndian, rowGroupOrdinal)
binary.Write(buf, binary.LittleEndian, columnOrdinal)
if DataPageModule != moduleType && DataPageHeaderModule != moduleType {
return buf.String()
binary.Write(buf, binary.LittleEndian, pageOrdinal)
return buf.String()
// CreateFooterAad takes an aadPrefix and constructs the security AAD bytes for encrypting
// and decrypting the parquet footer bytes.
func CreateFooterAad(aadPrefix string) string {
return CreateModuleAad(aadPrefix, FooterModule, -1, -1, -1)
// QuickUpdatePageAad updates aad with the new page ordinal, modifying the
// last two bytes of aad.
func QuickUpdatePageAad(aad []byte, newPageOrdinal int16) {
binary.LittleEndian.PutUint16(aad[len(aad)-2:], uint16(newPageOrdinal))