blob: de8b263394ee0cd4bb06c12c84e2c4d40686c2b8 [file] [log] [blame]
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use buffer::{BufferResult, RefReadBuffer, RefWriteBuffer};
use symmetriccipher::{Encryptor, Decryptor, SynchronousStreamCipher, SymmetricCipherError};
use cryptoutil::{read_u32_le, symm_enc_or_dec, write_u32_le, xor_keystream};
use simd::u32x4;
use std::cmp;
#[derive(Clone, Copy)]
struct SalsaState {
a: u32x4,
b: u32x4,
c: u32x4,
d: u32x4
}
#[derive(Copy)]
pub struct Salsa20 {
state: SalsaState,
output: [u8; 64],
offset: usize,
}
impl Clone for Salsa20 { fn clone(&self) -> Salsa20 { *self } }
const S7:u32x4 = u32x4(7, 7, 7, 7);
const S9:u32x4 = u32x4(9, 9, 9, 9);
const S13:u32x4 = u32x4(13, 13, 13, 13);
const S18:u32x4 = u32x4(18, 18, 18, 18);
const S32:u32x4 = u32x4(32, 32, 32, 32);
macro_rules! prepare_rowround {
($a: expr, $b: expr, $c: expr) => {{
let u32x4(a10, a11, a12, a13) = $a;
$a = u32x4(a13, a10, a11, a12);
let u32x4(b10, b11, b12, b13) = $b;
$b = u32x4(b12, b13, b10, b11);
let u32x4(c10, c11, c12, c13) = $c;
$c = u32x4(c11, c12, c13, c10);
}}
}
macro_rules! prepare_columnround {
($a: expr, $b: expr, $c: expr) => {{
let u32x4(a13, a10, a11, a12) = $a;
$a = u32x4(a10, a11, a12, a13);
let u32x4(b12, b13, b10, b11) = $b;
$b = u32x4(b10, b11, b12, b13);
let u32x4(c11, c12, c13, c10) = $c;
$c = u32x4(c10, c11, c12, c13);
}}
}
macro_rules! add_rotate_xor {
($dst: expr, $a: expr, $b: expr, $shift: expr) => {{
let v = $a + $b;
let r = S32 - $shift;
let right = v >> r;
$dst = $dst ^ (v << $shift) ^ right
}}
}
fn columnround(state: &mut SalsaState) -> () {
add_rotate_xor!(state.a, state.d, state.c, S7);
add_rotate_xor!(state.b, state.a, state.d, S9);
add_rotate_xor!(state.c, state.b, state.a, S13);
add_rotate_xor!(state.d, state.c, state.b, S18);
}
fn rowround(state: &mut SalsaState) -> () {
add_rotate_xor!(state.c, state.d, state.a, S7);
add_rotate_xor!(state.b, state.c, state.d, S9);
add_rotate_xor!(state.a, state.c, state.b, S13);
add_rotate_xor!(state.d, state.a, state.b, S18);
}
impl Salsa20 {
pub fn new(key: &[u8], nonce: &[u8]) -> Salsa20 {
assert!(key.len() == 16 || key.len() == 32);
assert!(nonce.len() == 8);
Salsa20 { state: Salsa20::expand(key, nonce), output: [0; 64], offset: 64 }
}
pub fn new_xsalsa20(key: &[u8], nonce: &[u8]) -> Salsa20 {
assert!(key.len() == 32);
assert!(nonce.len() == 24);
let mut xsalsa20 = Salsa20 { state: Salsa20::expand(key, &nonce[0..16]), output: [0; 64], offset: 64 };
let mut new_key = [0; 32];
xsalsa20.hsalsa20_hash(&mut new_key);
xsalsa20.state = Salsa20::expand(&new_key, &nonce[16..24]);
xsalsa20
}
fn expand(key: &[u8], nonce: &[u8]) -> SalsaState {
let constant = match key.len() {
16 => b"expand 16-byte k",
32 => b"expand 32-byte k",
_ => unreachable!(),
};
// The state vectors are laid out to facilitate SIMD operation,
// instead of the natural matrix ordering.
//
// * Constant (x0, x5, x10, x15)
// * Key (x1, x2, x3, x4, x11, x12, x13, x14)
// * Input (x6, x7, x8, x9)
let key_tail; // (x11, x12, x13, x14)
if key.len() == 16 {
key_tail = key;
} else {
key_tail = &key[16..32];
}
let x8; let x9; // (x8, x9)
if nonce.len() == 16 {
// HSalsa uses the full 16 byte nonce.
x8 = read_u32_le(&nonce[8..12]);
x9 = read_u32_le(&nonce[12..16]);
} else {
x8 = 0;
x9 = 0;
}
SalsaState {
a: u32x4(
read_u32_le(&key[12..16]), // x4
x9, // x9
read_u32_le(&key_tail[12..16]), // x14
read_u32_le(&key[8..12]), // x3
),
b: u32x4(
x8, // x8
read_u32_le(&key_tail[8..12]), // x13
read_u32_le(&key[4..8]), // x2
read_u32_le(&nonce[4..8]) // x7
),
c: u32x4(
read_u32_le(&key_tail[4..8]), // x12
read_u32_le(&key[0..4]), // x1
read_u32_le(&nonce[0..4]), // x6
read_u32_le(&key_tail[0..4]) // x11
),
d: u32x4(
read_u32_le(&constant[0..4]), // x0
read_u32_le(&constant[4..8]), // x5
read_u32_le(&constant[8..12]), // x10
read_u32_le(&constant[12..16]), // x15
)
}
}
fn hash(&mut self) {
let mut state = self.state;
for _ in 0..10 {
columnround(&mut state);
prepare_rowround!(state.a, state.b, state.c);
rowround(&mut state);
prepare_columnround!(state.a, state.b, state.c);
}
let u32x4(x4, x9, x14, x3) = self.state.a + state.a;
let u32x4(x8, x13, x2, x7) = self.state.b + state.b;
let u32x4(x12, x1, x6, x11) = self.state.c + state.c;
let u32x4(x0, x5, x10, x15) = self.state.d + state.d;
let lens = [
x0, x1, x2, x3,
x4, x5, x6, x7,
x8, x9, x10, x11,
x12, x13, x14, x15
];
for i in 0..lens.len() {
write_u32_le(&mut self.output[i*4..(i+1)*4], lens[i]);
}
self.state.b = self.state.b + u32x4(1, 0, 0, 0);
let u32x4(_, _, _, ctr_lo) = self.state.b;
if ctr_lo == 0 {
self.state.a = self.state.a + u32x4(0, 1, 0, 0);
}
self.offset = 0;
}
fn hsalsa20_hash(&mut self, out: &mut [u8]) {
let mut state = self.state;
for _ in 0..10 {
columnround(&mut state);
prepare_rowround!(state.a, state.b, state.c);
rowround(&mut state);
prepare_columnround!(state.a, state.b, state.c);
}
let u32x4(_, x9, _, _) = state.a;
let u32x4(x8, _, _, x7) = state.b;
let u32x4(_, _, x6, _) = state.c;
let u32x4(x0, x5, x10, x15) = state.d;
let lens = [
x0, x5, x10, x15,
x6, x7, x8, x9
];
for i in 0..lens.len() {
write_u32_le(&mut out[i*4..(i+1)*4], lens[i]);
}
}
}
impl SynchronousStreamCipher for Salsa20 {
fn process(&mut self, input: &[u8], output: &mut [u8]) {
assert!(input.len() == output.len());
let len = input.len();
let mut i = 0;
while i < len {
// If there is no keystream available in the output buffer,
// generate the next block.
if self.offset == 64 {
self.hash();
}
// Process the min(available keystream, remaining input length).
let count = cmp::min(64 - self.offset, len - i);
xor_keystream(&mut output[i..i+count], &input[i..i+count], &self.output[self.offset..]);
i += count;
self.offset += count;
}
}
}
impl Encryptor for Salsa20 {
fn encrypt(&mut self, input: &mut RefReadBuffer, output: &mut RefWriteBuffer, _: bool)
-> Result<BufferResult, SymmetricCipherError> {
symm_enc_or_dec(self, input, output)
}
}
impl Decryptor for Salsa20 {
fn decrypt(&mut self, input: &mut RefReadBuffer, output: &mut RefWriteBuffer, _: bool)
-> Result<BufferResult, SymmetricCipherError> {
symm_enc_or_dec(self, input, output)
}
}
pub fn hsalsa20(key: &[u8], nonce: &[u8], out: &mut [u8]) {
assert!(key.len() == 32);
assert!(nonce.len() == 16);
let mut h = Salsa20 { state: Salsa20::expand(key, nonce), output: [0; 64], offset: 64 };
h.hsalsa20_hash(out);
}
#[cfg(test)]
mod test {
use std::iter::repeat;
use salsa20::Salsa20;
use symmetriccipher::SynchronousStreamCipher;
use digest::Digest;
use sha2::Sha256;
#[test]
fn test_salsa20_128bit_ecrypt_set_1_vector_0() {
let key = [128u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let nonce = [0u8; 8];
let input = [0u8; 64];
let mut stream = [0u8; 64];
let result =
[0x4D, 0xFA, 0x5E, 0x48, 0x1D, 0xA2, 0x3E, 0xA0,
0x9A, 0x31, 0x02, 0x20, 0x50, 0x85, 0x99, 0x36,
0xDA, 0x52, 0xFC, 0xEE, 0x21, 0x80, 0x05, 0x16,
0x4F, 0x26, 0x7C, 0xB6, 0x5F, 0x5C, 0xFD, 0x7F,
0x2B, 0x4F, 0x97, 0xE0, 0xFF, 0x16, 0x92, 0x4A,
0x52, 0xDF, 0x26, 0x95, 0x15, 0x11, 0x0A, 0x07,
0xF9, 0xE4, 0x60, 0xBC, 0x65, 0xEF, 0x95, 0xDA,
0x58, 0xF7, 0x40, 0xB7, 0xD1, 0xDB, 0xB0, 0xAA];
let mut salsa20 = Salsa20::new(&key, &nonce);
salsa20.process(&input, &mut stream);
assert!(stream[..] == result[..]);
}
#[test]
fn test_salsa20_256bit_ecrypt_set_1_vector_0() {
let key =
[128u8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
let nonce = [0u8; 8];
let input = [0u8; 64];
let mut stream = [0u8; 64];
let result =
[0xE3, 0xBE, 0x8F, 0xDD, 0x8B, 0xEC, 0xA2, 0xE3,
0xEA, 0x8E, 0xF9, 0x47, 0x5B, 0x29, 0xA6, 0xE7,
0x00, 0x39, 0x51, 0xE1, 0x09, 0x7A, 0x5C, 0x38,
0xD2, 0x3B, 0x7A, 0x5F, 0xAD, 0x9F, 0x68, 0x44,
0xB2, 0x2C, 0x97, 0x55, 0x9E, 0x27, 0x23, 0xC7,
0xCB, 0xBD, 0x3F, 0xE4, 0xFC, 0x8D, 0x9A, 0x07,
0x44, 0x65, 0x2A, 0x83, 0xE7, 0x2A, 0x9C, 0x46,
0x18, 0x76, 0xAF, 0x4D, 0x7E, 0xF1, 0xA1, 0x17];
let mut salsa20 = Salsa20::new(&key, &nonce);
salsa20.process(&input, &mut stream);
assert!(stream[..] == result[..]);
}
#[test]
fn test_salsa20_256bit_nacl_vector_2() {
let key = [
0xdc,0x90,0x8d,0xda,0x0b,0x93,0x44,0xa9,
0x53,0x62,0x9b,0x73,0x38,0x20,0x77,0x88,
0x80,0xf3,0xce,0xb4,0x21,0xbb,0x61,0xb9,
0x1c,0xbd,0x4c,0x3e,0x66,0x25,0x6c,0xe4
];
let nonce = [
0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
];
let input: Vec<u8> = repeat(0).take(4194304).collect();
let mut stream: Vec<u8> = repeat(0).take(input.len()).collect();
let output_str = "662b9d0e3463029156069b12f918691a98f7dfb2ca0393c96bbfc6b1fbd630a2";
let mut salsa20 = Salsa20::new(&key, &nonce);
salsa20.process(input.as_ref(), &mut stream);
let mut sh = Sha256::new();
sh.input(stream.as_ref());
let out_str = sh.result_str();
assert!(&out_str[..] == output_str);
}
#[test]
fn test_xsalsa20_cryptopp() {
let key =
[0x1b, 0x27, 0x55, 0x64, 0x73, 0xe9, 0x85, 0xd4,
0x62, 0xcd, 0x51, 0x19, 0x7a, 0x9a, 0x46, 0xc7,
0x60, 0x09, 0x54, 0x9e, 0xac, 0x64, 0x74, 0xf2,
0x06, 0xc4, 0xee, 0x08, 0x44, 0xf6, 0x83, 0x89];
let nonce =
[0x69, 0x69, 0x6e, 0xe9, 0x55, 0xb6, 0x2b, 0x73,
0xcd, 0x62, 0xbd, 0xa8, 0x75, 0xfc, 0x73, 0xd6,
0x82, 0x19, 0xe0, 0x03, 0x6b, 0x7a, 0x0b, 0x37];
let input = [0u8; 139];
let mut stream = [0u8; 139];
let result =
[0xee, 0xa6, 0xa7, 0x25, 0x1c, 0x1e, 0x72, 0x91,
0x6d, 0x11, 0xc2, 0xcb, 0x21, 0x4d, 0x3c, 0x25,
0x25, 0x39, 0x12, 0x1d, 0x8e, 0x23, 0x4e, 0x65,
0x2d, 0x65, 0x1f, 0xa4, 0xc8, 0xcf, 0xf8, 0x80,
0x30, 0x9e, 0x64, 0x5a, 0x74, 0xe9, 0xe0, 0xa6,
0x0d, 0x82, 0x43, 0xac, 0xd9, 0x17, 0x7a, 0xb5,
0x1a, 0x1b, 0xeb, 0x8d, 0x5a, 0x2f, 0x5d, 0x70,
0x0c, 0x09, 0x3c, 0x5e, 0x55, 0x85, 0x57, 0x96,
0x25, 0x33, 0x7b, 0xd3, 0xab, 0x61, 0x9d, 0x61,
0x57, 0x60, 0xd8, 0xc5, 0xb2, 0x24, 0xa8, 0x5b,
0x1d, 0x0e, 0xfe, 0x0e, 0xb8, 0xa7, 0xee, 0x16,
0x3a, 0xbb, 0x03, 0x76, 0x52, 0x9f, 0xcc, 0x09,
0xba, 0xb5, 0x06, 0xc6, 0x18, 0xe1, 0x3c, 0xe7,
0x77, 0xd8, 0x2c, 0x3a, 0xe9, 0xd1, 0xa6, 0xf9,
0x72, 0xd4, 0x16, 0x02, 0x87, 0xcb, 0xfe, 0x60,
0xbf, 0x21, 0x30, 0xfc, 0x0a, 0x6f, 0xf6, 0x04,
0x9d, 0x0a, 0x5c, 0x8a, 0x82, 0xf4, 0x29, 0x23,
0x1f, 0x00, 0x80];
let mut xsalsa20 = Salsa20::new_xsalsa20(&key, &nonce);
xsalsa20.process(&input, &mut stream);
assert!(stream[..] == result[..]);
}
}
#[cfg(all(test, feature = "with-bench"))]
mod bench {
use test::Bencher;
use symmetriccipher::SynchronousStreamCipher;
use salsa20::Salsa20;
#[bench]
pub fn salsa20_10(bh: & mut Bencher) {
let mut salsa20 = Salsa20::new(&[0; 32], &[0; 8]);
let input = [1u8; 10];
let mut output = [0u8; 10];
bh.iter( || {
salsa20.process(&input, &mut output);
});
bh.bytes = input.len() as u64;
}
#[bench]
pub fn salsa20_1k(bh: & mut Bencher) {
let mut salsa20 = Salsa20::new(&[0; 32], &[0; 8]);
let input = [1u8; 1024];
let mut output = [0u8; 1024];
bh.iter( || {
salsa20.process(&input, &mut output);
});
bh.bytes = input.len() as u64;
}
#[bench]
pub fn salsa20_64k(bh: & mut Bencher) {
let mut salsa20 = Salsa20::new(&[0; 32], &[0; 8]);
let input = [1u8; 65536];
let mut output = [0u8; 65536];
bh.iter( || {
salsa20.process(&input, &mut output);
});
bh.bytes = input.len() as u64;
}
}