blob: d9ec8df4bec76ed258f5aa865d9ed45c34c9a694 [file] [log] [blame]
use crate::cmp::Cmp;
use crate::types::SequenceNumber;
use std::cmp::Ordering;
use std::io::Write;
use integer_encoding::{FixedInt, FixedIntWriter, VarInt, VarIntWriter};
// The following typedefs are used to distinguish between the different key formats used internally
// by different modules.
// TODO: At some point, convert those into actual types with conversions between them. That's a lot
// of boilerplate, but increases type safety.
#[derive(Debug, Clone, Copy, PartialOrd, PartialEq)]
pub enum ValueType {
TypeDeletion = 0,
TypeValue = 1,
}
/// A MemtableKey consists of the following elements: [keylen, key, tag, (vallen, value)] where
/// keylen is a varint32 encoding the length of key+tag. tag is a fixed 8 bytes segment encoding
/// the entry type and the sequence number. vallen and value are optional components at the end.
pub type MemtableKey<'a> = &'a [u8];
/// A UserKey is the actual key supplied by the calling application, without any internal
/// decorations.
pub type UserKey<'a> = &'a [u8];
/// An InternalKey consists of [key, tag], so it's basically a MemtableKey without the initial
/// length specification. This type is used as item type of MemtableIterator, and as the key
/// type of tables.
pub type InternalKey<'a> = &'a [u8];
/// A LookupKey is the first part of a memtable key, consisting of [keylen: varint32, key: *u8,
/// tag: u64]
/// keylen is the length of key plus 8 (for the tag; this for LevelDB compatibility)
#[derive(Clone, Debug)]
pub struct LookupKey {
key: Vec<u8>,
key_offset: usize,
}
const U64_SPACE: usize = 8;
impl LookupKey {
pub fn new(k: UserKey, s: SequenceNumber) -> LookupKey {
LookupKey::new_full(k, s, ValueType::TypeValue)
}
pub fn new_full(k: UserKey, s: SequenceNumber, t: ValueType) -> LookupKey {
let mut key = Vec::new();
let internal_keylen = k.len() + U64_SPACE;
key.resize(k.len() + internal_keylen.required_space() + U64_SPACE, 0);
{
let mut writer = key.as_mut_slice();
writer
.write_varint(internal_keylen)
.expect("write to slice failed");
writer.write_all(k).expect("write to slice failed");
writer
.write_fixedint(s << 8 | t as u64)
.expect("write to slice failed");
}
LookupKey {
key,
key_offset: internal_keylen.required_space(),
}
}
/// Returns the full memtable-formatted key.
pub fn memtable_key(&self) -> MemtableKey {
self.key.as_slice()
}
/// Returns only the user key portion.
pub fn user_key(&self) -> UserKey {
&self.key[self.key_offset..self.key.len() - 8]
}
/// Returns key and tag.
pub fn internal_key(&self) -> InternalKey {
&self.key[self.key_offset..]
}
}
/// Parses a tag into (type, sequence number)
pub fn parse_tag(tag: u64) -> (ValueType, u64) {
let seq = tag >> 8;
let typ = tag & 0xff;
match typ {
0 => (ValueType::TypeDeletion, seq),
1 => (ValueType::TypeValue, seq),
_ => (ValueType::TypeValue, seq),
}
}
/// A memtable key is a bytestring containing (keylen, key, tag, vallen, val). This function
/// builds such a key. It's called key because the underlying Map implementation will only be
/// concerned with keys; the value field is not used (instead, the value is encoded in the key,
/// and for lookups we just search for the next bigger entry).
/// keylen is the length of key + 8 (to account for the tag)
pub fn build_memtable_key(key: &[u8], value: &[u8], t: ValueType, seq: SequenceNumber) -> Vec<u8> {
// We are using the original LevelDB approach here -- encoding key and value into the
// key that is used for insertion into the SkipMap.
// The format is: [key_size: varint32, key_data: [u8], flags: u64, value_size: varint32,
// value_data: [u8]]
let keysize = key.len() + U64_SPACE;
let valsize = value.len();
let mut buf = Vec::new();
buf.resize(
keysize + valsize + keysize.required_space() + valsize.required_space(),
0,
);
{
let mut writer = buf.as_mut_slice();
writer.write_varint(keysize).expect("write to slice failed");
writer.write_all(key).expect("write to slice failed");
writer
.write_fixedint((t as u64) | (seq << 8))
.expect("write to slice failed");
writer.write_varint(valsize).expect("write to slice failed");
writer.write_all(value).expect("write to slice failed");
assert_eq!(writer.len(), 0);
}
buf
}
/// Parses a memtable key and returns (keylen, key offset, tag, vallen, val offset).
/// If the key only contains (keylen, key, tag), the vallen and val offset return values will be
/// meaningless.
pub fn parse_memtable_key(mkey: MemtableKey) -> (usize, usize, u64, usize, usize) {
let (keylen, mut i): (usize, usize) = VarInt::decode_var(&mkey);
let keyoff = i;
i += keylen - 8;
if mkey.len() > i {
let tag = FixedInt::decode_fixed(&mkey[i..i + 8]);
i += 8;
let (vallen, j): (usize, usize) = VarInt::decode_var(&mkey[i..]);
i += j;
let valoff = i;
return (keylen - 8, keyoff, tag, vallen, valoff);
} else {
return (keylen - 8, keyoff, 0, 0, 0);
}
}
/// cmp_memtable_key efficiently compares two memtable keys by only parsing what's actually needed.
pub fn cmp_memtable_key<'a, 'b>(
ucmp: &dyn Cmp,
a: MemtableKey<'a>,
b: MemtableKey<'b>,
) -> Ordering {
let (alen, aoff): (usize, usize) = VarInt::decode_var(&a);
let (blen, boff): (usize, usize) = VarInt::decode_var(&b);
let userkey_a = &a[aoff..aoff + alen - 8];
let userkey_b = &b[boff..boff + blen - 8];
match ucmp.cmp(userkey_a, userkey_b) {
Ordering::Less => Ordering::Less,
Ordering::Greater => Ordering::Greater,
Ordering::Equal => {
let atag = FixedInt::decode_fixed(&a[aoff + alen - 8..aoff + alen]);
let btag = FixedInt::decode_fixed(&b[boff + blen - 8..boff + blen]);
let (_, aseq) = parse_tag(atag);
let (_, bseq) = parse_tag(btag);
// reverse!
bseq.cmp(&aseq)
}
}
}
/// Parse a key in InternalKey format.
pub fn parse_internal_key(ikey: InternalKey) -> (ValueType, SequenceNumber, UserKey) {
if ikey.is_empty() {
return (ValueType::TypeDeletion, 0, &ikey[0..0]);
}
assert!(ikey.len() >= 8);
let (typ, seq) = parse_tag(FixedInt::decode_fixed(&ikey[ikey.len() - 8..]));
return (typ, seq, &ikey[0..ikey.len() - 8]);
}
/// cmp_internal_key efficiently compares keys in InternalKey format by only parsing the parts that
/// are actually needed for a comparison.
pub fn cmp_internal_key<'a, 'b>(
ucmp: &dyn Cmp,
a: InternalKey<'a>,
b: InternalKey<'b>,
) -> Ordering {
match ucmp.cmp(&a[0..a.len() - 8], &b[0..b.len() - 8]) {
Ordering::Less => Ordering::Less,
Ordering::Greater => Ordering::Greater,
Ordering::Equal => {
let seqa = parse_tag(FixedInt::decode_fixed(&a[a.len() - 8..])).1;
let seqb = parse_tag(FixedInt::decode_fixed(&b[b.len() - 8..])).1;
// reverse comparison!
seqb.cmp(&seqa)
}
}
}
/// truncate_to_userkey performs an in-place conversion from InternalKey to UserKey format.
pub fn truncate_to_userkey(ikey: &mut Vec<u8>) {
let len = ikey.len();
assert!(len >= 8);
ikey.truncate(len - 8);
}
#[cfg(feature = "enclave_unit_test")]
pub mod tests {
use super::*;
use teaclave_test_utils::*;
pub fn run_tests() -> bool {
run_tests!(test_memtable_lookupkey, test_build_memtable_key,)
}
fn test_memtable_lookupkey() {
let lk1 = LookupKey::new("abcde".as_bytes(), 123);
let lk2 = LookupKey::new("xyabxy".as_bytes(), 97);
// Assert correct allocation strategy
assert_eq!(lk1.key.len(), 14);
assert_eq!(lk1.key.capacity(), 14);
assert_eq!(lk1.user_key(), "abcde".as_bytes());
assert_eq!(u32::decode_var(lk1.memtable_key()), (13, 1));
assert_eq!(
lk2.internal_key(),
vec![120, 121, 97, 98, 120, 121, 1, 97, 0, 0, 0, 0, 0, 0].as_slice()
);
}
fn test_build_memtable_key() {
assert_eq!(
build_memtable_key(
"abc".as_bytes(),
"123".as_bytes(),
ValueType::TypeValue,
231
),
vec![11, 97, 98, 99, 1, 231, 0, 0, 0, 0, 0, 0, 3, 49, 50, 51]
);
assert_eq!(
build_memtable_key("".as_bytes(), "123".as_bytes(), ValueType::TypeValue, 231),
vec![8, 1, 231, 0, 0, 0, 0, 0, 0, 3, 49, 50, 51]
);
assert_eq!(
build_memtable_key(
"abc".as_bytes(),
"123".as_bytes(),
ValueType::TypeDeletion,
231
),
vec![11, 97, 98, 99, 0, 231, 0, 0, 0, 0, 0, 0, 3, 49, 50, 51]
);
assert_eq!(
build_memtable_key(
"abc".as_bytes(),
"".as_bytes(),
ValueType::TypeDeletion,
231
),
vec![11, 97, 98, 99, 0, 231, 0, 0, 0, 0, 0, 0, 0]
);
}
}