blob: d6e897dad05f811f943276beff690340194f75bc [file] [log] [blame]
use std::{ ptr, mem, str, slice, fmt };
use std::ops::{ Index, IndexMut, Deref };
use std::string::String;
use std::vec::Vec;
use codegen::{ DumpGenerator, Generator, PrettyGenerator };
use value::JsonValue;
const KEY_BUF_LEN: usize = 32;
static NULL: JsonValue = JsonValue::Null;
// FNV-1a implementation
//
// While the `Object` is implemented as a binary tree, not a hash table, the
// order in which the tree is balanced makes absolutely no difference as long
// as there is a deterministic left / right ordering with good spread.
// Comparing a hashed `u64` is faster than comparing `&str` or even `&[u8]`,
// for larger objects this yields non-trivial performance benefits.
//
// Additionally this "randomizes" the keys a bit. Should the keys in an object
// be inserted in alphabetical order (an example of such a use case would be
// using an object as a store for entries by ids, where ids are sorted), this
// will prevent the tree from being constructed in a way where the same branch
// of each node is always used, effectively producing linear lookup times. Bad!
//
// Example:
//
// ```
// println!("{}", hash_key(b"10000056"));
// println!("{}", hash_key(b"10000057"));
// println!("{}", hash_key(b"10000058"));
// println!("{}", hash_key(b"10000059"));
// ```
//
// Produces:
//
// ```
// 15043794053238616431 <-- 2nd
// 15043792953726988220 <-- 1st
// 15043800650308385697 <-- 4th
// 15043799550796757486 <-- 3rd
// ```
#[inline]
fn hash_key(key: &[u8]) -> u64 {
let mut hash: u64 = 0xcbf29ce484222325;
for byte in key {
hash ^= *byte as u64;
hash = hash.wrapping_mul(0x100000001b3);
}
hash
}
struct Key {
// Internal buffer to store keys that fit within `KEY_BUF_LEN`,
// otherwise this field will contain garbage.
pub buf: [u8; KEY_BUF_LEN],
// Length of the key in bytes.
pub len: usize,
// Cached raw pointer to the key, so that we can cheaply construct
// a `&str` slice from the `Node` without checking if the key is
// allocated separately on the heap, or in the `key_buf`.
pub ptr: *mut u8,
// A hash of the key, explanation below.
pub hash: u64,
}
impl Key {
#[inline]
fn new(hash: u64, len: usize) -> Self {
Key {
buf: [0; KEY_BUF_LEN],
len: len,
ptr: ptr::null_mut(),
hash: hash
}
}
#[inline]
fn as_bytes(&self) -> &[u8] {
unsafe {
slice::from_raw_parts(self.ptr, self.len)
}
}
#[inline]
fn as_str(&self) -> &str {
unsafe {
str::from_utf8_unchecked(self.as_bytes())
}
}
// The `buf` on the `Key` can only be filled after the struct
// is already on the `Vec`'s heap (along with the `Node`).
// For that reason it's not set in `Key::new` but only after
// the `Node` is created and allocated.
#[inline]
fn attach(&mut self, key: &[u8]) {
if self.len <= KEY_BUF_LEN {
unsafe {
ptr::copy_nonoverlapping(
key.as_ptr(),
self.buf.as_mut_ptr(),
self.len
);
}
self.ptr = self.buf.as_mut_ptr();
} else {
let mut heap = key.to_vec();
self.ptr = heap.as_mut_ptr();
mem::forget(heap);
}
}
// Since we store `Node`s on a vector, it will suffer from reallocation.
// Whenever that happens, `key.ptr` for short keys will turn into dangling
// pointers and will need to be re-cached.
#[inline]
fn fix_ptr(&mut self) {
if self.len <= KEY_BUF_LEN {
self.ptr = self.buf.as_mut_ptr();
}
}
}
// Implement `Sync` and `Send` for `Key` despite the use of raw pointers. The struct
// itself should be memory safe.
unsafe impl Sync for Key {}
unsafe impl Send for Key {}
// Because long keys _can_ be stored separately from the `Key` on heap,
// it's essential to clean up the heap allocation when the `Key` is dropped.
impl Drop for Key {
fn drop(&mut self) {
unsafe {
if self.len > KEY_BUF_LEN {
// Construct a `Vec` out of the `key_ptr`. Since the key is
// always allocated from a slice, the capacity is equal to length.
let heap = Vec::from_raw_parts(
self.ptr,
self.len,
self.len
);
// Now that we have an owned `Vec<u8>`, drop it.
drop(heap);
}
}
}
}
// Just like with `Drop`, `Clone` needs a custom implementation that accounts
// for the fact that key _can_ be separately heap allocated.
impl Clone for Key {
fn clone(&self) -> Self {
if self.len > KEY_BUF_LEN {
let mut heap = self.as_bytes().to_vec();
let ptr = heap.as_mut_ptr();
mem::forget(heap);
Key {
buf: [0; KEY_BUF_LEN],
len: self.len,
ptr: ptr,
hash: self.hash,
}
} else {
Key {
buf: self.buf,
len: self.len,
ptr: ptr::null_mut(), // requires a `fix_ptr` call after `Node` is on the heap
hash: self.hash,
}
}
}
}
#[derive(Clone)]
struct Node {
// String-esque key abstraction
pub key: Key,
// Value stored.
pub value: JsonValue,
// Store vector index pointing to the `Node` for which `key_hash` is smaller
// than that of this `Node`.
// Will default to 0 as root node can't be referenced anywhere else.
pub left: usize,
// Same as above but for `Node`s with hash larger than this one. If the
// hash is the same, but keys are different, the lookup will default
// to the right branch as well.
pub right: usize,
}
impl fmt::Debug for Node {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
fmt::Debug::fmt(&(self.key.as_str(), &self.value, self.left, self.right), f)
}
}
impl PartialEq for Node {
fn eq(&self, other: &Node) -> bool {
self.key.hash == other.key.hash &&
self.key.as_bytes() == other.key.as_bytes() &&
self.value == other.value
}
}
impl Node {
#[inline]
fn new(value: JsonValue, hash: u64, len: usize) -> Node {
Node {
key: Key::new(hash, len),
value: value,
left: 0,
right: 0,
}
}
}
/// A binary tree implementation of a string -> `JsonValue` map. You normally don't
/// have to interact with instances of `Object`, much more likely you will be
/// using the `JsonValue::Object` variant, which wraps around this struct.
#[derive(Debug)]
pub struct Object {
store: Vec<Node>
}
impl Object {
/// Create a new, empty instance of `Object`. Empty `Object` performs no
/// allocation until a value is inserted into it.
#[inline(always)]
pub fn new() -> Self {
Object {
store: Vec::new()
}
}
/// Create a new `Object` with memory preallocated for `capacity` number
/// of entries.
#[inline(always)]
pub fn with_capacity(capacity: usize) -> Self {
Object {
store: Vec::with_capacity(capacity)
}
}
#[inline]
fn node_at_index_mut(&mut self, index: usize) -> *mut Node {
unsafe { self.store.as_mut_ptr().offset(index as isize) }
}
#[inline(always)]
fn add_node(&mut self, key: &[u8], value: JsonValue, hash: u64) -> usize {
let index = self.store.len();
if index < self.store.capacity() {
// Because we've just checked the capacity, we can avoid
// using `push`, and instead do unsafe magic to memcpy
// the new node at the correct index without additional
// capacity or bound checks.
unsafe {
let node = Node::new(value, hash, key.len());
self.store.set_len(index + 1);
// To whomever gets concerned: I got better results with
// copy than write. Difference in benchmarks wasn't big though.
ptr::copy_nonoverlapping(
&node as *const Node,
self.store.as_mut_ptr().offset(index as isize),
1,
);
// Since the Node has been copied, we need to forget about
// the owned value, else we may run into use after free.
mem::forget(node);
}
unsafe { self.store.get_unchecked_mut(index).key.attach(key) };
} else {
self.store.push(Node::new(value, hash, key.len()));
unsafe { self.store.get_unchecked_mut(index).key.attach(key) };
// Index up to the index (old length), we don't need to fix
// anything on the Node that just got pushed.
for node in self.store.iter_mut().take(index) {
node.key.fix_ptr();
}
}
index
}
/// Insert a new entry, or override an existing one. Note that `key` has
/// to be a `&str` slice and not an owned `String`. The internals of
/// `Object` will handle the heap allocation of the key if needed for
/// better performance.
#[inline]
pub fn insert(&mut self, key: &str, value: JsonValue) {
self.insert_index(key, value);
}
pub(crate) fn insert_index(&mut self, key: &str, value: JsonValue) -> usize {
let key = key.as_bytes();
let hash = hash_key(key);
if self.store.len() == 0 {
self.store.push(Node::new(value, hash, key.len()));
self.store[0].key.attach(key);
return 0;
}
let mut node = unsafe { &mut *self.node_at_index_mut(0) };
let mut parent = 0;
loop {
if hash == node.key.hash && key == node.key.as_bytes() {
node.value = value;
return parent;
} else if hash < node.key.hash {
if node.left != 0 {
parent = node.left;
node = unsafe { &mut *self.node_at_index_mut(node.left) };
continue;
}
let index = self.add_node(key, value, hash);
self.store[parent].left = index;
return index;
} else {
if node.right != 0 {
parent = node.right;
node = unsafe { &mut *self.node_at_index_mut(node.right) };
continue;
}
let index = self.add_node(key, value, hash);
self.store[parent].right = index;
return index;
}
}
}
#[inline]
pub(crate) fn override_at(&mut self, index: usize, value: JsonValue) {
self.store[index].value = value;
}
#[inline]
#[deprecated(since="0.11.11", note="Was only meant for internal use")]
pub fn override_last(&mut self, value: JsonValue) {
if let Some(node) = self.store.last_mut() {
node.value = value;
}
}
pub fn get(&self, key: &str) -> Option<&JsonValue> {
if self.store.len() == 0 {
return None;
}
let key = key.as_bytes();
let hash = hash_key(key);
let mut node = unsafe { self.store.get_unchecked(0) };
loop {
if hash == node.key.hash && key == node.key.as_bytes() {
return Some(&node.value);
} else if hash < node.key.hash {
if node.left == 0 {
return None;
}
node = unsafe { self.store.get_unchecked(node.left) };
} else {
if node.right == 0 {
return None;
}
node = unsafe { self.store.get_unchecked(node.right) };
}
}
}
pub fn get_mut(&mut self, key: &str) -> Option<&mut JsonValue> {
if self.store.len() == 0 {
return None;
}
let key = key.as_bytes();
let hash = hash_key(key);
let mut index = 0;
{
let mut node = unsafe { self.store.get_unchecked(0) };
loop {
if hash == node.key.hash && key == node.key.as_bytes() {
break;
} else if hash < node.key.hash {
if node.left == 0 {
return None;
}
index = node.left;
node = unsafe { self.store.get_unchecked(node.left) };
} else {
if node.right == 0 {
return None;
}
index = node.right;
node = unsafe { self.store.get_unchecked(node.right) };
}
}
}
let node = unsafe { self.store.get_unchecked_mut(index) };
Some(&mut node.value)
}
/// Attempts to remove the value behind `key`, if successful
/// will return the `JsonValue` stored behind the `key`.
pub fn remove(&mut self, key: &str) -> Option<JsonValue> {
if self.store.len() == 0 {
return None;
}
let key = key.as_bytes();
let hash = hash_key(key);
let mut index = 0;
{
let mut node = unsafe { self.store.get_unchecked(0) };
// Try to find the node
loop {
if hash == node.key.hash && key == node.key.as_bytes() {
break;
} else if hash < node.key.hash {
if node.left == 0 {
return None;
}
index = node.left;
node = unsafe { self.store.get_unchecked(node.left) };
} else {
if node.right == 0 {
return None;
}
index = node.right;
node = unsafe { self.store.get_unchecked(node.right) };
}
}
}
// Removing a node would screw the tree badly, it's easier to just
// recreate it. This is a very costly operation, but removing nodes
// in JSON shouldn't happen very often if at all. Optimizing this
// can wait for better times.
let mut new_object = Object::with_capacity(self.store.len() - 1);
let mut removed = None;
for (i, node) in self.store.iter_mut().enumerate() {
if i == index {
// Rust doesn't like us moving things from `node`, even if
// it is owned. Replace fixes that.
removed = Some(mem::replace(&mut node.value, JsonValue::Null));
} else {
let value = mem::replace(&mut node.value, JsonValue::Null);
new_object.insert(node.key.as_str(), value);
}
}
mem::swap(self, &mut new_object);
removed
}
#[inline(always)]
pub fn len(&self) -> usize {
self.store.len()
}
#[inline(always)]
pub fn is_empty(&self) -> bool {
self.store.is_empty()
}
/// Wipe the `Object` clear. The capacity will remain untouched.
pub fn clear(&mut self) {
self.store.clear();
}
#[inline(always)]
pub fn iter(&self) -> Iter {
Iter {
inner: self.store.iter()
}
}
#[inline(always)]
pub fn iter_mut(&mut self) -> IterMut {
IterMut {
inner: self.store.iter_mut()
}
}
/// Prints out the value as JSON string.
pub fn dump(&self) -> String {
let mut gen = DumpGenerator::new();
gen.write_object(self).expect("Can't fail");
gen.consume()
}
/// Pretty prints out the value as JSON string. Takes an argument that's
/// number of spaces to indent new blocks with.
pub fn pretty(&self, spaces: u16) -> String {
let mut gen = PrettyGenerator::new(spaces);
gen.write_object(self).expect("Can't fail");
gen.consume()
}
}
// Custom implementation of `Clone`, as new heap allocation means
// we have to fix key pointers everywhere!
impl Clone for Object {
fn clone(&self) -> Self {
let mut store = self.store.clone();
for node in store.iter_mut() {
node.key.fix_ptr();
}
Object {
store: store
}
}
}
// Because keys can inserted in different order, the safe way to
// compare `Object`s is to iterate over one and check if the other
// has all the same keys.
impl PartialEq for Object {
fn eq(&self, other: &Object) -> bool {
if self.len() != other.len() {
return false;
}
for (key, value) in self.iter() {
match other.get(key) {
Some(ref other_val) => if *other_val != value { return false; },
None => return false
}
}
true
}
}
pub struct Iter<'a> {
inner: slice::Iter<'a, Node>
}
impl<'a> Iter<'a> {
/// Create an empty iterator that always returns `None`
pub fn empty() -> Self {
Iter {
inner: [].iter()
}
}
}
impl<'a> Iterator for Iter<'a> {
type Item = (&'a str, &'a JsonValue);
#[inline(always)]
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(|node| (node.key.as_str(), &node.value))
}
}
impl<'a> DoubleEndedIterator for Iter<'a> {
#[inline(always)]
fn next_back(&mut self) -> Option<Self::Item> {
self.inner.next_back().map(|node| (node.key.as_str(), &node.value))
}
}
pub struct IterMut<'a> {
inner: slice::IterMut<'a, Node>
}
impl<'a> IterMut<'a> {
/// Create an empty iterator that always returns `None`
pub fn empty() -> Self {
IterMut {
inner: [].iter_mut()
}
}
}
impl<'a> Iterator for IterMut<'a> {
type Item = (&'a str, &'a mut JsonValue);
#[inline(always)]
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(|node| (node.key.as_str(), &mut node.value))
}
}
impl<'a> DoubleEndedIterator for IterMut<'a> {
#[inline(always)]
fn next_back(&mut self) -> Option<Self::Item> {
self.inner.next_back().map(|node| (node.key.as_str(), &mut node.value))
}
}
/// Implements indexing by `&str` to easily access object members:
///
/// ## Example
///
/// ```
/// # #[macro_use]
/// # extern crate json;
/// # use json::JsonValue;
/// #
/// # fn main() {
/// let value = object!{
/// "foo" => "bar"
/// };
///
/// if let JsonValue::Object(object) = value {
/// assert!(object["foo"] == "bar");
/// }
/// # }
/// ```
// TODO: doc
impl<'a> Index<&'a str> for Object {
type Output = JsonValue;
fn index(&self, index: &str) -> &JsonValue {
match self.get(index) {
Some(value) => value,
_ => &NULL
}
}
}
impl Index<String> for Object {
type Output = JsonValue;
fn index(&self, index: String) -> &JsonValue {
self.index(index.deref())
}
}
impl<'a> Index<&'a String> for Object {
type Output = JsonValue;
fn index(&self, index: &String) -> &JsonValue {
self.index(index.deref())
}
}
/// Implements mutable indexing by `&str` to easily modify object members:
///
/// ## Example
///
/// ```
/// # #[macro_use]
/// # extern crate json;
/// # use json::JsonValue;
/// #
/// # fn main() {
/// let value = object!{};
///
/// if let JsonValue::Object(mut object) = value {
/// object["foo"] = 42.into();
///
/// assert!(object["foo"] == 42);
/// }
/// # }
/// ```
impl<'a> IndexMut<&'a str> for Object {
fn index_mut(&mut self, index: &str) -> &mut JsonValue {
if self.get(index).is_none() {
self.insert(index, JsonValue::Null);
}
self.get_mut(index).unwrap()
}
}
impl IndexMut<String> for Object {
fn index_mut(&mut self, index: String) -> &mut JsonValue {
self.index_mut(index.deref())
}
}
impl<'a> IndexMut<&'a String> for Object {
fn index_mut(&mut self, index: &String) -> &mut JsonValue {
self.index_mut(index.deref())
}
}