| /* Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #define C_LUCY_HASH |
| #define C_LUCY_HASHTOMBSTONE |
| #define LUCY_USE_SHORT_NAMES |
| #define CHY_USE_SHORT_NAMES |
| |
| #include <string.h> |
| #include <stdlib.h> |
| |
| #include "Lucy/Object/VTable.h" |
| |
| #include "Lucy/Object/Hash.h" |
| #include "Lucy/Object/CharBuf.h" |
| #include "Lucy/Object/Err.h" |
| #include "Lucy/Object/VArray.h" |
| #include "Lucy/Store/InStream.h" |
| #include "Lucy/Store/OutStream.h" |
| #include "Lucy/Util/Freezer.h" |
| #include "Lucy/Util/Memory.h" |
| |
| static HashTombStone TOMBSTONE = { |
| HASHTOMBSTONE, |
| {1} |
| }; |
| |
| #define HashEntry lucy_HashEntry |
| |
| typedef struct HashEntry { |
| Obj *key; |
| Obj *value; |
| int32_t hash_sum; |
| } HashEntry; |
| |
| // Reset the iterator. Hash_Iterate must be called to restart iteration. |
| static INLINE void |
| SI_kill_iter(Hash *self); |
| |
| // Return the entry associated with the key, if any. |
| static INLINE HashEntry* |
| SI_fetch_entry(Hash *self, const Obj *key, int32_t hash_sum); |
| |
| // Double the number of buckets and redistribute all entries. |
| static INLINE HashEntry* |
| SI_rebuild_hash(Hash *self); |
| |
| Hash* |
| Hash_new(uint32_t capacity) { |
| Hash *self = (Hash*)VTable_Make_Obj(HASH); |
| return Hash_init(self, capacity); |
| } |
| |
| Hash* |
| Hash_init(Hash *self, uint32_t capacity) { |
| // Allocate enough space to hold the requested number of elements without |
| // triggering a rebuild. |
| uint32_t requested_capacity = capacity < I32_MAX ? capacity : I32_MAX; |
| uint32_t threshold; |
| capacity = 16; |
| while (1) { |
| threshold = (capacity / 3) * 2; |
| if (threshold > requested_capacity) { break; } |
| capacity *= 2; |
| } |
| |
| // Init. |
| self->size = 0; |
| self->iter_tick = -1; |
| |
| // Derive. |
| self->capacity = capacity; |
| self->entries = (HashEntry*)CALLOCATE(capacity, sizeof(HashEntry)); |
| self->threshold = threshold; |
| |
| return self; |
| } |
| |
| void |
| Hash_destroy(Hash *self) { |
| if (self->entries) { |
| Hash_Clear(self); |
| FREEMEM(self->entries); |
| } |
| SUPER_DESTROY(self, HASH); |
| } |
| |
| Hash* |
| Hash_dump(Hash *self) { |
| Hash *dump = Hash_new(self->size); |
| Obj *key; |
| Obj *value; |
| |
| Hash_Iterate(self); |
| while (Hash_Next(self, &key, &value)) { |
| // Since JSON only supports text hash keys, Dump() can only support |
| // text hash keys. |
| CERTIFY(key, CHARBUF); |
| Hash_Store(dump, key, Obj_Dump(value)); |
| } |
| |
| return dump; |
| } |
| |
| Obj* |
| Hash_load(Hash *self, Obj *dump) { |
| Hash *source = (Hash*)CERTIFY(dump, HASH); |
| CharBuf *class_name = (CharBuf*)Hash_Fetch_Str(source, "_class", 6); |
| UNUSED_VAR(self); |
| |
| // Assume that the presence of the "_class" key paired with a valid class |
| // name indicates the output of a Dump rather than an ordinary Hash. */ |
| if (class_name && CB_Is_A(class_name, CHARBUF)) { |
| VTable *vtable = VTable_fetch_vtable(class_name); |
| |
| if (!vtable) { |
| CharBuf *parent_class = VTable_find_parent_class(class_name); |
| if (parent_class) { |
| VTable *parent = VTable_singleton(parent_class, NULL); |
| vtable = VTable_singleton(class_name, parent); |
| DECREF(parent_class); |
| } |
| else { |
| // TODO: Fix Hash_Load() so that it works with ordinary hash |
| // keys named "_class". |
| THROW(ERR, "Can't find class '%o'", class_name); |
| } |
| } |
| |
| // Dispatch to an alternate Load() method. |
| if (vtable) { |
| Obj_load_t load = (Obj_load_t)METHOD(vtable, Obj, Load); |
| if (load == Obj_load) { |
| THROW(ERR, "Abstract method Load() not defined for %o", |
| VTable_Get_Name(vtable)); |
| } |
| else if (load != (Obj_load_t)Hash_load) { // stop inf loop |
| return load(NULL, dump); |
| } |
| } |
| } |
| |
| // It's an ordinary Hash. |
| { |
| Hash *loaded = Hash_new(source->size); |
| Obj *key; |
| Obj *value; |
| |
| Hash_Iterate(source); |
| while (Hash_Next(source, &key, &value)) { |
| Hash_Store(loaded, key, Obj_Load(value, value)); |
| } |
| |
| return (Obj*)loaded; |
| } |
| } |
| |
| void |
| Hash_serialize(Hash *self, OutStream *outstream) { |
| Obj *key; |
| Obj *val; |
| uint32_t charbuf_count = 0; |
| OutStream_Write_C32(outstream, self->size); |
| |
| // Write CharBuf keys first. CharBuf keys are the common case; grouping |
| // them together is a form of run-length-encoding and saves space, since |
| // we omit the per-key class name. |
| Hash_Iterate(self); |
| while (Hash_Next(self, &key, &val)) { |
| if (Obj_Is_A(key, CHARBUF)) { charbuf_count++; } |
| } |
| OutStream_Write_C32(outstream, charbuf_count); |
| Hash_Iterate(self); |
| while (Hash_Next(self, &key, &val)) { |
| if (Obj_Is_A(key, CHARBUF)) { |
| Obj_Serialize(key, outstream); |
| FREEZE(val, outstream); |
| } |
| } |
| |
| // Punt on the classes of the remaining keys. |
| Hash_Iterate(self); |
| while (Hash_Next(self, &key, &val)) { |
| if (!Obj_Is_A(key, CHARBUF)) { |
| FREEZE(key, outstream); |
| FREEZE(val, outstream); |
| } |
| } |
| } |
| |
| Hash* |
| Hash_deserialize(Hash *self, InStream *instream) { |
| uint32_t size = InStream_Read_C32(instream); |
| uint32_t num_charbufs = InStream_Read_C32(instream); |
| uint32_t num_other = size - num_charbufs; |
| CharBuf *key = num_charbufs ? CB_new(0) : NULL; |
| |
| if (self) { Hash_init(self, size); } |
| else { self = Hash_new(size); } |
| |
| // Read key-value pairs with CharBuf keys. |
| while (num_charbufs--) { |
| uint32_t len = InStream_Read_C32(instream); |
| char *key_buf = CB_Grow(key, len); |
| InStream_Read_Bytes(instream, key_buf, len); |
| key_buf[len] = '\0'; |
| CB_Set_Size(key, len); |
| Hash_Store(self, (Obj*)key, THAW(instream)); |
| } |
| DECREF(key); |
| |
| // Read remaining key/value pairs. |
| while (num_other--) { |
| Obj *k = THAW(instream); |
| Hash_Store(self, k, THAW(instream)); |
| DECREF(k); |
| } |
| |
| return self; |
| } |
| |
| void |
| Hash_clear(Hash *self) { |
| HashEntry *entry = (HashEntry*)self->entries; |
| HashEntry *const limit = entry + self->capacity; |
| |
| // Iterate through all entries. |
| for (; entry < limit; entry++) { |
| if (!entry->key) { continue; } |
| DECREF(entry->key); |
| DECREF(entry->value); |
| entry->key = NULL; |
| entry->value = NULL; |
| entry->hash_sum = 0; |
| } |
| |
| self->size = 0; |
| } |
| |
| void |
| Hash_do_store(Hash *self, Obj *key, Obj *value, |
| int32_t hash_sum, bool_t use_this_key) { |
| HashEntry *entries = self->size >= self->threshold |
| ? SI_rebuild_hash(self) |
| : (HashEntry*)self->entries; |
| uint32_t tick = hash_sum; |
| const uint32_t mask = self->capacity - 1; |
| |
| while (1) { |
| tick &= mask; |
| HashEntry *entry = entries + tick; |
| if (entry->key == (Obj*)&TOMBSTONE || !entry->key) { |
| if (entry->key == (Obj*)&TOMBSTONE) { |
| // Take note of diminished tombstone clutter. |
| self->threshold++; |
| } |
| entry->key = use_this_key |
| ? key |
| : Hash_Make_Key(self, key, hash_sum); |
| entry->value = value; |
| entry->hash_sum = hash_sum; |
| self->size++; |
| break; |
| } |
| else if (entry->hash_sum == hash_sum |
| && Obj_Equals(key, entry->key) |
| ) { |
| DECREF(entry->value); |
| entry->value = value; |
| break; |
| } |
| tick++; // linear scan |
| } |
| } |
| |
| void |
| Hash_store(Hash *self, Obj *key, Obj *value) { |
| Hash_do_store(self, key, value, Obj_Hash_Sum(key), false); |
| } |
| |
| void |
| Hash_store_str(Hash *self, const char *key, size_t key_len, Obj *value) { |
| ZombieCharBuf *key_buf = ZCB_WRAP_STR((char*)key, key_len); |
| Hash_do_store(self, (Obj*)key_buf, value, |
| ZCB_Hash_Sum(key_buf), false); |
| } |
| |
| Obj* |
| Hash_make_key(Hash *self, Obj *key, int32_t hash_sum) { |
| UNUSED_VAR(self); |
| UNUSED_VAR(hash_sum); |
| return Obj_Clone(key); |
| } |
| |
| Obj* |
| Hash_fetch_str(Hash *self, const char *key, size_t key_len) { |
| ZombieCharBuf *key_buf = ZCB_WRAP_STR(key, key_len); |
| return Hash_fetch(self, (Obj*)key_buf); |
| } |
| |
| static INLINE HashEntry* |
| SI_fetch_entry(Hash *self, const Obj *key, int32_t hash_sum) { |
| uint32_t tick = hash_sum; |
| HashEntry *const entries = (HashEntry*)self->entries; |
| HashEntry *entry; |
| |
| while (1) { |
| tick &= self->capacity - 1; |
| entry = entries + tick; |
| if (!entry->key) { |
| // Failed to find the key, so return NULL. |
| return NULL; |
| } |
| else if (entry->hash_sum == hash_sum |
| && Obj_Equals(key, entry->key) |
| ) { |
| return entry; |
| } |
| tick++; |
| } |
| } |
| |
| Obj* |
| Hash_fetch(Hash *self, const Obj *key) { |
| HashEntry *entry = SI_fetch_entry(self, key, Obj_Hash_Sum(key)); |
| return entry ? entry->value : NULL; |
| } |
| |
| Obj* |
| Hash_delete(Hash *self, const Obj *key) { |
| HashEntry *entry = SI_fetch_entry(self, key, Obj_Hash_Sum(key)); |
| if (entry) { |
| Obj *value = entry->value; |
| DECREF(entry->key); |
| entry->key = (Obj*)&TOMBSTONE; |
| entry->value = NULL; |
| entry->hash_sum = 0; |
| self->size--; |
| self->threshold--; // limit number of tombstones |
| return value; |
| } |
| else { |
| return NULL; |
| } |
| } |
| |
| Obj* |
| Hash_delete_str(Hash *self, const char *key, size_t key_len) { |
| ZombieCharBuf *key_buf = ZCB_WRAP_STR(key, key_len); |
| return Hash_delete(self, (Obj*)key_buf); |
| } |
| |
| uint32_t |
| Hash_iterate(Hash *self) { |
| SI_kill_iter(self); |
| return self->size; |
| } |
| |
| static INLINE void |
| SI_kill_iter(Hash *self) { |
| self->iter_tick = -1; |
| } |
| |
| bool_t |
| Hash_next(Hash *self, Obj **key, Obj **value) { |
| while (1) { |
| if (++self->iter_tick >= (int32_t)self->capacity) { |
| // Bail since we've completed the iteration. |
| --self->iter_tick; |
| *key = NULL; |
| *value = NULL; |
| return false; |
| } |
| else { |
| HashEntry *const entry |
| = (HashEntry*)self->entries + self->iter_tick; |
| if (entry->key && entry->key != (Obj*)&TOMBSTONE) { |
| // Success! |
| *key = entry->key; |
| *value = entry->value; |
| return true; |
| } |
| } |
| } |
| } |
| |
| Obj* |
| Hash_find_key(Hash *self, const Obj *key, int32_t hash_sum) { |
| HashEntry *entry = SI_fetch_entry(self, key, hash_sum); |
| return entry ? entry->key : NULL; |
| } |
| |
| VArray* |
| Hash_keys(Hash *self) { |
| Obj *key; |
| Obj *val; |
| VArray *keys = VA_new(self->size); |
| Hash_Iterate(self); |
| while (Hash_Next(self, &key, &val)) { |
| VA_push(keys, INCREF(key)); |
| } |
| return keys; |
| } |
| |
| VArray* |
| Hash_values(Hash *self) { |
| Obj *key; |
| Obj *val; |
| VArray *values = VA_new(self->size); |
| Hash_Iterate(self); |
| while (Hash_Next(self, &key, &val)) { VA_push(values, INCREF(val)); } |
| return values; |
| } |
| |
| bool_t |
| Hash_equals(Hash *self, Obj *other) { |
| Hash *twin = (Hash*)other; |
| Obj *key; |
| Obj *val; |
| |
| if (twin == self) { return true; } |
| if (!Obj_Is_A(other, HASH)) { return false; } |
| if (self->size != twin->size) { return false; } |
| |
| Hash_Iterate(self); |
| while (Hash_Next(self, &key, &val)) { |
| Obj *other_val = Hash_Fetch(twin, key); |
| if (!other_val || !Obj_Equals(other_val, val)) { return false; } |
| } |
| |
| return true; |
| } |
| |
| uint32_t |
| Hash_get_capacity(Hash *self) { |
| return self->capacity; |
| } |
| |
| uint32_t |
| Hash_get_size(Hash *self) { |
| return self->size; |
| } |
| |
| static INLINE HashEntry* |
| SI_rebuild_hash(Hash *self) { |
| HashEntry *old_entries = (HashEntry*)self->entries; |
| HashEntry *entry = old_entries; |
| HashEntry *limit = old_entries + self->capacity; |
| |
| SI_kill_iter(self); |
| self->capacity *= 2; |
| self->threshold = (self->capacity / 3) * 2; |
| self->entries = (HashEntry*)CALLOCATE(self->capacity, sizeof(HashEntry)); |
| self->size = 0; |
| |
| for (; entry < limit; entry++) { |
| if (!entry->key || entry->key == (Obj*)&TOMBSTONE) { |
| continue; |
| } |
| Hash_do_store(self, entry->key, entry->value, |
| entry->hash_sum, true); |
| } |
| |
| FREEMEM(old_entries); |
| |
| return (HashEntry*)self->entries; |
| } |
| |
| /***************************************************************************/ |
| |
| uint32_t |
| HashTombStone_get_refcount(HashTombStone* self) { |
| CHY_UNUSED_VAR(self); |
| return 1; |
| } |
| |
| HashTombStone* |
| HashTombStone_inc_refcount(HashTombStone* self) { |
| return self; |
| } |
| |
| uint32_t |
| HashTombStone_dec_refcount(HashTombStone* self) { |
| UNUSED_VAR(self); |
| return 1; |
| } |
| |
| |