| /* Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #define C_LUCY_SNAPSHOT |
| #include "Lucy/Util/ToolSet.h" |
| |
| #include "Lucy/Index/Snapshot.h" |
| #include "Lucy/Index/Segment.h" |
| #include "Lucy/Store/Folder.h" |
| #include "Lucy/Util/StringHelper.h" |
| #include "Lucy/Util/IndexFileNames.h" |
| #include "Lucy/Util/Json.h" |
| |
| static VArray* |
| S_clean_segment_contents(VArray *orig); |
| |
| int32_t Snapshot_current_file_format = 2; |
| static int32_t Snapshot_current_file_subformat = 1; |
| |
| Snapshot* |
| Snapshot_new() { |
| Snapshot *self = (Snapshot*)VTable_Make_Obj(SNAPSHOT); |
| return Snapshot_init(self); |
| } |
| |
| static void |
| S_zero_out(Snapshot *self) { |
| DECREF(self->entries); |
| DECREF(self->path); |
| self->entries = Hash_new(0); |
| self->path = NULL; |
| } |
| |
| Snapshot* |
| Snapshot_init(Snapshot *self) { |
| S_zero_out(self); |
| return self; |
| } |
| |
| void |
| Snapshot_destroy(Snapshot *self) { |
| DECREF(self->entries); |
| DECREF(self->path); |
| SUPER_DESTROY(self, SNAPSHOT); |
| } |
| |
| void |
| Snapshot_add_entry(Snapshot *self, const CharBuf *entry) { |
| Hash_Store(self->entries, (Obj*)entry, INCREF(&EMPTY)); |
| } |
| |
| bool_t |
| Snapshot_delete_entry(Snapshot *self, const CharBuf *entry) { |
| Obj *val = Hash_Delete(self->entries, (Obj*)entry); |
| if (val) { |
| Obj_Dec_RefCount(val); |
| return true; |
| } |
| else { |
| return false; |
| } |
| } |
| |
| VArray* |
| Snapshot_list(Snapshot *self) { |
| return Hash_Keys(self->entries); |
| } |
| |
| uint32_t |
| Snapshot_num_entries(Snapshot *self) { |
| return Hash_Get_Size(self->entries); |
| } |
| |
| void |
| Snapshot_set_path(Snapshot *self, const CharBuf *path) { |
| DECREF(self->path); |
| self->path = path ? CB_Clone(path) : NULL; |
| } |
| |
| CharBuf* |
| Snapshot_get_path(Snapshot *self) { |
| return self->path; |
| } |
| |
| Snapshot* |
| Snapshot_read_file(Snapshot *self, Folder *folder, const CharBuf *path) { |
| // Eliminate all prior data. Pick a snapshot file. |
| S_zero_out(self); |
| self->path = path ? CB_Clone(path) : IxFileNames_latest_snapshot(folder); |
| |
| if (self->path) { |
| Hash *snap_data |
| = (Hash*)CERTIFY(Json_slurp_json(folder, self->path), HASH); |
| Obj *format_obj |
| = CERTIFY(Hash_Fetch_Str(snap_data, "format", 6), OBJ); |
| int32_t format = (int32_t)Obj_To_I64(format_obj); |
| Obj *subformat_obj = Hash_Fetch_Str(snap_data, "subformat", 9); |
| int32_t subformat = subformat_obj |
| ? (int32_t)Obj_To_I64(subformat_obj) |
| : 0; |
| |
| // Verify that we can read the index properly. |
| if (format > Snapshot_current_file_format) { |
| THROW(ERR, "Snapshot format too recent: %i32, %i32", |
| format, Snapshot_current_file_format); |
| } |
| |
| // Build up list of entries. |
| VArray *list = (VArray*)CERTIFY( |
| Hash_Fetch_Str(snap_data, "entries", 7), |
| VARRAY); |
| INCREF(list); |
| if (format == 1 || (format == 2 && subformat < 1)) { |
| VArray *cleaned = S_clean_segment_contents(list); |
| DECREF(list); |
| list = cleaned; |
| } |
| Hash_Clear(self->entries); |
| for (uint32_t i = 0, max = VA_Get_Size(list); i < max; i++) { |
| CharBuf *entry |
| = (CharBuf*)CERTIFY(VA_Fetch(list, i), CHARBUF); |
| Hash_Store(self->entries, (Obj*)entry, INCREF(&EMPTY)); |
| } |
| |
| DECREF(list); |
| DECREF(snap_data); |
| } |
| |
| return self; |
| } |
| |
| static VArray* |
| S_clean_segment_contents(VArray *orig) { |
| // Since Snapshot format 2, no DataReader has depended on individual files |
| // within segment directories being listed. Filter these files because |
| // they cause a problem with FilePurger. |
| VArray *cleaned = VA_new(VA_Get_Size(orig)); |
| for (uint32_t i = 0, max = VA_Get_Size(orig); i < max; i++) { |
| CharBuf *name = (CharBuf*)VA_Fetch(orig, i); |
| if (!Seg_valid_seg_name(name)) { |
| if (CB_Starts_With_Str(name, "seg_", 4)) { |
| continue; // Skip this file. |
| } |
| } |
| VA_Push(cleaned, INCREF(name)); |
| } |
| return cleaned; |
| } |
| |
| |
| void |
| Snapshot_write_file(Snapshot *self, Folder *folder, const CharBuf *path) { |
| Hash *all_data = Hash_new(0); |
| VArray *list = Snapshot_List(self); |
| |
| // Update path. |
| DECREF(self->path); |
| if (path) { |
| self->path = CB_Clone(path); |
| } |
| else { |
| CharBuf *latest = IxFileNames_latest_snapshot(folder); |
| uint64_t gen = latest ? IxFileNames_extract_gen(latest) + 1 : 1; |
| char base36[StrHelp_MAX_BASE36_BYTES]; |
| StrHelp_to_base36(gen, &base36); |
| self->path = CB_newf("snapshot_%s.json", &base36); |
| DECREF(latest); |
| } |
| |
| // Don't overwrite. |
| if (Folder_Exists(folder, self->path)) { |
| THROW(ERR, "Snapshot file '%o' already exists", self->path); |
| } |
| |
| // Sort, then store file names. |
| VA_Sort(list, NULL, NULL); |
| Hash_Store_Str(all_data, "entries", 7, (Obj*)list); |
| |
| // Create a JSON-izable data structure. |
| Hash_Store_Str(all_data, "format", 6, |
| (Obj*)CB_newf("%i32", (int32_t)Snapshot_current_file_format)); |
| Hash_Store_Str(all_data, "subformat", 9, |
| (Obj*)CB_newf("%i32", (int32_t)Snapshot_current_file_subformat)); |
| |
| // Write out JSON-ized data to the new file. |
| Json_spew_json((Obj*)all_data, folder, self->path); |
| |
| DECREF(all_data); |
| } |
| |
| |