| /* Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #define C_LUCY_COMPOUNDFILEWRITER |
| #include "Lucy/Util/ToolSet.h" |
| |
| #include "Lucy/Store/CompoundFileWriter.h" |
| #include "Lucy/Store/Folder.h" |
| #include "Lucy/Store/InStream.h" |
| #include "Lucy/Store/OutStream.h" |
| #include "Lucy/Util/IndexFileNames.h" |
| #include "Lucy/Util/Json.h" |
| |
| int32_t CFWriter_current_file_format = 2; |
| |
| // Helper which does the heavy lifting for CFWriter_consolidate. |
| static void |
| S_do_consolidate(CompoundFileWriter *self); |
| |
| // Clean up files which may be left over from previous merge attempts. |
| static void |
| S_clean_up_old_temp_files(CompoundFileWriter *self); |
| |
| CompoundFileWriter* |
| CFWriter_new(Folder *folder) { |
| CompoundFileWriter *self |
| = (CompoundFileWriter*)VTable_Make_Obj(COMPOUNDFILEWRITER); |
| return CFWriter_init(self, folder); |
| } |
| |
| CompoundFileWriter* |
| CFWriter_init(CompoundFileWriter *self, Folder *folder) { |
| self->folder = (Folder*)INCREF(folder); |
| return self; |
| } |
| |
| void |
| CFWriter_destroy(CompoundFileWriter *self) { |
| DECREF(self->folder); |
| SUPER_DESTROY(self, COMPOUNDFILEWRITER); |
| } |
| |
| void |
| CFWriter_consolidate(CompoundFileWriter *self) { |
| CharBuf *cfmeta_file = (CharBuf*)ZCB_WRAP_STR("cfmeta.json", 11); |
| if (Folder_Exists(self->folder, cfmeta_file)) { |
| THROW(ERR, "Merge already performed for %o", |
| Folder_Get_Path(self->folder)); |
| } |
| else { |
| S_clean_up_old_temp_files(self); |
| S_do_consolidate(self); |
| } |
| } |
| |
| static void |
| S_clean_up_old_temp_files(CompoundFileWriter *self) { |
| Folder *folder = self->folder; |
| CharBuf *cfmeta_temp = (CharBuf*)ZCB_WRAP_STR("cfmeta.json.temp", 16); |
| CharBuf *cf_file = (CharBuf*)ZCB_WRAP_STR("cf.dat", 6); |
| |
| if (Folder_Exists(folder, cf_file)) { |
| if (!Folder_Delete(folder, cf_file)) { |
| THROW(ERR, "Can't delete '%o'", cf_file); |
| } |
| } |
| if (Folder_Exists(folder, cfmeta_temp)) { |
| if (!Folder_Delete(folder, cfmeta_temp)) { |
| THROW(ERR, "Can't delete '%o'", cfmeta_temp); |
| } |
| } |
| } |
| |
| static void |
| S_do_consolidate(CompoundFileWriter *self) { |
| Folder *folder = self->folder; |
| Hash *metadata = Hash_new(0); |
| Hash *sub_files = Hash_new(0); |
| VArray *files = Folder_List(folder, NULL); |
| VArray *merged = VA_new(VA_Get_Size(files)); |
| CharBuf *cf_file = (CharBuf*)ZCB_WRAP_STR("cf.dat", 6); |
| OutStream *outstream = Folder_Open_Out(folder, (CharBuf*)cf_file); |
| uint32_t i, max; |
| bool_t rename_success; |
| |
| if (!outstream) { RETHROW(INCREF(Err_get_error())); } |
| |
| // Start metadata. |
| Hash_Store_Str(metadata, "files", 5, INCREF(sub_files)); |
| Hash_Store_Str(metadata, "format", 6, |
| (Obj*)CB_newf("%i32", CFWriter_current_file_format)); |
| |
| CharBuf *infilepath = CB_new(30); |
| size_t base_len = 0; |
| VA_Sort(files, NULL, NULL); |
| for (i = 0, max = VA_Get_Size(files); i < max; i++) { |
| CharBuf *infilename = (CharBuf*)VA_Fetch(files, i); |
| |
| if (!CB_Ends_With_Str(infilename, ".json", 5)) { |
| InStream *instream = Folder_Open_In(folder, infilename); |
| Hash *file_data = Hash_new(2); |
| int64_t offset, len; |
| |
| if (!instream) { RETHROW(INCREF(Err_get_error())); } |
| |
| // Absorb the file. |
| offset = OutStream_Tell(outstream); |
| OutStream_Absorb(outstream, instream); |
| len = OutStream_Tell(outstream) - offset; |
| |
| // Record offset and length. |
| Hash_Store_Str(file_data, "offset", 6, |
| (Obj*)CB_newf("%i64", offset)); |
| Hash_Store_Str(file_data, "length", 6, |
| (Obj*)CB_newf("%i64", len)); |
| CB_Set_Size(infilepath, base_len); |
| CB_Cat(infilepath, infilename); |
| Hash_Store(sub_files, (Obj*)infilepath, (Obj*)file_data); |
| VA_Push(merged, INCREF(infilename)); |
| |
| // Add filler NULL bytes so that every sub-file begins on a file |
| // position multiple of 8. |
| OutStream_Align(outstream, 8); |
| |
| InStream_Close(instream); |
| DECREF(instream); |
| } |
| } |
| DECREF(infilepath); |
| |
| // Write metadata to cfmeta file. |
| CharBuf *cfmeta_temp = (CharBuf*)ZCB_WRAP_STR("cfmeta.json.temp", 16); |
| CharBuf *cfmeta_file = (CharBuf*)ZCB_WRAP_STR("cfmeta.json", 11); |
| Json_spew_json((Obj*)metadata, (Folder*)self->folder, cfmeta_temp); |
| rename_success = Folder_Rename(self->folder, cfmeta_temp, cfmeta_file); |
| if (!rename_success) { RETHROW(INCREF(Err_get_error())); } |
| |
| // Clean up. |
| OutStream_Close(outstream); |
| DECREF(outstream); |
| DECREF(files); |
| DECREF(metadata); |
| /* |
| CharBuf *merged_file; |
| Obj *ignore; |
| Hash_Iterate(sub_files); |
| while (Hash_Next(sub_files, (Obj**)&merged_file, &ignore)) { |
| if (!Folder_Delete(folder, merged_file)) { |
| CharBuf *mess = MAKE_MESS("Can't delete '%o'", merged_file); |
| DECREF(sub_files); |
| Err_throw_mess(ERR, mess); |
| } |
| } |
| */ |
| DECREF(sub_files); |
| for (uint32_t i = 0, max = VA_Get_Size(merged); i < max; i++) { |
| CharBuf *merged_file = (CharBuf*)VA_Fetch(merged, i); |
| if (!Folder_Delete(folder, merged_file)) { |
| CharBuf *mess = MAKE_MESS("Can't delete '%o'", merged_file); |
| DECREF(merged); |
| Err_throw_mess(ERR, mess); |
| } |
| } |
| DECREF(merged); |
| } |
| |
| |