blob: 58f174d22eb3a0d58de9955868523d4294719c36 [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <ctype.h>
#define C_LUCY_SEGMENT
#include "Lucy/Util/ToolSet.h"
#include "Lucy/Index/Segment.h"
#include "Lucy/Store/Folder.h"
#include "Lucy/Util/Json.h"
#include "Lucy/Util/StringHelper.h"
#include "Lucy/Util/IndexFileNames.h"
Segment*
Seg_new(int64_t number) {
Segment *self = (Segment*)VTable_Make_Obj(SEGMENT);
return Seg_init(self, number);
}
Segment*
Seg_init(Segment *self, int64_t number) {
// Validate.
if (number < 0) { THROW(ERR, "Segment number %i64 less than 0", number); }
// Init.
self->metadata = Hash_new(0);
self->count = 0;
self->by_num = VA_new(2);
self->by_name = Hash_new(0);
// Start field numbers at 1, not 0.
VA_Push(self->by_num, INCREF(&EMPTY));
// Assign.
self->number = number;
// Derive.
self->name = Seg_num_to_name(number);
return self;
}
CharBuf*
Seg_num_to_name(int64_t number) {
char base36[StrHelp_MAX_BASE36_BYTES];
StrHelp_to_base36(number, &base36);
return CB_newf("seg_%s", &base36);
}
bool_t
Seg_valid_seg_name(const CharBuf *name) {
if (CB_Starts_With_Str(name, "seg_", 4)) {
ZombieCharBuf *scratch = ZCB_WRAP(name);
ZCB_Nip(scratch, 4);
uint32_t code_point;
while (0 != (code_point = ZCB_Nip_One(scratch))) {
if (!isalnum(code_point)) { return false; }
}
if (ZCB_Get_Size(scratch) == 0) { return true; } // Success!
}
return false;
}
void
Seg_destroy(Segment *self) {
DECREF(self->name);
DECREF(self->metadata);
DECREF(self->by_name);
DECREF(self->by_num);
SUPER_DESTROY(self, SEGMENT);
}
bool_t
Seg_read_file(Segment *self, Folder *folder) {
CharBuf *filename = CB_newf("%o/segmeta.json", self->name);
Hash *metadata = (Hash*)Json_slurp_json(folder, filename);
Hash *my_metadata;
// Bail unless the segmeta file was read successfully.
DECREF(filename);
if (!metadata) { return false; }
CERTIFY(metadata, HASH);
// Grab metadata for the Segment object itself.
DECREF(self->metadata);
self->metadata = metadata;
my_metadata
= (Hash*)CERTIFY(Hash_Fetch_Str(self->metadata, "segmeta", 7), HASH);
// Assign.
Obj *count = Hash_Fetch_Str(my_metadata, "count", 5);
if (!count) { count = Hash_Fetch_Str(my_metadata, "doc_count", 9); }
if (!count) { THROW(ERR, "Missing 'count'"); }
else { self->count = Obj_To_I64(count); }
// Get list of field nums.
uint32_t i;
VArray *source_by_num = (VArray*)Hash_Fetch_Str(my_metadata,
"field_names", 11);
uint32_t num_fields = source_by_num ? VA_Get_Size(source_by_num) : 0;
if (source_by_num == NULL) {
THROW(ERR, "Failed to extract 'field_names' from metadata");
}
// Init.
DECREF(self->by_num);
DECREF(self->by_name);
self->by_num = VA_new(num_fields);
self->by_name = Hash_new(num_fields);
// Copy the list of fields from the source.
for (i = 0; i < num_fields; i++) {
CharBuf *name = (CharBuf*)VA_Fetch(source_by_num, i);
Seg_Add_Field(self, name);
}
return true;
}
void
Seg_write_file(Segment *self, Folder *folder) {
Hash *my_metadata = Hash_new(16);
// Store metadata specific to this Segment object.
Hash_Store_Str(my_metadata, "count", 5,
(Obj*)CB_newf("%i64", self->count));
Hash_Store_Str(my_metadata, "name", 4, (Obj*)CB_Clone(self->name));
Hash_Store_Str(my_metadata, "field_names", 11, INCREF(self->by_num));
Hash_Store_Str(my_metadata, "format", 6, (Obj*)CB_newf("%i32", 1));
Hash_Store_Str(self->metadata, "segmeta", 7, (Obj*)my_metadata);
CharBuf *filename = CB_newf("%o/segmeta.json", self->name);
bool_t result = Json_spew_json((Obj*)self->metadata, folder, filename);
DECREF(filename);
if (!result) { RETHROW(INCREF(Err_get_error())); }
}
int32_t
Seg_add_field(Segment *self, const CharBuf *field) {
Integer32 *num = (Integer32*)Hash_Fetch(self->by_name, (Obj*)field);
if (num) {
return Int32_Get_Value(num);
}
else {
int32_t field_num = VA_Get_Size(self->by_num);
Hash_Store(self->by_name, (Obj*)field, (Obj*)Int32_new(field_num));
VA_Push(self->by_num, (Obj*)CB_Clone(field));
return field_num;
}
}
CharBuf*
Seg_get_name(Segment *self) {
return self->name;
}
int64_t
Seg_get_number(Segment *self) {
return self->number;
}
void
Seg_set_count(Segment *self, int64_t count) {
self->count = count;
}
int64_t
Seg_get_count(Segment *self) {
return self->count;
}
int64_t
Seg_increment_count(Segment *self, int64_t increment) {
self->count += increment;
return self->count;
}
void
Seg_store_metadata(Segment *self, const CharBuf *key, Obj *value) {
if (Hash_Fetch(self->metadata, (Obj*)key)) {
THROW(ERR, "Metadata key '%o' already registered", key);
}
Hash_Store(self->metadata, (Obj*)key, value);
}
void
Seg_store_metadata_str(Segment *self, const char *key, size_t key_len,
Obj *value) {
ZombieCharBuf *k = ZCB_WRAP_STR((char*)key, key_len);
Seg_Store_Metadata(self, (CharBuf*)k, value);
}
Obj*
Seg_fetch_metadata(Segment *self, const CharBuf *key) {
return Hash_Fetch(self->metadata, (Obj*)key);
}
Obj*
Seg_fetch_metadata_str(Segment *self, const char *key, size_t len) {
return Hash_Fetch_Str(self->metadata, key, len);
}
Hash*
Seg_get_metadata(Segment *self) {
return self->metadata;
}
int32_t
Seg_compare_to(Segment *self, Obj *other) {
Segment *other_seg = (Segment*)CERTIFY(other, SEGMENT);
if (self->number < other_seg->number) { return -1; }
else if (self->number == other_seg->number) { return 0; }
else { return 1; }
}
CharBuf*
Seg_field_name(Segment *self, int32_t field_num) {
return field_num
? (CharBuf*)VA_Fetch(self->by_num, field_num)
: NULL;
}
int32_t
Seg_field_num(Segment *self, const CharBuf *field) {
if (field == NULL) {
return 0;
}
else {
Integer32 *num = (Integer32*)Hash_Fetch(self->by_name, (Obj*)field);
return num ? Int32_Get_Value(num) : 0;
}
}