blob: 7c189c9c891eaa31d85a4cc2233228d660658942 [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define C_LUCY_SCHEMA
#include <string.h>
#include <ctype.h>
#include "Lucy/Util/ToolSet.h"
#include "Lucy/Plan/Schema.h"
#include "Lucy/Analysis/Analyzer.h"
#include "Lucy/Index/Similarity.h"
#include "Lucy/Plan/FieldType.h"
#include "Lucy/Plan/BlobType.h"
#include "Lucy/Plan/NumericType.h"
#include "Lucy/Plan/StringType.h"
#include "Lucy/Plan/FullTextType.h"
#include "Lucy/Plan/Architecture.h"
#include "Lucy/Store/Folder.h"
#include "Lucy/Util/Json.h"
// Scan the array to see if an object testing as Equal is present. If not,
// push the elem onto the end of the array.
static void
S_add_unique(VArray *array, Obj *elem);
static void
S_add_text_field(Schema *self, const CharBuf *field, FieldType *type);
static void
S_add_string_field(Schema *self, const CharBuf *field, FieldType *type);
static void
S_add_blob_field(Schema *self, const CharBuf *field, FieldType *type);
static void
S_add_numeric_field(Schema *self, const CharBuf *field, FieldType *type);
Schema*
Schema_new() {
Schema *self = (Schema*)VTable_Make_Obj(SCHEMA);
return Schema_init(self);
}
Schema*
Schema_init(Schema *self) {
// Init.
self->analyzers = Hash_new(0);
self->types = Hash_new(0);
self->sims = Hash_new(0);
self->uniq_analyzers = VA_new(2);
VA_Resize(self->uniq_analyzers, 1);
// Assign.
self->arch = Schema_Architecture(self);
self->sim = Arch_Make_Similarity(self->arch);
return self;
}
void
Schema_destroy(Schema *self) {
DECREF(self->arch);
DECREF(self->analyzers);
DECREF(self->uniq_analyzers);
DECREF(self->types);
DECREF(self->sims);
DECREF(self->sim);
SUPER_DESTROY(self, SCHEMA);
}
static void
S_add_unique(VArray *array, Obj *elem) {
uint32_t i, max;
if (!elem) { return; }
for (i = 0, max = VA_Get_Size(array); i < max; i++) {
Obj *candidate = VA_Fetch(array, i);
if (!candidate) { continue; }
if (elem == candidate) { return; }
if (Obj_Get_VTable(elem) == Obj_Get_VTable(candidate)) {
if (Obj_Equals(elem, candidate)) { return; }
}
}
VA_Push(array, INCREF(elem));
}
bool_t
Schema_equals(Schema *self, Obj *other) {
Schema *twin = (Schema*)other;
if (twin == self) { return true; }
if (!Obj_Is_A(other, SCHEMA)) { return false; }
if (!Arch_Equals(self->arch, (Obj*)twin->arch)) { return false; }
if (!Sim_Equals(self->sim, (Obj*)twin->sim)) { return false; }
if (!Hash_Equals(self->types, (Obj*)twin->types)) { return false; }
return true;
}
Architecture*
Schema_architecture(Schema *self) {
UNUSED_VAR(self);
return Arch_new();
}
void
Schema_spec_field(Schema *self, const CharBuf *field, FieldType *type) {
FieldType *existing = Schema_Fetch_Type(self, field);
// If the field already has an association, verify pairing and return.
if (existing) {
if (FType_Equals(type, (Obj*)existing)) { return; }
else { THROW(ERR, "'%o' assigned conflicting FieldType", field); }
}
if (FType_Is_A(type, FULLTEXTTYPE)) {
S_add_text_field(self, field, type);
}
else if (FType_Is_A(type, STRINGTYPE)) {
S_add_string_field(self, field, type);
}
else if (FType_Is_A(type, BLOBTYPE)) {
S_add_blob_field(self, field, type);
}
else if (FType_Is_A(type, NUMERICTYPE)) {
S_add_numeric_field(self, field, type);
}
else {
THROW(ERR, "Unrecognized field type: '%o'", type);
}
}
static void
S_add_text_field(Schema *self, const CharBuf *field, FieldType *type) {
FullTextType *fttype = (FullTextType*)CERTIFY(type, FULLTEXTTYPE);
Similarity *sim = FullTextType_Make_Similarity(fttype);
Analyzer *analyzer = FullTextType_Get_Analyzer(fttype);
// Cache helpers.
Hash_Store(self->sims, (Obj*)field, (Obj*)sim);
Hash_Store(self->analyzers, (Obj*)field, INCREF(analyzer));
S_add_unique(self->uniq_analyzers, (Obj*)analyzer);
// Store FieldType.
Hash_Store(self->types, (Obj*)field, INCREF(type));
}
static void
S_add_string_field(Schema *self, const CharBuf *field, FieldType *type) {
StringType *string_type = (StringType*)CERTIFY(type, STRINGTYPE);
Similarity *sim = StringType_Make_Similarity(string_type);
// Cache helpers.
Hash_Store(self->sims, (Obj*)field, (Obj*)sim);
// Store FieldType.
Hash_Store(self->types, (Obj*)field, INCREF(type));
}
static void
S_add_blob_field(Schema *self, const CharBuf *field, FieldType *type) {
BlobType *blob_type = (BlobType*)CERTIFY(type, BLOBTYPE);
Hash_Store(self->types, (Obj*)field, INCREF(blob_type));
}
static void
S_add_numeric_field(Schema *self, const CharBuf *field, FieldType *type) {
NumericType *num_type = (NumericType*)CERTIFY(type, NUMERICTYPE);
Hash_Store(self->types, (Obj*)field, INCREF(num_type));
}
FieldType*
Schema_fetch_type(Schema *self, const CharBuf *field) {
return (FieldType*)Hash_Fetch(self->types, (Obj*)field);
}
Analyzer*
Schema_fetch_analyzer(Schema *self, const CharBuf *field) {
return field
? (Analyzer*)Hash_Fetch(self->analyzers, (Obj*)field)
: NULL;
}
Similarity*
Schema_fetch_sim(Schema *self, const CharBuf *field) {
Similarity *sim = NULL;
if (field != NULL) {
sim = (Similarity*)Hash_Fetch(self->sims, (Obj*)field);
}
return sim;
}
uint32_t
Schema_num_fields(Schema *self) {
return Hash_Get_Size(self->types);
}
Architecture*
Schema_get_architecture(Schema *self) {
return self->arch;
}
Similarity*
Schema_get_similarity(Schema *self) {
return self->sim;
}
VArray*
Schema_all_fields(Schema *self) {
return Hash_Keys(self->types);
}
uint32_t
S_find_in_array(VArray *array, Obj *obj) {
uint32_t i, max;
for (i = 0, max = VA_Get_Size(array); i < max; i++) {
Obj *candidate = VA_Fetch(array, i);
if (obj == NULL && candidate == NULL) {
return i;
}
else if (obj != NULL && candidate != NULL) {
if (Obj_Get_VTable(obj) == Obj_Get_VTable(candidate)) {
if (Obj_Equals(obj, candidate)) {
return i;
}
}
}
}
THROW(ERR, "Couldn't find match for %o", obj);
UNREACHABLE_RETURN(uint32_t);
}
Hash*
Schema_dump(Schema *self) {
Hash *dump = Hash_new(0);
Hash *type_dumps = Hash_new(Hash_Get_Size(self->types));
CharBuf *field;
FieldType *type;
// Record class name, store dumps of unique Analyzers.
Hash_Store_Str(dump, "_class", 6,
(Obj*)CB_Clone(Schema_Get_Class_Name(self)));
Hash_Store_Str(dump, "analyzers", 9, (Obj*)VA_Dump(self->uniq_analyzers));
// Dump FieldTypes.
Hash_Store_Str(dump, "fields", 6, (Obj*)type_dumps);
Hash_Iterate(self->types);
while (Hash_Next(self->types, (Obj**)&field, (Obj**)&type)) {
VTable *type_vtable = FType_Get_VTable(type);
// Dump known types to simplified format.
if (type_vtable == FULLTEXTTYPE) {
FullTextType *fttype = (FullTextType*)type;
Hash *type_dump = FullTextType_Dump_For_Schema(fttype);
Analyzer *analyzer = FullTextType_Get_Analyzer(fttype);
uint32_t tick
= S_find_in_array(self->uniq_analyzers, (Obj*)analyzer);
// Store the tick which references a unique analyzer.
Hash_Store_Str(type_dump, "analyzer", 8,
(Obj*)CB_newf("%u32", tick));
Hash_Store(type_dumps, (Obj*)field, (Obj*)type_dump);
}
else if (type_vtable == STRINGTYPE || type_vtable == BLOBTYPE) {
Hash *type_dump = FType_Dump_For_Schema(type);
Hash_Store(type_dumps, (Obj*)field, (Obj*)type_dump);
}
// Unknown FieldType type, so punt.
else {
Hash_Store(type_dumps, (Obj*)field, FType_Dump(type));
}
}
return dump;
}
Schema*
Schema_load(Schema *self, Obj *dump) {
Hash *source = (Hash*)CERTIFY(dump, HASH);
CharBuf *class_name
= (CharBuf*)CERTIFY(Hash_Fetch_Str(source, "_class", 6), CHARBUF);
VTable *vtable = VTable_singleton(class_name, NULL);
Schema *loaded = (Schema*)VTable_Make_Obj(vtable);
Hash *type_dumps
= (Hash*)CERTIFY(Hash_Fetch_Str(source, "fields", 6), HASH);
VArray *analyzer_dumps
= (VArray*)CERTIFY(Hash_Fetch_Str(source, "analyzers", 9), VARRAY);
VArray *analyzers
= (VArray*)VA_Load(analyzer_dumps, (Obj*)analyzer_dumps);
CharBuf *field;
Hash *type_dump;
UNUSED_VAR(self);
// Start with a blank Schema.
Schema_init(loaded);
VA_Grow(loaded->uniq_analyzers, VA_Get_Size(analyzers));
Hash_Iterate(type_dumps);
while (Hash_Next(type_dumps, (Obj**)&field, (Obj**)&type_dump)) {
CharBuf *type_str;
CERTIFY(type_dump, HASH);
type_str = (CharBuf*)Hash_Fetch_Str(type_dump, "type", 4);
if (type_str) {
if (CB_Equals_Str(type_str, "fulltext", 8)) {
// Replace the "analyzer" tick with the real thing.
Obj *tick
= CERTIFY(Hash_Fetch_Str(type_dump, "analyzer", 8), OBJ);
Analyzer *analyzer
= (Analyzer*)VA_Fetch(analyzers,
(uint32_t)Obj_To_I64(tick));
if (!analyzer) {
THROW(ERR, "Can't find analyzer for '%o'", field);
}
Hash_Store_Str(type_dump, "analyzer", 8, INCREF(analyzer));
FullTextType *type
= (FullTextType*)VTable_Load_Obj(FULLTEXTTYPE,
(Obj*)type_dump);
Schema_Spec_Field(loaded, field, (FieldType*)type);
DECREF(type);
}
else if (CB_Equals_Str(type_str, "string", 6)) {
StringType *type
= (StringType*)VTable_Load_Obj(STRINGTYPE,
(Obj*)type_dump);
Schema_Spec_Field(loaded, field, (FieldType*)type);
DECREF(type);
}
else if (CB_Equals_Str(type_str, "blob", 4)) {
BlobType *type
= (BlobType*)VTable_Load_Obj(BLOBTYPE, (Obj*)type_dump);
Schema_Spec_Field(loaded, field, (FieldType*)type);
DECREF(type);
}
else if (CB_Equals_Str(type_str, "i32_t", 5)) {
Int32Type *type
= (Int32Type*)VTable_Load_Obj(INT32TYPE, (Obj*)type_dump);
Schema_Spec_Field(loaded, field, (FieldType*)type);
DECREF(type);
}
else if (CB_Equals_Str(type_str, "i64_t", 5)) {
Int64Type *type
= (Int64Type*)VTable_Load_Obj(INT64TYPE, (Obj*)type_dump);
Schema_Spec_Field(loaded, field, (FieldType*)type);
DECREF(type);
}
else if (CB_Equals_Str(type_str, "f32_t", 5)) {
Float32Type *type
= (Float32Type*)VTable_Load_Obj(FLOAT32TYPE,
(Obj*)type_dump);
Schema_Spec_Field(loaded, field, (FieldType*)type);
DECREF(type);
}
else if (CB_Equals_Str(type_str, "f64_t", 5)) {
Float64Type *type
= (Float64Type*)VTable_Load_Obj(FLOAT64TYPE,
(Obj*)type_dump);
Schema_Spec_Field(loaded, field, (FieldType*)type);
DECREF(type);
}
else {
THROW(ERR, "Unknown type '%o' for field '%o'", type_str, field);
}
}
else {
FieldType *type = (FieldType*)CERTIFY(
Hash_Load(type_dump, (Obj*)type_dump),
FIELDTYPE);
Schema_Spec_Field(loaded, field, type);
DECREF(type);
}
}
DECREF(analyzers);
return loaded;
}
void
Schema_eat(Schema *self, Schema *other) {
if (!Schema_Is_A(self, Schema_Get_VTable(other))) {
THROW(ERR, "%o not a descendent of %o",
Schema_Get_Class_Name(self), Schema_Get_Class_Name(other));
}
CharBuf *field;
FieldType *type;
Hash_Iterate(other->types);
while (Hash_Next(other->types, (Obj**)&field, (Obj**)&type)) {
Schema_Spec_Field(self, field, type);
}
}
void
Schema_write(Schema *self, Folder *folder, const CharBuf *filename) {
Hash *dump = Schema_Dump(self);
ZombieCharBuf *schema_temp = ZCB_WRAP_STR("schema.temp", 11);
bool_t success;
Folder_Delete(folder, (CharBuf*)schema_temp); // Just in case.
Json_spew_json((Obj*)dump, folder, (CharBuf*)schema_temp);
success = Folder_Rename(folder, (CharBuf*)schema_temp, filename);
DECREF(dump);
if (!success) { RETHROW(INCREF(Err_get_error())); }
}