blob: 6e5615a4d996f96cafd560fd3f34f023ba1fd1df [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define C_LUCY_INVERTER
#define C_LUCY_INVERTERENTRY
#include "Lucy/Util/ToolSet.h"
#include "Lucy/Index/Inverter.h"
#include "Lucy/Analysis/Analyzer.h"
#include "Lucy/Analysis/Token.h"
#include "Lucy/Analysis/Inversion.h"
#include "Lucy/Document/Doc.h"
#include "Lucy/Index/Segment.h"
#include "Lucy/Index/Similarity.h"
#include "Lucy/Plan/FieldType.h"
#include "Lucy/Plan/BlobType.h"
#include "Lucy/Plan/NumericType.h"
#include "Lucy/Plan/FullTextType.h"
#include "Lucy/Plan/TextType.h"
#include "Lucy/Plan/Schema.h"
Inverter*
Inverter_new(Schema *schema, Segment *segment) {
Inverter *self = (Inverter*)VTable_Make_Obj(INVERTER);
return Inverter_init(self, schema, segment);
}
Inverter*
Inverter_init(Inverter *self, Schema *schema, Segment *segment) {
// Init.
self->tick = -1;
self->doc = NULL;
self->sorted = false;
self->blank = InvEntry_new(NULL, NULL, 0);
self->current = self->blank;
// Derive.
self->entry_pool = VA_new(Schema_Num_Fields(schema));
self->entries = VA_new(Schema_Num_Fields(schema));
// Assign.
self->schema = (Schema*)INCREF(schema);
self->segment = (Segment*)INCREF(segment);
return self;
}
void
Inverter_destroy(Inverter *self) {
Inverter_Clear(self);
DECREF(self->blank);
DECREF(self->entries);
DECREF(self->entry_pool);
DECREF(self->schema);
DECREF(self->segment);
SUPER_DESTROY(self, INVERTER);
}
uint32_t
Inverter_iterate(Inverter *self) {
self->tick = -1;
if (!self->sorted) {
VA_Sort(self->entries, NULL, NULL);
self->sorted = true;
}
return VA_Get_Size(self->entries);
}
int32_t
Inverter_next(Inverter *self) {
self->current = (InverterEntry*)VA_Fetch(self->entries, ++self->tick);
if (!self->current) { self->current = self->blank; } // Exhausted.
return self->current->field_num;
}
void
Inverter_set_doc(Inverter *self, Doc *doc) {
Inverter_Clear(self); // Zap all cached field values and Inversions.
self->doc = (Doc*)INCREF(doc);
}
void
Inverter_set_boost(Inverter *self, float boost) {
self->boost = boost;
}
float
Inverter_get_boost(Inverter *self) {
return self->boost;
}
Doc*
Inverter_get_doc(Inverter *self) {
return self->doc;
}
CharBuf*
Inverter_get_field_name(Inverter *self) {
return self->current->field;
}
Obj*
Inverter_get_value(Inverter *self) {
return self->current->value;
}
FieldType*
Inverter_get_type(Inverter *self) {
return self->current->type;
}
Analyzer*
Inverter_get_analyzer(Inverter *self) {
return self->current->analyzer;
}
Similarity*
Inverter_get_similarity(Inverter *self) {
return self->current->sim;
}
Inversion*
Inverter_get_inversion(Inverter *self) {
return self->current->inversion;
}
void
Inverter_add_field(Inverter *self, InverterEntry *entry) {
// Get an Inversion, going through analyzer if appropriate.
if (entry->analyzer) {
DECREF(entry->inversion);
entry->inversion = Analyzer_Transform_Text(entry->analyzer,
(CharBuf*)entry->value);
Inversion_Invert(entry->inversion);
}
else if (entry->indexed || entry->highlightable) {
ViewCharBuf *value = (ViewCharBuf*)entry->value;
size_t token_len = ViewCB_Get_Size(value);
Token *seed = Token_new((char*)ViewCB_Get_Ptr8(value),
token_len, 0, token_len, 1.0f, 1);
DECREF(entry->inversion);
entry->inversion = Inversion_new(seed);
DECREF(seed);
Inversion_Invert(entry->inversion); // Nearly a no-op.
}
// Prime the iterator.
VA_Push(self->entries, INCREF(entry));
self->sorted = false;
}
void
Inverter_clear(Inverter *self) {
uint32_t i, max;
for (i = 0, max = VA_Get_Size(self->entries); i < max; i++) {
InvEntry_Clear(VA_Fetch(self->entries, i));
}
VA_Clear(self->entries);
self->tick = -1;
DECREF(self->doc);
self->doc = NULL;
}
InverterEntry*
InvEntry_new(Schema *schema, const CharBuf *field, int32_t field_num) {
InverterEntry *self = (InverterEntry*)VTable_Make_Obj(INVERTERENTRY);
return InvEntry_init(self, schema, field, field_num);
}
InverterEntry*
InvEntry_init(InverterEntry *self, Schema *schema, const CharBuf *field,
int32_t field_num) {
self->field_num = field_num;
self->field = field ? CB_Clone(field) : NULL;
self->inversion = NULL;
if (schema) {
self->analyzer
= (Analyzer*)INCREF(Schema_Fetch_Analyzer(schema, field));
self->sim = (Similarity*)INCREF(Schema_Fetch_Sim(schema, field));
self->type = (FieldType*)INCREF(Schema_Fetch_Type(schema, field));
if (!self->type) { THROW(ERR, "Unknown field: '%o'", field); }
uint8_t prim_id = FType_Primitive_ID(self->type);
switch (prim_id & FType_PRIMITIVE_ID_MASK) {
case FType_TEXT:
self->value = (Obj*)ViewCB_new_from_trusted_utf8(NULL, 0);
break;
case FType_BLOB:
self->value = (Obj*)ViewBB_new(NULL, 0);
break;
case FType_INT32:
self->value = (Obj*)Int32_new(0);
break;
case FType_INT64:
self->value = (Obj*)Int64_new(0);
break;
case FType_FLOAT32:
self->value = (Obj*)Float32_new(0);
break;
case FType_FLOAT64:
self->value = (Obj*)Float64_new(0);
break;
default:
THROW(ERR, "Unrecognized primitive id: %i8", prim_id);
}
self->indexed = FType_Indexed(self->type);
if (self->indexed && FType_Is_A(self->type, NUMERICTYPE)) {
THROW(ERR, "Field '%o' spec'd as indexed, but numerical types cannot "
"be indexed yet", field);
}
if (FType_Is_A(self->type, FULLTEXTTYPE)) {
self->highlightable
= FullTextType_Highlightable((FullTextType*)self->type);
}
}
return self;
}
void
InvEntry_destroy(InverterEntry *self) {
DECREF(self->field);
DECREF(self->value);
DECREF(self->analyzer);
DECREF(self->type);
DECREF(self->sim);
DECREF(self->inversion);
SUPER_DESTROY(self, INVERTERENTRY);
}
void
InvEntry_clear(InverterEntry *self) {
DECREF(self->inversion);
self->inversion = NULL;
}
int32_t
InvEntry_compare_to(InverterEntry *self, Obj *other) {
InverterEntry *competitor
= (InverterEntry*)CERTIFY(other, INVERTERENTRY);
return self->field_num - competitor->field_num;
}