blob: 185685043f1862bfa9a45d00ab1e04f96241af07 [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define C_LUCY_LEXICONREADER
#define C_LUCY_POLYLEXICONREADER
#define C_LUCY_DEFAULTLEXICONREADER
#include "Lucy/Util/ToolSet.h"
#include "Lucy/Index/LexiconReader.h"
#include "Lucy/Plan/FieldType.h"
#include "Lucy/Plan/Schema.h"
#include "Lucy/Index/PolyLexicon.h"
#include "Lucy/Index/SegLexicon.h"
#include "Lucy/Index/Segment.h"
#include "Lucy/Index/Snapshot.h"
#include "Lucy/Index/TermInfo.h"
#include "Lucy/Store/Folder.h"
LexiconReader*
LexReader_init(LexiconReader *self, Schema *schema, Folder *folder,
Snapshot *snapshot, VArray *segments, int32_t seg_tick) {
DataReader_init((DataReader*)self, schema, folder, snapshot, segments,
seg_tick);
ABSTRACT_CLASS_CHECK(self, LEXICONREADER);
return self;
}
LexiconReader*
LexReader_aggregator(LexiconReader *self, VArray *readers, I32Array *offsets) {
UNUSED_VAR(self);
return (LexiconReader*)PolyLexReader_new(readers, offsets);
}
PolyLexiconReader*
PolyLexReader_new(VArray *readers, I32Array *offsets) {
PolyLexiconReader *self
= (PolyLexiconReader*)VTable_Make_Obj(POLYLEXICONREADER);
return PolyLexReader_init(self, readers, offsets);
}
PolyLexiconReader*
PolyLexReader_init(PolyLexiconReader *self, VArray *readers,
I32Array *offsets) {
Schema *schema = NULL;
for (uint32_t i = 0, max = VA_Get_Size(readers); i < max; i++) {
LexiconReader *reader
= (LexiconReader*)CERTIFY(VA_Fetch(readers, i), LEXICONREADER);
if (!schema) { schema = LexReader_Get_Schema(reader); }
}
LexReader_init((LexiconReader*)self, schema, NULL, NULL, NULL, -1);
self->readers = (VArray*)INCREF(readers);
self->offsets = (I32Array*)INCREF(offsets);
return self;
}
void
PolyLexReader_close(PolyLexiconReader *self) {
if (self->readers) {
for (uint32_t i = 0, max = VA_Get_Size(self->readers); i < max; i++) {
LexiconReader *reader
= (LexiconReader*)VA_Fetch(self->readers, i);
if (reader) { LexReader_Close(reader); }
}
VA_Clear(self->readers);
}
}
void
PolyLexReader_destroy(PolyLexiconReader *self) {
DECREF(self->readers);
DECREF(self->offsets);
SUPER_DESTROY(self, POLYLEXICONREADER);
}
Lexicon*
PolyLexReader_lexicon(PolyLexiconReader *self, const CharBuf *field,
Obj *term) {
PolyLexicon *lexicon = NULL;
if (field != NULL) {
Schema *schema = PolyLexReader_Get_Schema(self);
FieldType *type = Schema_Fetch_Type(schema, field);
if (type != NULL) {
lexicon = PolyLex_new(field, self->readers);
if (!PolyLex_Get_Num_Seg_Lexicons(lexicon)) {
DECREF(lexicon);
return NULL;
}
if (term) { PolyLex_Seek(lexicon, term); }
}
}
return (Lexicon*)lexicon;
}
uint32_t
PolyLexReader_doc_freq(PolyLexiconReader *self, const CharBuf *field,
Obj *term) {
uint32_t doc_freq = 0;
for (uint32_t i = 0, max = VA_Get_Size(self->readers); i < max; i++) {
LexiconReader *reader = (LexiconReader*)VA_Fetch(self->readers, i);
if (reader) {
doc_freq += LexReader_Doc_Freq(reader, field, term);
}
}
return doc_freq;
}
DefaultLexiconReader*
DefLexReader_new(Schema *schema, Folder *folder, Snapshot *snapshot,
VArray *segments, int32_t seg_tick) {
DefaultLexiconReader *self
= (DefaultLexiconReader*)VTable_Make_Obj(DEFAULTLEXICONREADER);
return DefLexReader_init(self, schema, folder, snapshot, segments,
seg_tick);
}
// Indicate whether it is safe to build a SegLexicon using the given
// parameters. Will return false if the field is not indexed or if no terms
// are present for this field in this segment.
static bool_t
S_has_data(Schema *schema, Folder *folder, Segment *segment, CharBuf *field) {
FieldType *type = Schema_Fetch_Type(schema, field);
if (!type || !FType_Indexed(type)) {
// If the field isn't indexed, bail out.
return false;
}
else {
// Bail out if there are no terms for this field in this segment.
int32_t field_num = Seg_Field_Num(segment, field);
CharBuf *seg_name = Seg_Get_Name(segment);
CharBuf *file = CB_newf("%o/lexicon-%i32.dat", seg_name, field_num);
bool_t retval = Folder_Exists(folder, file);
DECREF(file);
return retval;
}
}
DefaultLexiconReader*
DefLexReader_init(DefaultLexiconReader *self, Schema *schema, Folder *folder,
Snapshot *snapshot, VArray *segments, int32_t seg_tick) {
// Init.
LexReader_init((LexiconReader*)self, schema, folder, snapshot, segments,
seg_tick);
Segment *segment = DefLexReader_Get_Segment(self);
// Build an array of SegLexicon objects.
self->lexicons = VA_new(Schema_Num_Fields(schema));
for (uint32_t i = 1, max = Schema_Num_Fields(schema) + 1; i < max; i++) {
CharBuf *field = Seg_Field_Name(segment, i);
if (field && S_has_data(schema, folder, segment, field)) {
SegLexicon *lexicon = SegLex_new(schema, folder, segment, field);
VA_Store(self->lexicons, i, (Obj*)lexicon);
}
}
return self;
}
void
DefLexReader_close(DefaultLexiconReader *self) {
DECREF(self->lexicons);
self->lexicons = NULL;
}
void
DefLexReader_destroy(DefaultLexiconReader *self) {
DECREF(self->lexicons);
SUPER_DESTROY(self, DEFAULTLEXICONREADER);
}
Lexicon*
DefLexReader_lexicon(DefaultLexiconReader *self, const CharBuf *field,
Obj *term) {
int32_t field_num = Seg_Field_Num(self->segment, field);
SegLexicon *orig = (SegLexicon*)VA_Fetch(self->lexicons, field_num);
SegLexicon *lexicon = NULL;
if (orig) { // i.e. has data
lexicon
= SegLex_new(self->schema, self->folder, self->segment, field);
SegLex_Seek(lexicon, term);
}
return (Lexicon*)lexicon;
}
static TermInfo*
S_find_tinfo(DefaultLexiconReader *self, const CharBuf *field, Obj *target) {
if (field != NULL && target != NULL) {
int32_t field_num = Seg_Field_Num(self->segment, field);
SegLexicon *lexicon
= (SegLexicon*)VA_Fetch(self->lexicons, field_num);
if (lexicon) {
// Iterate until the result is ge the term.
SegLex_Seek(lexicon, target);
//if found matches target, return info; otherwise NULL
Obj *found = SegLex_Get_Term(lexicon);
if (found && Obj_Equals(target, found)) {
return SegLex_Get_Term_Info(lexicon);
}
}
}
return NULL;
}
TermInfo*
DefLexReader_fetch_term_info(DefaultLexiconReader *self,
const CharBuf *field, Obj *target) {
TermInfo *tinfo = S_find_tinfo(self, field, target);
return tinfo ? TInfo_Clone(tinfo) : NULL;
}
uint32_t
DefLexReader_doc_freq(DefaultLexiconReader *self, const CharBuf *field,
Obj *term) {
TermInfo *tinfo = S_find_tinfo(self, field, term);
return tinfo ? TInfo_Get_Doc_Freq(tinfo) : 0;
}