blob: 07737d9902712ce6622ead86ef2cba4dd0f4510e [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define C_LUCY_POLYSEARCHER
#include "Lucy/Util/ToolSet.h"
#include "Lucy/Search/PolySearcher.h"
#include "Lucy/Document/HitDoc.h"
#include "Lucy/Index/DocVector.h"
#include "Lucy/Index/PolyReader.h"
#include "Lucy/Plan/Schema.h"
#include "Lucy/Search/Collector.h"
#include "Lucy/Search/HitQueue.h"
#include "Lucy/Search/Query.h"
#include "Lucy/Search/MatchDoc.h"
#include "Lucy/Search/Matcher.h"
#include "Lucy/Search/Searcher.h"
#include "Lucy/Search/SortSpec.h"
#include "Lucy/Search/TopDocs.h"
#include "Lucy/Search/Compiler.h"
PolySearcher*
PolySearcher_init(PolySearcher *self, Schema *schema, VArray *searchers) {
const uint32_t num_searchers = VA_Get_Size(searchers);
uint32_t i;
int32_t *starts_array = (int32_t*)MALLOCATE(num_searchers * sizeof(int32_t));
int32_t doc_max = 0;
Searcher_init((Searcher*)self, schema);
self->searchers = (VArray*)INCREF(searchers);
self->starts = NULL; // Safe cleanup.
for (i = 0; i < num_searchers; i++) {
Searcher *searcher
= (Searcher*)CERTIFY(VA_Fetch(searchers, i), SEARCHER);
Schema *candidate = Searcher_Get_Schema(searcher);
VTable *orig_vt = Schema_Get_VTable(schema);
VTable *candidate_vt = Schema_Get_VTable(candidate);
// Confirm that searchers all use the same schema.
if (orig_vt != candidate_vt) {
THROW(ERR, "Conflicting schemas: '%o', '%o'",
Schema_Get_Class_Name(schema),
Schema_Get_Class_Name(candidate));
}
// Derive doc_max and relative start offsets.
starts_array[i] = (int32_t)doc_max;
doc_max += Searcher_Doc_Max(searcher);
}
self->doc_max = doc_max;
self->starts = I32Arr_new_steal(starts_array, num_searchers);
return self;
}
void
PolySearcher_destroy(PolySearcher *self) {
DECREF(self->searchers);
DECREF(self->starts);
SUPER_DESTROY(self, POLYSEARCHER);
}
HitDoc*
PolySearcher_fetch_doc(PolySearcher *self, int32_t doc_id) {
uint32_t tick = PolyReader_sub_tick(self->starts, doc_id);
Searcher *searcher = (Searcher*)VA_Fetch(self->searchers, tick);
int32_t offset = I32Arr_Get(self->starts, tick);
if (!searcher) { THROW(ERR, "Invalid doc id: %i32", doc_id); }
HitDoc *hit_doc = Searcher_Fetch_Doc(searcher, doc_id - offset);
HitDoc_Set_Doc_ID(hit_doc, doc_id);
return hit_doc;
}
DocVector*
PolySearcher_fetch_doc_vec(PolySearcher *self, int32_t doc_id) {
uint32_t tick = PolyReader_sub_tick(self->starts, doc_id);
Searcher *searcher = (Searcher*)VA_Fetch(self->searchers, tick);
int32_t start = I32Arr_Get(self->starts, tick);
if (!searcher) { THROW(ERR, "Invalid doc id: %i32", doc_id); }
return Searcher_Fetch_Doc_Vec(searcher, doc_id - start);
}
int32_t
PolySearcher_doc_max(PolySearcher *self) {
return self->doc_max;
}
uint32_t
PolySearcher_doc_freq(PolySearcher *self, const CharBuf *field, Obj *term) {
uint32_t i, max;
uint32_t doc_freq = 0;
for (i = 0, max = VA_Get_Size(self->searchers); i < max; i++) {
Searcher *searcher = (Searcher*)VA_Fetch(self->searchers, i);
doc_freq += Searcher_Doc_Freq(searcher, field, term);
}
return doc_freq;
}
static void
S_modify_doc_ids(VArray *match_docs, int32_t base) {
uint32_t i, max;
for (i = 0, max = VA_Get_Size(match_docs); i < max; i++) {
MatchDoc *match_doc = (MatchDoc*)VA_Fetch(match_docs, i);
int32_t new_doc_id = MatchDoc_Get_Doc_ID(match_doc) + base;
MatchDoc_Set_Doc_ID(match_doc, new_doc_id);
}
}
TopDocs*
PolySearcher_top_docs(PolySearcher *self, Query *query, uint32_t num_wanted,
SortSpec *sort_spec) {
Schema *schema = PolySearcher_Get_Schema(self);
VArray *searchers = self->searchers;
I32Array *starts = self->starts;
HitQueue *hit_q = sort_spec
? HitQ_new(schema, sort_spec, num_wanted)
: HitQ_new(NULL, NULL, num_wanted);
uint32_t total_hits = 0;
Compiler *compiler = Query_Is_A(query, COMPILER)
? ((Compiler*)INCREF(query))
: Query_Make_Compiler(query, (Searcher*)self,
Query_Get_Boost(query));
uint32_t i, max;
for (i = 0, max = VA_Get_Size(searchers); i < max; i++) {
Searcher *searcher = (Searcher*)VA_Fetch(searchers, i);
int32_t base = I32Arr_Get(starts, i);
TopDocs *top_docs = Searcher_Top_Docs(searcher, (Query*)compiler,
num_wanted, sort_spec);
VArray *sub_match_docs = TopDocs_Get_Match_Docs(top_docs);
uint32_t j, jmax;
total_hits += TopDocs_Get_Total_Hits(top_docs);
S_modify_doc_ids(sub_match_docs, base);
for (j = 0, jmax = VA_Get_Size(sub_match_docs); j < jmax; j++) {
MatchDoc *match_doc = (MatchDoc*)VA_Fetch(sub_match_docs, j);
if (!HitQ_Insert(hit_q, INCREF(match_doc))) { break; }
}
DECREF(top_docs);
}
{
VArray *match_docs = HitQ_Pop_All(hit_q);
TopDocs *retval = TopDocs_new(match_docs, total_hits);
DECREF(match_docs);
DECREF(compiler);
DECREF(hit_q);
return retval;
}
}
void
PolySearcher_collect(PolySearcher *self, Query *query,
Collector *collector) {
uint32_t i, max;
VArray *const searchers = self->searchers;
I32Array *starts = self->starts;
for (i = 0, max = VA_Get_Size(searchers); i < max; i++) {
int32_t start = I32Arr_Get(starts, i);
Searcher *searcher = (Searcher*)VA_Fetch(searchers, i);
OffsetCollector *offset_coll = OffsetColl_new(collector, start);
Searcher_Collect(searcher, query, (Collector*)offset_coll);
DECREF(offset_coll);
}
}