blob: 19052150291b82933cd4556da85f15248408e519 [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define C_LUCY_TEXTSORTCACHE
#include "Lucy/Util/ToolSet.h"
#include "Lucy/Index/SortCache/TextSortCache.h"
#include "Lucy/Index/Segment.h"
#include "Lucy/Plan/FieldType.h"
#include "Lucy/Plan/Schema.h"
#include "Lucy/Store/InStream.h"
#include "Lucy/Store/Folder.h"
TextSortCache*
TextSortCache_new(const CharBuf *field, FieldType *type, int32_t cardinality,
int32_t doc_max, int32_t null_ord, int32_t ord_width,
InStream *ord_in, InStream *ix_in, InStream *dat_in) {
TextSortCache *self = (TextSortCache*)VTable_Make_Obj(TEXTSORTCACHE);
return TextSortCache_init(self, field, type, cardinality, doc_max,
null_ord, ord_width, ord_in, ix_in, dat_in);
}
TextSortCache*
TextSortCache_init(TextSortCache *self, const CharBuf *field,
FieldType *type, int32_t cardinality,
int32_t doc_max, int32_t null_ord, int32_t ord_width,
InStream *ord_in, InStream *ix_in, InStream *dat_in) {
// Validate.
if (!type || !FType_Sortable(type)) {
DECREF(self);
THROW(ERR, "'%o' isn't a sortable field", field);
}
// Memory map ords and super-init.
int64_t ord_len = InStream_Length(ord_in);
void *ords = InStream_Buf(ord_in, (size_t)ord_len);
SortCache_init((SortCache*)self, field, type, ords, cardinality, doc_max,
null_ord, ord_width);
// Validate ords file length.
double bytes_per_doc = self->ord_width / 8.0;
double max_ords = ord_len / bytes_per_doc;
if (max_ords < self->doc_max + 1) {
WARN("ORD WIDTH: %i32 %i32", ord_width, self->ord_width);
THROW(ERR, "Conflict between ord count max %f64 and doc_max %i32 for "
"field %o", max_ords, doc_max, field);
}
// Assign.
self->ord_in = (InStream*)INCREF(ord_in);
self->ix_in = (InStream*)INCREF(ix_in);
self->dat_in = (InStream*)INCREF(dat_in);
return self;
}
void
TextSortCache_destroy(TextSortCache *self) {
if (self->ord_in) {
InStream_Close(self->ord_in);
InStream_Dec_RefCount(self->ord_in);
}
if (self->ix_in) {
InStream_Close(self->ix_in);
InStream_Dec_RefCount(self->ix_in);
}
if (self->dat_in) {
InStream_Close(self->dat_in);
InStream_Dec_RefCount(self->dat_in);
}
SUPER_DESTROY(self, TEXTSORTCACHE);
}
#define NULL_SENTINEL -1
Obj*
TextSortCache_value(TextSortCache *self, int32_t ord, Obj *blank) {
if (ord == self->null_ord) {
return NULL;
}
InStream_Seek(self->ix_in, ord * sizeof(int64_t));
int64_t offset = InStream_Read_I64(self->ix_in);
if (offset == NULL_SENTINEL) {
return NULL;
}
else {
uint32_t next_ord = ord + 1;
int64_t next_offset;
while (1) {
InStream_Seek(self->ix_in, next_ord * sizeof(int64_t));
next_offset = InStream_Read_I64(self->ix_in);
if (next_offset != NULL_SENTINEL) { break; }
next_ord++;
}
// Read character data into CharBuf.
CERTIFY(blank, CHARBUF);
int64_t len = next_offset - offset;
char *ptr = CB_Grow((CharBuf*)blank, (size_t)len);
InStream_Seek(self->dat_in, offset);
InStream_Read_Bytes(self->dat_in, ptr, (size_t)len);
ptr[len] = '\0';
if (!StrHelp_utf8_valid(ptr, (size_t)len)) {
CB_Set_Size((CharBuf*)blank, 0);
THROW(ERR, "Invalid UTF-8 at %i64 in %o", offset,
InStream_Get_Filename(self->dat_in));
}
CB_Set_Size((CharBuf*)blank, (size_t)len);
}
return blank;
}
CharBuf*
TextSortCache_make_blank(TextSortCache *self) {
UNUSED_VAR(self);
return CB_new_from_trusted_utf8("", 0);
}