blob: 539cc9ee3c7d8e0605e7732bb0e5301424aa1dd0 [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdlib.h>
#define TESTLUCY_USE_SHORT_NAMES
#include "Lucy/Util/ToolSet.h"
#include "Lucy/Test/Index/TestSortWriter.h"
#include "Clownfish/TestHarness/TestBatchRunner.h"
#include "Lucy/Analysis/StandardTokenizer.h"
#include "Lucy/Document/Doc.h"
#include "Lucy/Document/HitDoc.h"
#include "Lucy/Index/DocReader.h"
#include "Lucy/Index/Indexer.h"
#include "Lucy/Index/IndexManager.h"
#include "Lucy/Index/PolyReader.h"
#include "Lucy/Index/Segment.h"
#include "Lucy/Index/SegReader.h"
#include "Lucy/Index/SortCache.h"
#include "Lucy/Index/SortReader.h"
#include "Lucy/Index/SortWriter.h"
#include "Lucy/Plan/FullTextType.h"
#include "Lucy/Plan/Schema.h"
#include "Lucy/Plan/StringType.h"
#include "Lucy/Store/RAMFolder.h"
static String *name_str;
static String *speed_str;
static String *weight_str;
static String *home_str;
static String *cat_str;
static String *wheels_str;
static String *unused_str;
static String *nope_str;
TestSortWriter*
TestSortWriter_new() {
return (TestSortWriter*)Class_Make_Obj(TESTSORTWRITER);
}
static void
S_init_strings() {
name_str = Str_newf("name");
speed_str = Str_newf("speed");
weight_str = Str_newf("weight");
home_str = Str_newf("home");
cat_str = Str_newf("cat");
wheels_str = Str_newf("wheels");
unused_str = Str_newf("unused");
nope_str = Str_newf("nope");
}
static void
S_destroy_strings() {
DECREF(name_str);
DECREF(speed_str);
DECREF(weight_str);
DECREF(home_str);
DECREF(cat_str);
DECREF(wheels_str);
DECREF(unused_str);
DECREF(nope_str);
}
static Schema*
S_create_schema() {
Schema *schema = Schema_new();
StandardTokenizer *tokenizer = StandardTokenizer_new();
FullTextType *full_text_type = FullTextType_new((Analyzer*)tokenizer);
FullTextType_Set_Sortable(full_text_type, true);
StringType *string_type = StringType_new();
StringType_Set_Sortable(string_type, true);
StringType *unsortable = StringType_new();
Schema_Spec_Field(schema, name_str, (FieldType*)full_text_type);
Schema_Spec_Field(schema, speed_str, (FieldType*)string_type);
Schema_Spec_Field(schema, weight_str, (FieldType*)string_type);
Schema_Spec_Field(schema, home_str, (FieldType*)string_type);
Schema_Spec_Field(schema, cat_str, (FieldType*)string_type);
Schema_Spec_Field(schema, wheels_str, (FieldType*)string_type);
Schema_Spec_Field(schema, unused_str, (FieldType*)string_type);
Schema_Spec_Field(schema, nope_str, (FieldType*)unsortable);
DECREF(unsortable);
DECREF(string_type);
DECREF(full_text_type);
DECREF(tokenizer);
return schema;
}
static void
S_store_field(Doc *doc, String *field, const char *value) {
if (value) {
String *string = SSTR_WRAP_C(value);
Doc_Store(doc, field, (Obj*)string);
}
}
static void
S_add_doc(Indexer *indexer, const char *name, const char *speed,
const char *weight, const char *home, const char *wheels,
const char *nope) {
Doc *doc = Doc_new(NULL, 0);
S_store_field(doc, name_str, name);
S_store_field(doc, speed_str, speed);
S_store_field(doc, weight_str, weight);
S_store_field(doc, home_str, home);
S_store_field(doc, cat_str, "vehicle");
S_store_field(doc, wheels_str, wheels);
S_store_field(doc, nope_str, nope);
Indexer_Add_Doc(indexer, doc, 1.0f);
DECREF(doc);
}
static void
S_test_sort_cache(TestBatchRunner *runner, RAMFolder *folder,
SegReader *seg_reader, const char *gen, bool is_used,
String *field) {
Segment *segment = SegReader_Get_Segment(seg_reader);
int32_t field_num = Seg_Field_Num(segment, field);
char *field_str = Str_To_Utf8(field);
String *filename = Str_newf("seg_%s/sort-%i32.ord", gen, field_num);
if (is_used) {
TEST_TRUE(runner, RAMFolder_Exists(folder, filename),
"sort files written for %s", field_str);
}
else {
TEST_TRUE(runner, !RAMFolder_Exists(folder, filename),
"no sort files written for %s", field_str);
}
DECREF(filename);
if (!is_used) {
free(field_str);
return;
}
SortReader *sort_reader
= (SortReader*)SegReader_Obtain(seg_reader,
Class_Get_Name(SORTREADER));
DocReader *doc_reader
= (DocReader*)SegReader_Obtain(seg_reader, Class_Get_Name(DOCREADER));
SortCache *sort_cache
= SortReader_Fetch_Sort_Cache(sort_reader, field);
int32_t doc_max = SegReader_Doc_Max(seg_reader);
for (int32_t doc_id = 1; doc_id <= doc_max; ++doc_id) {
int32_t ord = SortCache_Ordinal(sort_cache, doc_id);
Obj *cache_value = SortCache_Value(sort_cache, ord);
HitDoc *doc = DocReader_Fetch_Doc(doc_reader, doc_id);
Obj *doc_value = HitDoc_Extract(doc, field);
bool is_equal;
if (cache_value == NULL || doc_value == NULL) {
is_equal = (cache_value == doc_value);
}
else {
is_equal = Obj_Equals(cache_value, doc_value);
}
TEST_TRUE(runner, is_equal, "correct cached value field %s doc %d",
field_str, doc_id);
DECREF(doc_value);
DECREF(doc);
DECREF(cache_value);
}
free(field_str);
}
static void
test_sort_writer(TestBatchRunner *runner) {
Schema *schema = S_create_schema();
RAMFolder *folder = RAMFolder_new(NULL);
{
// Add vehicles.
Indexer *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0);
S_add_doc(indexer, "airplane", "0200", "8000", "air", "3", "nyet");
S_add_doc(indexer, "bike", "0015", "0025", "land", "2", NULL);
S_add_doc(indexer, "car", "0070", "3000", "land", "4", NULL);
Indexer_Commit(indexer);
DECREF(indexer);
}
{
PolyReader *poly_reader = PolyReader_open((Obj*)folder, NULL, NULL);
Vector *seg_readers = PolyReader_Get_Seg_Readers(poly_reader);
SegReader *seg_reader = (SegReader*)Vec_Fetch(seg_readers, 0);
S_test_sort_cache(runner, folder, seg_reader, "1", true, name_str);
S_test_sort_cache(runner, folder, seg_reader, "1", true, speed_str);
S_test_sort_cache(runner, folder, seg_reader, "1", true, weight_str);
S_test_sort_cache(runner, folder, seg_reader, "1", true, home_str);
S_test_sort_cache(runner, folder, seg_reader, "1", true, cat_str);
S_test_sort_cache(runner, folder, seg_reader, "1", true, wheels_str);
S_test_sort_cache(runner, folder, seg_reader, "1", false, unused_str);
S_test_sort_cache(runner, folder, seg_reader, "1", false, nope_str);
DECREF(poly_reader);
}
{
// Add a second segment.
NonMergingIndexManager *manager = NMIxManager_new();
Indexer *indexer
= Indexer_new(schema, (Obj*)folder, (IndexManager*)manager, 0);
// no "wheels" field -- test NULL/undef
S_add_doc(indexer, "dirigible", "0040", "0000", "air", NULL, NULL);
Indexer_Commit(indexer);
DECREF(indexer);
DECREF(manager);
}
{
// Consolidate everything, to test merging.
Indexer *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0);
String *bike_str = SSTR_WRAP_C("bike");
Indexer_Delete_By_Term(indexer, name_str, (Obj*)bike_str);
// no "wheels" field -- test NULL/undef
S_add_doc(indexer, "elephant", "0020", "6000", "land", NULL, NULL);
Indexer_Optimize(indexer);
Indexer_Commit(indexer);
DECREF(indexer);
}
{
Vector *filenames = RAMFolder_List_R(folder, NULL);
int num_old_seg_files = 0;
for (size_t i = 0, size = Vec_Get_Size(filenames); i < size; ++i) {
String *filename = (String*)Vec_Fetch(filenames, i);
if (Str_Contains_Utf8(filename, "seg_1", 5)
|| Str_Contains_Utf8(filename, "seg_2", 5)
) {
++num_old_seg_files;
}
}
TEST_INT_EQ(runner, num_old_seg_files, 0,
"all files from earlier segments zapped");
DECREF(filenames);
}
{
PolyReader *poly_reader = PolyReader_open((Obj*)folder, NULL, NULL);
Vector *seg_readers = PolyReader_Get_Seg_Readers(poly_reader);
SegReader *seg_reader = (SegReader*)Vec_Fetch(seg_readers, 0);
S_test_sort_cache(runner, folder, seg_reader, "3", true, name_str);
S_test_sort_cache(runner, folder, seg_reader, "3", true, speed_str);
S_test_sort_cache(runner, folder, seg_reader, "3", true, weight_str);
S_test_sort_cache(runner, folder, seg_reader, "3", true, home_str);
S_test_sort_cache(runner, folder, seg_reader, "3", true, cat_str);
S_test_sort_cache(runner, folder, seg_reader, "3", true, wheels_str);
DECREF(poly_reader);
}
DECREF(folder);
DECREF(schema);
}
void
TestSortWriter_Run_IMP(TestSortWriter *self, TestBatchRunner *runner) {
TestBatchRunner_Plan(runner, (TestBatch*)self, 57);
// Force frequent flushes.
SortWriter_set_default_mem_thresh(100);
S_init_strings();
test_sort_writer(runner);
S_destroy_strings();
}
NonMergingIndexManager*
NMIxManager_new() {
NonMergingIndexManager *self
= (NonMergingIndexManager*)Class_Make_Obj(NONMERGINGINDEXMANAGER);
return NMIxManager_init(self);
}
NonMergingIndexManager*
NMIxManager_init(NonMergingIndexManager *self) {
IxManager_init((IndexManager*)self, NULL, NULL);
return self;
}
Vector*
NMIxManager_Recycle_IMP(NonMergingIndexManager *self, PolyReader *reader,
lucy_DeletionsWriter *del_writer, int64_t cutoff,
bool optimize) {
UNUSED_VAR(self);
UNUSED_VAR(reader);
UNUSED_VAR(del_writer);
UNUSED_VAR(cutoff);
UNUSED_VAR(optimize);
return Vec_new(0);
}