blob: 54779f446c7dffcea9376ce4f184113b60a3c320 [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define TESTLUCY_USE_SHORT_NAMES
#include "Lucy/Util/ToolSet.h"
#include "Clownfish/TestHarness/TestBatchRunner.h"
#include "Lucy/Test.h"
#include "Lucy/Test/Plan/TestFieldMisc.h"
#include "Lucy/Analysis/EasyAnalyzer.h"
#include "Lucy/Analysis/StandardTokenizer.h"
#include "Lucy/Document/Doc.h"
#include "Lucy/Document/HitDoc.h"
#include "Lucy/Index/Indexer.h"
#include "Lucy/Plan/FullTextType.h"
#include "Lucy/Plan/Schema.h"
#include "Lucy/Plan/StringType.h"
#include "Lucy/Search/Hits.h"
#include "Lucy/Search/IndexSearcher.h"
#include "Lucy/Search/TermQuery.h"
#include "Lucy/Store/RAMFolder.h"
static String *analyzed_str;
static String *easy_analyzed_str;
static String *state_str;
static String *states_str;
static String *string_str;
static String *unindexed_but_analyzed_str;
static String *unindexed_unanalyzed_str;
static String *united_states_str;
TestFieldMisc*
TestFieldMisc_new() {
return (TestFieldMisc*)Class_Make_Obj(TESTFIELDMISC);
}
static void
S_init_strings() {
analyzed_str = Str_newf("analyzed");
easy_analyzed_str = Str_newf("easy_analyzed");
state_str = Str_newf("state");
states_str = Str_newf("States");
string_str = Str_newf("string");
unindexed_but_analyzed_str = Str_newf("unindexed_but_analyzed");
unindexed_unanalyzed_str = Str_newf("unindexed_unanalyzed");
united_states_str = Str_newf("United States");
}
static void
S_destroy_strings() {
DECREF(analyzed_str);
DECREF(easy_analyzed_str);
DECREF(state_str);
DECREF(states_str);
DECREF(string_str);
DECREF(unindexed_but_analyzed_str);
DECREF(unindexed_unanalyzed_str);
DECREF(united_states_str);
}
static Schema*
S_create_schema() {
Schema *schema = Schema_new();
StandardTokenizer *tokenizer = StandardTokenizer_new();
String *language = Str_newf("en");
EasyAnalyzer *easy_analyzer = EasyAnalyzer_new(language);
FullTextType *plain = FullTextType_new((Analyzer*)tokenizer);
FullTextType *easy_analyzed = FullTextType_new((Analyzer*)easy_analyzer);
StringType *string_spec = StringType_new();
FullTextType *unindexed_but_analyzed
= FullTextType_new((Analyzer*)tokenizer);
FullTextType_Set_Indexed(unindexed_but_analyzed, false);
StringType *unindexed_unanalyzed = StringType_new();
StringType_Set_Indexed(unindexed_unanalyzed, false);
Schema_Spec_Field(schema, analyzed_str, (FieldType*)plain);
Schema_Spec_Field(schema, easy_analyzed_str, (FieldType*)easy_analyzed);
Schema_Spec_Field(schema, string_str, (FieldType*)string_spec);
Schema_Spec_Field(schema, unindexed_but_analyzed_str,
(FieldType*)unindexed_but_analyzed);
Schema_Spec_Field(schema, unindexed_unanalyzed_str,
(FieldType*)unindexed_unanalyzed);
DECREF(unindexed_unanalyzed);
DECREF(unindexed_but_analyzed);
DECREF(string_spec);
DECREF(easy_analyzed);
DECREF(plain);
DECREF(easy_analyzer);
DECREF(language);
DECREF(tokenizer);
return schema;
}
static void
S_add_doc(Indexer *indexer, String *field_name) {
Doc *doc = Doc_new(NULL, 0);
Doc_Store(doc, field_name, (Obj*)united_states_str);
Indexer_Add_Doc(indexer, doc, 1.0f);
DECREF(doc);
}
static void
S_check(TestBatchRunner *runner, RAMFolder *folder, String *field,
String *query_text, uint32_t expected_num_hits) {
char *field_str = Str_To_Utf8(field);
TermQuery *query = TermQuery_new(field, (Obj*)query_text);
IndexSearcher *searcher = IxSearcher_new((Obj*)folder);
Hits *hits = IxSearcher_Hits(searcher, (Obj*)query, 0, 10, NULL);
TEST_TRUE(runner, Hits_Total_Hits(hits) == expected_num_hits,
"%s correct num hits", field_str);
// Don't check the contents of the hit if there aren't any.
if (expected_num_hits) {
HitDoc *hit = Hits_Next(hits);
String *value = (String*)HitDoc_Extract(hit, field);
TEST_TRUE(runner, Str_Equals(united_states_str, (Obj*)value),
"%s correct doc returned", field_str);
DECREF(value);
DECREF(hit);
}
DECREF(hits);
DECREF(searcher);
DECREF(query);
free(field_str);
}
static void
test_spec_field(TestBatchRunner *runner) {
RAMFolder *folder = RAMFolder_new(NULL);
Schema *schema = S_create_schema();
Indexer *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0);
S_add_doc(indexer, analyzed_str);
S_add_doc(indexer, easy_analyzed_str);
S_add_doc(indexer, string_str);
S_add_doc(indexer, unindexed_but_analyzed_str);
S_add_doc(indexer, unindexed_unanalyzed_str);
Indexer_Commit(indexer);
S_check(runner, folder, analyzed_str, states_str, 1);
S_check(runner, folder, easy_analyzed_str, state_str, 1);
S_check(runner, folder, string_str, united_states_str, 1);
S_check(runner, folder, unindexed_but_analyzed_str, state_str, 0);
S_check(runner, folder, unindexed_but_analyzed_str, united_states_str, 0);
S_check(runner, folder, unindexed_unanalyzed_str, state_str, 0);
S_check(runner, folder, unindexed_unanalyzed_str, united_states_str, 0);
DECREF(indexer);
DECREF(schema);
DECREF(folder);
}
static void
S_add_many_fields_doc(Indexer *indexer, String *content, int num_fields) {
Doc *doc = Doc_new(NULL, 0);
for (int32_t i = 1; i <= num_fields; ++i) {
String *field = Str_newf("field%i32", i);
Doc_Store(doc, field, (Obj*)content);
DECREF(field);
}
Indexer_Add_Doc(indexer, doc, 1.0f);
DECREF(doc);
}
static void
test_many_fields(TestBatchRunner *runner) {
Schema *schema = Schema_new();
StandardTokenizer *tokenizer = StandardTokenizer_new();
FullTextType *type = FullTextType_new((Analyzer*)tokenizer);
String *query = Str_newf("x");
for (int32_t num_fields = 1; num_fields <= 10; ++num_fields) {
// Build an index with num_fields fields, and the same content in each.
String *field = Str_newf("field%i32", num_fields);
Schema_Spec_Field(schema, field, (FieldType*)type);
RAMFolder *folder = RAMFolder_new(NULL);
Indexer *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0);
String *content;
for (int c = 'a'; c <= 'z'; ++c) {
content = Str_new_from_char(c);
S_add_many_fields_doc(indexer, content, num_fields);
DECREF(content);
}
content = Str_newf("x x y");
S_add_many_fields_doc(indexer, content, num_fields);
DECREF(content);
Indexer_Commit(indexer);
// See if our search results match as expected.
IndexSearcher *searcher = IxSearcher_new((Obj*)folder);
Hits *hits = IxSearcher_Hits(searcher, (Obj*)query, 0, 100, NULL);
TEST_TRUE(runner, Hits_Total_Hits(hits) == 2,
"correct number of hits for %d fields", num_fields);
HitDoc *top_hit = Hits_Next(hits);
DECREF(top_hit);
DECREF(hits);
DECREF(searcher);
DECREF(indexer);
DECREF(folder);
DECREF(field);
}
DECREF(query);
DECREF(type);
DECREF(tokenizer);
DECREF(schema);
}
void
TestFieldMisc_Run_IMP(TestFieldMisc *self, TestBatchRunner *runner) {
TestBatchRunner_Plan(runner, (TestBatch*)self, 20);
S_init_strings();
test_spec_field(runner);
test_many_fields(runner);
S_destroy_strings();
}