blob: 1cc4680bc14c7f1991ae0cbda346419805883473 [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define C_TESTLUCY_TESTQUERYPARSERSYNTAX
#define C_TESTLUCY_TESTQUERYPARSER
#define TESTLUCY_USE_SHORT_NAMES
#include "Lucy/Util/ToolSet.h"
#include <string.h>
#include <stdlib.h>
#include "Clownfish/Boolean.h"
#include "Clownfish/TestHarness/TestBatchRunner.h"
#include "Lucy/Test.h"
#include "Lucy/Test/Search/TestQueryParserSyntax.h"
#include "Lucy/Test/Search/TestQueryParser.h"
#include "Lucy/Test/TestUtils.h"
#include "Lucy/Analysis/PolyAnalyzer.h"
#include "Lucy/Analysis/RegexTokenizer.h"
#include "Lucy/Analysis/SnowballStopFilter.h"
#include "Lucy/Document/Doc.h"
#include "Lucy/Index/Indexer.h"
#include "Lucy/Plan/FullTextType.h"
#include "Lucy/Plan/Schema.h"
#include "Lucy/Search/Hits.h"
#include "Lucy/Search/IndexSearcher.h"
#include "Lucy/Search/QueryParser.h"
#include "Lucy/Search/TermQuery.h"
#include "Lucy/Search/PhraseQuery.h"
#include "Lucy/Search/LeafQuery.h"
#include "Lucy/Search/ANDQuery.h"
#include "Lucy/Search/NOTQuery.h"
#include "Lucy/Search/ORQuery.h"
#include "Lucy/Store/Folder.h"
#include "Lucy/Store/RAMFolder.h"
#define make_term_query (Query*)TestUtils_make_term_query
#define make_phrase_query (Query*)TestUtils_make_phrase_query
#define make_leaf_query (Query*)TestUtils_make_leaf_query
#define make_not_query (Query*)TestUtils_make_not_query
#define make_poly_query (Query*)TestUtils_make_poly_query
TestQueryParserSyntax*
TestQPSyntax_new() {
return (TestQueryParserSyntax*)Class_Make_Obj(TESTQUERYPARSERSYNTAX);
}
static Folder*
build_index() {
// Plain type.
String *pattern = Str_newf("\\S+");
RegexTokenizer *tokenizer = RegexTokenizer_new(pattern);
FullTextType *plain = FullTextType_new((Analyzer*)tokenizer);
// Fancy type.
String *word_pattern = Str_newf("\\w+");
RegexTokenizer *word_tokenizer = RegexTokenizer_new(word_pattern);
Hash *stop_list = Hash_new(0);
Hash_Store_Utf8(stop_list, "x", 1, (Obj*)CFISH_TRUE);
SnowballStopFilter *stop_filter = SnowStop_new(NULL, stop_list);
Vector *analyzers = Vec_new(0);
Vec_Push(analyzers, (Obj*)word_tokenizer);
Vec_Push(analyzers, (Obj*)stop_filter);
PolyAnalyzer *fancy_analyzer = PolyAnalyzer_new(NULL, analyzers);
FullTextType *fancy = FullTextType_new((Analyzer*)fancy_analyzer);
// Schema.
Schema *schema = Schema_new();
String *plain_str = Str_newf("plain");
String *fancy_str = Str_newf("fancy");
Schema_Spec_Field(schema, plain_str, (FieldType*)plain);
Schema_Spec_Field(schema, fancy_str, (FieldType*)fancy);
// Indexer.
RAMFolder *folder = RAMFolder_new(NULL);
Indexer *indexer = Indexer_new(schema, (Obj*)folder, NULL, 0);
// Index documents.
Vector *doc_set = TestUtils_doc_set();
for (size_t i = 0; i < Vec_Get_Size(doc_set); ++i) {
String *content_string = (String*)Vec_Fetch(doc_set, i);
Doc *doc = Doc_new(NULL, 0);
Doc_Store(doc, plain_str, (Obj*)content_string);
Doc_Store(doc, fancy_str, (Obj*)content_string);
Indexer_Add_Doc(indexer, doc, 1.0);
DECREF(doc);
}
Indexer_Commit(indexer);
// Clean up.
DECREF(doc_set);
DECREF(indexer);
DECREF(fancy_str);
DECREF(plain_str);
DECREF(schema);
DECREF(fancy);
DECREF(fancy_analyzer);
DECREF(analyzers);
DECREF(stop_list);
DECREF(word_pattern);
DECREF(plain);
DECREF(tokenizer);
DECREF(pattern);
return (Folder*)folder;
}
static TestQueryParser*
leaf_test_simple_term() {
Query *tree = make_leaf_query(NULL, "a");
Query *plain_q = make_term_query("plain", "a");
Query *fancy_q = make_term_query("fancy", "a");
Query *expanded = make_poly_query(BOOLOP_OR, fancy_q, plain_q, NULL);
return TestQP_new("a", tree, expanded, 4);
}
static TestQueryParser*
leaf_test_simple_phrase() {
Query *tree = make_leaf_query(NULL, "\"a b\"");
Query *plain_q = make_phrase_query("plain", "a", "b", NULL);
Query *fancy_q = make_phrase_query("fancy", "a", "b", NULL);
Query *expanded = make_poly_query(BOOLOP_OR, fancy_q, plain_q, NULL);
return TestQP_new("\"a b\"", tree, expanded, 3);
}
static TestQueryParser*
leaf_test_unclosed_quote() {
Query *tree = make_leaf_query(NULL, "\"a b");
Query *plain_q = make_phrase_query("plain", "a", "b", NULL);
Query *fancy_q = make_phrase_query("fancy", "a", "b", NULL);
Query *expanded = make_poly_query(BOOLOP_OR, fancy_q, plain_q, NULL);
return TestQP_new("\"a b", tree, expanded, 3);
}
static TestQueryParser*
leaf_test_escaped_quotes_inside() {
Query *tree = make_leaf_query(NULL, "\"\\\"a b\\\"\"");
Query *plain_q = make_phrase_query("plain", "\"a", "b\"", NULL);
Query *fancy_q = make_phrase_query("fancy", "a", "b", NULL);
Query *expanded = make_poly_query(BOOLOP_OR, fancy_q, plain_q, NULL);
return TestQP_new("\"\\\"a b\\\"\"", tree, expanded, 3);
}
static TestQueryParser*
leaf_test_escaped_quotes_outside() {
Query *tree = make_leaf_query(NULL, "\\\"a");
Query *plain_q = make_term_query("plain", "\"a");
Query *fancy_q = make_term_query("fancy", "a");
Query *expanded = make_poly_query(BOOLOP_OR, fancy_q, plain_q, NULL);
return TestQP_new("\\\"a", tree, expanded, 4);
}
static TestQueryParser*
leaf_test_single_term_phrase() {
Query *tree = make_leaf_query(NULL, "\"a\"");
Query *plain_q = make_phrase_query("plain", "a", NULL);
Query *fancy_q = make_phrase_query("fancy", "a", NULL);
Query *expanded = make_poly_query(BOOLOP_OR, fancy_q, plain_q, NULL);
return TestQP_new("\"a\"", tree, expanded, 4);
}
static TestQueryParser*
leaf_test_longer_phrase() {
Query *tree = make_leaf_query(NULL, "\"a b c\"");
Query *plain_q = make_phrase_query("plain", "a", "b", "c", NULL);
Query *fancy_q = make_phrase_query("fancy", "a", "b", "c", NULL);
Query *expanded = make_poly_query(BOOLOP_OR, fancy_q, plain_q, NULL);
return TestQP_new("\"a b c\"", tree, expanded, 2);
}
static TestQueryParser*
leaf_test_empty_phrase() {
Query *tree = make_leaf_query(NULL, "\"\"");
Query *plain_q = make_phrase_query("plain", NULL);
Query *fancy_q = make_phrase_query("fancy", NULL);
Query *expanded = make_poly_query(BOOLOP_OR, fancy_q, plain_q, NULL);
return TestQP_new("\"\"", tree, expanded, 0);
}
static TestQueryParser*
leaf_test_phrase_with_stopwords() {
Query *tree = make_leaf_query(NULL, "\"x a\"");
Query *plain_q = make_phrase_query("plain", "x", "a", NULL);
Query *fancy_q = make_phrase_query("fancy", "a", NULL);
Query *expanded = make_poly_query(BOOLOP_OR, fancy_q, plain_q, NULL);
return TestQP_new("\"x a\"", tree, expanded, 4);
}
static TestQueryParser*
leaf_test_different_tokenization() {
Query *tree = make_leaf_query(NULL, "a.b");
Query *plain_q = make_term_query("plain", "a.b");
Query *fancy_q = make_phrase_query("fancy", "a", "b", NULL);
Query *expanded = make_poly_query(BOOLOP_OR, fancy_q, plain_q, NULL);
return TestQP_new("a.b", tree, expanded, 3);
}
static TestQueryParser*
leaf_test_http() {
char address[] = "http://www.foo.com/bar.html";
Query *tree = make_leaf_query(NULL, address);
Query *plain_q = make_term_query("plain", address);
Query *fancy_q = make_phrase_query("fancy", "http", "www", "foo",
"com", "bar", "html", NULL);
Query *expanded = make_poly_query(BOOLOP_OR, fancy_q, plain_q, NULL);
return TestQP_new(address, tree, expanded, 0);
}
static TestQueryParser*
leaf_test_field() {
Query *tree = make_leaf_query("plain", "b");
Query *expanded = make_term_query("plain", "b");
return TestQP_new("plain:b", tree, expanded, 3);
}
static TestQueryParser*
leaf_test_unrecognized_field() {
Query *tree = make_leaf_query("bogusfield", "b");
Query *expanded = make_term_query("bogusfield", "b");
return TestQP_new("bogusfield:b", tree, expanded, 0);
}
static TestQueryParser*
leaf_test_unescape_colons() {
Query *tree = make_leaf_query("plain", "a\\:b");
Query *expanded = make_term_query("plain", "a:b");
return TestQP_new("plain:a\\:b", tree, expanded, 0);
}
static TestQueryParser*
syntax_test_minus_plus() {
Query *leaf = make_leaf_query(NULL, "a");
Query *tree = make_not_query(leaf);
return TestQP_new("-+a", tree, NULL, 0);
}
static TestQueryParser*
syntax_test_plus_minus() {
// Not a perfect result, but then it's not a good query string.
Query *leaf = make_leaf_query(NULL, "a");
Query *tree = make_not_query(leaf);
return TestQP_new("+-a", tree, NULL, 0);
}
static TestQueryParser*
syntax_test_minus_minus() {
// Not a perfect result, but then it's not a good query string.
Query *tree = make_leaf_query(NULL, "a");
return TestQP_new("--a", tree, NULL, 4);
}
static TestQueryParser*
syntax_test_not_minus() {
Query *tree = make_leaf_query(NULL, "a");
return TestQP_new("NOT -a", tree, NULL, 4);
}
static TestQueryParser*
syntax_test_not_plus() {
// Not a perfect result, but then it's not a good query string.
Query *leaf = make_leaf_query(NULL, "a");
Query *tree = make_not_query(leaf);
return TestQP_new("NOT +a", tree, NULL, 0);
}
static TestQueryParser*
syntax_test_padded_plus() {
Query *plus = make_leaf_query(NULL, "+");
Query *a = make_leaf_query(NULL, "a");
Query *tree = make_poly_query(BOOLOP_OR, plus, a, NULL);
return TestQP_new("+ a", tree, NULL, 4);
}
static TestQueryParser*
syntax_test_padded_minus() {
Query *minus = make_leaf_query(NULL, "-");
Query *a = make_leaf_query(NULL, "a");
Query *tree = make_poly_query(BOOLOP_OR, minus, a, NULL);
return TestQP_new("- a", tree, NULL, 4);
}
static TestQueryParser*
syntax_test_unclosed_parens() {
// Not a perfect result, but then it's not a good query string.
Query *inner = make_poly_query(BOOLOP_OR, NULL);
Query *tree = make_poly_query(BOOLOP_OR, inner, NULL);
return TestQP_new("((", tree, NULL, 0);
}
static TestQueryParser*
syntax_test_unmatched_parens() {
Query *tree = make_leaf_query(NULL, "a");
return TestQP_new(")a)", tree, NULL, 4);
}
static TestQueryParser*
syntax_test_escaped_quotes_outside() {
Query *tree = make_leaf_query(NULL, "\\\"a\\\"");
return TestQP_new("\\\"a\\\"", tree, NULL, 4);
}
static TestQueryParser*
syntax_test_escaped_quotes_inside() {
Query *tree = make_leaf_query(NULL, "\"\\\"a\\\"\"");
return TestQP_new("\"\\\"a\\\"\"", tree, NULL, 4);
}
static TestQueryParser*
syntax_test_identifier_field_name() {
// Field names must be identifiers, i.e. they cannot start with a number.
Query *tree = make_leaf_query(NULL, "10:30");
return TestQP_new("10:30", tree, NULL, 0);
}
static TestQueryParser*
syntax_test_double_colon() {
Query *tree = make_leaf_query(NULL, "PHP::Interpreter");
return TestQP_new("PHP::Interpreter", tree, NULL, 0);
}
/***************************************************************************/
typedef TestQueryParser*
(*LUCY_TestQPSyntax_Test_t)();
static LUCY_TestQPSyntax_Test_t leaf_test_funcs[] = {
leaf_test_simple_term,
leaf_test_simple_phrase,
leaf_test_unclosed_quote,
leaf_test_escaped_quotes_inside,
leaf_test_escaped_quotes_outside,
leaf_test_single_term_phrase,
leaf_test_longer_phrase,
leaf_test_empty_phrase,
leaf_test_different_tokenization,
leaf_test_phrase_with_stopwords,
leaf_test_http,
leaf_test_field,
leaf_test_unrecognized_field,
leaf_test_unescape_colons,
NULL
};
static LUCY_TestQPSyntax_Test_t syntax_test_funcs[] = {
syntax_test_minus_plus,
syntax_test_plus_minus,
syntax_test_minus_minus,
syntax_test_not_minus,
syntax_test_not_plus,
syntax_test_padded_plus,
syntax_test_padded_minus,
syntax_test_unclosed_parens,
syntax_test_unmatched_parens,
syntax_test_escaped_quotes_outside,
syntax_test_escaped_quotes_inside,
syntax_test_identifier_field_name,
syntax_test_double_colon,
NULL
};
static void
test_query_parser_syntax(TestBatchRunner *runner) {
if (!RegexTokenizer_is_available()) {
for (uint32_t i = 0; leaf_test_funcs[i] != NULL; i++) {
SKIP(runner, 3, "RegexTokenizer not available");
}
for (uint32_t i = 0; syntax_test_funcs[i] != NULL; i++) {
SKIP(runner, 2, "RegexTokenizer not available");
}
return;
}
Folder *index = build_index();
IndexSearcher *searcher = IxSearcher_new((Obj*)index);
QueryParser *qparser = QParser_new(IxSearcher_Get_Schema(searcher),
NULL, NULL, NULL);
QParser_Set_Heed_Colons(qparser, true);
for (uint32_t i = 0; leaf_test_funcs[i] != NULL; i++) {
LUCY_TestQPSyntax_Test_t test_func = leaf_test_funcs[i];
TestQueryParser *test_case = test_func();
TestQueryParserIVARS *ivars = TestQP_IVARS(test_case);
Query *tree = QParser_Tree(qparser, ivars->query_string);
Query *expanded = QParser_Expand_Leaf(qparser, ivars->tree);
Query *parsed = QParser_Parse(qparser, ivars->query_string);
Hits *hits = IxSearcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL);
char *qstr = Str_To_Utf8(ivars->query_string);
TEST_TRUE(runner, Query_Equals(tree, (Obj*)ivars->tree),
"tree() %s", qstr);
TEST_TRUE(runner, Query_Equals(expanded, (Obj*)ivars->expanded),
"expand_leaf() %s", qstr);
TEST_INT_EQ(runner, Hits_Total_Hits(hits), ivars->num_hits,
"hits: %s", qstr);
free(qstr);
DECREF(hits);
DECREF(parsed);
DECREF(expanded);
DECREF(tree);
DECREF(test_case);
}
for (uint32_t i = 0; syntax_test_funcs[i] != NULL; i++) {
LUCY_TestQPSyntax_Test_t test_func = syntax_test_funcs[i];
TestQueryParser *test_case = test_func();
TestQueryParserIVARS *ivars = TestQP_IVARS(test_case);
Query *tree = QParser_Tree(qparser, ivars->query_string);
Query *parsed = QParser_Parse(qparser, ivars->query_string);
Hits *hits = IxSearcher_Hits(searcher, (Obj*)parsed, 0, 10, NULL);
char *qstr = Str_To_Utf8(ivars->query_string);
TEST_TRUE(runner, Query_Equals(tree, (Obj*)ivars->tree),
"tree() %s", qstr);
TEST_INT_EQ(runner, Hits_Total_Hits(hits), ivars->num_hits,
"hits: %s", qstr);
free(qstr);
DECREF(hits);
DECREF(parsed);
DECREF(tree);
DECREF(test_case);
}
DECREF(searcher);
DECREF(qparser);
DECREF(index);
}
void
TestQPSyntax_Run_IMP(TestQueryParserSyntax *self, TestBatchRunner *runner) {
TestBatchRunner_Plan(runner, (TestBatch*)self, 68);
test_query_parser_syntax(runner);
}