blob: 5a90bbe5cc943a28728c5eedbee950dcd7f874ec [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define C_LUCY_TESTNORMALIZER
#define C_LUCY_NORMALIZER
#include "Lucy/Util/ToolSet.h"
#include "Lucy/Test.h"
#include "Lucy/Test/Analysis/TestNormalizer.h"
#include "Lucy/Analysis/Normalizer.h"
#include "Lucy/Store/FSFolder.h"
#include "Lucy/Util/Json.h"
static void
test_Dump_Load_and_Equals(TestBatch *batch) {
Normalizer *normalizer[4];
CharBuf *NFC = (CharBuf*)ZCB_WRAP_STR("NFC", 3);
CharBuf *NFKC = (CharBuf*)ZCB_WRAP_STR("NFKC", 4);
normalizer[0] = Normalizer_new(NFKC, true, false);
normalizer[1] = Normalizer_new(NFC, true, false);
normalizer[2] = Normalizer_new(NFKC, false, false);
normalizer[3] = Normalizer_new(NFKC, true, true);
TEST_FALSE(batch,
Normalizer_Equals(normalizer[0], (Obj*)normalizer[1]),
"Equals() false with different normalization form");
TEST_FALSE(batch,
Normalizer_Equals(normalizer[0], (Obj*)normalizer[2]),
"Equals() false with different case_fold flag");
TEST_FALSE(batch,
Normalizer_Equals(normalizer[0], (Obj*)normalizer[3]),
"Equals() false with different strip_accents flag");
for (int i = 0; i < 4; ++i) {
Obj *dump = (Obj*)Normalizer_Dump(normalizer[i]);
Normalizer *clone = (Normalizer*)Normalizer_Load(normalizer[i], dump);
TEST_TRUE(batch,
Normalizer_Equals(normalizer[i], (Obj*)clone),
"Dump => Load round trip");
DECREF(normalizer[i]);
DECREF(dump);
DECREF(clone);
}
}
static void
test_normalization(TestBatch *batch) {
CharBuf *path = CB_newf("modules");
FSFolder *modules_folder = FSFolder_new(path);
if (!FSFolder_Check(modules_folder)) {
DECREF(modules_folder);
CB_setf(path, "../modules");
modules_folder = FSFolder_new(path);
if (!FSFolder_Check(modules_folder)) {
THROW(ERR, "Can't open modules folder");
}
}
CB_setf(path, "unicode/utf8proc/tests.json");
VArray *tests = (VArray*)Json_slurp_json((Folder*)modules_folder, path);
if (!tests) { RETHROW(Err_get_error()); }
for (uint32_t i = 0, max = VA_Get_Size(tests); i < max; i++) {
Hash *test = (Hash*)VA_Fetch(tests, i);
CharBuf *form = (CharBuf*)Hash_Fetch_Str(
test, "normalization_form", 18);
bool_t case_fold = Bool_Get_Value((BoolNum*)Hash_Fetch_Str(
test, "case_fold", 9));
bool_t strip_accents = Bool_Get_Value((BoolNum*)Hash_Fetch_Str(
test, "strip_accents", 13));
Normalizer *normalizer = Normalizer_new(form, case_fold, strip_accents);
VArray *words = (VArray*)Hash_Fetch_Str(test, "words", 5);
VArray *norms = (VArray*)Hash_Fetch_Str(test, "norms", 5);
for (uint32_t j = 0, max = VA_Get_Size(words); j < max; j++) {
CharBuf *word = (CharBuf*)VA_Fetch(words, j);
VArray *got = Normalizer_Split(normalizer, word);
CharBuf *norm = (CharBuf*)VA_Fetch(got, 0);
TEST_TRUE(batch,
norm
&& CB_Is_A(norm, CHARBUF)
&& CB_Equals(norm, VA_Fetch(norms, j)),
"Normalize %s %d %d: %s", CB_Get_Ptr8(form),
case_fold, strip_accents, CB_Get_Ptr8(word)
);
DECREF(got);
}
DECREF(normalizer);
}
DECREF(tests);
DECREF(modules_folder);
DECREF(path);
}
void
TestNormalizer_run_tests() {
TestBatch *batch = TestBatch_new(20);
TestBatch_Plan(batch);
test_Dump_Load_and_Equals(batch);
test_normalization(batch);
DECREF(batch);
}