blob: 62dcb370d7aa3b720e3f35ed469d0cc7868e8eee [file] [log] [blame]
import lunr from "lunr";
// The `require`s below are required for testing `ja`.
// eslint-disable-next-line @typescript-eslint/no-var-requires
require("lunr-languages/lunr.stemmer.support")(lunr);
// eslint-disable-next-line @typescript-eslint/no-var-requires
require("lunr-languages/tinyseg")(lunr);
// eslint-disable-next-line @typescript-eslint/no-var-requires
require(`lunr-languages/lunr.ja`)(lunr);
import { tokenize } from "./tokenize";
describe("tokenize", () => {
test.each<[string, string[]]>([
["Hello-World", ["hello", "world"]],
["Hello World 「世界和平」", ["hello", "world", "世界和平"]],
[
"a1b2很好c3_d4更好56也好,不错。",
["a1b2", "很好", "c3_d4", "更好", "56", "也好", "不错"],
],
["…", []],
])("tokenize('%s', ['en', 'zh']) should return %j", (text, tokens) => {
expect(tokenize(text, ["en", "zh"])).toEqual(tokens);
});
test.each<[string, string[]]>([
[
"População portuguesa é composta",
["população", "portuguesa", "é", "composta"],
],
])("tokenize('%s', ['en', 'pt']) should return %j", (text, tokens) => {
expect(tokenize(text, ["en", "pt"])).toEqual(tokens);
});
test.each<[string, string[]]>([
["私は電車が好きです。", ["私", "は", "電車", "が", "好き", "です", "。"]],
])("tokenize('%s', ['ja']) should return %j", (text, tokens) => {
expect(tokenize(text, ["ja"])).toEqual(tokens);
});
});