blob: ae222b1cef77627f3746f97511407207fd02e1ce [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define C_LUCY_REGEXTOKENIZER
#define C_LUCY_TOKEN
#include "Lucy/Util/ToolSet.h"
#include "Lucy/Analysis/RegexTokenizer.h"
#include "Lucy/Analysis/Token.h"
#include "Lucy/Analysis/Inversion.h"
RegexTokenizer*
RegexTokenizer_new(const CharBuf *pattern) {
RegexTokenizer *self = (RegexTokenizer*)VTable_Make_Obj(REGEXTOKENIZER);
return RegexTokenizer_init(self, pattern);
}
Inversion*
RegexTokenizer_transform(RegexTokenizer *self, Inversion *inversion) {
Inversion *new_inversion = Inversion_new(NULL);
Token *token;
while (NULL != (token = Inversion_Next(inversion))) {
RegexTokenizer_Tokenize_Str(self, token->text, token->len,
new_inversion);
}
return new_inversion;
}
Inversion*
RegexTokenizer_transform_text(RegexTokenizer *self, CharBuf *text) {
Inversion *new_inversion = Inversion_new(NULL);
RegexTokenizer_Tokenize_Str(self, (char*)CB_Get_Ptr8(text),
CB_Get_Size(text), new_inversion);
return new_inversion;
}
Obj*
RegexTokenizer_dump(RegexTokenizer *self) {
RegexTokenizer_dump_t super_dump
= (RegexTokenizer_dump_t)SUPER_METHOD(REGEXTOKENIZER, RegexTokenizer, Dump);
Hash *dump = (Hash*)CERTIFY(super_dump(self), HASH);
Hash_Store_Str(dump, "pattern", 7, CB_Dump(self->pattern));
return (Obj*)dump;
}
RegexTokenizer*
RegexTokenizer_load(RegexTokenizer *self, Obj *dump) {
Hash *source = (Hash*)CERTIFY(dump, HASH);
RegexTokenizer_load_t super_load
= (RegexTokenizer_load_t)SUPER_METHOD(REGEXTOKENIZER, RegexTokenizer, Load);
RegexTokenizer *loaded = super_load(self, dump);
CharBuf *pattern
= (CharBuf*)CERTIFY(Hash_Fetch_Str(source, "pattern", 7), CHARBUF);
return RegexTokenizer_init(loaded, pattern);
}
bool_t
RegexTokenizer_equals(RegexTokenizer *self, Obj *other) {
RegexTokenizer *const twin = (RegexTokenizer*)other;
if (twin == self) { return true; }
if (!Obj_Is_A(other, REGEXTOKENIZER)) { return false; }
if (!CB_Equals(twin->pattern, (Obj*)self->pattern)) { return false; }
return true;
}