blob: 2e01238a2d74b60bf27e839ac57dd43765c26ed3 [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
parcel Lucy;
/** Multiple Analyzers in series.
*
* A PolyAnalyzer is a series of L<Analyzers|Lucy::Analysis::Analyzer>,
* each of which will be called upon to "analyze" text in turn. You can
* either provide the Analyzers yourself, or you can specify a supported
* language, in which case a PolyAnalyzer consisting of a
* L<CaseFolder|Lucy::Analysis::CaseFolder>, a
* L<RegexTokenizer|Lucy::Analysis::RegexTokenizer>, and a
* L<SnowballStemmer|Lucy::Analysis::SnowballStemmer> will be generated for you.
*
* Supported languages:
*
* en => English,
* da => Danish,
* de => German,
* es => Spanish,
* fi => Finnish,
* fr => French,
* hu => Hungarian,
* it => Italian,
* nl => Dutch,
* no => Norwegian,
* pt => Portuguese,
* ro => Romanian,
* ru => Russian,
* sv => Swedish,
* tr => Turkish,
*/
class Lucy::Analysis::PolyAnalyzer
inherits Lucy::Analysis::Analyzer : dumpable {
VArray *analyzers;
inert incremented PolyAnalyzer*
new(const CharBuf *language = NULL, VArray *analyzers = NULL);
/**
* @param language An ISO code from the list of supported languages.
* @param analyzers An array of Analyzers. The order of the analyzers
* matters. Don't put a SnowballStemmer before a RegexTokenizer (can't stem whole
* documents or paragraphs -- just individual words), or a SnowballStopFilter
* after a SnowballStemmer (stemmed words, e.g. "themselv", will not appear in a
* stoplist). In general, the sequence should be: normalize, tokenize,
* stopalize, stem.
*/
public inert PolyAnalyzer*
init(PolyAnalyzer *self, const CharBuf *language = NULL,
VArray *analyzers = NULL);
/** Getter for "analyzers" member.
*/
public VArray*
Get_Analyzers(PolyAnalyzer *self);
public incremented Inversion*
Transform(PolyAnalyzer *self, Inversion *inversion);
public incremented Inversion*
Transform_Text(PolyAnalyzer *self, CharBuf *text);
public bool_t
Equals(PolyAnalyzer *self, Obj *other);
public incremented PolyAnalyzer*
Load(PolyAnalyzer *self, Obj *dump);
public void
Destroy(PolyAnalyzer *self);
}