core/Lucy/Analysis/PolyAnalyzer.cfh - lucy - Git at Google

 /* Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 parcel Lucy;

 /** Multiple Analyzers in series.
  *
  * A PolyAnalyzer is a series of L<Analyzers|Lucy::Analysis::Analyzer>,
  * each of which will be called upon to "analyze" text in turn.  You can
  * either provide the Analyzers yourself, or you can specify a supported
  * language, in which case a PolyAnalyzer consisting of a
  * L<CaseFolder|Lucy::Analysis::CaseFolder>, a
  * L<RegexTokenizer|Lucy::Analysis::RegexTokenizer>, and a
  * L<SnowballStemmer|Lucy::Analysis::SnowballStemmer> will be generated for you.
  *
  * Supported languages:
  *
  *     en => English,
  *     da => Danish,
  *     de => German,
  *     es => Spanish,
  *     fi => Finnish,
  *     fr => French,
  *     hu => Hungarian,
  *     it => Italian,
  *     nl => Dutch,
  *     no => Norwegian,
  *     pt => Portuguese,
  *     ro => Romanian,
  *     ru => Russian,
  *     sv => Swedish,
  *     tr => Turkish,
  */
 class Lucy::Analysis::PolyAnalyzer
     inherits Lucy::Analysis::Analyzer : dumpable {

     VArray  *analyzers;

     inert incremented PolyAnalyzer*
     new(const CharBuf *language = NULL, VArray *analyzers = NULL);

     /**
      * @param language An ISO code from the list of supported languages.
      * @param analyzers An array of Analyzers.  The order of the analyzers
      * matters.  Don't put a SnowballStemmer before a RegexTokenizer (can't stem whole
      * documents or paragraphs -- just individual words), or a SnowballStopFilter
      * after a SnowballStemmer (stemmed words, e.g. "themselv", will not appear in a
      * stoplist).  In general, the sequence should be: normalize, tokenize,
      * stopalize, stem.
      */
     public inert PolyAnalyzer*
     init(PolyAnalyzer *self, const CharBuf *language = NULL,
          VArray *analyzers = NULL);

     /** Getter for "analyzers" member.
      */
     public VArray*
     Get_Analyzers(PolyAnalyzer *self);

     public incremented Inversion*
     Transform(PolyAnalyzer *self, Inversion *inversion);

     public incremented Inversion*
     Transform_Text(PolyAnalyzer *self, CharBuf *text);

     public bool_t
     Equals(PolyAnalyzer *self, Obj *other);

     public incremented PolyAnalyzer*
     Load(PolyAnalyzer *self, Obj *dump);

     public void
     Destroy(PolyAnalyzer *self);
 }
	/* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	parcel Lucy;

	/** Multiple Analyzers in series.
	*
	* A PolyAnalyzer is a series of L<Analyzers\|Lucy::Analysis::Analyzer>,
	* each of which will be called upon to "analyze" text in turn. You can
	* either provide the Analyzers yourself, or you can specify a supported
	* language, in which case a PolyAnalyzer consisting of a
	* L<CaseFolder\|Lucy::Analysis::CaseFolder>, a
	* L<RegexTokenizer\|Lucy::Analysis::RegexTokenizer>, and a
	* L<SnowballStemmer\|Lucy::Analysis::SnowballStemmer> will be generated for you.
	*
	* Supported languages:
	*
	* en => English,
	* da => Danish,
	* de => German,
	* es => Spanish,
	* fi => Finnish,
	* fr => French,
	* hu => Hungarian,
	* it => Italian,
	* nl => Dutch,
	* no => Norwegian,
	* pt => Portuguese,
	* ro => Romanian,
	* ru => Russian,
	* sv => Swedish,
	* tr => Turkish,
	*/
	class Lucy::Analysis::PolyAnalyzer
	inherits Lucy::Analysis::Analyzer : dumpable {

	VArray *analyzers;

	inert incremented PolyAnalyzer*
	new(const CharBuf language = NULL, VArray analyzers = NULL);

	/**
	* @param language An ISO code from the list of supported languages.
	* @param analyzers An array of Analyzers. The order of the analyzers
	* matters. Don't put a SnowballStemmer before a RegexTokenizer (can't stem whole
	* documents or paragraphs -- just individual words), or a SnowballStopFilter
	* after a SnowballStemmer (stemmed words, e.g. "themselv", will not appear in a
	* stoplist). In general, the sequence should be: normalize, tokenize,
	* stopalize, stem.
	*/
	public inert PolyAnalyzer*
	init(PolyAnalyzer self, const CharBuf language = NULL,
	VArray *analyzers = NULL);

	/** Getter for "analyzers" member.
	*/
	public VArray*
	Get_Analyzers(PolyAnalyzer *self);

	public incremented Inversion*
	Transform(PolyAnalyzer self, Inversion inversion);

	public incremented Inversion*
	Transform_Text(PolyAnalyzer self, CharBuf text);

	public bool_t
	Equals(PolyAnalyzer self, Obj other);

	public incremented PolyAnalyzer*
	Load(PolyAnalyzer self, Obj dump);

	public void
	Destroy(PolyAnalyzer *self);
	}