blob: b04a0c4db36c08101e8686520d4e4a66e481c70f [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
parcel Lucy;
/** Tokenize/modify/filter text.
*
* An Analyzer is a filter which processes text, transforming it from one form
* into another. For instance, an analyzer might break up a long text into
* smaller pieces ([](cfish:RegexTokenizer)), or it
* might perform case folding to facilitate case-insensitive search
* ([](cfish:Normalizer)).
*
* Subclasses of Analyzer must override the [](.Equals) method.
*/
public abstract class Lucy::Analysis::Analyzer inherits Clownfish::Obj {
/** Abstract initializer.
*/
public inert Analyzer*
init(Analyzer *self);
/** Take a single [](cfish:Inversion) as input
* and returns an Inversion, either the same one (presumably transformed
* in some way), or a new one.
*
* @param inversion An inversion.
*/
public abstract incremented Inversion*
Transform(Analyzer *self, Inversion *inversion);
/** Kick off an analysis chain, creating an Inversion from string input.
* The default implementation simply creates an initial Inversion with a
* single Token, then calls [](cfish:.Transform), but occasionally subclasses will
* provide an optimized implementation which minimizes string copies.
*
* @param text A string.
*/
public incremented Inversion*
Transform_Text(Analyzer *self, String *text);
/** Analyze text and return an array of token texts.
*
* @param text A string.
*/
public incremented Vector*
Split(Analyzer *self, String *text);
/** Dump the analyzer as hash.
*
* Subclasses should call [](.Dump) on the superclass. The returned
* object is a hash which should be populated with parameters of
* the analyzer.
*
* @return A hash containing a description of the analyzer.
*/
public incremented Obj*
Dump(Analyzer *self);
/** Reconstruct an analyzer from a dump.
*
* Subclasses should first call [](.Load) on the superclass. The
* returned object is an analyzer which should be reconstructed by
* setting the dumped parameters from the hash contained in `dump`.
*
* Note that the invocant analyzer is unused.
*
* @param dump A hash.
* @return An analyzer.
*/
public incremented Obj*
Load(Analyzer *self, Obj *dump);
}