| #ifndef H_LIBSTEMMER |
| #define H_LIBSTEMMER |
| |
| |
| /* Make header file work when included from C++ */ |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| struct sb_stemmer; |
| typedef unsigned char sb_symbol; |
| |
| /* FIXME - should be able to get a version number for each stemming |
| * algorithm (which will be incremented each time the output changes). */ |
| |
| /** Returns an array of the names of the available stemming algorithms. |
| * Note that these are the canonical names - aliases (ie, other names for |
| * the same algorithm) will not be included in the list. |
| * The list is terminated with a null pointer. |
| * |
| * The list must not be modified in any way. |
| */ |
| const char ** sb_stemmer_list(void); |
| |
| /** Create a new stemmer object, using the specified algorithm, for the |
| * specified character encoding. |
| * |
| * All algorithms will usually be available in UTF-8, but may also be |
| * available in other character encodings. |
| * |
| * @param algorithm The algorithm name. This is either the english |
| * name of the algorithm, or the 2 or 3 letter ISO 639 codes for the |
| * language. Note that case is significant in this parameter - the |
| * value should be supplied in lower case. |
| * |
| * @param charenc The character encoding. NULL may be passed as |
| * this value, in which case UTF-8 encoding will be assumed. Otherwise, |
| * the argument may be one of "UTF_8", "ISO_8859_1" (ie, Latin 1), |
| * "CP850" (ie, MS-DOS Latin 1) or "KOI8_R" (Russian). Note that |
| * case is significant in this parameter. |
| * |
| * @return NULL if the specified algorithm is not recognised, or the |
| * algorithm is not available for the requested encoding. Otherwise, |
| * returns a pointer to a newly created stemmer for the requested algorithm. |
| * The returned pointer must be deleted by calling sb_stemmer_delete(). |
| * |
| * @note NULL will also be returned if an out of memory error occurs. |
| */ |
| struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc); |
| |
| /** Delete a stemmer object. |
| * |
| * This frees all resources allocated for the stemmer. After calling |
| * this function, the supplied stemmer may no longer be used in any way. |
| * |
| * It is safe to pass a null pointer to this function - this will have |
| * no effect. |
| */ |
| void sb_stemmer_delete(struct sb_stemmer * stemmer); |
| |
| /** Stem a word. |
| * |
| * The return value is owned by the stemmer - it must not be freed or |
| * modified, and it will become invalid when the stemmer is called again, |
| * or if the stemmer is freed. |
| * |
| * The length of the return value can be obtained using sb_stemmer_length(). |
| * |
| * If an out-of-memory error occurs, this will return NULL. |
| */ |
| const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer, |
| const sb_symbol * word, int size); |
| |
| /** Get the length of the result of the last stemmed word. |
| * This should not be called before sb_stemmer_stem() has been called. |
| */ |
| int sb_stemmer_length(struct sb_stemmer * stemmer); |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| |
| |
| |
| #endif /* H_LIBSTEMMER */ |
| |