| using System; |
| using System.Globalization; |
| using System.Text; |
| |
| namespace Lucene.Net.Analysis.Fr |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /// <summary> |
| /// A stemmer for French words. |
| /// <para/> |
| /// The algorithm is based on the work of |
| /// Dr Martin Porter on his snowball project<para/> |
| /// refer to http://snowball.sourceforge.net/french/stemmer.html |
| /// (French stemming algorithm) for details |
| /// </summary> |
| /// @deprecated Use <see cref="Tartarus.Snowball.Ext.FrenchStemmer"/> instead, |
| /// which has the same functionality. This filter will be removed in Lucene 4.0 |
| [Obsolete("Use FrenchStemmer instead, which has the same functionality.")] |
| public class FrenchStemmer |
| { |
| private static readonly CultureInfo locale = new CultureInfo("fr-FR"); |
| |
| /// <summary> |
| /// Buffer for the terms while stemming them. |
| /// </summary> |
| private StringBuilder sb = new StringBuilder(); |
| |
| /// <summary> |
| /// A temporary buffer, used to reconstruct R2 |
| /// </summary> |
| private readonly StringBuilder tb = new StringBuilder(); |
| |
| /// <summary> |
| /// Region R0 is equal to the whole buffer |
| /// </summary> |
| private string R0; |
| |
| /// <summary> |
| /// Region RV |
| /// "If the word begins with two vowels, RV is the region after the third letter, |
| /// otherwise the region after the first vowel not at the beginning of the word, |
| /// or the end of the word if these positions cannot be found." |
| /// </summary> |
| private string RV; |
| |
| /// <summary> |
| /// Region R1 |
| /// "R1 is the region after the first non-vowel following a vowel |
| /// or is the null region at the end of the word if there is no such non-vowel" |
| /// </summary> |
| private string R1; |
| |
| /// <summary> |
| /// Region R2 |
| /// "R2 is the region after the first non-vowel in R1 following a vowel |
| /// or is the null region at the end of the word if there is no such non-vowel" |
| /// </summary> |
| private string R2; |
| |
| |
| /// <summary> |
| /// Set to true if we need to perform step 2 |
| /// </summary> |
| private bool suite; |
| |
| /// <summary> |
| /// Set to true if the buffer was modified |
| /// </summary> |
| private bool modified; |
| |
| |
| /// <summary> |
| /// Stems the given term to a unique <c>discriminator</c>. |
| /// </summary> |
| /// <param name="term"> The term that should be stemmed </param> |
| /// <returns> Discriminator for <paramref name="term"/> </returns> |
| protected internal virtual string Stem(string term) |
| { |
| if (!IsStemmable(term)) |
| { |
| return term; |
| } |
| |
| // Use lowercase for medium stemming. |
| term = locale.TextInfo.ToLower(term); |
| |
| // Reset the StringBuilder. |
| sb.Remove(0, sb.Length); |
| sb.Insert(0, term); |
| |
| // reset the booleans |
| modified = false; |
| suite = false; |
| |
| sb = TreatVowels(sb); |
| |
| SetStrings(); |
| |
| Step1(); |
| |
| if (!modified || suite) |
| { |
| if (RV != null) |
| { |
| suite = Step2a(); |
| if (!suite) |
| { |
| Step2b(); |
| } |
| } |
| } |
| |
| if (modified || suite) |
| { |
| Step3(); |
| } |
| else |
| { |
| Step4(); |
| } |
| |
| Step5(); |
| |
| Step6(); |
| |
| return sb.ToString(); |
| } |
| |
| /// <summary> |
| /// Sets the search region strings |
| /// it needs to be done each time the buffer was modified |
| /// </summary> |
| private void SetStrings() |
| { |
| // set the strings |
| R0 = sb.ToString(); |
| RV = RetrieveRV(sb); |
| R1 = RetrieveR(sb); |
| if (R1 != null) |
| { |
| tb.Remove(0, tb.Length); |
| tb.Insert(0, R1); |
| R2 = RetrieveR(tb); |
| } |
| else |
| { |
| R2 = null; |
| } |
| } |
| |
| /// <summary> |
| /// First step of the Porter Algorithm<para/> |
| /// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation |
| /// </summary> |
| private void Step1() |
| { |
| string[] suffix = new string[] { "ances", "iqUes", "ismes", "ables", "istes", "ance", "iqUe", "isme", "able", "iste" }; |
| DeleteFrom(R2, suffix); |
| |
| ReplaceFrom(R2, new string[] { "logies", "logie" }, "log"); |
| ReplaceFrom(R2, new string[] { "usions", "utions", "usion", "ution" }, "u"); |
| ReplaceFrom(R2, new string[] { "ences", "ence" }, "ent"); |
| |
| string[] search = new string[] { "atrices", "ateurs", "ations", "atrice", "ateur", "ation" }; |
| DeleteButSuffixFromElseReplace(R2, search, "ic", true, R0, "iqU"); |
| |
| DeleteButSuffixFromElseReplace(R2, new string[] { "ements", "ement" }, "eus", false, R0, "eux"); |
| DeleteButSuffixFrom(R2, new string[] { "ements", "ement" }, "ativ", false); |
| DeleteButSuffixFrom(R2, new string[] { "ements", "ement" }, "iv", false); |
| DeleteButSuffixFrom(R2, new string[] { "ements", "ement" }, "abl", false); |
| DeleteButSuffixFrom(R2, new string[] { "ements", "ement" }, "iqU", false); |
| |
| DeleteFromIfTestVowelBeforeIn(R1, new string[] { "issements", "issement" }, false, R0); |
| DeleteFrom(RV, new string[] { "ements", "ement" }); |
| |
| DeleteButSuffixFromElseReplace(R2, new string[] { "ités", "ité" }, "abil", false, R0, "abl"); |
| DeleteButSuffixFromElseReplace(R2, new string[] { "ités", "ité" }, "ic", false, R0, "iqU"); |
| DeleteButSuffixFrom(R2, new string[] { "ités", "ité" }, "iv", true); |
| |
| string[] autre = new string[] { "ifs", "ives", "if", "ive" }; |
| DeleteButSuffixFromElseReplace(R2, autre, "icat", false, R0, "iqU"); |
| DeleteButSuffixFromElseReplace(R2, autre, "at", true, R2, "iqU"); |
| |
| ReplaceFrom(R0, new string[] { "eaux" }, "eau"); |
| |
| ReplaceFrom(R1, new string[] { "aux" }, "al"); |
| |
| DeleteButSuffixFromElseReplace(R2, new string[] { "euses", "euse" }, "", true, R1, "eux"); |
| |
| DeleteFrom(R2, new string[] { "eux" }); |
| |
| // if one of the next steps is performed, we will need to perform step2a |
| bool temp = false; |
| temp = ReplaceFrom(RV, new string[] { "amment" }, "ant"); |
| if (temp == true) |
| { |
| suite = true; |
| } |
| temp = ReplaceFrom(RV, new string[] { "emment" }, "ent"); |
| if (temp == true) |
| { |
| suite = true; |
| } |
| temp = DeleteFromIfTestVowelBeforeIn(RV, new string[] { "ments", "ment" }, true, RV); |
| if (temp == true) |
| { |
| suite = true; |
| } |
| |
| } |
| |
| /// <summary> |
| /// Second step (A) of the Porter Algorithm<para/> |
| /// Will be performed if nothing changed from the first step |
| /// or changed were done in the amment, emment, ments or ment suffixes<para/> |
| /// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation |
| /// </summary> |
| /// <returns> true if something changed in the <see cref="StringBuilder"/> </returns> |
| private bool Step2a() |
| { |
| string[] search = new string[] { "îmes", "îtes", "iraIent", "irait", "irais", "irai", "iras", "ira", "irent", "iriez", "irez", "irions", "irons", "iront", "issaIent", "issais", "issantes", "issante", "issants", "issant", "issait", "issais", "issions", "issons", "issiez", "issez", "issent", "isses", "isse", "ir", "is", "ît", "it", "ies", "ie", "i" }; |
| return DeleteFromIfTestVowelBeforeIn(RV, search, false, RV); |
| } |
| |
| /// <summary> |
| /// Second step (B) of the Porter Algorithm<para/> |
| /// Will be performed if step 2 A was performed unsuccessfully<para/> |
| /// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation |
| /// </summary> |
| private void Step2b() |
| { |
| string[] suffix = new string[] { "eraIent", "erais", "erait", "erai", "eras", "erions", "eriez", "erons", "eront", "erez", "èrent", "era", "ées", "iez", "ée", "és", "er", "ez", "é" }; |
| DeleteFrom(RV, suffix); |
| |
| string[] search = new string[] { "assions", "assiez", "assent", "asses", "asse", "aIent", "antes", "aIent", "Aient", "ante", "âmes", "âtes", "ants", "ant", "ait", "aît", "ais", "Ait", "Aît", "Ais", "ât", "as", "ai", "Ai", "a" }; |
| DeleteButSuffixFrom(RV, search, "e", true); |
| |
| DeleteFrom(R2, new string[] { "ions" }); |
| } |
| |
| /// <summary> |
| /// Third step of the Porter Algorithm<para/> |
| /// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation |
| /// </summary> |
| private void Step3() |
| { |
| if (sb.Length > 0) |
| { |
| char ch = sb[sb.Length - 1]; |
| if (ch == 'Y') |
| { |
| sb[sb.Length - 1] = 'i'; |
| SetStrings(); |
| } |
| else if (ch == 'ç') |
| { |
| sb[sb.Length - 1] = 'c'; |
| SetStrings(); |
| } |
| } |
| } |
| |
| /// <summary> |
| /// Fourth step of the Porter Algorithm<para/> |
| /// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation |
| /// </summary> |
| private void Step4() |
| { |
| if (sb.Length > 1) |
| { |
| char ch = sb[sb.Length - 1]; |
| if (ch == 's') |
| { |
| char b = sb[sb.Length - 2]; |
| if (b != 'a' && b != 'i' && b != 'o' && b != 'u' && b != 'è' && b != 's') |
| { |
| sb.Remove(sb.Length - 1, sb.Length - (sb.Length - 1)); |
| SetStrings(); |
| } |
| } |
| } |
| bool found = DeleteFromIfPrecededIn(R2, new string[] { "ion" }, RV, "s"); |
| if (!found) |
| { |
| found = DeleteFromIfPrecededIn(R2, new string[] { "ion" }, RV, "t"); |
| } |
| |
| ReplaceFrom(RV, new string[] { "Ière", "ière", "Ier", "ier" }, "i"); |
| DeleteFrom(RV, new string[] { "e" }); |
| DeleteFromIfPrecededIn(RV, new string[] { "ë" }, R0, "gu"); |
| } |
| |
| /// <summary> |
| /// Fifth step of the Porter Algorithm<para/> |
| /// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation |
| /// </summary> |
| private void Step5() |
| { |
| if (R0 != null) |
| { |
| if (R0.EndsWith("enn", StringComparison.Ordinal) || R0.EndsWith("onn", StringComparison.Ordinal) || R0.EndsWith("ett", StringComparison.Ordinal) || R0.EndsWith("ell", StringComparison.Ordinal) || R0.EndsWith("eill", StringComparison.Ordinal)) |
| { |
| sb.Remove(sb.Length - 1, sb.Length - (sb.Length - 1)); |
| SetStrings(); |
| } |
| } |
| } |
| |
| /// <summary> |
| /// Sixth (and last!) step of the Porter Algorithm<para/> |
| /// refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation |
| /// </summary> |
| private void Step6() |
| { |
| if (R0 != null && R0.Length > 0) |
| { |
| bool seenVowel = false; |
| bool seenConson = false; |
| int pos = -1; |
| for (int i = R0.Length - 1; i > -1; i--) |
| { |
| char ch = R0[i]; |
| if (IsVowel(ch)) |
| { |
| if (!seenVowel) |
| { |
| if (ch == 'é' || ch == 'è') |
| { |
| pos = i; |
| break; |
| } |
| } |
| seenVowel = true; |
| } |
| else |
| { |
| if (seenVowel) |
| { |
| break; |
| } |
| else |
| { |
| seenConson = true; |
| } |
| } |
| } |
| if (pos > -1 && seenConson && !seenVowel) |
| { |
| sb[pos] = 'e'; |
| } |
| } |
| } |
| |
| /// <summary> |
| /// Delete a suffix searched in zone "source" if zone "from" contains prefix + search string |
| /// </summary> |
| /// <param name="source"> the primary source zone for search </param> |
| /// <param name="search"> the strings to search for suppression </param> |
| /// <param name="from"> the secondary source zone for search </param> |
| /// <param name="prefix"> the prefix to add to the search string to test </param> |
| /// <returns> true if modified </returns> |
| private bool DeleteFromIfPrecededIn(string source, string[] search, string from, string prefix) |
| { |
| bool found = false; |
| if (source != null) |
| { |
| for (int i = 0; i < search.Length; i++) |
| { |
| if (source.EndsWith(search[i], StringComparison.Ordinal)) |
| { |
| if (from != null && from.EndsWith(prefix + search[i], StringComparison.Ordinal)) |
| { |
| sb.Remove(sb.Length - search[i].Length, sb.Length - (sb.Length - search[i].Length)); |
| found = true; |
| SetStrings(); |
| break; |
| } |
| } |
| } |
| } |
| return found; |
| } |
| |
| /// <summary> |
| /// Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel |
| /// </summary> |
| /// <param name="source"> the primary source zone for search </param> |
| /// <param name="search"> the strings to search for suppression </param> |
| /// <param name="vowel"> true if we need a vowel before the search string </param> |
| /// <param name="from"> the secondary source zone for search (where vowel could be) </param> |
| /// <returns> true if modified </returns> |
| private bool DeleteFromIfTestVowelBeforeIn(string source, string[] search, bool vowel, string from) |
| { |
| bool found = false; |
| if (source != null && from != null) |
| { |
| for (int i = 0; i < search.Length; i++) |
| { |
| if (source.EndsWith(search[i], StringComparison.Ordinal)) |
| { |
| if ((search[i].Length + 1) <= from.Length) |
| { |
| bool test = IsVowel(sb[sb.Length - (search[i].Length + 1)]); |
| if (test == vowel) |
| { |
| sb.Remove(sb.Length - search[i].Length, sb.Length - (sb.Length - search[i].Length)); |
| modified = true; |
| found = true; |
| SetStrings(); |
| break; |
| } |
| } |
| } |
| } |
| } |
| return found; |
| } |
| |
| /// <summary> |
| /// Delete a suffix searched in zone "source" if preceded by the prefix |
| /// </summary> |
| /// <param name="source"> the primary source zone for search </param> |
| /// <param name="search"> the strings to search for suppression </param> |
| /// <param name="prefix"> the prefix to add to the search string to test </param> |
| /// <param name="without"> true if it will be deleted even without prefix found </param> |
| private void DeleteButSuffixFrom(string source, string[] search, string prefix, bool without) |
| { |
| if (source != null) |
| { |
| for (int i = 0; i < search.Length; i++) |
| { |
| if (source.EndsWith(prefix + search[i], StringComparison.Ordinal)) |
| { |
| sb.Remove(sb.Length - (prefix.Length + search[i].Length), sb.Length - (sb.Length - (prefix.Length + search[i].Length))); |
| modified = true; |
| SetStrings(); |
| break; |
| } |
| else if (without && source.EndsWith(search[i], StringComparison.Ordinal)) |
| { |
| sb.Remove(sb.Length - search[i].Length, sb.Length - (sb.Length - search[i].Length)); |
| modified = true; |
| SetStrings(); |
| break; |
| } |
| } |
| } |
| } |
| |
| /// <summary> |
| /// Delete a suffix searched in zone "source" if preceded by prefix<para/> |
| /// or replace it with the replace string if preceded by the prefix in the zone "from"<para/> |
| /// or delete the suffix if specified |
| /// </summary> |
| /// <param name="source"> the primary source zone for search </param> |
| /// <param name="search"> the strings to search for suppression </param> |
| /// <param name="prefix"> the prefix to add to the search string to test </param> |
| /// <param name="without"> true if it will be deleted even without prefix found </param> |
| /// <param name="from"> the secondary source zone for search </param> |
| /// <param name="replace"> the replacement string </param> |
| private void DeleteButSuffixFromElseReplace(string source, string[] search, string prefix, bool without, string from, string replace) |
| { |
| if (source != null) |
| { |
| for (int i = 0; i < search.Length; i++) |
| { |
| if (source.EndsWith(prefix + search[i], StringComparison.Ordinal)) |
| { |
| sb.Remove(sb.Length - (prefix.Length + search[i].Length), sb.Length - sb.Length - (prefix.Length + search[i].Length)); |
| modified = true; |
| SetStrings(); |
| break; |
| } |
| else if (from != null && from.EndsWith(prefix + search[i], StringComparison.Ordinal)) |
| { |
| sb.Remove(sb.Length - (prefix.Length + search[i].Length), sb.Length - sb.Length - (prefix.Length + search[i].Length)).Insert(sb.Length - (prefix.Length + search[i].Length), replace); |
| modified = true; |
| SetStrings(); |
| break; |
| } |
| else if (without && source.EndsWith(search[i], StringComparison.Ordinal)) |
| { |
| sb.Remove(sb.Length - search[i].Length, sb.Length - sb.Length - search[i].Length); |
| modified = true; |
| SetStrings(); |
| break; |
| } |
| } |
| } |
| } |
| |
| /// <summary> |
| /// Replace a search string with another within the source zone |
| /// </summary> |
| /// <param name="source"> the source zone for search </param> |
| /// <param name="search"> the strings to search for replacement </param> |
| /// <param name="replace"> the replacement string </param> |
| private bool ReplaceFrom(string source, string[] search, string replace) |
| { |
| bool found = false; |
| if (source != null) |
| { |
| for (int i = 0; i < search.Length; i++) |
| { |
| if (source.EndsWith(search[i], StringComparison.Ordinal)) |
| { |
| sb.Remove(sb.Length - search[i].Length, sb.Length - sb.Length - search[i].Length).Insert(sb.Length - search[i].Length, replace); |
| modified = true; |
| found = true; |
| SetStrings(); |
| break; |
| } |
| } |
| } |
| return found; |
| } |
| |
| /// <summary> |
| /// Delete a search string within the source zone |
| /// </summary> |
| /// <param name="source"> the source zone for search </param> |
| /// <param name="suffix"> the strings to search for suppression </param> |
| private void DeleteFrom(string source, string[] suffix) |
| { |
| if (source != null) |
| { |
| for (int i = 0; i < suffix.Length; i++) |
| { |
| if (source.EndsWith(suffix[i], StringComparison.Ordinal)) |
| { |
| sb.Remove(sb.Length - suffix[i].Length, sb.Length - (sb.Length - suffix[i].Length)); |
| modified = true; |
| SetStrings(); |
| break; |
| } |
| } |
| } |
| } |
| |
| /// <summary> |
| /// Test if a char is a french vowel, including accentuated ones |
| /// </summary> |
| /// <param name="ch"> the char to test </param> |
| /// <returns> true if the char is a vowel </returns> |
| private bool IsVowel(char ch) |
| { |
| switch (ch) |
| { |
| case 'a': |
| case 'e': |
| case 'i': |
| case 'o': |
| case 'u': |
| case 'y': |
| case 'â': |
| case 'à': |
| case 'ë': |
| case 'é': |
| case 'ê': |
| case 'è': |
| case 'ï': |
| case 'î': |
| case 'ô': |
| case 'ü': |
| case 'ù': |
| case 'û': |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| /// <summary> |
| /// Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string<para/> |
| /// "R is the region after the first non-vowel following a vowel |
| /// or is the null region at the end of the word if there is no such non-vowel" </summary> |
| /// <param name="buffer"> the in buffer </param> |
| /// <returns> the resulting string </returns> |
| private string RetrieveR(StringBuilder buffer) |
| { |
| int len = buffer.Length; |
| int pos = -1; |
| for (int c = 0; c < len; c++) |
| { |
| if (IsVowel(buffer[c])) |
| { |
| pos = c; |
| break; |
| } |
| } |
| if (pos > -1) |
| { |
| int consonne = -1; |
| for (int c = pos; c < len; c++) |
| { |
| if (!IsVowel(buffer[c])) |
| { |
| consonne = c; |
| break; |
| } |
| } |
| if (consonne > -1 && (consonne + 1) < len) |
| { |
| return buffer.ToString(consonne + 1, len - (consonne + 1)); |
| //return StringHelperClass.SubstringSpecial(buffer, consonne+1, len); |
| } |
| else |
| { |
| return null; |
| } |
| } |
| else |
| { |
| return null; |
| } |
| } |
| |
| /// <summary> |
| /// Retrieve the "RV zone" from a buffer an return the corresponding string<para/> |
| /// "If the word begins with two vowels, RV is the region after the third letter, |
| /// otherwise the region after the first vowel not at the beginning of the word, |
| /// or the end of the word if these positions cannot be found." </summary> |
| /// <param name="buffer"> the in buffer </param> |
| /// <returns> the resulting string </returns> |
| private string RetrieveRV(StringBuilder buffer) |
| { |
| int len = buffer.Length; |
| if (buffer.Length > 3) |
| { |
| if (IsVowel(buffer[0]) && IsVowel(buffer[1])) |
| { |
| return buffer.ToString(3, len - 3); |
| } |
| else |
| { |
| int pos = 0; |
| for (int c = 1; c < len; c++) |
| { |
| if (IsVowel(buffer[c])) |
| { |
| pos = c; |
| break; |
| } |
| } |
| if (pos + 1 < len) |
| { |
| return buffer.ToString(pos + 1, len - (pos + 1)); |
| } |
| else |
| { |
| return null; |
| } |
| } |
| } |
| else |
| { |
| return null; |
| } |
| } |
| |
| |
| |
| /// <summary> |
| /// Turns u and i preceded AND followed by a vowel to UpperCase<para/> |
| /// Turns y preceded OR followed by a vowel to UpperCase<para/> |
| /// Turns u preceded by q to UpperCase |
| /// </summary> |
| /// <param name="buffer"> the buffer to treat </param> |
| /// <returns> the treated buffer </returns> |
| private StringBuilder TreatVowels(StringBuilder buffer) |
| { |
| for (int c = 0; c < buffer.Length; c++) |
| { |
| char ch = buffer[c]; |
| |
| if (c == 0) // first char |
| { |
| if (buffer.Length > 1) |
| { |
| if (ch == 'y' && IsVowel(buffer[c + 1])) |
| { |
| buffer[c] = 'Y'; |
| } |
| } |
| } |
| else if (c == buffer.Length - 1) // last char |
| { |
| if (ch == 'u' && buffer[c - 1] == 'q') |
| { |
| buffer[c] = 'U'; |
| } |
| if (ch == 'y' && IsVowel(buffer[c - 1])) |
| { |
| buffer[c] = 'Y'; |
| } |
| } |
| else // other cases |
| { |
| if (ch == 'u') |
| { |
| if (buffer[c - 1] == 'q') |
| { |
| buffer[c] = 'U'; |
| } |
| else if (IsVowel(buffer[c - 1]) && IsVowel(buffer[c + 1])) |
| { |
| buffer[c] = 'U'; |
| } |
| } |
| if (ch == 'i') |
| { |
| if (IsVowel(buffer[c - 1]) && IsVowel(buffer[c + 1])) |
| { |
| buffer[c] = 'I'; |
| } |
| } |
| if (ch == 'y') |
| { |
| if (IsVowel(buffer[c - 1]) || IsVowel(buffer[c + 1])) |
| { |
| buffer[c] = 'Y'; |
| } |
| } |
| } |
| } |
| |
| return buffer; |
| } |
| |
| /// <summary> |
| /// Checks a term if it can be processed correctly. |
| /// </summary> |
| /// <returns> true if, and only if, the given term consists in letters. </returns> |
| private bool IsStemmable(string term) |
| { |
| bool upper = false; |
| int first = -1; |
| for (int c = 0; c < term.Length; c++) |
| { |
| // Discard terms that contain non-letter characters. |
| if (!char.IsLetter(term[c])) |
| { |
| return false; |
| } |
| // Discard terms that contain multiple uppercase letters. |
| if (char.IsUpper(term[c])) |
| { |
| if (upper) |
| { |
| return false; |
| } |
| // First encountered uppercase letter, set flag and save |
| // position. |
| else |
| { |
| first = c; |
| upper = true; |
| } |
| } |
| } |
| // Discard the term if it contains a single uppercase letter that |
| // is not starting the term. |
| if (first > 0) |
| { |
| return false; |
| } |
| return true; |
| } |
| } |
| } |