| using System; |
| using System.Globalization; |
| |
| namespace Lucene.Net.Analysis.Br |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /// <summary> |
| /// A stemmer for Brazilian Portuguese words. |
| /// </summary> |
| public class BrazilianStemmer |
| { |
| private static readonly CultureInfo locale = new CultureInfo("pt-BR"); |
| |
| /// <summary> |
| /// Changed term |
| /// </summary> |
| private string TERM; |
| private string CT; |
| private string R1; |
| private string R2; |
| private string RV; |
| |
| |
| public BrazilianStemmer() |
| { |
| } |
| |
| /// <summary> |
| /// Stems the given term to an unique <c>discriminator</c>. |
| /// </summary> |
| /// <param name="term"> The term that should be stemmed. </param> |
| /// <returns>Discriminator for <paramref name="term"/></returns> |
| protected internal virtual string Stem(string term) |
| { |
| bool altered = false; // altered the term |
| |
| // creates CT |
| CreateCT(term); |
| |
| if (!IsIndexable(CT)) |
| { |
| return null; |
| } |
| if (!IsStemmable(CT)) |
| { |
| return CT; |
| } |
| |
| R1 = GetR1(CT); |
| R2 = GetR1(R1); |
| RV = GetRV(CT); |
| TERM = term + ";" + CT; |
| |
| altered = Step1(); |
| if (!altered) |
| { |
| altered = Step2(); |
| } |
| |
| if (altered) |
| { |
| Step3(); |
| } |
| else |
| { |
| Step4(); |
| } |
| |
| Step5(); |
| |
| return CT; |
| } |
| |
| /// <summary> |
| /// Checks a term if it can be processed correctly. |
| /// </summary> |
| /// <returns> true if, and only if, the given term consists in letters. </returns> |
| private bool IsStemmable(string term) |
| { |
| for (int c = 0; c < term.Length; c++) |
| { |
| // Discard terms that contain non-letter characters. |
| if (!char.IsLetter(term[c])) |
| { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /// <summary> |
| /// Checks a term if it can be processed indexed. |
| /// </summary> |
| /// <returns> true if it can be indexed </returns> |
| private bool IsIndexable(string term) |
| { |
| return (term.Length < 30) && (term.Length > 2); |
| } |
| |
| /// <summary> |
| /// See if string is 'a','e','i','o','u' |
| /// </summary> |
| /// <returns> true if is vowel </returns> |
| private bool IsVowel(char value) |
| { |
| return (value == 'a') || (value == 'e') || (value == 'i') || (value == 'o') || (value == 'u'); |
| } |
| |
| /// <summary> |
| /// Gets R1 |
| /// |
| /// R1 - is the region after the first non-vowel following a vowel, |
| /// or is the null region at the end of the word if there is |
| /// no such non-vowel. |
| /// </summary> |
| /// <returns> null or a string representing R1 </returns> |
| private string GetR1(string value) |
| { |
| int i; |
| int j; |
| |
| // be-safe !!! |
| if (value == null) |
| { |
| return null; |
| } |
| |
| // find 1st vowel |
| i = value.Length - 1; |
| for (j = 0; j < i; j++) |
| { |
| if (IsVowel(value[j])) |
| { |
| break; |
| } |
| } |
| |
| if (!(j < i)) |
| { |
| return null; |
| } |
| |
| // find 1st non-vowel |
| for (; j < i; j++) |
| { |
| if (!(IsVowel(value[j]))) |
| { |
| break; |
| } |
| } |
| |
| if (!(j < i)) |
| { |
| return null; |
| } |
| |
| return value.Substring(j + 1); |
| } |
| |
| /// <summary> |
| /// Gets RV |
| /// |
| /// RV - IF the second letter is a consonant, RV is the region after |
| /// the next following vowel, |
| /// |
| /// OR if the first two letters are vowels, RV is the region |
| /// after the next consonant, |
| /// |
| /// AND otherwise (consonant-vowel case) RV is the region after |
| /// the third letter. |
| /// |
| /// BUT RV is the end of the word if this positions cannot be |
| /// found. |
| /// </summary> |
| /// <returns> null or a string representing RV </returns> |
| private string GetRV(string value) |
| { |
| int i; |
| int j; |
| |
| // be-safe !!! |
| if (value == null) |
| { |
| return null; |
| } |
| |
| i = value.Length - 1; |
| |
| // RV - IF the second letter is a consonant, RV is the region after |
| // the next following vowel, |
| if ((i > 0) && !IsVowel(value[1])) |
| { |
| // find 1st vowel |
| for (j = 2; j < i; j++) |
| { |
| if (IsVowel(value[j])) |
| { |
| break; |
| } |
| } |
| |
| if (j < i) |
| { |
| return value.Substring(j + 1); |
| } |
| } |
| |
| |
| // RV - OR if the first two letters are vowels, RV is the region |
| // after the next consonant, |
| if ((i > 1) && IsVowel(value[0]) && IsVowel(value[1])) |
| { |
| // find 1st consoant |
| for (j = 2; j < i; j++) |
| { |
| if (!IsVowel(value[j])) |
| { |
| break; |
| } |
| } |
| |
| if (j < i) |
| { |
| return value.Substring(j + 1); |
| } |
| } |
| |
| // RV - AND otherwise (consonant-vowel case) RV is the region after |
| // the third letter. |
| if (i > 2) |
| { |
| return value.Substring(3); |
| } |
| |
| return null; |
| } |
| |
| /// <summary> |
| /// 1) Turn to lowercase |
| /// 2) Remove accents |
| /// 3) ã -> a ; õ -> o |
| /// 4) ç -> c |
| /// </summary> |
| /// <returns> null or a string transformed </returns> |
| private string ChangeTerm(string value) |
| { |
| int j; |
| string r = ""; |
| |
| // be-safe !!! |
| if (value == null) |
| { |
| return null; |
| } |
| |
| value = locale.TextInfo.ToLower(value); |
| for (j = 0; j < value.Length; j++) |
| { |
| if ((value[j] == 'á') || (value[j] == 'â') || (value[j] == 'ã')) |
| { |
| r = r + "a"; |
| continue; |
| } |
| if ((value[j] == 'é') || (value[j] == 'ê')) |
| { |
| r = r + "e"; |
| continue; |
| } |
| if (value[j] == 'í') |
| { |
| r = r + "i"; |
| continue; |
| } |
| if ((value[j] == 'ó') || (value[j] == 'ô') || (value[j] == 'õ')) |
| { |
| r = r + "o"; |
| continue; |
| } |
| if ((value[j] == 'ú') || (value[j] == 'ü')) |
| { |
| r = r + "u"; |
| continue; |
| } |
| if (value[j] == 'ç') |
| { |
| r = r + "c"; |
| continue; |
| } |
| if (value[j] == 'ñ') |
| { |
| r = r + "n"; |
| continue; |
| } |
| |
| r = r + value[j]; |
| } |
| |
| return r; |
| } |
| |
| /// <summary> |
| /// Check if a string ends with a suffix |
| /// </summary> |
| /// <returns> true if the string ends with the specified suffix </returns> |
| private bool Suffix(string value, string suffix) |
| { |
| |
| // be-safe !!! |
| if ((value == null) || (suffix == null)) |
| { |
| return false; |
| } |
| |
| if (suffix.Length > value.Length) |
| { |
| return false; |
| } |
| |
| return value.Substring(value.Length - suffix.Length).Equals(suffix, StringComparison.Ordinal); |
| } |
| |
| /// <summary> |
| /// Replace a <see cref="string"/> suffix by another |
| /// </summary> |
| /// <returns> the replaced <see cref="string"/> </returns> |
| private string ReplaceSuffix(string value, string toReplace, string changeTo) |
| { |
| string vvalue; |
| |
| // be-safe !!! |
| if ((value == null) || (toReplace == null) || (changeTo == null)) |
| { |
| return value; |
| } |
| |
| vvalue = RemoveSuffix(value, toReplace); |
| |
| if (value.Equals(vvalue, StringComparison.Ordinal)) |
| { |
| return value; |
| } |
| else |
| { |
| return vvalue + changeTo; |
| } |
| } |
| |
| /// <summary> |
| /// Remove a <see cref="string"/> suffix |
| /// </summary> |
| /// <returns> the <see cref="string"/> without the suffix </returns> |
| private string RemoveSuffix(string value, string toRemove) |
| { |
| // be-safe !!! |
| if ((value == null) || (toRemove == null) || !Suffix(value, toRemove)) |
| { |
| return value; |
| } |
| |
| return value.Substring(0, value.Length - toRemove.Length); |
| } |
| |
| /// <summary> |
| /// See if a suffix is preceded by a <see cref="string"/> |
| /// </summary> |
| /// <returns> true if the suffix is preceded </returns> |
| private bool SuffixPreceded(string value, string suffix, string preceded) |
| { |
| // be-safe !!! |
| if ((value == null) || (suffix == null) || (preceded == null) || !Suffix(value, suffix)) |
| { |
| return false; |
| } |
| |
| return Suffix(RemoveSuffix(value, suffix), preceded); |
| } |
| |
| /// <summary> |
| /// Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'. |
| /// </summary> |
| private void CreateCT(string term) |
| { |
| CT = ChangeTerm(term); |
| |
| if (CT.Length < 2) |
| { |
| return; |
| } |
| |
| // if the first character is ... , remove it |
| if ((CT[0] == '"') || (CT[0] == '\'') || (CT[0] == '-') || (CT[0] == ',') || (CT[0] == ';') || (CT[0] == '.') || (CT[0] == '?') || (CT[0] == '!')) |
| { |
| CT = CT.Substring(1); |
| } |
| |
| if (CT.Length < 2) |
| { |
| return; |
| } |
| |
| // if the last character is ... , remove it |
| if ((CT[CT.Length - 1] == '-') || (CT[CT.Length - 1] == ',') || (CT[CT.Length - 1] == ';') || (CT[CT.Length - 1] == '.') || (CT[CT.Length - 1] == '?') || (CT[CT.Length - 1] == '!') || (CT[CT.Length - 1] == '\'') || (CT[CT.Length - 1] == '"')) |
| { |
| CT = CT.Substring(0, CT.Length - 1); |
| } |
| } |
| |
| |
| /// <summary> |
| /// Standard suffix removal. |
| /// Search for the longest among the following suffixes, and perform |
| /// the following actions: |
| /// </summary> |
| /// <returns> false if no ending was removed </returns> |
| private bool Step1() |
| { |
| if (CT == null) |
| { |
| return false; |
| } |
| |
| // suffix length = 7 |
| if (Suffix(CT, "uciones") && Suffix(R2, "uciones")) |
| { |
| CT = ReplaceSuffix(CT, "uciones", "u"); |
| return true; |
| } |
| |
| // suffix length = 6 |
| if (CT.Length >= 6) |
| { |
| if (Suffix(CT, "imentos") && Suffix(R2, "imentos")) |
| { |
| CT = RemoveSuffix(CT, "imentos"); |
| return true; |
| } |
| if (Suffix(CT, "amentos") && Suffix(R2, "amentos")) |
| { |
| CT = RemoveSuffix(CT, "amentos"); |
| return true; |
| } |
| if (Suffix(CT, "adores") && Suffix(R2, "adores")) |
| { |
| CT = RemoveSuffix(CT, "adores"); |
| return true; |
| } |
| if (Suffix(CT, "adoras") && Suffix(R2, "adoras")) |
| { |
| CT = RemoveSuffix(CT, "adoras"); |
| return true; |
| } |
| if (Suffix(CT, "logias") && Suffix(R2, "logias")) |
| { |
| ReplaceSuffix(CT, "logias", "log"); |
| return true; |
| } |
| if (Suffix(CT, "encias") && Suffix(R2, "encias")) |
| { |
| CT = ReplaceSuffix(CT, "encias", "ente"); |
| return true; |
| } |
| if (Suffix(CT, "amente") && Suffix(R1, "amente")) |
| { |
| CT = RemoveSuffix(CT, "amente"); |
| return true; |
| } |
| if (Suffix(CT, "idades") && Suffix(R2, "idades")) |
| { |
| CT = RemoveSuffix(CT, "idades"); |
| return true; |
| } |
| } |
| |
| // suffix length = 5 |
| if (CT.Length >= 5) |
| { |
| if (Suffix(CT, "acoes") && Suffix(R2, "acoes")) |
| { |
| CT = RemoveSuffix(CT, "acoes"); |
| return true; |
| } |
| if (Suffix(CT, "imento") && Suffix(R2, "imento")) |
| { |
| CT = RemoveSuffix(CT, "imento"); |
| return true; |
| } |
| if (Suffix(CT, "amento") && Suffix(R2, "amento")) |
| { |
| CT = RemoveSuffix(CT, "amento"); |
| return true; |
| } |
| if (Suffix(CT, "adora") && Suffix(R2, "adora")) |
| { |
| CT = RemoveSuffix(CT, "adora"); |
| return true; |
| } |
| if (Suffix(CT, "ismos") && Suffix(R2, "ismos")) |
| { |
| CT = RemoveSuffix(CT, "ismos"); |
| return true; |
| } |
| if (Suffix(CT, "istas") && Suffix(R2, "istas")) |
| { |
| CT = RemoveSuffix(CT, "istas"); |
| return true; |
| } |
| if (Suffix(CT, "logia") && Suffix(R2, "logia")) |
| { |
| CT = ReplaceSuffix(CT, "logia", "log"); |
| return true; |
| } |
| if (Suffix(CT, "ucion") && Suffix(R2, "ucion")) |
| { |
| CT = ReplaceSuffix(CT, "ucion", "u"); |
| return true; |
| } |
| if (Suffix(CT, "encia") && Suffix(R2, "encia")) |
| { |
| CT = ReplaceSuffix(CT, "encia", "ente"); |
| return true; |
| } |
| if (Suffix(CT, "mente") && Suffix(R2, "mente")) |
| { |
| CT = RemoveSuffix(CT, "mente"); |
| return true; |
| } |
| if (Suffix(CT, "idade") && Suffix(R2, "idade")) |
| { |
| CT = RemoveSuffix(CT, "idade"); |
| return true; |
| } |
| } |
| |
| // suffix length = 4 |
| if (CT.Length >= 4) |
| { |
| if (Suffix(CT, "acao") && Suffix(R2, "acao")) |
| { |
| CT = RemoveSuffix(CT, "acao"); |
| return true; |
| } |
| if (Suffix(CT, "ezas") && Suffix(R2, "ezas")) |
| { |
| CT = RemoveSuffix(CT, "ezas"); |
| return true; |
| } |
| if (Suffix(CT, "icos") && Suffix(R2, "icos")) |
| { |
| CT = RemoveSuffix(CT, "icos"); |
| return true; |
| } |
| if (Suffix(CT, "icas") && Suffix(R2, "icas")) |
| { |
| CT = RemoveSuffix(CT, "icas"); |
| return true; |
| } |
| if (Suffix(CT, "ismo") && Suffix(R2, "ismo")) |
| { |
| CT = RemoveSuffix(CT, "ismo"); |
| return true; |
| } |
| if (Suffix(CT, "avel") && Suffix(R2, "avel")) |
| { |
| CT = RemoveSuffix(CT, "avel"); |
| return true; |
| } |
| if (Suffix(CT, "ivel") && Suffix(R2, "ivel")) |
| { |
| CT = RemoveSuffix(CT, "ivel"); |
| return true; |
| } |
| if (Suffix(CT, "ista") && Suffix(R2, "ista")) |
| { |
| CT = RemoveSuffix(CT, "ista"); |
| return true; |
| } |
| if (Suffix(CT, "osos") && Suffix(R2, "osos")) |
| { |
| CT = RemoveSuffix(CT, "osos"); |
| return true; |
| } |
| if (Suffix(CT, "osas") && Suffix(R2, "osas")) |
| { |
| CT = RemoveSuffix(CT, "osas"); |
| return true; |
| } |
| if (Suffix(CT, "ador") && Suffix(R2, "ador")) |
| { |
| CT = RemoveSuffix(CT, "ador"); |
| return true; |
| } |
| if (Suffix(CT, "ivas") && Suffix(R2, "ivas")) |
| { |
| CT = RemoveSuffix(CT, "ivas"); |
| return true; |
| } |
| if (Suffix(CT, "ivos") && Suffix(R2, "ivos")) |
| { |
| CT = RemoveSuffix(CT, "ivos"); |
| return true; |
| } |
| if (Suffix(CT, "iras") && Suffix(RV, "iras") && SuffixPreceded(CT, "iras", "e")) |
| { |
| CT = ReplaceSuffix(CT, "iras", "ir"); |
| return true; |
| } |
| } |
| |
| // suffix length = 3 |
| if (CT.Length >= 3) |
| { |
| if (Suffix(CT, "eza") && Suffix(R2, "eza")) |
| { |
| CT = RemoveSuffix(CT, "eza"); |
| return true; |
| } |
| if (Suffix(CT, "ico") && Suffix(R2, "ico")) |
| { |
| CT = RemoveSuffix(CT, "ico"); |
| return true; |
| } |
| if (Suffix(CT, "ica") && Suffix(R2, "ica")) |
| { |
| CT = RemoveSuffix(CT, "ica"); |
| return true; |
| } |
| if (Suffix(CT, "oso") && Suffix(R2, "oso")) |
| { |
| CT = RemoveSuffix(CT, "oso"); |
| return true; |
| } |
| if (Suffix(CT, "osa") && Suffix(R2, "osa")) |
| { |
| CT = RemoveSuffix(CT, "osa"); |
| return true; |
| } |
| if (Suffix(CT, "iva") && Suffix(R2, "iva")) |
| { |
| CT = RemoveSuffix(CT, "iva"); |
| return true; |
| } |
| if (Suffix(CT, "ivo") && Suffix(R2, "ivo")) |
| { |
| CT = RemoveSuffix(CT, "ivo"); |
| return true; |
| } |
| if (Suffix(CT, "ira") && Suffix(RV, "ira") && SuffixPreceded(CT, "ira", "e")) |
| { |
| CT = ReplaceSuffix(CT, "ira", "ir"); |
| return true; |
| } |
| } |
| |
| // no ending was removed by step1 |
| return false; |
| } |
| |
| |
| /// <summary> |
| /// Verb suffixes. |
| /// |
| /// Search for the longest among the following suffixes in RV, |
| /// and if found, delete. |
| /// </summary> |
| /// <returns> false if no ending was removed </returns> |
| private bool Step2() |
| { |
| if (RV == null) |
| { |
| return false; |
| } |
| |
| // suffix lenght = 7 |
| if (RV.Length >= 7) |
| { |
| if (Suffix(RV, "issemos")) |
| { |
| CT = RemoveSuffix(CT, "issemos"); |
| return true; |
| } |
| if (Suffix(RV, "essemos")) |
| { |
| CT = RemoveSuffix(CT, "essemos"); |
| return true; |
| } |
| if (Suffix(RV, "assemos")) |
| { |
| CT = RemoveSuffix(CT, "assemos"); |
| return true; |
| } |
| if (Suffix(RV, "ariamos")) |
| { |
| CT = RemoveSuffix(CT, "ariamos"); |
| return true; |
| } |
| if (Suffix(RV, "eriamos")) |
| { |
| CT = RemoveSuffix(CT, "eriamos"); |
| return true; |
| } |
| if (Suffix(RV, "iriamos")) |
| { |
| CT = RemoveSuffix(CT, "iriamos"); |
| return true; |
| } |
| } |
| |
| // suffix length = 6 |
| if (RV.Length >= 6) |
| { |
| if (Suffix(RV, "iremos")) |
| { |
| CT = RemoveSuffix(CT, "iremos"); |
| return true; |
| } |
| if (Suffix(RV, "eremos")) |
| { |
| CT = RemoveSuffix(CT, "eremos"); |
| return true; |
| } |
| if (Suffix(RV, "aremos")) |
| { |
| CT = RemoveSuffix(CT, "aremos"); |
| return true; |
| } |
| if (Suffix(RV, "avamos")) |
| { |
| CT = RemoveSuffix(CT, "avamos"); |
| return true; |
| } |
| if (Suffix(RV, "iramos")) |
| { |
| CT = RemoveSuffix(CT, "iramos"); |
| return true; |
| } |
| if (Suffix(RV, "eramos")) |
| { |
| CT = RemoveSuffix(CT, "eramos"); |
| return true; |
| } |
| if (Suffix(RV, "aramos")) |
| { |
| CT = RemoveSuffix(CT, "aramos"); |
| return true; |
| } |
| if (Suffix(RV, "asseis")) |
| { |
| CT = RemoveSuffix(CT, "asseis"); |
| return true; |
| } |
| if (Suffix(RV, "esseis")) |
| { |
| CT = RemoveSuffix(CT, "esseis"); |
| return true; |
| } |
| if (Suffix(RV, "isseis")) |
| { |
| CT = RemoveSuffix(CT, "isseis"); |
| return true; |
| } |
| if (Suffix(RV, "arieis")) |
| { |
| CT = RemoveSuffix(CT, "arieis"); |
| return true; |
| } |
| if (Suffix(RV, "erieis")) |
| { |
| CT = RemoveSuffix(CT, "erieis"); |
| return true; |
| } |
| if (Suffix(RV, "irieis")) |
| { |
| CT = RemoveSuffix(CT, "irieis"); |
| return true; |
| } |
| } |
| |
| |
| // suffix length = 5 |
| if (RV.Length >= 5) |
| { |
| if (Suffix(RV, "irmos")) |
| { |
| CT = RemoveSuffix(CT, "irmos"); |
| return true; |
| } |
| if (Suffix(RV, "iamos")) |
| { |
| CT = RemoveSuffix(CT, "iamos"); |
| return true; |
| } |
| if (Suffix(RV, "armos")) |
| { |
| CT = RemoveSuffix(CT, "armos"); |
| return true; |
| } |
| if (Suffix(RV, "ermos")) |
| { |
| CT = RemoveSuffix(CT, "ermos"); |
| return true; |
| } |
| if (Suffix(RV, "areis")) |
| { |
| CT = RemoveSuffix(CT, "areis"); |
| return true; |
| } |
| if (Suffix(RV, "ereis")) |
| { |
| CT = RemoveSuffix(CT, "ereis"); |
| return true; |
| } |
| if (Suffix(RV, "ireis")) |
| { |
| CT = RemoveSuffix(CT, "ireis"); |
| return true; |
| } |
| if (Suffix(RV, "asses")) |
| { |
| CT = RemoveSuffix(CT, "asses"); |
| return true; |
| } |
| if (Suffix(RV, "esses")) |
| { |
| CT = RemoveSuffix(CT, "esses"); |
| return true; |
| } |
| if (Suffix(RV, "isses")) |
| { |
| CT = RemoveSuffix(CT, "isses"); |
| return true; |
| } |
| if (Suffix(RV, "astes")) |
| { |
| CT = RemoveSuffix(CT, "astes"); |
| return true; |
| } |
| if (Suffix(RV, "assem")) |
| { |
| CT = RemoveSuffix(CT, "assem"); |
| return true; |
| } |
| if (Suffix(RV, "essem")) |
| { |
| CT = RemoveSuffix(CT, "essem"); |
| return true; |
| } |
| if (Suffix(RV, "issem")) |
| { |
| CT = RemoveSuffix(CT, "issem"); |
| return true; |
| } |
| if (Suffix(RV, "ardes")) |
| { |
| CT = RemoveSuffix(CT, "ardes"); |
| return true; |
| } |
| if (Suffix(RV, "erdes")) |
| { |
| CT = RemoveSuffix(CT, "erdes"); |
| return true; |
| } |
| if (Suffix(RV, "irdes")) |
| { |
| CT = RemoveSuffix(CT, "irdes"); |
| return true; |
| } |
| if (Suffix(RV, "ariam")) |
| { |
| CT = RemoveSuffix(CT, "ariam"); |
| return true; |
| } |
| if (Suffix(RV, "eriam")) |
| { |
| CT = RemoveSuffix(CT, "eriam"); |
| return true; |
| } |
| if (Suffix(RV, "iriam")) |
| { |
| CT = RemoveSuffix(CT, "iriam"); |
| return true; |
| } |
| if (Suffix(RV, "arias")) |
| { |
| CT = RemoveSuffix(CT, "arias"); |
| return true; |
| } |
| if (Suffix(RV, "erias")) |
| { |
| CT = RemoveSuffix(CT, "erias"); |
| return true; |
| } |
| if (Suffix(RV, "irias")) |
| { |
| CT = RemoveSuffix(CT, "irias"); |
| return true; |
| } |
| if (Suffix(RV, "estes")) |
| { |
| CT = RemoveSuffix(CT, "estes"); |
| return true; |
| } |
| if (Suffix(RV, "istes")) |
| { |
| CT = RemoveSuffix(CT, "istes"); |
| return true; |
| } |
| if (Suffix(RV, "areis")) |
| { |
| CT = RemoveSuffix(CT, "areis"); |
| return true; |
| } |
| if (Suffix(RV, "aveis")) |
| { |
| CT = RemoveSuffix(CT, "aveis"); |
| return true; |
| } |
| } |
| |
| // suffix length = 4 |
| if (RV.Length >= 4) |
| { |
| if (Suffix(RV, "aria")) |
| { |
| CT = RemoveSuffix(CT, "aria"); |
| return true; |
| } |
| if (Suffix(RV, "eria")) |
| { |
| CT = RemoveSuffix(CT, "eria"); |
| return true; |
| } |
| if (Suffix(RV, "iria")) |
| { |
| CT = RemoveSuffix(CT, "iria"); |
| return true; |
| } |
| if (Suffix(RV, "asse")) |
| { |
| CT = RemoveSuffix(CT, "asse"); |
| return true; |
| } |
| if (Suffix(RV, "esse")) |
| { |
| CT = RemoveSuffix(CT, "esse"); |
| return true; |
| } |
| if (Suffix(RV, "isse")) |
| { |
| CT = RemoveSuffix(CT, "isse"); |
| return true; |
| } |
| if (Suffix(RV, "aste")) |
| { |
| CT = RemoveSuffix(CT, "aste"); |
| return true; |
| } |
| if (Suffix(RV, "este")) |
| { |
| CT = RemoveSuffix(CT, "este"); |
| return true; |
| } |
| if (Suffix(RV, "iste")) |
| { |
| CT = RemoveSuffix(CT, "iste"); |
| return true; |
| } |
| if (Suffix(RV, "arei")) |
| { |
| CT = RemoveSuffix(CT, "arei"); |
| return true; |
| } |
| if (Suffix(RV, "erei")) |
| { |
| CT = RemoveSuffix(CT, "erei"); |
| return true; |
| } |
| if (Suffix(RV, "irei")) |
| { |
| CT = RemoveSuffix(CT, "irei"); |
| return true; |
| } |
| if (Suffix(RV, "aram")) |
| { |
| CT = RemoveSuffix(CT, "aram"); |
| return true; |
| } |
| if (Suffix(RV, "eram")) |
| { |
| CT = RemoveSuffix(CT, "eram"); |
| return true; |
| } |
| if (Suffix(RV, "iram")) |
| { |
| CT = RemoveSuffix(CT, "iram"); |
| return true; |
| } |
| if (Suffix(RV, "avam")) |
| { |
| CT = RemoveSuffix(CT, "avam"); |
| return true; |
| } |
| if (Suffix(RV, "arem")) |
| { |
| CT = RemoveSuffix(CT, "arem"); |
| return true; |
| } |
| if (Suffix(RV, "erem")) |
| { |
| CT = RemoveSuffix(CT, "erem"); |
| return true; |
| } |
| if (Suffix(RV, "irem")) |
| { |
| CT = RemoveSuffix(CT, "irem"); |
| return true; |
| } |
| if (Suffix(RV, "ando")) |
| { |
| CT = RemoveSuffix(CT, "ando"); |
| return true; |
| } |
| if (Suffix(RV, "endo")) |
| { |
| CT = RemoveSuffix(CT, "endo"); |
| return true; |
| } |
| if (Suffix(RV, "indo")) |
| { |
| CT = RemoveSuffix(CT, "indo"); |
| return true; |
| } |
| if (Suffix(RV, "arao")) |
| { |
| CT = RemoveSuffix(CT, "arao"); |
| return true; |
| } |
| if (Suffix(RV, "erao")) |
| { |
| CT = RemoveSuffix(CT, "erao"); |
| return true; |
| } |
| if (Suffix(RV, "irao")) |
| { |
| CT = RemoveSuffix(CT, "irao"); |
| return true; |
| } |
| if (Suffix(RV, "adas")) |
| { |
| CT = RemoveSuffix(CT, "adas"); |
| return true; |
| } |
| if (Suffix(RV, "idas")) |
| { |
| CT = RemoveSuffix(CT, "idas"); |
| return true; |
| } |
| if (Suffix(RV, "aras")) |
| { |
| CT = RemoveSuffix(CT, "aras"); |
| return true; |
| } |
| if (Suffix(RV, "eras")) |
| { |
| CT = RemoveSuffix(CT, "eras"); |
| return true; |
| } |
| if (Suffix(RV, "iras")) |
| { |
| CT = RemoveSuffix(CT, "iras"); |
| return true; |
| } |
| if (Suffix(RV, "avas")) |
| { |
| CT = RemoveSuffix(CT, "avas"); |
| return true; |
| } |
| if (Suffix(RV, "ares")) |
| { |
| CT = RemoveSuffix(CT, "ares"); |
| return true; |
| } |
| if (Suffix(RV, "eres")) |
| { |
| CT = RemoveSuffix(CT, "eres"); |
| return true; |
| } |
| if (Suffix(RV, "ires")) |
| { |
| CT = RemoveSuffix(CT, "ires"); |
| return true; |
| } |
| if (Suffix(RV, "ados")) |
| { |
| CT = RemoveSuffix(CT, "ados"); |
| return true; |
| } |
| if (Suffix(RV, "idos")) |
| { |
| CT = RemoveSuffix(CT, "idos"); |
| return true; |
| } |
| if (Suffix(RV, "amos")) |
| { |
| CT = RemoveSuffix(CT, "amos"); |
| return true; |
| } |
| if (Suffix(RV, "emos")) |
| { |
| CT = RemoveSuffix(CT, "emos"); |
| return true; |
| } |
| if (Suffix(RV, "imos")) |
| { |
| CT = RemoveSuffix(CT, "imos"); |
| return true; |
| } |
| if (Suffix(RV, "iras")) |
| { |
| CT = RemoveSuffix(CT, "iras"); |
| return true; |
| } |
| if (Suffix(RV, "ieis")) |
| { |
| CT = RemoveSuffix(CT, "ieis"); |
| return true; |
| } |
| } |
| |
| // suffix length = 3 |
| if (RV.Length >= 3) |
| { |
| if (Suffix(RV, "ada")) |
| { |
| CT = RemoveSuffix(CT, "ada"); |
| return true; |
| } |
| if (Suffix(RV, "ida")) |
| { |
| CT = RemoveSuffix(CT, "ida"); |
| return true; |
| } |
| if (Suffix(RV, "ara")) |
| { |
| CT = RemoveSuffix(CT, "ara"); |
| return true; |
| } |
| if (Suffix(RV, "era")) |
| { |
| CT = RemoveSuffix(CT, "era"); |
| return true; |
| } |
| if (Suffix(RV, "ira")) |
| { |
| CT = RemoveSuffix(CT, "ava"); |
| return true; |
| } |
| if (Suffix(RV, "iam")) |
| { |
| CT = RemoveSuffix(CT, "iam"); |
| return true; |
| } |
| if (Suffix(RV, "ado")) |
| { |
| CT = RemoveSuffix(CT, "ado"); |
| return true; |
| } |
| if (Suffix(RV, "ido")) |
| { |
| CT = RemoveSuffix(CT, "ido"); |
| return true; |
| } |
| if (Suffix(RV, "ias")) |
| { |
| CT = RemoveSuffix(CT, "ias"); |
| return true; |
| } |
| if (Suffix(RV, "ais")) |
| { |
| CT = RemoveSuffix(CT, "ais"); |
| return true; |
| } |
| if (Suffix(RV, "eis")) |
| { |
| CT = RemoveSuffix(CT, "eis"); |
| return true; |
| } |
| if (Suffix(RV, "ira")) |
| { |
| CT = RemoveSuffix(CT, "ira"); |
| return true; |
| } |
| if (Suffix(RV, "ear")) |
| { |
| CT = RemoveSuffix(CT, "ear"); |
| return true; |
| } |
| } |
| |
| // suffix length = 2 |
| if (RV.Length >= 2) |
| { |
| if (Suffix(RV, "ia")) |
| { |
| CT = RemoveSuffix(CT, "ia"); |
| return true; |
| } |
| if (Suffix(RV, "ei")) |
| { |
| CT = RemoveSuffix(CT, "ei"); |
| return true; |
| } |
| if (Suffix(RV, "am")) |
| { |
| CT = RemoveSuffix(CT, "am"); |
| return true; |
| } |
| if (Suffix(RV, "em")) |
| { |
| CT = RemoveSuffix(CT, "em"); |
| return true; |
| } |
| if (Suffix(RV, "ar")) |
| { |
| CT = RemoveSuffix(CT, "ar"); |
| return true; |
| } |
| if (Suffix(RV, "er")) |
| { |
| CT = RemoveSuffix(CT, "er"); |
| return true; |
| } |
| if (Suffix(RV, "ir")) |
| { |
| CT = RemoveSuffix(CT, "ir"); |
| return true; |
| } |
| if (Suffix(RV, "as")) |
| { |
| CT = RemoveSuffix(CT, "as"); |
| return true; |
| } |
| if (Suffix(RV, "es")) |
| { |
| CT = RemoveSuffix(CT, "es"); |
| return true; |
| } |
| if (Suffix(RV, "is")) |
| { |
| CT = RemoveSuffix(CT, "is"); |
| return true; |
| } |
| if (Suffix(RV, "eu")) |
| { |
| CT = RemoveSuffix(CT, "eu"); |
| return true; |
| } |
| if (Suffix(RV, "iu")) |
| { |
| CT = RemoveSuffix(CT, "iu"); |
| return true; |
| } |
| if (Suffix(RV, "iu")) |
| { |
| CT = RemoveSuffix(CT, "iu"); |
| return true; |
| } |
| if (Suffix(RV, "ou")) |
| { |
| CT = RemoveSuffix(CT, "ou"); |
| return true; |
| } |
| } |
| |
| // no ending was removed by step2 |
| return false; |
| } |
| |
| /// <summary> |
| /// Delete suffix 'i' if in RV and preceded by 'c' |
| /// </summary> |
| private void Step3() |
| { |
| if (RV == null) |
| { |
| return; |
| } |
| |
| if (Suffix(RV, "i") && SuffixPreceded(RV, "i", "c")) |
| { |
| CT = RemoveSuffix(CT, "i"); |
| } |
| |
| } |
| |
| /// <summary> |
| /// Residual suffix |
| /// |
| /// If the word ends with one of the suffixes (os a i o á í ó) |
| /// in RV, delete it |
| /// </summary> |
| private void Step4() |
| { |
| if (RV == null) |
| { |
| return; |
| } |
| |
| if (Suffix(RV, "os")) |
| { |
| CT = RemoveSuffix(CT, "os"); |
| return; |
| } |
| if (Suffix(RV, "a")) |
| { |
| CT = RemoveSuffix(CT, "a"); |
| return; |
| } |
| if (Suffix(RV, "i")) |
| { |
| CT = RemoveSuffix(CT, "i"); |
| return; |
| } |
| if (Suffix(RV, "o")) |
| { |
| CT = RemoveSuffix(CT, "o"); |
| return; |
| } |
| |
| } |
| |
| /// <summary> |
| /// If the word ends with one of ( e é ê) in RV,delete it, |
| /// and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV, |
| /// delete the 'u' (or 'i') |
| /// |
| /// Or if the word ends ç remove the cedilha |
| /// </summary> |
| private void Step5() |
| { |
| if (RV == null) |
| { |
| return; |
| } |
| |
| if (Suffix(RV, "e")) |
| { |
| if (SuffixPreceded(RV, "e", "gu")) |
| { |
| CT = RemoveSuffix(CT, "e"); |
| CT = RemoveSuffix(CT, "u"); |
| return; |
| } |
| |
| if (SuffixPreceded(RV, "e", "ci")) |
| { |
| CT = RemoveSuffix(CT, "e"); |
| CT = RemoveSuffix(CT, "i"); |
| return; |
| } |
| |
| CT = RemoveSuffix(CT, "e"); |
| return; |
| } |
| } |
| |
| /// <summary> |
| /// For log and debug purpose |
| /// </summary> |
| /// <returns> TERM, CT, RV, R1 and R2 </returns> |
| public virtual string Log() |
| { |
| return " (TERM = " + TERM + ")" + " (CT = " + CT + ")" + " (RV = " + RV + ")" + " (R1 = " + R1 + ")" + " (R2 = " + R2 + ")"; |
| } |
| } |
| } |