blob: 774b64c6674c860ec76c079fcf2b9cb415777bd7 [file] [log] [blame]
using System.Diagnostics;
using System.Collections.Generic;
namespace Lucene.Net.Analysis.Pt
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Portuguese stemmer implementing the RSLP (Removedor de Sufixos da Lingua Portuguesa)
/// algorithm. This is sometimes also referred to as the Orengo stemmer.
/// </summary>
/// <seealso cref= RSLPStemmerBase </seealso>
public class PortugueseStemmer : RSLPStemmerBase
{
private static readonly Step plural, feminine, adverb, augmentative, noun, verb, vowel;
static PortugueseStemmer()
{
IDictionary<string, Step> steps = Parse(typeof(PortugueseStemmer), "portuguese.rslp");
plural = steps["Plural"];
feminine = steps["Feminine"];
adverb = steps["Adverb"];
augmentative = steps["Augmentative"];
noun = steps["Noun"];
verb = steps["Verb"];
vowel = steps["Vowel"];
}
/// <param name="s"> buffer, oversized to at least <code>len+1</code> </param>
/// <param name="len"> initial valid length of buffer </param>
/// <returns> new valid length, stemmed </returns>
public virtual int Stem(char[] s, int len)
{
Debug.Assert(s.Length >= len + 1, "this stemmer requires an oversized array of at least 1");
len = plural.Apply(s, len);
len = adverb.Apply(s, len);
len = feminine.Apply(s, len);
len = augmentative.Apply(s, len);
int oldlen = len;
len = noun.Apply(s, len);
if (len == oldlen) // suffix not removed
{
oldlen = len;
len = verb.Apply(s, len);
if (len == oldlen) // suffix not removed
{
len = vowel.Apply(s, len);
}
}
// rslp accent removal
for (int i = 0; i < len; i++)
{
switch (s[i])
{
case 'à':
case 'á':
case 'â':
case 'ã':
case 'ä':
case 'å':
s[i] = 'a';
break;
case 'ç':
s[i] = 'c';
break;
case 'è':
case 'é':
case 'ê':
case 'ë':
s[i] = 'e';
break;
case 'ì':
case 'í':
case 'î':
case 'ï':
s[i] = 'i';
break;
case 'ñ':
s[i] = 'n';
break;
case 'ò':
case 'ó':
case 'ô':
case 'õ':
case 'ö':
s[i] = 'o';
break;
case 'ù':
case 'ú':
case 'û':
case 'ü':
s[i] = 'u';
break;
case 'ý':
case 'ÿ':
s[i] = 'y';
break;
}
}
return len;
}
}
}