docs/attachments/LUCENE-2842/LUCENE-2842.patch - lucene-jira-archive - Git at Google

 Index: solr/src/test/org/apache/solr/analysis/TestGalicianStemFilterFactory.java
 ===================================================================
 --- solr/src/test/org/apache/solr/analysis/TestGalicianStemFilterFactory.java	(revision 0)
 +++ solr/src/test/org/apache/solr/analysis/TestGalicianStemFilterFactory.java	(revision 0)
 @@ -0,0 +1,36 @@
 +package org.apache.solr.analysis;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.Reader;
 +import java.io.StringReader;
 +
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 +
 +/**
 + * Simple tests to ensure the Galician stem factory is working.
 + */
 +public class TestGalicianStemFilterFactory extends BaseTokenTestCase {
 +  public void testStemming() throws Exception {
 +    Reader reader = new StringReader("cariñosa");
 +    GalicianStemFilterFactory factory = new GalicianStemFilterFactory();
 +    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
 +    assertTokenStreamContents(stream, new String[] { "cariñ" });
 +  }
 +}

 Property changes on: solr\src\test\org\apache\solr\analysis\TestGalicianStemFilterFactory.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: solr/src/test/org/apache/solr/analysis/TestPortugueseStemFilterFactory.java
 ===================================================================
 --- solr/src/test/org/apache/solr/analysis/TestPortugueseStemFilterFactory.java	(revision 0)
 +++ solr/src/test/org/apache/solr/analysis/TestPortugueseStemFilterFactory.java	(revision 0)
 @@ -0,0 +1,36 @@
 +package org.apache.solr.analysis;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.Reader;
 +import java.io.StringReader;
 +
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 +
 +/**
 + * Simple tests to ensure the Portuguese stem factory is working.
 + */
 +public class TestPortugueseStemFilterFactory extends BaseTokenTestCase {
 +  public void testStemming() throws Exception {
 +    Reader reader = new StringReader("maluquice");
 +    PortugueseStemFilterFactory factory = new PortugueseStemFilterFactory();
 +    TokenStream stream = factory.create(new WhitespaceTokenizer(DEFAULT_VERSION, reader));
 +    assertTokenStreamContents(stream, new String[] { "maluc" });
 +  }
 +}

 Property changes on: solr\src\test\org\apache\solr\analysis\TestPortugueseStemFilterFactory.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java
 ===================================================================
 --- solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java	(revision 0)
 +++ solr/src/java/org/apache/solr/analysis/PortugueseStemFilterFactory.java	(revision 0)
 @@ -0,0 +1,28 @@
 +package org.apache.solr.analysis;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.pt.PortugueseStemFilter;
 +
 +/** Factory for {@link PortugueseStemFilter} */
 +public class PortugueseStemFilterFactory extends BaseTokenFilterFactory {
 +  public TokenStream create(TokenStream input) {
 +    return new PortugueseStemFilter(input);
 +  }
 +}

 Property changes on: solr\src\java\org\apache\solr\analysis\PortugueseStemFilterFactory.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java
 ===================================================================
 --- solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java	(revision 0)
 +++ solr/src/java/org/apache/solr/analysis/GalicianStemFilterFactory.java	(revision 0)
 @@ -0,0 +1,28 @@
 +package org.apache.solr.analysis;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.gl.GalicianStemFilter;
 +
 +/** Factory for {@link GalicianStemFilter} */
 +public class GalicianStemFilterFactory extends BaseTokenFilterFactory {
 +  public TokenStream create(TokenStream input) {
 +    return new GalicianStemFilter(input);
 +  }
 +}

 Property changes on: solr\src\java\org\apache\solr\analysis\GalicianStemFilterFactory.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: modules/analysis/common/src/test/org/apache/lucene/analysis/pt/ptrslptestdata.zip
 ===================================================================
 Cannot display: file marked as a binary type.
 svn:mime-type = application/octet-stream

 Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\pt\ptrslptestdata.zip
 ___________________________________________________________________
 Added: svn:mime-type
    + application/octet-stream

 Index: modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java
 ===================================================================
 --- modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java	(revision 0)
 +++ modules/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java	(revision 0)
 @@ -0,0 +1,69 @@
 +package org.apache.lucene.analysis.pt;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import static org.apache.lucene.analysis.util.VocabularyAssert.assertVocabulary;
 +
 +import java.io.IOException;
 +import java.io.Reader;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.analysis.core.LowerCaseFilter;
 +import org.apache.lucene.analysis.standard.StandardTokenizer;
 +import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 +
 +/**
 + * Simple tests for {@link PortugueseStemFilter}
 + */
 +public class TestPortugueseStemFilter extends BaseTokenStreamTestCase {
 +  private Analyzer analyzer = new ReusableAnalyzerBase() {
 +    @Override
 +    protected TokenStreamComponents createComponents(String fieldName,
 +        Reader reader) {
 +      Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
 +      TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source);
 +      return new TokenStreamComponents(source, new PortugueseStemFilter(result));
 +    }
 +  };
 +
 +  /**
 +   * Test the example from the paper "Assessing the impact of stemming accuracy
 +   * on information retrieval"
 +   */
 +  public void testExamples() throws IOException {
 +    assertAnalyzesTo(
 +        analyzer,
 +    "O debate político, pelo menos o que vem a público, parece, de modo nada "
 +    + "surpreendente, restrito a temas menores. Mas há, evidentemente, "
 +    + "grandes questões em jogo nas eleições que se aproximam.",
 +    new String[] {
 +      "o", "debat", "politic", "pel", "menos", "o", "que", "vem", "a",
 +      "public", "parec", "de", "mod", "nad", "surpreend", "restrit",
 +      "a", "tem", "men", "mas", "ha", "evid", "grand", "quest",
 +      "em", "jog", "na", "eleic", "que", "se", "aproxim"
 +    });
 +  }
 +
 +  /** Test against a vocabulary from the reference impl */
 +  public void testVocabulary() throws IOException {
 +    assertVocabulary(analyzer, getDataFile("ptrslptestdata.zip"), "ptrslp.txt");
 +  }
 +}

 Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\pt\TestPortugueseStemFilter.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java
 ===================================================================
 --- modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java	(revision 0)
 +++ modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianStemFilter.java	(revision 0)
 @@ -0,0 +1,52 @@
 +package org.apache.lucene.analysis.gl;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import static org.apache.lucene.analysis.util.VocabularyAssert.assertVocabulary;
 +
 +import java.io.IOException;
 +import java.io.Reader;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.analysis.core.LowerCaseFilter;
 +import org.apache.lucene.analysis.standard.StandardTokenizer;
 +import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
 +
 +/**
 + * Simple tests for {@link GalicianStemFilter}
 + */
 +public class TestGalicianStemFilter extends BaseTokenStreamTestCase {
 +  private Analyzer analyzer = new ReusableAnalyzerBase() {
 +    @Override
 +    protected TokenStreamComponents createComponents(String fieldName,
 +        Reader reader) {
 +      Tokenizer source = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
 +      TokenStream result = new LowerCaseFilter(TEST_VERSION_CURRENT, source);
 +      return new TokenStreamComponents(source, new GalicianStemFilter(result));
 +    }
 +  };
 +
 +
 +  /** Test against a vocabulary from the reference impl */
 +  public void testVocabulary() throws IOException {
 +    assertVocabulary(analyzer, getDataFile("gltestdata.zip"), "gl.txt");
 +  }
 +}

 Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\gl\TestGalicianStemFilter.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: modules/analysis/common/src/test/org/apache/lucene/analysis/gl/gltestdata.zip
 ===================================================================
 Cannot display: file marked as a binary type.
 svn:mime-type = application/octet-stream

 Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\gl\gltestdata.zip
 ___________________________________________________________________
 Added: svn:mime-type
    + application/octet-stream

 Index: modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
 ===================================================================
 --- modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java	(revision 0)
 +++ modules/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java	(revision 0)
 @@ -0,0 +1,53 @@
 +package org.apache.lucene.analysis.gl;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.util.HashSet;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 +
 +public class TestGalicianAnalyzer extends BaseTokenStreamTestCase {
 +  /** This test fails with NPE when the
 +   * stopwords file is missing in classpath */
 +  public void testResourcesAvailable() {
 +    new GalicianAnalyzer(TEST_VERSION_CURRENT);
 +  }
 +
 +  /** test stopwords and stemming */
 +  public void testBasics() throws IOException {
 +    Analyzer a = new GalicianAnalyzer(TEST_VERSION_CURRENT);
 +    // stemming
 +    checkOneTermReuse(a, "correspondente", "correspond");
 +    checkOneTermReuse(a, "corresponderá", "correspond");
 +    // stopword
 +    assertAnalyzesTo(a, "e", new String[] {});
 +  }
 +
 +  /** test use of exclusion set */
 +  public void testExclude() throws IOException {
 +    Set<String> exclusionSet = new HashSet<String>();
 +    exclusionSet.add("correspondente");
 +    Analyzer a = new GalicianAnalyzer(TEST_VERSION_CURRENT,
 +        GalicianAnalyzer.getDefaultStopSet(), exclusionSet);
 +    checkOneTermReuse(a, "correspondente", "correspondente");
 +    checkOneTermReuse(a, "corresponderá", "correspond");
 +  }
 +}

 Property changes on: modules\analysis\common\src\test\org\apache\lucene\analysis\gl\TestGalicianAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemmer.java
 ===================================================================
 --- modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemmer.java	(revision 0)
 +++ modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemmer.java	(revision 0)
 @@ -0,0 +1,102 @@
 +package org.apache.lucene.analysis.pt;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.util.Map;
 +
 +/**
 + * Portuguese stemmer implementing the RSLP (Removedor de Sufixos da Lingua Portuguesa)
 + * algorithm. This is sometimes also referred to as the Orengo stemmer.
 + *
 + * @see RSLPStemmerBase
 + */
 +public class PortugueseStemmer extends RSLPStemmerBase {
 +  private static final Step plural, feminine, adverb, augmentative, noun, verb, vowel;
 +
 +  static {
 +    Map<String,Step> steps = parse(PortugueseStemmer.class, "portuguese.rslp");
 +    plural = steps.get("Plural");
 +    feminine = steps.get("Feminine");
 +    adverb = steps.get("Adverb");
 +    augmentative = steps.get("Augmentative");
 +    noun = steps.get("Noun");
 +    verb = steps.get("Verb");
 +    vowel = steps.get("Vowel");
 +  }
 +
 +  /**
 +   * @param s buffer, oversized to at least <code>len+1</code>
 +   * @param len initial valid length of buffer
 +   * @return new valid length, stemmed
 +   */
 +  public int stem(char s[], int len) {
 +    assert s.length >= len + 1 : "this stemmer requires an oversized array of at least 1";
 +
 +    len = plural.apply(s, len);
 +    len = adverb.apply(s, len);
 +    len = feminine.apply(s, len);
 +    len = augmentative.apply(s, len);
 +
 +    int oldlen = len;
 +    len = noun.apply(s, len);
 +
 +    if (len == oldlen) { /* suffix not removed */
 +      oldlen = len;
 +
 +      len = verb.apply(s, len);
 +
 +      if (len == oldlen) { /* suffix not removed */
 +        len = vowel.apply(s, len);
 +      }
 +    }
 +
 +    // rslp accent removal
 +    for (int i = 0; i < len; i++) {
 +      switch(s[i]) {
 +        case 'à':
 +        case 'á':
 +        case 'â':
 +        case 'ã':
 +        case 'ä':
 +        case 'å': s[i] = 'a'; break;
 +        case 'ç': s[i] = 'c'; break;
 +        case 'è':
 +        case 'é':
 +        case 'ê':
 +        case 'ë': s[i] = 'e'; break;
 +        case 'ì':
 +        case 'í':
 +        case 'î':
 +        case 'ï': s[i] = 'i'; break;
 +        case 'ñ': s[i] = 'n'; break;
 +        case 'ò':
 +        case 'ó':
 +        case 'ô':
 +        case 'õ':
 +        case 'ö': s[i] = 'o'; break;
 +        case 'ù':
 +        case 'ú':
 +        case 'û':
 +        case 'ü': s[i] = 'u'; break;
 +        case 'ý':
 +        case 'ÿ': s[i] = 'y'; break;
 +      }
 +    }
 +    return len;
 +  }
 +}

 Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\pt\PortugueseStemmer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java
 ===================================================================
 --- modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java	(revision 0)
 +++ modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseStemFilter.java	(revision 0)
 @@ -0,0 +1,60 @@
 +package org.apache.lucene.analysis.pt;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +
 +import org.apache.lucene.analysis.TokenFilter;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
 +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 +
 +/**
 + * A {@link TokenFilter} that applies {@link PortugueseStemmer} to stem
 + * Portuguese words.
 + * <p>
 + * To prevent terms from being stemmed use an instance of
 + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
 + * the {@link KeywordAttribute} before this {@link TokenStream}.
 + * </p>
 + */
 +public final class PortugueseStemFilter extends TokenFilter {
 +  private final PortugueseStemmer stemmer = new PortugueseStemmer();
 +  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
 +  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
 +
 +  public PortugueseStemFilter(TokenStream input) {
 +    super(input);
 +  }
 +
 +  @Override
 +  public boolean incrementToken() throws IOException {
 +    if (input.incrementToken()) {
 +      if (!keywordAttr.isKeyword()) {
 +        // this stemmer increases word length by 1: worst case '*ã' -> '*ão'
 +        final int len = termAtt.length();
 +        final int newlen = stemmer.stem(termAtt.resizeBuffer(len+1), len);
 +        termAtt.setLength(newlen);
 +      }
 +      return true;
 +    } else {
 +      return false;
 +    }
 +  }
 +}

 Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\pt\PortugueseStemFilter.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java
 ===================================================================
 --- modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java	(revision 1054344)
 +++ modules/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseMinimalStemmer.java	(working copy)
 @@ -1,10 +1,5 @@
  package org.apache.lucene.analysis.pt;

 -import java.util.Arrays;
 -
 -import org.apache.lucene.analysis.util.CharArraySet;
 -import org.apache.lucene.util.Version;
 -
  /**
   * Licensed to the Apache Software Foundation (ASF) under one or more
   * contributor license agreements.  See the NOTICE file distributed with
 @@ -31,89 +26,14 @@
   * which is just the plural reduction step of the RSLP
   * algorithm from <i>A Stemming Algorithmm for the Portuguese Language</i>,
   * Orengo et al.
 + * @see RSLPStemmerBase
   */
 -public class PortugueseMinimalStemmer {
 +public class PortugueseMinimalStemmer extends RSLPStemmerBase {

 -  private static final CharArraySet excIS = new CharArraySet(Version.LUCENE_31,
 -      Arrays.asList("lápis", "cais", "mais", "crúcis", "biquínis", "pois",
 -          "depois","dois","leis"),
 -      false);
 +  private static final Step pluralStep =
 +    parse(PortugueseMinimalStemmer.class, "portuguese.rslp").get("Plural");

 -  private static final CharArraySet excS = new CharArraySet(Version.LUCENE_31,
 -      Arrays.asList("aliás", "pires", "lápis", "cais", "mais", "mas", "menos",
 -          "férias", "fezes", "pêsames", "crúcis", "gás", "atrás", "moisés",
 -          "através", "convés", "ês", "país", "após", "ambas", "ambos",
 -          "messias", "depois"),
 -      false);
 -
    public int stem(char s[], int len) {
 -    if (len < 3 || s[len-1] != 's')
 -      return len;
 -
 -    if (s[len-2] == 'n') {
 -      len--;
 -      s[len-1] = 'm';
 -      return len;
 -    }
 -
 -    if (len >= 6 && s[len-3] == 'õ' && s[len-2] == 'e') {
 -      len--;
 -      s[len-2] = 'ã';
 -      s[len-1] = 'o';
 -      return len;
 -    }
 -
 -    if (len >= 4 && s[len-3] == 'ã' && s[len-2] == 'e')
 -      if (!(len == 4 && s[0] == 'm')) {
 -        len--;
 -        s[len-1] = 'o';
 -        return len;
 -      }
 -
 -    if (len >= 4 && s[len-2] == 'i') {
 -      if (s[len-3] == 'a')
 -        if (!(len == 4 && (s[0] == 'c' || s[0] == 'm'))) {
 -          len--;
 -          s[len-1] = 'l';
 -          return len;
 -        }
 -
 -      if (len >= 5 && s[len-3] == 'é') {
 -        len--;
 -        s[len-2] = 'e';
 -        s[len-1] = 'l';
 -        return len;
 -      }
 -
 -      if (len >= 5 && s[len-3] == 'e') {
 -        len--;
 -        s[len-1] = 'l';
 -        return len;
 -      }
 -
 -      if (len >= 5 && s[len-3] == 'ó') {
 -        len--;
 -        s[len-2] = 'o';
 -        s[len-1] = 'l';
 -        return len;
 -      }
 -
 -      if (!excIS.contains(s, 0, len)) {
 -        s[len-1] = 'l';
 -        return len;
 -      }
 -    }
 -
 -    if (len >= 6 && s[len-3] == 'l' && s[len-2] == 'e')
 -      return len - 2;
 -
 -    if (len >= 6 && s[len-3] == 'r' && s[len-2] == 'e')
 -      if (!(len == 7 && s[0] == 'á' && s[1] == 'r' && s[2] == 'v' && s[3] == 'o'))
 -        return len - 2;
 -
 -    if (excS.contains(s, 0, len))
 -      return len;
 -    else
 -      return len-1;
 +    return pluralStep.apply(s, len);
    }
  }
 Index: modules/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
 ===================================================================
 --- modules/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java	(revision 0)
 +++ modules/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java	(revision 0)
 @@ -0,0 +1,345 @@
 +package org.apache.lucene.analysis.pt;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.io.InputStream;
 +import java.io.InputStreamReader;
 +import java.io.LineNumberReader;
 +import java.util.ArrayList;
 +import java.util.Arrays;
 +import java.util.HashMap;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.regex.Matcher;
 +import java.util.regex.Pattern;
 +
 +import org.apache.lucene.analysis.util.CharArraySet;
 +import org.apache.lucene.util.Version;
 +
 +import static org.apache.lucene.analysis.util.StemmerUtil.*;
 +
 +/**
 + * Base class for stemmers that use a set of RSLP-like stemming steps.
 + * <p>
 + * RSLP (Removedor de Sufixos da Lingua Portuguesa) is an algorithm designed
 + * originally for stemming the Portuguese language, described in the paper
 + * <i>A Stemming Algorithm for the Portuguese Language</i>, Orengo et. al.
 + * <p>
 + * Since this time a plural-only modification (RSLP-S) as well as a modification
 + * for the Galician language have been implemented. This class parses a configuration
 + * file that describes {@link Step}s, where each Step contains a set of {@link Rule}s.
 + * <p>
 + * The general rule format is:
 + * <blockquote>{ "suffix", N, "replacement", { "exception1", "exception2", ...}}</blockquote>
 + * where:
 + * <ul>
 + *   <li><code>suffix</code> is the suffix to be removed (such as "inho").
 + *   <li><code>N</code> is the min stem size, where stem is defined as the candidate stem
 + *       after removing the suffix (but before appending the replacement!)
 + *   <li><code>replacement</code> is an optimal string to append after removing the suffix.
 + *       This can be the empty string.
 + *   <li><code>exceptions</code> is an optional list of exceptions, patterns that should
 + *       not be stemmed. These patterns can be specified as whole word or suffix (ends-with)
 + *       patterns, depending upon the exceptions format flag in the step header.
 + * </ul>
 + * <p>
 + * A step is an ordered list of rules, with a structure in this format:
 + * <blockquote>{ "name", N, B, { "cond1", "cond2", ... }
 + *               ... rules ... };
 + * </blockquote>
 + * where:
 + * <ul>
 + *   <li><code>name</code> is a name for the step (such as "Plural").
 + *   <li><code>N</code> is the min word size. Words that are less than this length bypass
 + *       the step completely, as an optimization. Note: N can be zero, in this case this
 + *       implementation will automatically calculate the appropriate value from the underlying
 + *       rules.
 + *   <li><code>B</code> is a "boolean" flag specifying how exceptions in the rules are matched.
 + *       A value of 1 indicates whole-word pattern matching, a value of 0 indicates that
 + *       exceptions are actually suffixes and should be matched with ends-with.
 + *   <li><code>conds</code> are an optional list of conditions to enter the step at all. If
 + *       the list is non-empty, then a word must end with one of these conditions or it will
 + *       bypass the step completely as an optimization.
 + * </ul>
 + * <p>
 + * @see <a href="http://www.inf.ufrgs.br/~viviane/rslp/index.htm">RSLP description</a>
 + * @lucene.internal
 + */
 +public abstract class RSLPStemmerBase {
 +
 +  /**
 +   * A basic rule, with no exceptions.
 +   */
 +  protected static class Rule {
 +    protected final char suffix[];
 +    protected final char replacement[];
 +    protected final int min;
 +
 +    /**
 +     * Create a rule.
 +     * @param suffix suffix to remove
 +     * @param min minimum stem length
 +     * @param replacement replacement string
 +     */
 +    public Rule(String suffix, int min, String replacement) {
 +      this.suffix = suffix.toCharArray();
 +      this.replacement = replacement.toCharArray();
 +      this.min = min;
 +    }
 +
 +    /**
 +     * @return true if the word matches this rule.
 +     */
 +    public boolean matches(char s[], int len) {
 +      return (len - suffix.length >= min && endsWith(s, len, suffix));
 +    }
 +
 +    /**
 +     * @return new valid length of the string after firing this rule.
 +     */
 +    public int replace(char s[], int len) {
 +      if (replacement.length > 0) {
 +        System.arraycopy(replacement, 0, s, len - suffix.length, replacement.length);
 +      }
 +      return len - suffix.length + replacement.length;
 +    }
 +  }
 +
 +  /**
 +   * A rule with a set of whole-word exceptions.
 +   */
 +  protected static class RuleWithSetExceptions extends Rule {
 +    protected final CharArraySet exceptions;
 +
 +    public RuleWithSetExceptions(String suffix, int min, String replacement,
 +        String[] exceptions) {
 +      super(suffix, min, replacement);
 +      for (int i = 0; i < exceptions.length; i++) {
 +        if (!exceptions[i].endsWith(suffix))
 +          System.err.println("warning: useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
 +      }
 +      this.exceptions = new CharArraySet(Version.LUCENE_31,
 +           Arrays.asList(exceptions), false);
 +    }
 +
 +    @Override
 +    public boolean matches(char s[], int len) {
 +      return super.matches(s, len) && !exceptions.contains(s, 0, len);
 +    }
 +  }
 +
 +  /**
 +   * A rule with a set of exceptional suffixes.
 +   */
 +  protected static class RuleWithSuffixExceptions extends Rule {
 +    // TODO: use a more efficient datastructure: automaton?
 +    protected final char[][] exceptions;
 +
 +    public RuleWithSuffixExceptions(String suffix, int min, String replacement,
 +        String[] exceptions) {
 +      super(suffix, min, replacement);
 +      for (int i = 0; i < exceptions.length; i++) {
 +        if (!exceptions[i].endsWith(suffix))
 +          System.err.println("warning: useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
 +      }
 +      this.exceptions = new char[exceptions.length][];
 +      for (int i = 0; i < exceptions.length; i++)
 +        this.exceptions[i] = exceptions[i].toCharArray();
 +    }
 +
 +    @Override
 +    public boolean matches(char s[], int len) {
 +      if (!super.matches(s, len))
 +        return false;
 +
 +      for (int i = 0; i < exceptions.length; i++)
 +        if (endsWith(s, len, exceptions[i]))
 +          return false;
 +
 +      return true;
 +    }
 +  }
 +
 +  /**
 +   * A step containing a list of rules.
 +   */
 +  protected static class Step {
 +    protected final String name;
 +    protected final Rule rules[];
 +    protected final int min;
 +    protected final char[][] suffixes;
 +
 +    /**
 +     * Create a new step
 +     * @param name Step's name.
 +     * @param rules an ordered list of rules.
 +     * @param min minimum word size. if this is 0 it is automatically calculated.
 +     * @param suffixes optional list of conditional suffixes. may be null.
 +     */
 +    public Step(String name, Rule rules[], int min, String suffixes[]) {
 +      this.name = name;
 +      this.rules = rules;
 +      if (min == 0) {
 +        min = Integer.MAX_VALUE;
 +        for (Rule r : rules)
 +          min = Math.min(min, r.min + r.suffix.length);
 +      }
 +      this.min = min;
 +
 +      if (suffixes == null || suffixes.length == 0) {
 +        this.suffixes = null;
 +      } else {
 +        this.suffixes = new char[suffixes.length][];
 +        for (int i = 0; i < suffixes.length; i++)
 +          this.suffixes[i] = suffixes[i].toCharArray();
 +      }
 +    }
 +
 +    /**
 +     * @return new valid length of the string after applying the entire step.
 +     */
 +    public int apply(char s[], int len) {
 +      if (len < min)
 +        return len;
 +
 +      if (suffixes != null) {
 +        boolean found = false;
 +
 +        for (int i = 0; i < suffixes.length; i++)
 +          if (endsWith(s, len, suffixes[i])) {
 +            found = true;
 +            break;
 +          }
 +
 +        if (!found) return len;
 +      }
 +
 +      for (int i = 0; i < rules.length; i++) {
 +        if (rules[i].matches(s, len))
 +          return rules[i].replace(s, len);
 +      }
 +
 +      return len;
 +    }
 +  }
 +
 +  /**
 +   * Parse a resource file into an RSLP stemmer description.
 +   * @return a Map containing the named Steps in this description.
 +   */
 +  protected static Map<String,Step> parse(Class<? extends RSLPStemmerBase> clazz, String resource) {
 +    // TODO: this parser is ugly, but works. use a jflex grammar instead.
 +    try {
 +      InputStream is = clazz.getResourceAsStream(resource);
 +      LineNumberReader r = new LineNumberReader(new InputStreamReader(is, "UTF-8"));
 +      Map<String,Step> steps = new HashMap<String,Step>();
 +      String step;
 +      while ((step = readLine(r)) != null) {
 +        Step s = parseStep(r, step);
 +        steps.put(s.name, s);
 +      }
 +      r.close();
 +      return steps;
 +    } catch (IOException e) {
 +      throw new RuntimeException(e);
 +    }
 +  }
 +
 +  private static final Pattern headerPattern =
 +    Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*(0|1),\\s*\\{(.*)\\},\\s*$");
 +  private static final Pattern stripPattern =
 +    Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+)\\s*\\}\\s*(,|(\\}\\s*;))$");
 +  private static final Pattern repPattern =
 +    Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*\"([^\"]*)\"\\}\\s*(,|(\\}\\s*;))$");
 +  private static final Pattern excPattern =
 +    Pattern.compile("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*\"([^\"]*)\",\\s*\\{(.*)\\}\\s*\\}\\s*(,|(\\}\\s*;))$");
 +
 +  private static Step parseStep(LineNumberReader r, String header) throws IOException {
 +    Matcher matcher = headerPattern.matcher(header);
 +    if (!matcher.find()) {
 +      throw new RuntimeException("Illegal Step header specified at line " + r.getLineNumber());
 +    }
 +    assert matcher.groupCount() == 4;
 +    String name = matcher.group(1);
 +    int min = Integer.parseInt(matcher.group(2));
 +    int type = Integer.parseInt(matcher.group(3));
 +    String suffixes[] = parseList(matcher.group(4));
 +    Rule rules[] = parseRules(r, type);
 +    return new Step(name, rules, min, suffixes);
 +  }
 +
 +  private static Rule[] parseRules(LineNumberReader r, int type) throws IOException {
 +    List<Rule> rules = new ArrayList<Rule>();
 +    String line;
 +    while ((line = readLine(r)) != null) {
 +      Matcher matcher = stripPattern.matcher(line);
 +      if (matcher.matches()) {
 +        rules.add(new Rule(matcher.group(1), Integer.parseInt(matcher.group(2)), ""));
 +      } else {
 +        matcher = repPattern.matcher(line);
 +        if (matcher.matches()) {
 +          rules.add(new Rule(matcher.group(1), Integer.parseInt(matcher.group(2)), matcher.group(3)));
 +        } else {
 +          matcher = excPattern.matcher(line);
 +          if (matcher.matches()) {
 +            if (type == 0) {
 +              rules.add(new RuleWithSuffixExceptions(matcher.group(1),
 +                        Integer.parseInt(matcher.group(2)),
 +                        matcher.group(3),
 +                        parseList(matcher.group(4))));
 +            } else {
 +              rules.add(new RuleWithSetExceptions(matcher.group(1),
 +                        Integer.parseInt(matcher.group(2)),
 +                        matcher.group(3),
 +                        parseList(matcher.group(4))));
 +            }
 +          } else {
 +            throw new RuntimeException("Illegal Step rule specified at line " + r.getLineNumber());
 +          }
 +        }
 +      }
 +      if (line.endsWith(";"))
 +        return rules.toArray(new Rule[rules.size()]);
 +    }
 +    return null;
 +  }
 +
 +  private static String[] parseList(String s) {
 +    if (s.isEmpty())
 +      return null;
 +    String list[] = s.split(",");
 +    for (int i = 0; i < list.length; i++)
 +      list[i] = parseString(list[i].trim());
 +    return list;
 +  }
 +
 +  private static String parseString(String s) {
 +    return s.substring(1, s.length()-1);
 +  }
 +
 +  private static String readLine(LineNumberReader r) throws IOException {
 +    String line = null;
 +    while ((line = r.readLine()) != null) {
 +      line = line.trim();
 +      if (!line.isEmpty() && line.charAt(0) != '#')
 +        return line;
 +    }
 +    return line;
 +  }
 +}

 Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\pt\RSLPStemmerBase.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemmer.java
 ===================================================================
 --- modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemmer.java	(revision 0)
 +++ modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemmer.java	(revision 0)
 @@ -0,0 +1,83 @@
 +package org.apache.lucene.analysis.gl;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.util.Map;
 +
 +import org.apache.lucene.analysis.pt.RSLPStemmerBase;
 +
 +/**
 + * Galician stemmer implementing "Regras do lematizador para o galego".
 + *
 + * @see RSLPStemmerBase
 + * @see <a href="http://bvg.udc.es/recursos_lingua/stemming.jsp">Description of rules</a>
 + */
 +public class GalicianStemmer extends RSLPStemmerBase {
 +  private static final Step plural, unification, adverb, augmentative, noun, verb, vowel;
 +
 +  static {
 +    Map<String,Step> steps = parse(GalicianStemmer.class, "galician.rslp");
 +    plural = steps.get("Plural");
 +    unification = steps.get("Unification");
 +    adverb = steps.get("Adverb");
 +    augmentative = steps.get("Augmentative");
 +    noun = steps.get("Noun");
 +    verb = steps.get("Verb");
 +    vowel = steps.get("Vowel");
 +  }
 +
 +  /**
 +   * @param s buffer, oversized to at least <code>len+1</code>
 +   * @param len initial valid length of buffer
 +   * @return new valid length, stemmed
 +   */
 +  public int stem(char s[], int len) {
 +    assert s.length >= len + 1 : "this stemmer requires an oversized array of at least 1";
 +
 +    len = plural.apply(s, len);
 +    len = unification.apply(s, len);
 +    len = adverb.apply(s, len);
 +
 +    int oldlen;
 +    do {
 +      oldlen = len;
 +      len = augmentative.apply(s, len);
 +    } while (len != oldlen);
 +
 +    oldlen = len;
 +    len = noun.apply(s, len);
 +    if (len == oldlen) { /* suffix not removed */
 +      len = verb.apply(s, len);
 +    }
 +
 +    len = vowel.apply(s, len);
 +
 +    // RSLG accent removal
 +    for (int i = 0; i < len; i++)
 +      switch(s[i]) {
 +        case 'á': s[i] = 'a'; break;
 +        case 'é':
 +        case 'ê': s[i] = 'e'; break;
 +        case 'í': s[i] = 'i'; break;
 +        case 'ó': s[i] = 'o'; break;
 +        case 'ú': s[i] = 'u'; break;
 +      }
 +
 +    return len;
 +  }
 +}

 Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\gl\GalicianStemmer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilter.java
 ===================================================================
 --- modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilter.java	(revision 0)
 +++ modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianStemFilter.java	(revision 0)
 @@ -0,0 +1,60 @@
 +package org.apache.lucene.analysis.gl;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +
 +import org.apache.lucene.analysis.TokenFilter;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
 +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 +
 +/**
 + * A {@link TokenFilter} that applies {@link GalicianStemmer} to stem
 + * Galician words.
 + * <p>
 + * To prevent terms from being stemmed use an instance of
 + * {@link KeywordMarkerFilter} or a custom {@link TokenFilter} that sets
 + * the {@link KeywordAttribute} before this {@link TokenStream}.
 + * </p>
 + */
 +public final class GalicianStemFilter extends TokenFilter {
 +  private final GalicianStemmer stemmer = new GalicianStemmer();
 +  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
 +  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
 +
 +  public GalicianStemFilter(TokenStream input) {
 +    super(input);
 +  }
 +
 +  @Override
 +  public boolean incrementToken() throws IOException {
 +    if (input.incrementToken()) {
 +      if (!keywordAttr.isKeyword()) {
 +        // this stemmer increases word length by 1: worst case '*çom' -> '*ción'
 +        final int len = termAtt.length();
 +        final int newlen = stemmer.stem(termAtt.resizeBuffer(len+1), len);
 +        termAtt.setLength(newlen);
 +      }
 +      return true;
 +    } else {
 +      return false;
 +    }
 +  }
 +}

 Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\gl\GalicianStemFilter.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
 ===================================================================
 --- modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java	(revision 0)
 +++ modules/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java	(revision 0)
 @@ -0,0 +1,129 @@
 +package org.apache.lucene.analysis.gl;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.io.Reader;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.core.LowerCaseFilter;
 +import org.apache.lucene.analysis.core.StopFilter;
 +import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.analysis.standard.StandardFilter;
 +import org.apache.lucene.analysis.standard.StandardTokenizer;
 +import org.apache.lucene.analysis.util.CharArraySet;
 +import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 +import org.apache.lucene.analysis.util.WordlistLoader;
 +import org.apache.lucene.util.Version;
 +
 +/**
 + * {@link Analyzer} for Galician.
 + */
 +public final class GalicianAnalyzer extends StopwordAnalyzerBase {
 +  private final Set<?> stemExclusionSet;
 +
 +  /** File containing default Galician stopwords. */
 +  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
 +
 +  /**
 +   * Returns an unmodifiable instance of the default stop words set.
 +   * @return default stop words set.
 +   */
 +  public static Set<?> getDefaultStopSet(){
 +    return DefaultSetHolder.DEFAULT_STOP_SET;
 +  }
 +
 +  /**
 +   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
 +   * accesses the static final set the first time.;
 +   */
 +  private static class DefaultSetHolder {
 +    static final Set<?> DEFAULT_STOP_SET;
 +
 +    static {
 +      try {
 +        DEFAULT_STOP_SET = WordlistLoader.getWordSet(GalicianAnalyzer.class,
 +            DEFAULT_STOPWORD_FILE);
 +      } catch (IOException ex) {
 +        // default set should always be present as it is part of the
 +        // distribution (JAR)
 +        throw new RuntimeException("Unable to load default stopword set");
 +      }
 +    }
 +  }
 +
 +  /**
 +   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
 +   */
 +  public GalicianAnalyzer(Version matchVersion) {
 +    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   */
 +  public GalicianAnalyzer(Version matchVersion, Set<?> stopwords) {
 +    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 +   * provided this analyzer will add a {@link KeywordMarkerFilter} before
 +   * stemming.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   * @param stemExclusionSet a set of terms not to be stemmed
 +   */
 +  public GalicianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
 +    super(matchVersion, stopwords);
 +    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
 +        matchVersion, stemExclusionSet));
 +  }
 +
 +  /**
 +   * Creates a
 +   * {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
 +   * which tokenizes all the text in the provided {@link Reader}.
 +   *
 +   * @return A
 +   *         {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
 +   *         built from an {@link StandardTokenizer} filtered with
 +   *         {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
 +   *         , {@link KeywordMarkerFilter} if a stem exclusion set is
 +   *         provided and {@link GalicianStemFilter}.
 +   */
 +  @Override
 +  protected TokenStreamComponents createComponents(String fieldName,
 +      Reader reader) {
 +    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 +    TokenStream result = new StandardFilter(matchVersion, source);
 +    result = new LowerCaseFilter(matchVersion, result);
 +    result = new StopFilter(matchVersion, result, stopwords);
 +    if(!stemExclusionSet.isEmpty())
 +      result = new KeywordMarkerFilter(result, stemExclusionSet);
 +    result = new GalicianStemFilter(result);
 +    return new TokenStreamComponents(source, result);
 +  }
 +}

 Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\gl\GalicianAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: modules/analysis/common/src/java/org/apache/lucene/analysis/gl/package.html
 ===================================================================
 --- modules/analysis/common/src/java/org/apache/lucene/analysis/gl/package.html	(revision 0)
 +++ modules/analysis/common/src/java/org/apache/lucene/analysis/gl/package.html	(revision 0)
 @@ -0,0 +1,22 @@
 +<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 +<!--
 + Licensed to the Apache Software Foundation (ASF) under one or more
 + contributor license agreements.  See the NOTICE file distributed with
 + this work for additional information regarding copyright ownership.
 + The ASF licenses this file to You under the Apache License, Version 2.0
 + (the "License"); you may not use this file except in compliance with
 + the License.  You may obtain a copy of the License at
 +
 +     http://www.apache.org/licenses/LICENSE-2.0
 +
 + Unless required by applicable law or agreed to in writing, software
 + distributed under the License is distributed on an "AS IS" BASIS,
 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + See the License for the specific language governing permissions and
 + limitations under the License.
 +-->
 +<html><head></head>
 +<body>
 +Analyzer for Galician.
 +</body>
 +</html>

 Property changes on: modules\analysis\common\src\java\org\apache\lucene\analysis\gl\package.html
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: modules/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java
 ===================================================================
 --- modules/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java	(revision 1054344)
 +++ modules/analysis/common/src/java/org/apache/lucene/analysis/util/StemmerUtil.java	(working copy)
 @@ -57,6 +57,25 @@
    }

    /**
 +   * Returns true if the character array ends with the suffix.
 +   *
 +   * @param s Input Buffer
 +   * @param len length of input buffer
 +   * @param suffix Suffix string to test
 +   * @return true if <code>s</code> ends with <code>suffix</code>
 +   */
 +  public static boolean endsWith(char s[], int len, char suffix[]) {
 +    final int suffixLen = suffix.length;
 +    if (suffixLen > len)
 +      return false;
 +    for (int i = suffixLen - 1; i >= 0; i--)
 +      if (s[len -(suffixLen - i)] != suffix[i])
 +        return false;
 +
 +    return true;
 +  }
 +
 +  /**
     * Delete a character in-place
     *
     * @param s Input Buffer
 Index: modules/analysis/common/src/resources/org/apache/lucene/analysis/pt/portuguese.rslp
 ===================================================================
 --- modules/analysis/common/src/resources/org/apache/lucene/analysis/pt/portuguese.rslp	(revision 0)
 +++ modules/analysis/common/src/resources/org/apache/lucene/analysis/pt/portuguese.rslp	(revision 0)
 @@ -0,0 +1,456 @@
 +#  Steps file for the RSLP stemmer.
 +
 +# Step 1: Plural Reduction
 +{  "Plural", 3, 1, {"s"},
 +  # bons -> bom
 +  {"ns",1,"m"},
 +  # balões -> balão
 +  {"ões",3,"ão"},
 +  # capitães -> capitão
 +  {"ães",1,"ão",{"mães"}},
 +  # normais -> normal
 +  {"ais",1,"al",{"cais","mais"}},
 +  # papéis -> papel
 +  {"éis",2,"el"},
 +  # amáveis -> amável
 +  {"eis",2,"el"},
 +  # lençóis -> lençol
 +  {"óis",2,"ol"},
 +  # barris -> barril
 +  {"is",2,"il",{"lápis","cais","mais","crúcis","biquínis","pois","depois","dois","leis"}},
 +  # males -> mal
 +  {"les",3,"l"},
 +  # mares -> mar
 +  {"res",3,"r", {"árvores"}},
 +  # casas -> casa
 +  {"s",2,"",{"aliás","pires","lápis","cais","mais","mas","menos","férias","fezes","pêsames","crúcis","gás","atrás","moisés","através","convés","ês","país","após","ambas","ambos","messias", "depois"}}};
 +
 +# Step 2: Adverb Reduction
 +{  "Adverb", 0, 0, {},
 +  # felizmente -> feliz
 +  {"mente",4,"",{"experimente"}}};
 +
 +# Step 3: Feminine Reduction
 +{  "Feminine", 3, 1, {"a","ã"},
 +  # chefona -> chefão
 +  {"ona",3,"ão",{"abandona","lona","iona","cortisona","monótona","maratona","acetona","detona","carona"}},
 +  # vilã -> vilão
 +  {"ã",2,"ão",{"amanhã","arapuã","fã","divã"}},
 +  # professora -> professor
 +  {"ora",3,"or"},
 +  # americana -> americano
 +  {"na",4,"no",{"carona","abandona","lona","iona","cortisona","monótona","maratona","acetona","detona","guiana","campana","grana","caravana","banana","paisana"}},
 +  # sozinha -> sozinho
 +  {"inha",3,"inho",{"rainha","linha","minha"}},
 +  # inglesa -> inglês
 +  {"esa",3,"ês",{"mesa","obesa","princesa","turquesa","ilesa","pesa","presa"}},
 +  # famosa -> famoso
 +  {"osa",3,"oso",{"mucosa","prosa"}},
 +  # maníaca -> maníaco
 +  {"íaca",3,"íaco"},
 +  # prática -> prático
 +  {"ica",3,"ico",{"dica"}},
 +  # cansada -> cansado
 +  {"ada",2,"ado",{"pitada"}},
 +  # mantida -> mantido
 +  {"ida",3,"ido",{"vida","dúvida"}},
 +  {"ída",3,"ido",{"recaída","saída"}},
 +  # prima -> primo
 +  {"ima",3,"imo",{"vítima"}},
 +  # passiva -> passivo
 +  {"iva",3,"ivo",{"saliva","oliva"}},
 +  # primeira -> primeiro
 +  {"eira",3,"eiro",{"beira","cadeira","frigideira","bandeira","feira","capoeira","barreira","fronteira","besteira","poeira"}}};
 +
 +# Step 4: Augmentative/Diminutive Reduction
 +{  "Augmentative", 0, 1, {},
 +  # cansadíssimo -> cansad
 +  {"díssimo",5},
 +  # amabilíssimo -> ama
 +  {"abilíssimo",5},
 +  # fortíssimo -> fort
 +  {"íssimo",3},
 +  {"ésimo",3},
 +  # chiquérrimo -> chiqu
 +  {"érrimo",4},
 +  # pezinho -> pe
 +  {"zinho",2},
 +  # maluquinho -> maluc
 +  {"quinho",4,"c"},
 +  # amiguinho -> amig
 +  {"uinho",4},
 +  # cansadinho -> cansad
 +  {"adinho",3},
 +  # carrinho -> carr
 +  {"inho",3,"",{"caminho","cominho"}},
 +  # grandalhão -> grand
 +  {"alhão",4},
 +  # dentuça -> dent
 +  {"uça",4},
 +  # ricaço -> ric
 +  {"aço",4,"",{"antebraço"}},
 +  {"aça",4},
 +  # casadão -> cans
 +  {"adão",4},
 +  {"idão",4},
 +  # corpázio -> corp
 +  {"ázio",3,"",{"topázio"}},
 +  # pratarraz -> prat
 +  {"arraz",4},
 +  {"zarrão",3},
 +  {"arrão",4},
 +  # bocarra -> boc
 +  {"arra",3},
 +  # calorzão -> calor
 +  {"zão",2,"",{"coalizão"}},
 +  # meninão -> menin
 +  {"ão",3,"",{"camarão","chimarrão","canção","coração","embrião","grotão","glutão","ficção","fogão","feição","furacão","gamão","lampião","leão","macacão","nação","órfão","orgão","patrão","portão","quinhão","rincão","tração","falcão","espião","mamão","folião","cordão","aptidão","campeão","colchão","limão","leilão","melão","barão","milhão","bilhão","fusão","cristão","ilusão","capitão","estação","senão"}}};
 +
 +# Step 5: Noun Suffix Reduction
 +{  "Noun", 0, 0, {},
 +  # existencialista -> exist
 +  {"encialista",4},
 +  # minimalista -> minim
 +  {"alista",5},
 +  # contagem -> cont
 +  {"agem",3,"",{"coragem","chantagem","vantagem","carruagem"}},
 +  # gerenciamento -> gerenc
 +  {"iamento",4},
 +  # monitoramento -> monitor
 +  {"amento",3,"",{"firmamento","fundamento","departamento"}},
 +  # nascimento -> nasc
 +  {"imento",3},
 +  {"mento",6,"",{"firmamento","elemento","complemento","instrumento","departamento"}},
 +  # comercializado -> comerci
 +  {"alizado",4},
 +  # traumatizado -> traum
 +  {"atizado",4},
 +  {"tizado",4,"",{"alfabetizado"}},
 +  # alfabetizado -> alfabet
 +  {"izado",5,"",{"organizado","pulverizado"}},
 +  # associativo -> associ
 +  {"ativo",4,"",{"pejorativo","relativo"}},
 +  # contraceptivo -> contracep
 +  {"tivo",4,"",{"relativo"}},
 +  # esportivo -> esport
 +  {"ivo",4,"",{"passivo","possessivo","pejorativo","positivo"}},
 +  # abalado -> abal
 +  {"ado",2,"",{"grado"}},
 +  # impedido -> imped
 +  {"ido",3,"",{"cândido","consolido","rápido","decido","tímido","duvido","marido"}},
 +  # ralador -> ral
 +  {"ador",3},
 +  # entendedor -> entend
 +  {"edor",3},
 +  # cumpridor -> cumpr
 +  {"idor",4,"",{"ouvidor"}},
 +  {"dor",4,"",{"ouvidor"}},
 +  {"sor",4,"",{"assessor"}},
 +  {"atoria",5},
 +  {"tor",3,"",{"benfeitor","leitor","editor","pastor","produtor","promotor","consultor"}},
 +  {"or",2,"",{"motor","melhor","redor","rigor","sensor","tambor","tumor","assessor","benfeitor","pastor","terior","favor","autor"}},
 +  # comparabilidade -> compar
 +  {"abilidade",5},
 +  # abolicionista -> abol
 +  {"icionista",4},
 +  # intervencionista -> interven
 +  {"cionista",5},
 +  {"ionista",5},
 +  {"ionar",5},
 +  # profissional -> profiss
 +  {"ional",4},
 +  # referência -> refer
 +  {"ência",3},
 +  # repugnância -> repugn
 +  {"ância",4,"",{"ambulância"}},
 +  # abatedouro -> abat
 +  {"edouro",3},
 +  # fofoqueiro -> fofoc
 +  {"queiro",3,"c"},
 +  {"adeiro",4,"",{"desfiladeiro"}},
 +  # brasileiro -> brasil
 +  {"eiro",3,"",{"desfiladeiro","pioneiro","mosteiro"}},
 +  {"uoso",3},
 +  # gostoso -> gost
 +  {"oso",3,"",{"precioso"}},
 +  # comercializaç -> comerci
 +  {"alizaç",5},
 +  {"atizaç",5},
 +  {"tizaç",5},
 +  {"izaç",5,"",{"organizaç"}},
 +  # alegaç -> aleg
 +  {"aç",3,"",{"equaç","relaç"}},
 +  # aboliç -> abol
 +  {"iç",3,"",{"eleiç"}},
 +  # anedotário -> anedot
 +  {"ário",3,"",{"voluntário","salário","aniversário","diário","lionário","armário"}},
 +  {"atório",3},
 +  {"rio",5,"",{"voluntário","salário","aniversário","diário","compulsório","lionário","próprio","stério","armário"}},
 +  # ministério -> minist
 +  {"ério",6},
 +  # chinês -> chin
 +  {"ês",4},
 +  # beleza -> bel
 +  {"eza",3},
 +  # rigidez -> rigid
 +  {"ez",4},
 +  # parentesco -> parent
 +  {"esco",4},
 +  # ocupante -> ocup
 +  {"ante",2,"",{"gigante","elefante","adiante","possante","instante","restaurante"}},
 +  # bombástico -> bomb
 +  {"ástico",4,"",{"eclesiástico"}},
 +  {"alístico",3},
 +  {"áutico",4},
 +  {"êutico",4},
 +  {"tico",3,"",{"político","eclesiástico","diagnostico","prático","doméstico","diagnóstico","idêntico","alopático","artístico","autêntico","eclético","crítico","critico"}},
 +  # polêmico -> polêm
 +  {"ico",4,"",{"tico","público","explico"}},
 +  # produtividade -> produt
 +  {"ividade",5},
 +  # profundidade -> profund
 +  {"idade",4,"",{"autoridade","comunidade"}},
 +  # aposentadoria -> aposentad
 +  {"oria",4,"",{"categoria"}},
 +  # existencial -> exist
 +  {"encial",5},
 +  # artista -> art
 +  {"ista",4},
 +  {"auta",5},
 +  # maluquice -> maluc
 +  {"quice",4,"c"},
 +  # chatice -> chat
 +  {"ice",4,"",{"cúmplice"}},
 +  # demoníaco -> demon
 +  {"íaco",3},
 +  # decorrente -> decorr
 +  {"ente",4,"",{"freqüente","alimente","acrescente","permanente","oriente","aparente"}},
 +  {"ense",5},
 +  # criminal -> crim
 +  {"inal",3},
 +  # americano -> americ
 +  {"ano",4},
 +  # amável -> am
 +  {"ável",2,"",{"afável","razoável","potável","vulnerável"}},
 +  # combustível -> combust
 +  {"ível",3,"",{"possível"}},
 +  {"vel",5,"",{"possível","vulnerável","solúvel"}},
 +  {"bil",3,"vel"},
 +  # cobertura -> cobert
 +  {"ura",4,"",{"imatura","acupuntura","costura"}},
 +  {"ural",4},
 +  # consensual -> consens
 +  {"ual",3,"",{"bissexual","virtual","visual","pontual"}},
 +  # mundial -> mund
 +  {"ial",3},
 +  # experimental -> experiment
 +  {"al",4,"",{"afinal","animal","estatal","bissexual","desleal","fiscal","formal","pessoal","liberal","postal","virtual","visual","pontual","sideral","sucursal"}},
 +  {"alismo",4},
 +  {"ivismo",4},
 +  {"ismo",3,"",{"cinismo"}}};
 +
 +# Step 6: Verb Suffix Reduction
 +{  "Verb", 0, 0, {},
 +  # cantaríamo -> cant
 +  {"aríamo",2},
 +  # cantássemo -> cant
 +  {"ássemo",2},
 +  # beberíamo -> beb
 +  {"eríamo",2},
 +  # bebêssemo -> beb
 +  {"êssemo",2},
 +  # partiríamo -> part
 +  {"iríamo",3},
 +  # partíssemo -> part
 +  {"íssemo",3},
 +  # cantáramo -> cant
 +  {"áramo",2},
 +  # cantárei -> cant
 +  {"árei",2},
 +  # cantaremo -> cant
 +  {"aremo",2},
 +  # cantariam -> cant
 +  {"ariam",2},
 +  # cantaríei -> cant
 +  {"aríei",2},
 +  # cantássei -> cant
 +  {"ássei",2},
 +  # cantassem -> cant
 +  {"assem",2},
 +  # cantávamo -> cant
 +  {"ávamo",2},
 +  # bebêramo -> beb
 +  {"êramo",3},
 +  # beberemo -> beb
 +  {"eremo",3},
 +  # beberiam -> beb
 +  {"eriam",3},
 +  # beberíei -> beb
 +  {"eríei",3},
 +  # bebêssei -> beb
 +  {"êssei",3},
 +  # bebessem -> beb
 +  {"essem",3},
 +  # partiríamo -> part
 +  {"íramo",3},
 +  # partiremo -> part
 +  {"iremo",3},
 +  # partiriam -> part
 +  {"iriam",3},
 +  # partiríei -> part
 +  {"iríei",3},
 +  # partíssei -> part
 +  {"íssei",3},
 +  # partissem -> part
 +  {"issem",3},
 +  # cantando -> cant
 +  {"ando",2},
 +  # bebendo -> beb
 +  {"endo",3},
 +  # partindo -> part
 +  {"indo",3},
 +  # propondo -> prop
 +  {"ondo",3},
 +  # cantaram -> cant
 +  {"aram",2},
 +  {"arão",2},
 +  # cantarde -> cant
 +  {"arde",2},
 +  # cantarei -> cant
 +  {"arei",2},
 +  # cantarem -> cant
 +  {"arem",2},
 +  # cantaria -> cant
 +  {"aria",2},
 +  # cantarmo -> cant
 +  {"armo",2},
 +  # cantasse -> cant
 +  {"asse",2},
 +  # cantaste -> cant
 +  {"aste",2},
 +  # cantavam -> cant
 +  {"avam",2,"",{"agravam"}},
 +  # cantávei -> cant
 +  {"ávei",2},
 +  # beberam -> beb
 +  {"eram",3},
 +  {"erão",3},
 +  # beberde -> beb
 +  {"erde",3},
 +  # beberei -> beb
 +  {"erei",3},
 +  # bebêrei -> beb
 +  {"êrei",3},
 +  # beberem -> beb
 +  {"erem",3},
 +  # beberia -> beb
 +  {"eria",3},
 +  # bebermo -> beb
 +  {"ermo",3},
 +  # bebesse -> beb
 +  {"esse",3},
 +  # bebeste -> beb
 +  {"este",3,"",{"faroeste","agreste"}},
 +  # bebíamo -> beb
 +  {"íamo",3},
 +  # partiram -> part
 +  {"iram",3},
 +  # concluíram -> conclu
 +  {"íram",3},
 +  {"irão",2},
 +  # partirde -> part
 +  {"irde",2},
 +  # partírei -> part
 +  {"irei",3,"",{"admirei"}},
 +  # partirem -> part
 +  {"irem",3,"",{"adquirem"}},
 +  # partiria -> part
 +  {"iria",3},
 +  # partirmo -> part
 +  {"irmo",3},
 +  # partisse -> part
 +  {"isse",3},
 +  # partiste -> part
 +  {"iste",4},
 +  {"iava",4,"",{"ampliava"}},
 +  # cantamo -> cant
 +  {"amo",2},
 +  {"iona",3},
 +  # cantara -> cant
 +  {"ara",2,"",{"arara","prepara"}},
 +  # cantará -> cant
 +  {"ará",2,"",{"alvará"}},
 +  # cantare -> cant
 +  {"are",2,"",{"prepare"}},
 +  # cantava -> cant
 +  {"ava",2,"",{"agrava"}},
 +  # cantemo -> cant
 +  {"emo",2},
 +  # bebera -> beb
 +  {"era",3,"",{"acelera","espera"}},
 +  # beberá -> beb
 +  {"erá",3},
 +  # bebere -> beb
 +  {"ere",3,"",{"espere"}},
 +  # bebiam -> beb
 +  {"iam",3,"",{"enfiam","ampliam","elogiam","ensaiam"}},
 +  # bebíei -> beb
 +  {"íei",3},
 +  # partimo -> part
 +  {"imo",3,"",{"reprimo","intimo","íntimo","nimo","queimo","ximo"}},
 +  # partira -> part
 +  {"ira",3,"",{"fronteira","sátira"}},
 +  {"ído",3},
 +  # partirá -> part
 +  {"irá",3},
 +  {"tizar",4,"",{"alfabetizar"}},
 +  {"izar",5,"",{"organizar"}},
 +  {"itar",5,"",{"acreditar","explicitar","estreitar"}},
 +  # partire -> part
 +  {"ire",3,"",{"adquire"}},
 +  # compomo -> comp
 +  {"omo",3},
 +  # cantai -> cant
 +  {"ai",2},
 +  # cantam -> cant
 +  {"am",2},
 +  # barbear -> barb
 +  {"ear",4,"",{"alardear","nuclear"}},
 +  # cantar -> cant
 +  {"ar",2,"",{"azar","bazaar","patamar"}},
 +  # cheguei -> cheg
 +  {"uei",3},
 +  {"uía",5,"u"},
 +  # cantei -> cant
 +  {"ei",3},
 +  {"guem",3,"g"},
 +  # cantem -> cant
 +  {"em",2,"",{"alem","virgem"}},
 +  # beber -> beb
 +  {"er",2,"",{"éter","pier"}},
 +  # bebeu -> beb
 +  {"eu",3,"",{"chapeu"}},
 +  # bebia -> beb
 +  {"ia",3,"",{"estória","fatia","acia","praia","elogia","mania","lábia","aprecia","polícia","arredia","cheia","ásia"}},
 +  # partir -> part
 +  {"ir",3,"",{"freir"}},
 +  # partiu -> part
 +  {"iu",3},
 +  {"eou",5},
 +  # chegou -> cheg
 +  {"ou",3},
 +  # bebi -> beb
 +  {"i",3}};
 +
 +# Step 7: Vowel Removal
 +{  "Vowel", 0, 0, {},
 +  {"bil",2,"vel"},
 +  {"gue",2,"g",{"gangue","jegue"}},
 +  {"á",3},
 +  {"ê",3,"",{"bebê"}},
 +  # menina -> menin
 +  {"a",3,"",{"ásia"}},
 +  # grande -> grand
 +  {"e",3},
 +  # menino -> menin
 +  {"o",3,"",{"ão"}}};
 Index: modules/analysis/common/src/resources/org/apache/lucene/analysis/gl/stopwords.txt
 ===================================================================
 --- modules/analysis/common/src/resources/org/apache/lucene/analysis/gl/stopwords.txt	(revision 0)
 +++ modules/analysis/common/src/resources/org/apache/lucene/analysis/gl/stopwords.txt	(revision 0)
 @@ -0,0 +1,161 @@
 +# galican stopwords
 +a
 +aínda
 +alí
 +aquel
 +aquela
 +aquelas
 +aqueles
 +aquilo
 +aquí
 +ao
 +aos
 +as
 +así
 +á
 +ben
 +cando
 +che
 +co
 +coa
 +comigo
 +con
 +connosco
 +contigo
 +convosco
 +coas
 +cos
 +cun
 +cuns
 +cunha
 +cunhas
 +da
 +dalgunha
 +dalgunhas
 +dalgún
 +dalgúns
 +das
 +de
 +del
 +dela
 +delas
 +deles
 +desde
 +deste
 +do
 +dos
 +dun
 +duns
 +dunha
 +dunhas
 +e
 +el
 +ela
 +elas
 +eles
 +en
 +era
 +eran
 +esa
 +esas
 +ese
 +eses
 +esta
 +estar
 +estaba
 +está
 +están
 +este
 +estes
 +estiven
 +estou
 +eu
 +é
 +facer
 +foi
 +foron
 +fun
 +había
 +hai
 +iso
 +isto
 +la
 +las
 +lle
 +lles
 +lo
 +los
 +mais
 +me
 +meu
 +meus
 +min
 +miña
 +miñas
 +moi
 +na
 +nas
 +neste
 +nin
 +no
 +non
 +nos
 +nosa
 +nosas
 +noso
 +nosos
 +nós
 +nun
 +nunha
 +nuns
 +nunhas
 +o
 +os
 +ou
 +ó
 +ós
 +para
 +pero
 +pode
 +pois
 +pola
 +polas
 +polo
 +polos
 +por
 +que
 +se
 +senón
 +ser
 +seu
 +seus
 +sexa
 +sido
 +sobre
 +súa
 +súas
 +tamén
 +tan
 +te
 +ten
 +teñen
 +teño
 +ter
 +teu
 +teus
 +ti
 +tido
 +tiña
 +tiven
 +túa
 +túas
 +un
 +unha
 +unhas
 +uns
 +vos
 +vosa
 +vosas
 +voso
 +vosos
 +vós

 Property changes on: modules\analysis\common\src\resources\org\apache\lucene\analysis\gl\stopwords.txt
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: modules/analysis/common/src/resources/org/apache/lucene/analysis/gl/galician.rslp
 ===================================================================
 --- modules/analysis/common/src/resources/org/apache/lucene/analysis/gl/galician.rslp	(revision 0)
 +++ modules/analysis/common/src/resources/org/apache/lucene/analysis/gl/galician.rslp	(revision 0)
 @@ -0,0 +1,647 @@
 +#	Steps file for the RSLP stemmer.
 +
 +# Step 1: Plural Reduction
 +{  "Plural", 3, 1, {"s"},
 +  # bons -> bon
 +  {"ns",1,"n",{"luns","furatapóns","furatapons"}},
 +  # xamós -> xamón
 +  {"ós",3,"ón"},
 +  # balões -> balón
 +  {"ões",3,"ón"},
 +  # capitães -> capitão
 +  {"ães",1,"ão",{"mães","magalhães"}},
 +  # normais -> normal
 +  {"ais",2,"al",{"cais","tais","mais","pais","ademais"}},
 +  {"áis",2,"al",{"cáis","táis", "máis", "páis", "ademáis"}},
 +  # papéis -> papel
 +  {"éis",2,"el"},
 +  # posíbeis -> posíbel
 +  {"eis",2,"el"},
 +  # espanhóis -> espanhol
 +  {"óis",2,"ol",{"escornabóis"}},
 +  # caracois -> caracol
 +  {"ois",2,"ol",{"escornabois"}},
 +  # cadrís -> cadril
 +  {"ís",2,"il",{"país"}},
 +  # cadris -> cadril
 +  {"is",2,"il",{"menfis","pais","kinguis"}},
 +  # males -> mal
 +  {"les",2,"l",{"ingles","marselles","montreales","senegales","manizales","móstoles","nápoles"}},
 +  # mares -> mar
 +  {"res",3,"r",{"petres","henares","cáceres","baleares","linares","londres","mieres","miraflores","mércores","venres", "pires"}},
 +  # luces -> luz
 +  {"ces",2,"z"},
 +  # luzes -> luz
 +  {"zes",2,"z"},
 +  # leises -> lei
 +  {"ises",3,"z"},
 +  # animás -> animal
 +  {"ás",1,"al",{"más"}},
 +  # gases -> gas
 +  {"ses",2,"s"},
 +  # casas -> casa
 +  {"s",2,"",{"barbadés","barcelonés","cantonés","gabonés","llanés","medinés","escocés","escocês","francês","barcelonês","cantonês","macramés","reves","barcelones","cantones","gabones","llanes","magallanes","medines","escoces","frances","xoves","martes","aliás","pires","lápis","cais","mais","mas","menos","férias","pêsames","crúcis","país","cangas","atenas","asturias","canarias","filipinas","honduras","molucas","caldas","mascareñas","micenas","covarrubias","psoas","óculos","nupcias","xoves","martes","llanes"}}};
 +
 +{  "Unification", 0, 0, {},
 +  # cansadísimo -> cansadísimo
 +  {"íssimo",5,"ísimo"},
 +  # cansadísima -> cansadísima
 +  {"íssima",5,"ísima"},
 +  # homaço -> homazo
 +  {"aço",4,"azo"},
 +  # mulheraça -> mulheraza
 +  {"aça",4,"aza"},
 +  # xentuça -> xentuza
 +  {"uça",4,"uza"},
 +  # manilhar -> manillar
 +  {"lhar",2,"llar"},
 +  # colher -> coller
 +  {"lher",2,"ller"},
 +  # melhor -> mellor
 +  {"lhor",2,"llor"},
 +  # alho -> allo
 +  {"lho",1,"llo"},
 +  # linhar -> liñar
 +  {"nhar",2,"ñar"},
 +  # penhor -> peñor
 +  {"nhor",2,"ñor"},
 +  # anho -> año
 +  {"nho",1,"ño"},
 +  # cunha -> cuña
 +  {"nha",1,"ña"},
 +  # hospitalário -> hospitalario
 +  {"ário",3,"ario"},
 +  # bibliotecária -> bibliotecaria
 +  {"ária",3,"aria"},
 +  # agradable -> agradábel
 +  {"able",2,"ábel"},
 +  # agradávele -> agradábel
 +  {"ável",2,"ábel"},
 +  # imposible -> imposíbel
 +  {"ible",2,"íbel"},
 +  # imposível -> imposíbel
 +  {"ível",2,"íbel"},
 +  # imposiçom -> imposición
 +  {"çom",2,"ción"},
 +  # garagem -> garaxe
 +  {"agem",2,"axe"},
 +  # garage -> garaxe
 +  {"age",2,"axe"},
 +  # impressão -> impressón
 +  {"ão",3,"ón"},
 +  # irmao -> irmán
 +  {"ao",1,"án"},
 +  # irmau -> irmán
 +  {"au",1,"án"},
 +  # garrafom -> garrafón
 +  {"om",3,"ón"},
 +  # cantem -> canten
 +  {"m",2,"n"}};
 +
 +{  "Adverb", 0, 0, {},
 +  # felizmente -> feliz
 +  {"mente",4,"",{"experimente","vehemente","sedimente"}}};
 +
 +{  "Augmentative", 0, 1, {},
 +  # cansadísimo -> cansad
 +  {"dísimo",5},
 +  # cansadísima -> cansad
 +  {"dísima",5},
 +  # amabilísimo -> ama
 +  {"bilísimo",3},
 +  # amabilísima -> ama
 +  {"bilísima",3},
 +  # fortísimo -> fort
 +  {"ísimo",3},
 +  # fortísima -> fort
 +  {"ísima",3},
 +  # centésimo -> cent
 +  {"ésimo",3},
 +  # centésima -> cent
 +  {"ésima",3},
 +  # paupérrimo -> paup
 +  {"érrimo",4},
 +  # paupérrima -> paup
 +  {"érrima",4},
 +  # charlatana -> charlat
 +  {"ana",2,"",{"argana","banana","choupana","espadana","faciana","iguana","lantana","macana","membrana","mesana","nirvana","obsidiana","palangana","pavana","persiana","pestana","porcelana","pseudomembrana","roldana","sábana","salangana","saragana","ventana"}},
 +  # charlatán -> charlat
 +  {"án",3,"",{"ademán","bardán","barregán","corricán","curricán","faisán","furacán","fustán","gabán","gabián","galán","gañán","lavacán","mazán","mourán","rabadán","serán","serrán","tabán","titán","tobogán","verán","volcán","volován"}},
 +  # homazo -> hom
 +  {"azo",4,"",{"abrazo","espazo","andazo","bagazo","balazo","bandazo","cachazo","carazo","denazo","engazo","famazo","lampreazo","pantocazo","pedazo","preñazo","regazo","ribazo","sobrazo","terrazo","trompazo"}},
 +  # mulleraza -> muller
 +  {"aza",3,"",{"alcarraza","ameaza","baraza","broucaza","burgaza","cabaza","cachaza","calaza","carpaza","carraza","coiraza","colmaza","fogaza","famaza","labaza","liñaza","melaza","mordaza","paraza","pinaza","rabaza","rapaza","trancaza"}},
 +  # cascallo -> casc
 +  {"allo",4,"",{"traballo"}},
 +  # xentalla -> xent
 +  {"alla",4},
 +  # bocarra -> boc
 +  {"arra",3,"",{"cigarra","cinzarra"}},
 +  # medicastro -> medic
 +  {"astro",3,"",{"balastro","bimbastro","canastro","retropilastro"}},
 +  # poetastra -> poet
 +  {"astra",3,"",{"banastra","canastra","contrapilastra","piastra","pilastra"}},
 +  # corpázio -> corp
 +  {"ázio",3,"",{"topázio"}},
 +  # soutelo -> sout
 +  {"elo",4,"",{"bacelo","barrelo","bicarelo","biquelo","boquelo","botelo","bouquelo","cacarelo","cachelo","cadrelo","campelo","candelo","cantelo","carabelo","carambelo","caramelo","cercelo","cerebelo","chocarelo","coitelo","conchelo","corbelo","cotobelo","couselo","destelo","desvelo","esfácelo","fandelo","fardelo","farelo","farnelo","flabelo","ganchelo","garfelo","involucelo","mantelo","montelo","outerelo","padicelo","pesadelo","pinguelo","piquelo","rampelo","rastrelo","restelo","tornecelo","trabelo","restrelo","portelo","ourelo","zarapelo"}},
 +  # avioneta -> avion
 +  {"eta",3,"",{"arqueta","atleta","avoceta","baioneta","baldeta","banqueta","barraganeta","barreta","borleta","buceta","caceta","calceta","caldeta","cambeta","canaleta","caneta","carreta","cerceta","chaparreta","chapeta","chareta","chincheta","colcheta","cometa","corbeta","corveta","cuneta","desteta","espeta","espoleta","estafeta","esteta","faceta","falanxeta","frasqueta","gaceta","gabeta","galleta","garabeta","gaveta","glorieta","lagareta","lambeta","lanceta","libreta","maceta","macheta","maleta","malleta","mareta","marreta","meseta","mofeta","muleta","peseta","planeta","raqueta","regreta","saqueta","veleta","vendeta","viñeta"}},
 +  # guapete -> guap
 +  {"ete",3,"",{"alfinete","ariete","bacinete","banquete","barallete","barrete","billete","binguelete","birrete","bonete","bosquete","bufete","burlete","cabalete","cacahuete","cavinete","capacete","carrete","casarete","casete","chupete","clarinete","colchete","colete","capete","curupete","disquete","estilete","falsete","ferrete","filete","gallardete","gobelete","inglete","machete","miquelete","molete","mosquete","piquete","ribete","rodete","rolete","roquete","sorvete","vedete","vendete"}},
 +  # práctica -> práct
 +  {"ica",3,"",{"andarica","botánica","botica","dialéctica","dinámica","física","formica","gráfica","marica","túnica"}},
 +  # práctico -> práct
 +  {"ico",3,"",{"conico","acetifico","acidifico"}},
 +  # trapexo -> trap
 +  {"exo",3,"",{"arpexo","arquexo","asexo","axexo","azulexo","badexo","bafexo","bocexo","bosquexo","boubexo","cacarexo","carrexo","cascarexo","castrexo","convexo","cotexo","desexo","despexo","forcexo","gabexo","gargarexo","gorgolexo","inconexo","manexo","merexo","narnexo","padexo","patexo","sopexo","varexo"}},
 +  {"exa",3,"",{"airexa","bandexa","carrexa","envexa","igrexa","larexa","patexa","presexa","sobexa"}},
 +  # multidão -> mult
 +  {"idão",3},
 +  # pequeniño -> pequeno
 +  {"iño",3,"o",{"camiño","cariño","comiño","golfiño","padriño","sobriño","viciño","veciño"}},
 +  # pequeniña -> pequena
 +  {"iña",3,"a",{"camariña","campiña","entreliña","espiña","fariña","moriña","valiña"}},
 +  # grandito -> grand
 +  {"ito",3,""},
 +  # grandita -> grand
 +  {"ita",3,""},
 +  # anomaloide -> animal
 +  {"oide",3,"",{"anaroide","aneroide","asteroide","axoide","cardioide","celuloide","coronoide","discoide","espermatozoide","espiroide","esquizoide","esteroide","glenoide","linfoide","hemorroide","melaloide","sacaroide","tetraploide","varioloide"}},
 +  # cazola -> caz
 +  {"ola",3,"",{"aixola","ampola","argola","arola","arteríola","bandola","bítola","bractéola","cachola","carambola","carapola","carola","carrandiola","catrapola","cebola","centola","champola","chatola","cirola","cítola","consola","corola","empola","escarola","esmola","estola","fitola","florícola","garañola","gárgola","garxola","glicocola","góndola","mariola","marola","michola","pirola","rebola","rupícola","saxícola","sémola","tachola","tómbola"}},
 +  # pedrolo -> pedr
 +  {"olo",3,"",{"arrolo","babiolo","cacharolo","caixarolo","carolo","carramolo","cascarolo","cirolo","codrolo","correolo","cotrolo","desconsolo","rebolo","repolo","subsolo","tixolo","tómbolo","torolo","trémolo","vacúolo","xermolo","zócolo"}},
 +  # vellote -> vell
 +  {"ote",3,"",{"aigote","alcaiote","barbarote","balote","billote","cachote","camarote","capote","cebote","chichote","citote","cocorote","escote","gañote","garrote","gavote","lamote","lapote","larapote","lingote","lítote","magote","marrote","matalote","pandote","paparote","rebote","tagarote","zarrote"}},
 +  # mozota -> moz
 +  {"ota",3,"",{"asíntota","caiota","cambota","chacota","compota","creosota","curota","derrota","díspota","gamota","maniota","pelota","picota","pillota","pixota","queirota","remota"}},
 +  # gordocho -> gord
 +  {"cho",3,"",{"abrocho","arrocho","carocho","falucho","bombacho","borracho","mostacho"}},
 +  # gordecha -> gord
 +  {"cha",3,"",{"borracha","carracha","estacha","garnacha","limacha","remolacha","abrocha"}},
 +  # baratuco -> barat
 +  {"uco",4,"",{"caduco","estuco","fachuco","malluco","saluco","trabuco"}},
 +  # borrachuzo -> borrach
 +  {"uzo",3,"",{"carriñouzo","fachuzo","mañuzo","mestruzo","tapuzo"}},
 +  # xentuza -> xent
 +  {"uza",3,"",{"barruza","chamuza","chapuza","charamuza","conduza","deduza","desluza","entreluza","induza","reluza","seduza","traduza","trasluza"}},
 +  # babuxa -> bab
 +  {"uxa",3,"",{"caramuxa","carrabouxa","cartuxa","coruxa","curuxa","gaturuxa","maruxa","meruxa","miruxa","moruxa","muruxa","papuxa","rabuxa","trouxa"}},
 +  {"uxo",3,"",{"caramuxo","carouxo","carrabouxo","curuxo","debuxo","ganduxo","influxo","negouxo","pertuxo","refluxo"}},
 +  # grupello -> grup
 +  {"ello",3,"",{"alborello","artello","botello","cachafello","calello","casarello","cazabello","cercello","cocerello","concello","consello","desparello","escaravello","espello","fedello","fervello","gagafello","gorrobello","nortello","pendello","troupello","trebello"}},
 +  # pontella -> pont
 +  {"ella",3,"",{"alborella","bertorella","bocatella","botella","calella","cercella","gadella","grosella","lentella","movella","nocella","noitevella","parella","pelella","percebella","segorella","sabella"}}};
 +
 +{  "Noun", 0, 0, {},
 +  # lealdade -> leal
 +  {"dade",3,"",{"acridade","calidade"}},
 +  # clarificar -> clar
 +  {"ificar",2},
 +  # brasileiro->brasil
 +  {"eiro",3,"",{"agoireiro","bardalleiro","braseiro","barreiro","canteiro","capoeiro","carneiro","carteiro","cinceiro","faroleiro","mareiro","preguiceiro","quinteiro","raposeiro","retranqueiro","regueiro","sineiro","troleiro","ventureiro"}},
 +  # marisqueira -> marisqu
 +  {"eira",3,"",{"cabeleira","canteira","cocheira","folleira","milleira"}},
 +  # hospitalario -> hospital
 +  {"ario",3,"",{"armario","calcario","lionario","salario"}},
 +  # bibliotecaria -> bibliotec
 +  {"aria",3,"",{"cetaria","coronaria","fumaria","linaria","lunaria","parietaria","saponaria","serpentaria"}},
 +  # humorístico -> humor
 +  {"ístico",3,"",{"balístico", "ensaístico"}},
 +  # castrista -> castr
 +  {"ista",3,"",{"batista","ciclista","fadista","operista","tenista","verista"}},
 +  # lavado -> lav
 +  {"ado",2,"",{"grado","agrado"}},
 +  # decanato -> decan
 +  {"ato",2,"",{"agnato"}},
 +  # xemido -> xem
 +  {"ido",3,"",{"cándido","cândido","consolido","decidido","duvido","marido","rápido"}},
 +  # mantida -> mant
 +  {"ida",3,"",{"bastida","dúbida","dubida","duvida","ermida","éxida","guarida","lapicida","medida","morida"}},
 +  {"ída",3},
 +  # mantído -> mant
 +  {"ido",3},
 +  # orelludo -> orell
 +  {"udo",3,"",{"estudo","escudo"}},
 +  # orelluda -> orell
 +  {"uda",3},
 +  {"ada",3,"",{"abada","alhada","allada","pitada"}},
 +  # comedela -> come
 +  {"dela",3,"",{"cambadela","cavadela","forcadela","erisipidela","mortadela","espadela","fondedela","picadela","arandela","candela","cordela","escudela","pardela"}},
 +  # fontela -> font
 +  {"ela",3,"",{"canela","capela","cotela","cubela","curupela","escarapela","esparrela","estela","fardela","flanela","fornela","franela","gabela","gamela","gavela","glumela","granicela","lamela","lapela","malvela","manela","manganela","mexarela","micela","mistela","novela","ourela","panela","parcela","pasarela","patamela","patela","paxarela","pipela","pitela","postela","pubela","restela","sabela","salmonela","secuela","sentinela","soldanela","subela","temoncela","tesela","tixela","tramela","trapela","varela","vitela","xanela","xestela"}},
 +  # agradábel -> agrad
 +  {"ábel",2,"",{"afábel","fiábel"}},
 +  # combustíbel -> combust
 +  {"íbel",2,"",{"críbel","imposíbel","posíbel","fisíbel","falíbel"}},
 +  # fabricante -> frabrica
 +  {"nte",3,"",{"alimente","adiante","acrescente","elefante","frequente","freqüente","gigante","instante","oriente","permanente","posante","possante","restaurante"}},
 +  # ignorancia -> ignora
 +  {"ncia",3},
 +  # temperanza -> tempera
 +  {"nza",3},
 +  {"acia",3,"",{"acracia","audacia","falacia","farmacia"}},
 +  # inmundicia -> inmund
 +  {"icia",3,"",{"caricia","delicia","ledicia","malicia","milicia","noticia","pericia","presbicia","primicia","regalicia","sevicia","tiricia"}},
 +  # xustiza -> xust
 +  {"iza",3,"",{"alvariza","baliza","cachiza","caniza","cañiza","carbaliza","carriza","chamariza","chapiza","fraguiza","latiza","longaniza","mañiza","nabiza","peliza","preguiza","rabiza"}},
 +  # clarexar -> clar
 +  {"exar",3,"",{"palmexar"}},
 +  # administración -> administr
 +  {"ación",2,"",{"aeración"}},
 +  # expedición -> exped
 +  {"ición",3,"",{"condición","gornición","monición","nutrición","petición","posición","sedición","volición"}},
 +  # excepción -> except
 +  {"ción",3,"t"},
 +  # comprensión -> comprens
 +  {"sión",3,"s",{"abrasión", "alusión"}},
 +  # doazón -> do
 +  {"azón",2,"",{"armazón"}},
 +  # garrafón -> garraf
 +  {"ón",3,"",{"abalón","acordeón","alción","aldrabón","alerón","aliñón","ambón","bombón","calzón","campón","canalón","cantón","capitón","cañón","centón","ciclón","collón","colofón","copón","cotón","cupón","petón","tirón","tourón","turón","unción","versión","zubón","zurrón"}},
 +  # lambona -> lamb
 +  {"ona",3,"",{"abandona","acetona","aleurona","amazona","anémona","bombona","cambona","carona","chacona","charamona","cincona","condona","cortisona","cretona","cretona","detona","estona","fitohormona","fregona","gerona","hidroquinona","hormona","lesiona","madona","maratona","matrona","metadona","monótona","neurona","pamplona","peptona","poltrona","proxesterona","quinona","quinona","silicona","sulfona"}},
 +  # bretoa -> bretón
 +  {"oa",3,"",{"abandoa","madroa","barbacoa","estoa","airoa","eiroa","amalloa","ámboa","améndoa","anchoa","antinéboa","avéntoa","avoa","bágoa","balboa","bisavoa","boroa","canoa","caroa","comadroa","coroa","éngoa","espácoa","filloa","fírgoa","grañoa","lagoa","lanzoa","magoa","mámoa","morzoa","noiteboa","noraboa","parañoa","persoa","queiroa","rañoa","táboa","tataravoa","teiroa"}},
 +  # demoníaco -> demoní
 +  {"aco",3},
 +  # demoníaca -> demoní
 +  {"aca",3,"",{"alpaca","barraca","bullaca","buraca","carraca","casaca","cavaca","cloaca","entresaca","ervellaca","espinaca","estaca","farraca","millaca","pastinaca","pataca","resaca","urraca","purraca"}},
 +  # carballal -> carball
 +  {"al",4,"",{"afinal","animal","estatal","bisexual","bissexual","desleal","fiscal","formal","pessoal","persoal","liberal","postal","virtual","visual","pontual","puntual","homosexual","heterosexual"}},
 +  # nadador -> nada
 +  {"dor",2,"",{"abaixador"}},
 +  # benfeitor -> benfei
 +  {"tor",3,"",{"autor","motor","pastor","pintor"}},
 +  # produtor -> produt
 +  {"or",2,"",{"asesor","assessor","favor","mellor","melhor","redor","rigor","sensor","tambor","tumor"}},
 +  # profesora -> profes
 +  {"ora",3,"",{"albacora","anáfora","áncora","apisoadora","ardora","ascospora","aurora","avéspora","bitácora","canéfora","cantimplora","catáfora","cepilladora","demora","descalcificadora","diáspora","empacadora","epífora","ecavadora","escora","eslora","espora","fotocompoñedora","fotocopiadora","grampadora","isícora","lavadora","lixadora","macrospora","madrépora","madrágora","masora","mellora","metáfora","microspora","milépora","milpéndora","nécora","oospora","padeadora","pasiflora","pécora","píldora","pólvora","ratinadora","rémora","retroescavadora","sófora","torradora","trémbora","uredospora","víbora","víncora","zoospora"}},
 +  # zapataría -> zapat
 +  {"aría",3,"",{"libraría"}},
 +  # etiquetaxe -> etiquet
 +  {"axe",3,"",{"aluaxe","amaraxe","amperaxe","bagaxe","balaxe","barcaxe","borraxe","bescaxe","cabotaxe","carraxe","cartilaxe","chantaxe","colaxe","coraxe","carruaxe","dragaxe","embalaxe","ensilaxe","epistaxe","fagundaxe","fichaxe","fogaxe","forraxe","fretaxe","friaxe","garaxe","homenaxe","leitaxe","liñaxe","listaxe","maraxe","marcaxe","maridaxe","masaxe","miraxe","montaxe","pasaxe","peaxe","portaxe","ramaxe","rebelaxe","rodaxe","romaxe","sintaxe","sondaxe","tiraxe","vantaxe","vendaxe","viraxe"}},
 +  # movedizo -> move
 +  {"dizo",3},
 +  # limpeza -> limp
 +  {"eza",3,"",{"alteza","beleza","fereza","fineza","vasteza","vileza"}},
 +  # rixidez -> rixid
 +  {"ez",3,"",{"acidez","adultez","adustez","avidez","candidez","mudez","nenez","nudez","pomez"}},
 +  # mullerengo -> muller
 +  {"engo",3},
 +  # chairego -> chair
 +  {"ego",3,"",{"corego","derrego","entrego","lamego","sarego","sartego"}},
 +  # cariñoso -> cariñ
 +  {"oso",3,"",{"afanoso","algoso","caldoso","caloso","cocoso","ditoso","favoso","fogoso","lamoso","mecoso","mocoso","precioso","rixoso","venoso","viroso","xesoso"}},
 +  # cariñosa -> cariñ
 +  {"osa",3,"",{"mucosa","glicosa","baldosa","celulosa","isoglosa","nitrocelulosa","levulosa","ortosa","pectosa","preciosa","sacarosa","serosa","ventosa"}},
 +  # negrume -> negr
 +  {"ume",3,"",{"agrume","albume","alcume","batume","cacume","cerrume","chorume","churume","costume","curtume","estrume","gafume","legume","perfume","queixume","zarrume"}},
 +  # altura -> alt
 +  {"ura",3,"",{"albura","armadura","imatura","costura"}},
 +  # cuspiñar -> cusp
 +  {"iñar",3},
 +  # febril -> febr
 +  {"il",3,"",{"abril","alfil","anil","atril","badil","baril","barril","brasil","cadril","candil","cantil","carril","chamil","chancil","civil","cubil","dátil","difícil","dócil","edil","estéril","fácil","fráxil","funil","fusil","grácil","gradil","hábil","hostil","marfil"}},
 +  # principesco -> princip
 +  {"esco",4},
 +  # mourisco -> mour
 +  {"isco",4},
 +  # esportivo -> esport
 +  {"ivo",3,"",{"pasivo","positivo","passivo","possessivo","posesivo","pexotarivo","relativo"}}};
 +
 +{  "Verb", 0, 0, {},
 +  # amaba -> am
 +  {"aba",2},
 +  # andabade -> and
 +  {"abade",2},
 +  # andábade -> and
 +  {"ábade",2},
 +  # chorabamo -> chor
 +  {"abamo",2},
 +  # chorábamo -> chor
 +  {"ábamo",2},
 +  # moraban -> morab
 +  {"aban",2},
 +  # andache -> and
 +  {"ache",2},
 +  # andade -> and
 +  {"ade",2},
 +  {"an",2},
 +  # cantando -> cant
 +  {"ando",2},
 +  # cantar -> cant
 +  {"ar",2,"",{"azar","bazar","patamar"}},
 +  # lembrarade -> lembra
 +  {"arade",2},
 +  {"aramo",2},
 +  {"arán",2},
 +  # cantaran -> cant
 +  {"aran",2},
 +  # convidárade -> convid
 +  {"árade",2},
 +  # convidaría -> convid
 +  {"aría",2},
 +  # cantariade -> cant
 +  {"ariade",2},
 +  # cantaríade -> cant
 +  {"aríade",2},
 +  # cantarian -> cant
 +  {"arian",2},
 +  # cantariamo -> cant
 +  {"ariamo",2},
 +  # pescaron -> pesc
 +  {"aron",2},
 +  # cantase -> cant
 +  {"ase",2},
 +  # cantasede -> cant
 +  {"asede",2},
 +  # cantásede -> cant
 +  {"ásede",2},
 +  # cantasemo -> cant
 +  {"asemo",2},
 +  # cantásemo -> cant
 +  {"ásemo",2},
 +  # cantasen -> cant
 +  {"asen",2},
 +  # loitavan -> loitav
 +  {"avan",2},
 +  # cantaríamo -> cant
 +  {"aríamo",2},
 +  # cantassen -> cant
 +  {"assen",2},
 +  # cantássemo -> cant
 +  {"ássemo",2},
 +  # beberíamo -> beb
 +  {"eríamo",2},
 +  # bebêssemo -> beb
 +  {"êssemo",2},
 +  # partiríamo -> part
 +  {"iríamo",3},
 +  # partíssemo -> part
 +  {"íssemo",3},
 +  # cantáramo -> cant
 +  {"áramo",2},
 +  # cantárei -> cant
 +  {"árei",2},
 +  # cantaren -> cant
 +  {"aren",2},
 +  # cantaremo -> cant
 +  {"aremo",2},
 +  # cantaríei -> cant
 +  {"aríei",2},
 +  {"ássei",2},
 +  # cantávamo-> cant
 +  {"ávamo",2},
 +  # bebêramo -> beb
 +  {"êramo",1},
 +  # beberemo -> beb
 +  {"eremo",1},
 +  # beberíei -> beb
 +  {"eríei",1},
 +  # bebêssei -> beb
 +  {"êssei",1},
 +  # partiríamo -> part
 +  {"íramo",3},
 +  # partiremo -> part
 +  {"iremo",3},
 +  # partiríei -> part
 +  {"iríei",3},
 +  # partíssei -> part
 +  {"íssei",3},
 +  # partissen -> part
 +  {"issen",3},
 +  # bebendo -> beb
 +  {"endo",1},
 +  # partindo -> part
 +  {"indo",3},
 +  # propondo -> prop
 +  {"ondo",3},
 +  # cantarde -> cant
 +  {"arde",2},
 +  # cantarei -> cant
 +  {"arei",2},
 +  # cantaria -> cant
 +  {"aria",2},
 +  # cantarmo -> cant
 +  {"armo",2},
 +  # cantasse -> cant
 +  {"asse",2},
 +  {"aste",2},
 +  # cantávei -> cant
 +  {"ávei",2},
 +  # perderão -> perd
 +  {"erão",1},
 +  # beberde -> beb
 +  {"erde",1},
 +  # beberei -> beb
 +  {"erei",1},
 +  # bebêrei -> beb
 +  {"êrei",1},
 +  # beberen -> beb
 +  {"eren",2},
 +  # beberia -> beb
 +  {"eria",1},
 +  # bebermo -> beb
 +  {"ermo",1},
 +  # bebeste -> beb
 +  {"este",1,"",{"faroeste","agreste"}},
 +  # bebíamo -> beb
 +  {"íamo",1},
 +  # fuxian -> fux
 +  {"ian",2,"",{"enfian","eloxian","ensaian"}},
 +  # partirde -> part
 +  {"irde",2},
 +  # partírei -> part
 +  {"irei",3,"",{"admirei"}},
 +  # partiren -> part
 +  {"iren",3},
 +  # partiria -> part
 +  {"iria",3},
 +  # partirmo -> part
 +  {"irmo",3},
 +  # partisse -> part
 +  {"isse",3},
 +  # partiste -> part
 +  {"iste",4},
 +  {"iava",1,"",{"ampliava"}},
 +  # cantamo -> cant
 +  {"amo",2},
 +  # funciona -> func
 +  {"iona",3},
 +  # cantara -> cant
 +  {"ara",2,"",{"arara","prepara"}},
 +  # enviará -> envi
 +  {"ará",2,"",{"alvará","bacará"}},
 +  # cantare -> cant
 +  {"are",2,"",{"prepare"}},
 +  # cantava -> cant
 +  {"ava",2,"",{"agrava"}},
 +  # cantemo -> cant
 +  {"emo",2},
 +  # bebera -> beb
 +  {"era",1,"",{"acelera","espera"}},
 +  # beberá -> beb
 +  {"erá",1},
 +  # bebere -> beb
 +  {"ere",1,"",{"espere"}},
 +  # bebíei -> beb
 +  {"íei",1},
 +  # metin -> met
 +  {"in",3},
 +  # partimo -> part
 +  {"imo",3,"",{"reprimo","intimo","íntimo","nimo","queimo","ximo"}},
 +  # partira -> part
 +  {"ira",3,"",{"fronteira","sátira"}},
 +  {"ído",3},
 +  # partirá -> part
 +  {"irá",3},
 +  # concretizar -> concret
 +  {"tizar",4,"",{"alfabetizar"}},
 +  {"izar",3,"",{"organizar"}},
 +  # saltitar -> salt
 +  {"itar",5,"",{"acreditar","explicitar","estreitar"}},
 +  # partire -> part
 +  {"ire",3,"",{"adquire"}},
 +  # compomo -> comp
 +  {"omo",3},
 +  {"ai",2},
 +  # barbear -> barb
 +  {"ear",4,"",{"alardear","nuclear"}},
 +  # cheguei -> cheg
 +  {"uei",3},
 +  {"uía",5,"u"},
 +  # cantei -> cant
 +  {"ei",3},
 +  # beber -> beb
 +  {"er",1,"",{"éter","pier"}},
 +  # bebeu -> beb
 +  {"eu",1,"",{"chapeu"}},
 +  # bebia -> beb
 +  {"ia",1,"",{"estória","fatia","acia","praia","elogia","mania","lábia","aprecia","polícia","arredia","cheia","ásia"}},
 +  # partir -> part
 +  {"ir",3},
 +  # partiu -> part
 +  {"iu",3},
 +  # fraqueou -> fraqu
 +  {"eou",5},
 +  # chegou -> cheg
 +  {"ou",3},
 +  # bebi -> beb
 +  {"i",1},
 +  # varrede -> varr
 +  {"ede",1,"",{"rede","bípede","céspede","parede","palmípede","vostede","hóspede","adrede"}},
 +  # cantei -> cant
 +  {"ei",3},
 +  # anden -> and
 +  {"en",2},
 +  # descerade -> desc
 +  {"erade",1},
 +  # vivérade -> viv
 +  {"érade",1},
 +  # beberan -> beb
 +  {"eran",2},
 +  # colleramo -> coller
 +  {"eramo",1},
 +  # bebéramo -> beb
 +  {"éramo",1},
 +  # perderán -> perd
 +  {"erán",1},
 +  # varrería -> varr
 +  {"ería",1},
 +  # beberiade -> beb
 +  {"eriade",1},
 +  # beberíade -> beb
 +  {"eríade",1},
 +  # beberiamo -> beb
 +  {"eriamo",1},
 +  # beberian -> beb
 +  {"erian",1},
 +  # beberían -> beb
 +  {"erían",1},
 +  # perderon -> perd
 +  {"eron",1},
 +  # bebese -> beb
 +  {"ese",1},
 +  # bebesedes -> beb
 +  {"esedes",1},
 +  # bebésedes -> beb
 +  {"ésedes",1},
 +  # bebesemo -> beb
 +  {"esemo",1},
 +  # bebésemo -> beb
 +  {"ésemo",1},
 +  # bebesen -> beb
 +  {"esen",1},
 +  # bebêssede -> beb
 +  {"êssede",1},
 +  # chovía -> chov
 +  {"ía",1},
 +  # faciade -> fac
 +  {"iade",1},
 +  # facíade -> fac
 +  {"íade",1},
 +  # perdiamo -> perd
 +  {"iamo",1},
 +  # fuxían -> fux
 +  {"ían",1},
 +  # corriche -> corr
 +  {"iche",1},
 +  # partide -> part
 +  {"ide",1},
 +  # escribirade -> escrib
 +  {"irade",3},
 +  # parírade -> par
 +  {"írade",3},
 +  # partiramo -> part
 +  {"iramo",3},
 +  # fugirán -> fug
 +  {"irán",3},
 +  # viviría -> viv
 +  {"iría",3},
 +  # partiriade -> part
 +  {"iriade",3},
 +  # partiríade -> part
 +  {"iríade",3},
 +  # partiriamo -> part
 +  {"iriamo",3},
 +  # partirian -> part
 +  {"irian",3},
 +  # partirían -> part
 +  {"irían",3},
 +  # reflectiron -> reflect
 +  {"iron",3},
 +  # partise -> part
 +  {"ise",3},
 +  # partisede -> part
 +  {"isede",3},
 +  # partísede -> part
 +  {"ísede",3},
 +  # partisemo -> part
 +  {"isemo",3},
 +  # partísemo -> part
 +  {"ísemo",3},
 +  # partisen -> part
 +  {"isen",3},
 +  # partíssede -> part
 +  {"íssede",3},
 +  {"tizar",3,"",{"alfabetizar"}},
 +  {"ondo",3}};
 +
 +{  "Vowel", 0, 0, {},
 +  # segue -> seg
 +  {"gue",2,"g",{"azougue","dengue","merengue","nurague","merengue","rengue"}},
 +  {"que",2,"c",{"alambique","albaricoque","abaroque","alcrique","almadraque","almanaque","arenque","arinque","baduloque","ballestrinque","betoque","bivaque","bloque","bodaque","bosque","breque","buque","cacique","cheque","claque","contradique","coque","croque","dique","duque","enroque","espeque","estoque","estoraque","estraloque","estrinque","milicroque","monicreque","orinque","arinque","palenque","parque","penique","picabeque","pique","psique","raque","remolque","xeque","repenique","roque","sotobosque","tabique","tanque","toque","traque","truque","vivaque","xaque"}},
 +  {"a",3,"",{"amasadela","cerva"}},
 +  {"e",3,"",{"marte"}},
 +  {"o",3,"",{"barro","fado","cabo","libro","cervo"}},
 +  {"â",3},
 +  {"ã",3,"",{"amanhã","arapuã","fã","divã","manhã"}},
 +  {"ê",3},
 +  {"ô",3},
 +  {"á",3},
 +  {"é",3},
 +  {"ó",3},
 +  # munxi -> munx
 +  {"i",3}};