docs/attachments/LUCENE-2055/LUCENE-2055.patch - lucene-jira-archive - Git at Google

 Index: NOTICE.txt
 ===================================================================
 --- NOTICE.txt	(revision 906571)
 +++ NOTICE.txt	(working copy)
 @@ -23,6 +23,11 @@
  contrib/analyzers/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt.
  See http://members.unine.ch/jacques.savoy/clef/index.html.

 +The Romanian analyzer (contrib/analyzers) comes with a default
 +stopword list that is BSD-licensed created by Jacques Savoy.  The file resides in
 +contrib/analyzers/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt.
 +See http://members.unine.ch/jacques.savoy/clef/index.html.
 +
  The Bulgarian analyzer (contrib/analyzers) comes with a default
  stopword list that is BSD-licensed created by Jacques Savoy.  The file resides in
  contrib/analyzers/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt.
 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java	(revision 0)
 @@ -0,0 +1,54 @@
 +package org.apache.lucene.analysis.da;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.util.HashSet;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 +import org.apache.lucene.util.Version;
 +
 +public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
 +  /** This test fails with NPE when the
 +   * stopwords file is missing in classpath */
 +  public void testResourcesAvailable() {
 +    new DanishAnalyzer(Version.LUCENE_CURRENT);
 +  }
 +
 +  /** test stopwords and stemming */
 +  public void testBasics() throws IOException {
 +    Analyzer a = new DanishAnalyzer(Version.LUCENE_CURRENT);
 +    // stemming
 +    checkOneTermReuse(a, "undersøg", "undersøg");
 +    checkOneTermReuse(a, "undersøgelse", "undersøg");
 +    // stopword
 +    assertAnalyzesTo(a, "på", new String[] {});
 +  }
 +
 +  /** test use of exclusion set */
 +  public void testExclude() throws IOException {
 +    Set<String> exclusionSet = new HashSet<String>();
 +    exclusionSet.add("undersøgelse");
 +    Analyzer a = new DanishAnalyzer(Version.LUCENE_CURRENT,
 +        DanishAnalyzer.getDefaultStopSet(), exclusionSet);
 +    checkOneTermReuse(a, "undersøgelse", "undersøgelse");
 +    checkOneTermReuse(a, "undersøg", "undersøg");
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\test\org\apache\lucene\analysis\da\TestDanishAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java	(revision 0)
 @@ -0,0 +1,93 @@
 +package org.apache.lucene.analysis.de;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.io.StringReader;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 +import org.apache.lucene.analysis.CharArraySet;
 +import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 +import org.apache.lucene.analysis.LowerCaseTokenizer;
 +import org.apache.lucene.util.Version;
 +
 +public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
 +  public void testReusableTokenStream() throws Exception {
 +    Analyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
 +    checkOneTermReuse(a, "Tisch", "tisch");
 +    checkOneTermReuse(a, "Tische", "tisch");
 +    checkOneTermReuse(a, "Tischen", "tisch");
 +  }
 +
 +  public void testExclusionTableBWCompat() throws IOException {
 +    GermanStemFilter filter = new GermanStemFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT,
 +        new StringReader("Fischen Trinken")));
 +    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
 +    set.add("fischen");
 +    filter.setExclusionSet(set);
 +    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
 +  }
 +
 +  public void testWithKeywordAttribute() throws IOException {
 +    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
 +    set.add("fischen");
 +    GermanStemFilter filter = new GermanStemFilter(
 +        new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader(
 +            "Fischen Trinken")), set));
 +    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
 +  }
 +
 +  public void testWithKeywordAttributeAndExclusionTable() throws IOException {
 +    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
 +    set.add("fischen");
 +    CharArraySet set1 = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
 +    set1.add("trinken");
 +    set1.add("fischen");
 +    GermanStemFilter filter = new GermanStemFilter(
 +        new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader(
 +            "Fischen Trinken")), set));
 +    filter.setExclusionSet(set1);
 +    assertTokenStreamContents(filter, new String[] { "fischen", "trinken" });
 +  }
 +
 +  /*
 +   * Test that changes to the exclusion table are applied immediately
 +   * when using reusable token streams.
 +   */
 +  public void testExclusionTableReuse() throws Exception {
 +    GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
 +    checkOneTermReuse(a, "tischen", "tisch");
 +    a.setStemExclusionTable(new String[] { "tischen" });
 +    checkOneTermReuse(a, "tischen", "tischen");
 +  }
 +
 +  /** test some features of the new snowball filter
 +   * these only pass with LUCENE_CURRENT, not if you use o.a.l.a.de.GermanStemmer
 +   */
 +  public void testGermanSpecials() throws Exception {
 +    GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
 +    // a/o/u + e is equivalent to the umlaut form
 +    checkOneTermReuse(a, "Schaltflächen", "schaltflach");
 +    checkOneTermReuse(a, "Schaltflaechen", "schaltflach");
 +    // here they are with the old stemmer
 +    a = new GermanAnalyzer(Version.LUCENE_30);
 +    checkOneTermReuse(a, "Schaltflächen", "schaltflach");
 +    checkOneTermReuse(a, "Schaltflaechen", "schaltflaech");
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\test\org\apache\lucene\analysis\de\TestGermanAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java	(revision 906571)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java	(working copy)
 @@ -20,15 +20,14 @@
  import java.io.BufferedReader;
  import java.io.File;
  import java.io.FileInputStream;
 -import java.io.IOException;
  import java.io.InputStreamReader;
  import java.io.StringReader;

  import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 -import org.apache.lucene.analysis.Analyzer;
 -import org.apache.lucene.analysis.CharArraySet;
 -import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 -import org.apache.lucene.analysis.LowerCaseTokenizer;
 +import org.apache.lucene.analysis.KeywordTokenizer;
 +import org.apache.lucene.analysis.LowerCaseFilter;
 +import org.apache.lucene.analysis.TokenFilter;
 +import org.apache.lucene.analysis.Tokenizer;
  import org.apache.lucene.util.Version;

  /**
 @@ -40,6 +39,8 @@
  public class TestGermanStemFilter extends BaseTokenStreamTestCase {

    public void testStemming() throws Exception {
 +    Tokenizer tokenizer = new KeywordTokenizer(new StringReader(""));
 +    TokenFilter filter = new GermanStemFilter(new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer));
      // read test cases from external file:
      File dataDir = new File(System.getProperty("dataDir", "./bin"));
      File testFile = new File(dataDir, "org/apache/lucene/analysis/de/data.txt");
 @@ -55,68 +56,12 @@
          continue;    // ignore comments and empty lines
        String[] parts = line.split(";");
        //System.out.println(parts[0] + " -- " + parts[1]);
 -      check(parts[0], parts[1]);
 +      tokenizer.reset(new StringReader(parts[0]));
 +      filter.reset();
 +      assertTokenStreamContents(filter, new String[] { parts[1] });
      }
      breader.close();
      isr.close();
      fis.close();
    }
 -
 -  public void testReusableTokenStream() throws Exception {
 -    Analyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
 -    checkReuse(a, "Tisch", "tisch");
 -    checkReuse(a, "Tische", "tisch");
 -    checkReuse(a, "Tischen", "tisch");
 -  }
 -
 -  public void testExclusionTableBWCompat() throws IOException {
 -    GermanStemFilter filter = new GermanStemFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT,
 -        new StringReader("Fischen Trinken")));
 -    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
 -    set.add("fischen");
 -    filter.setExclusionSet(set);
 -    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
 -  }
 -
 -  public void testWithKeywordAttribute() throws IOException {
 -    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
 -    set.add("fischen");
 -    GermanStemFilter filter = new GermanStemFilter(
 -        new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader(
 -            "Fischen Trinken")), set));
 -    assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
 -  }
 -
 -  public void testWithKeywordAttributeAndExclusionTable() throws IOException {
 -    CharArraySet set = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
 -    set.add("fischen");
 -    CharArraySet set1 = new CharArraySet(Version.LUCENE_CURRENT, 1, true);
 -    set1.add("trinken");
 -    set1.add("fischen");
 -    GermanStemFilter filter = new GermanStemFilter(
 -        new KeywordMarkerTokenFilter(new LowerCaseTokenizer(Version.LUCENE_CURRENT, new StringReader(
 -            "Fischen Trinken")), set));
 -    filter.setExclusionSet(set1);
 -    assertTokenStreamContents(filter, new String[] { "fischen", "trinken" });
 -  }
 -
 -  /*
 -   * Test that changes to the exclusion table are applied immediately
 -   * when using reusable token streams.
 -   */
 -  public void testExclusionTableReuse() throws Exception {
 -    GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
 -    checkReuse(a, "tischen", "tisch");
 -    a.setStemExclusionTable(new String[] { "tischen" });
 -    checkReuse(a, "tischen", "tischen");
 -  }
 -
 -
 -  private void check(final String input, final String expected) throws Exception {
 -    checkOneTerm(new GermanAnalyzer(Version.LUCENE_CURRENT), input, expected);
 -  }
 -
 -  private void checkReuse(Analyzer a, String input, String expected) throws Exception {
 -    checkOneTermReuse(a, input, expected);
 -  }
  }
 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java	(revision 0)
 @@ -0,0 +1,54 @@
 +package org.apache.lucene.analysis.sv;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.util.HashSet;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 +import org.apache.lucene.util.Version;
 +
 +public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
 +  /** This test fails with NPE when the
 +   * stopwords file is missing in classpath */
 +  public void testResourcesAvailable() {
 +    new SwedishAnalyzer(Version.LUCENE_CURRENT);
 +  }
 +
 +  /** test stopwords and stemming */
 +  public void testBasics() throws IOException {
 +    Analyzer a = new SwedishAnalyzer(Version.LUCENE_CURRENT);
 +    // stemming
 +    checkOneTermReuse(a, "jaktkarlarne", "jaktkarl");
 +    checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
 +    // stopword
 +    assertAnalyzesTo(a, "och", new String[] {});
 +  }
 +
 +  /** test use of exclusion set */
 +  public void testExclude() throws IOException {
 +    Set<String> exclusionSet = new HashSet<String>();
 +    exclusionSet.add("jaktkarlarne");
 +    Analyzer a = new SwedishAnalyzer(Version.LUCENE_CURRENT,
 +        SwedishAnalyzer.getDefaultStopSet(), exclusionSet);
 +    checkOneTermReuse(a, "jaktkarlarne", "jaktkarlarne");
 +    checkOneTermReuse(a, "jaktkarlens", "jaktkarl");
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\test\org\apache\lucene\analysis\sv\TestSwedishAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java	(revision 0)
 @@ -0,0 +1,54 @@
 +package org.apache.lucene.analysis.fi;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.util.HashSet;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 +import org.apache.lucene.util.Version;
 +
 +public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
 +  /** This test fails with NPE when the
 +   * stopwords file is missing in classpath */
 +  public void testResourcesAvailable() {
 +    new FinnishAnalyzer(Version.LUCENE_CURRENT);
 +  }
 +
 +  /** test stopwords and stemming */
 +  public void testBasics() throws IOException {
 +    Analyzer a = new FinnishAnalyzer(Version.LUCENE_CURRENT);
 +    // stemming
 +    checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
 +    checkOneTermReuse(a, "edeltäjistään", "edeltäj");
 +    // stopword
 +    assertAnalyzesTo(a, "olla", new String[] {});
 +  }
 +
 +  /** test use of exclusion set */
 +  public void testExclude() throws IOException {
 +    Set<String> exclusionSet = new HashSet<String>();
 +    exclusionSet.add("edeltäjistään");
 +    Analyzer a = new FinnishAnalyzer(Version.LUCENE_CURRENT,
 +        FinnishAnalyzer.getDefaultStopSet(), exclusionSet);
 +    checkOneTermReuse(a, "edeltäjiinsä", "edeltäj");
 +    checkOneTermReuse(a, "edeltäjistään", "edeltäjistään");
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\test\org\apache\lucene\analysis\fi\TestFinnishAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java	(revision 0)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java	(revision 0)
 @@ -0,0 +1,44 @@
 +package org.apache.lucene.analysis.miscellaneous;
 +
 +import java.io.IOException;
 +import java.io.StringReader;
 +import java.util.HashMap;
 +import java.util.Map;
 +
 +import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 +import org.apache.lucene.analysis.KeywordTokenizer;
 +import org.apache.lucene.analysis.PorterStemFilter;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.util.Version;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
 +  public void testOverride() throws IOException {
 +    // lets make booked stem to books
 +    // the override filter will convert "booked" to "books",
 +    // but also mark it with KeywordAttribute so Porter will not change it.
 +    Map<String,String> dictionary = new HashMap<String,String>();
 +    dictionary.put("booked", "books");
 +    Tokenizer tokenizer = new KeywordTokenizer(new StringReader("booked"));
 +    TokenStream stream = new PorterStemFilter(
 +        new StemmerOverrideFilter(Version.LUCENE_CURRENT, tokenizer, dictionary));
 +    assertTokenStreamContents(stream, new String[] { "books" });
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\test\org\apache\lucene\analysis\miscellaneous\TestStemmerOverrideFilter.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java	(revision 906571)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenizerTest.java	(working copy)
 @@ -18,10 +18,8 @@
   */


 -import java.io.IOException;
  import java.io.StringReader;

 -import org.apache.lucene.analysis.TokenStream;
  import org.apache.lucene.analysis.BaseTokenStreamTestCase;

  /**
 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java	(revision 906571)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/ngram/NGramTokenFilterTest.java	(working copy)
 @@ -22,7 +22,6 @@
  import org.apache.lucene.analysis.BaseTokenStreamTestCase;
  import org.apache.lucene.util.Version;

 -import java.io.IOException;
  import java.io.StringReader;

  /**
 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java	(revision 906571)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java	(working copy)
 @@ -17,6 +17,8 @@
   * limitations under the License.
   */

 +import java.io.IOException;
 +
  import org.apache.lucene.analysis.BaseTokenStreamTestCase;
  import org.apache.lucene.analysis.CharArraySet;
  import org.apache.lucene.util.Version;
 @@ -113,6 +115,94 @@

  	}

 +	/**
 +	 * @deprecated remove this test for Lucene 4.0
 +	 */
 +	@Deprecated
 +	public void testAnalyzer30() throws Exception {
 +	    FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_30);
 +
 +	    assertAnalyzesTo(fa, "", new String[] {
 +	    });
 +
 +	    assertAnalyzesTo(
 +	      fa,
 +	      "chien chat cheval",
 +	      new String[] { "chien", "chat", "cheval" });
 +
 +	    assertAnalyzesTo(
 +	      fa,
 +	      "chien CHAT CHEVAL",
 +	      new String[] { "chien", "chat", "cheval" });
 +
 +	    assertAnalyzesTo(
 +	      fa,
 +	      "  chien  ,? + = -  CHAT /: > CHEVAL",
 +	      new String[] { "chien", "chat", "cheval" });
 +
 +	    assertAnalyzesTo(fa, "chien++", new String[] { "chien" });
 +
 +	    assertAnalyzesTo(
 +	      fa,
 +	      "mot \"entreguillemet\"",
 +	      new String[] { "mot", "entreguillemet" });
 +
 +	    // let's do some french specific tests now
 +
 +	    /* 1. couldn't resist
 +	     I would expect this to stay one term as in French the minus
 +	    sign is often used for composing words */
 +	    assertAnalyzesTo(
 +	      fa,
 +	      "Jean-François",
 +	      new String[] { "jean", "françois" });
 +
 +	    // 2. stopwords
 +	    assertAnalyzesTo(
 +	      fa,
 +	      "le la chien les aux chat du des à cheval",
 +	      new String[] { "chien", "chat", "cheval" });
 +
 +	    // some nouns and adjectives
 +	    assertAnalyzesTo(
 +	      fa,
 +	      "lances chismes habitable chiste éléments captifs",
 +	      new String[] {
 +	        "lanc",
 +	        "chism",
 +	        "habit",
 +	        "chist",
 +	        "élément",
 +	        "captif" });
 +
 +	    // some verbs
 +	    assertAnalyzesTo(
 +	      fa,
 +	      "finissions souffrirent rugissante",
 +	      new String[] { "fin", "souffr", "rug" });
 +
 +	    // some everything else
 +	    // aujourd'hui stays one term which is OK
 +	    assertAnalyzesTo(
 +	      fa,
 +	      "C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ ",
 +	      new String[] {
 +	        "c3po",
 +	        "aujourd'hui",
 +	        "oeuf",
 +	        "ïâöûàä",
 +	        "anticonstitutionnel",
 +	        "jav" });
 +
 +	    // some more everything else
 +	    // here 1940-1945 stays as one term, 1940:1945 not ?
 +	    assertAnalyzesTo(
 +	      fa,
 +	      "33Bis 1940-1945 1940:1945 (---i+++)*",
 +	      new String[] { "33bis", "1940-1945", "1940", "1945", "i" });
 +
 +	  }
 +
  	public void testReusableTokenStream() throws Exception {
  	  FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
  	  // stopwords
 @@ -157,4 +247,28 @@
      assertAnalyzesTo(fa, "habitable chiste", new String[] { "habitable",
          "chist" });
    }
 +
 +  public void testElision() throws Exception {
 +    FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
 +    assertAnalyzesTo(fa, "voir l'embrouille", new String[] { "voir", "embrouill" });
 +  }
 +
 +  /**
 +   * Prior to 3.1, this analyzer had no lowercase filter.
 +   * stopwords were case sensitive. Preserve this for back compat.
 +   * @deprecated Remove this test in Lucene 4.0
 +   */
 +  @Deprecated
 +  public void testBuggyStopwordsCasing() throws IOException {
 +    FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_30);
 +    assertAnalyzesTo(a, "Votre", new String[] { "votr" });
 +  }
 +
 +  /**
 +   * Test that stopwords are not case sensitive
 +   */
 +  public void testStopwordsCasing() throws IOException {
 +    FrenchAnalyzer a = new FrenchAnalyzer(Version.LUCENE_31);
 +    assertAnalyzesTo(a, "Votre", new String[] { });
 +  }
  }
 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java	(revision 906571)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/nl/TestDutchStemmer.java	(working copy)
 @@ -100,9 +100,6 @@
  	 check("ophalend", "ophal");
  	 check("ophalers", "ophaler");
  	 check("ophef", "ophef");
 -	 check("opheffen", "ophef"); // versus snowball 'opheff'
 -	 check("opheffende", "ophef"); // versus snowball 'opheff'
 -	 check("opheffing", "ophef"); // versus snowball 'opheff'
  	 check("opheldering", "ophelder");
  	 check("ophemelde", "ophemeld");
  	 check("ophemelen", "ophemel");
 @@ -118,6 +115,24 @@
  	 check("ophouden", "ophoud");
    }

 +  /**
 +   * @deprecated remove this test in Lucene 4.0
 +   */
 +  @Deprecated
 +  public void testOldBuggyStemmer() throws Exception {
 +    Analyzer a = new DutchAnalyzer(Version.LUCENE_30);
 +    checkOneTermReuse(a, "opheffen", "ophef"); // versus snowball 'opheff'
 +    checkOneTermReuse(a, "opheffende", "ophef"); // versus snowball 'opheff'
 +    checkOneTermReuse(a, "opheffing", "ophef"); // versus snowball 'opheff'
 +  }
 +
 +  public void testSnowballCorrectness() throws Exception {
 +    Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
 +    checkOneTermReuse(a, "opheffen", "opheff");
 +    checkOneTermReuse(a, "opheffende", "opheff");
 +    checkOneTermReuse(a, "opheffing", "opheff");
 +  }
 +
    public void testReusableTokenStream() throws Exception {
      Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
      checkOneTermReuse(a, "lichaamsziek", "lichaamsziek");
 @@ -161,6 +176,25 @@
      checkOneTermReuse(a, "lichamelijk", "somethingentirelydifferent");
    }

 +  /**
 +   * Prior to 3.1, this analyzer had no lowercase filter.
 +   * stopwords were case sensitive. Preserve this for back compat.
 +   * @deprecated Remove this test in Lucene 4.0
 +   */
 +  @Deprecated
 +  public void testBuggyStopwordsCasing() throws IOException {
 +    DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_30);
 +    assertAnalyzesTo(a, "Zelf", new String[] { "zelf" });
 +  }
 +
 +  /**
 +   * Test that stopwords are not case sensitive
 +   */
 +  public void testStopwordsCasing() throws IOException {
 +    DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_31);
 +    assertAnalyzesTo(a, "Zelf", new String[] { });
 +  }
 +
    private void check(final String input, final String expected) throws Exception {
      checkOneTerm(new DutchAnalyzer(Version.LUCENE_CURRENT), input, expected);
    }
 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java	(revision 0)
 @@ -0,0 +1,54 @@
 +package org.apache.lucene.analysis.hu;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.util.HashSet;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 +import org.apache.lucene.util.Version;
 +
 +public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
 +  /** This test fails with NPE when the
 +   * stopwords file is missing in classpath */
 +  public void testResourcesAvailable() {
 +    new HungarianAnalyzer(Version.LUCENE_CURRENT);
 +  }
 +
 +  /** test stopwords and stemming */
 +  public void testBasics() throws IOException {
 +    Analyzer a = new HungarianAnalyzer(Version.LUCENE_CURRENT);
 +    // stemming
 +    checkOneTermReuse(a, "babakocsi", "babakocs");
 +    checkOneTermReuse(a, "babakocsijáért", "babakocs");
 +    // stopword
 +    assertAnalyzesTo(a, "által", new String[] {});
 +  }
 +
 +  /** test use of exclusion set */
 +  public void testExclude() throws IOException {
 +    Set<String> exclusionSet = new HashSet<String>();
 +    exclusionSet.add("babakocsi");
 +    Analyzer a = new HungarianAnalyzer(Version.LUCENE_CURRENT,
 +        HungarianAnalyzer.getDefaultStopSet(), exclusionSet);
 +    checkOneTermReuse(a, "babakocsi", "babakocsi");
 +    checkOneTermReuse(a, "babakocsijáért", "babakocs");
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\test\org\apache\lucene\analysis\hu\TestHungarianAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java	(revision 0)
 @@ -0,0 +1,54 @@
 +package org.apache.lucene.analysis.no;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.util.HashSet;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 +import org.apache.lucene.util.Version;
 +
 +public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
 +  /** This test fails with NPE when the
 +   * stopwords file is missing in classpath */
 +  public void testResourcesAvailable() {
 +    new NorwegianAnalyzer(Version.LUCENE_CURRENT);
 +  }
 +
 +  /** test stopwords and stemming */
 +  public void testBasics() throws IOException {
 +    Analyzer a = new NorwegianAnalyzer(Version.LUCENE_CURRENT);
 +    // stemming
 +    checkOneTermReuse(a, "havnedistriktene", "havnedistrikt");
 +    checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
 +    // stopword
 +    assertAnalyzesTo(a, "det", new String[] {});
 +  }
 +
 +  /** test use of exclusion set */
 +  public void testExclude() throws IOException {
 +    Set<String> exclusionSet = new HashSet<String>();
 +    exclusionSet.add("havnedistriktene");
 +    Analyzer a = new NorwegianAnalyzer(Version.LUCENE_CURRENT,
 +        NorwegianAnalyzer.getDefaultStopSet(), exclusionSet);
 +    checkOneTermReuse(a, "havnedistriktene", "havnedistriktene");
 +    checkOneTermReuse(a, "havnedistrikter", "havnedistrikt");
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\test\org\apache\lucene\analysis\no\TestNorwegianAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java	(revision 0)
 @@ -0,0 +1,54 @@
 +package org.apache.lucene.analysis.ro;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.util.HashSet;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 +import org.apache.lucene.util.Version;
 +
 +public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
 +  /** This test fails with NPE when the
 +   * stopwords file is missing in classpath */
 +  public void testResourcesAvailable() {
 +    new RomanianAnalyzer(Version.LUCENE_CURRENT);
 +  }
 +
 +  /** test stopwords and stemming */
 +  public void testBasics() throws IOException {
 +    Analyzer a = new RomanianAnalyzer(Version.LUCENE_CURRENT);
 +    // stemming
 +    checkOneTermReuse(a, "absenţa", "absenţ");
 +    checkOneTermReuse(a, "absenţi", "absenţ");
 +    // stopword
 +    assertAnalyzesTo(a, "îl", new String[] {});
 +  }
 +
 +  /** test use of exclusion set */
 +  public void testExclude() throws IOException {
 +    Set<String> exclusionSet = new HashSet<String>();
 +    exclusionSet.add("absenţa");
 +    Analyzer a = new RomanianAnalyzer(Version.LUCENE_CURRENT,
 +        RomanianAnalyzer.getDefaultStopSet(), exclusionSet);
 +    checkOneTermReuse(a, "absenţa", "absenţa");
 +    checkOneTermReuse(a, "absenţi", "absenţ");
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\test\org\apache\lucene\analysis\ro\TestRomanianAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java	(revision 0)
 @@ -0,0 +1,54 @@
 +package org.apache.lucene.analysis.pt;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.util.HashSet;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 +import org.apache.lucene.util.Version;
 +
 +public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
 +  /** This test fails with NPE when the
 +   * stopwords file is missing in classpath */
 +  public void testResourcesAvailable() {
 +    new PortugueseAnalyzer(Version.LUCENE_CURRENT);
 +  }
 +
 +  /** test stopwords and stemming */
 +  public void testBasics() throws IOException {
 +    Analyzer a = new PortugueseAnalyzer(Version.LUCENE_CURRENT);
 +    // stemming
 +    checkOneTermReuse(a, "quilométricas", "quilométr");
 +    checkOneTermReuse(a, "quilométricos", "quilométr");
 +    // stopword
 +    assertAnalyzesTo(a, "não", new String[] {});
 +  }
 +
 +  /** test use of exclusion set */
 +  public void testExclude() throws IOException {
 +    Set<String> exclusionSet = new HashSet<String>();
 +    exclusionSet.add("quilométricas");
 +    Analyzer a = new PortugueseAnalyzer(Version.LUCENE_CURRENT,
 +        PortugueseAnalyzer.getDefaultStopSet(), exclusionSet);
 +    checkOneTermReuse(a, "quilométricas", "quilométricas");
 +    checkOneTermReuse(a, "quilométricos", "quilométr");
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\test\org\apache\lucene\analysis\pt\TestPortugueseAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java	(revision 0)
 @@ -0,0 +1,54 @@
 +package org.apache.lucene.analysis.tr;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.util.HashSet;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 +import org.apache.lucene.util.Version;
 +
 +public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
 +  /** This test fails with NPE when the
 +   * stopwords file is missing in classpath */
 +  public void testResourcesAvailable() {
 +    new TurkishAnalyzer(Version.LUCENE_CURRENT);
 +  }
 +
 +  /** test stopwords and stemming */
 +  public void testBasics() throws IOException {
 +    Analyzer a = new TurkishAnalyzer(Version.LUCENE_CURRENT);
 +    // stemming
 +    checkOneTermReuse(a, "ağacı", "ağaç");
 +    checkOneTermReuse(a, "ağaç", "ağaç");
 +    // stopword
 +    assertAnalyzesTo(a, "dolayı", new String[] {});
 +  }
 +
 +  /** test use of exclusion set */
 +  public void testExclude() throws IOException {
 +    Set<String> exclusionSet = new HashSet<String>();
 +    exclusionSet.add("ağacı");
 +    Analyzer a = new TurkishAnalyzer(Version.LUCENE_CURRENT,
 +        TurkishAnalyzer.getDefaultStopSet(), exclusionSet);
 +    checkOneTermReuse(a, "ağacı", "ağacı");
 +    checkOneTermReuse(a, "ağaç", "ağaç");
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\test\org\apache\lucene\analysis\tr\TestTurkishAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java	(revision 906571)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianLetterTokenizer.java	(working copy)
 @@ -25,7 +25,9 @@

  /**
   * Testcase for {@link RussianLetterTokenizer}
 + * @deprecated Remove this test class in Lucene 4.0
   */
 +@Deprecated
  public class TestRussianLetterTokenizer extends BaseTokenStreamTestCase {

    public void testRussianLetterTokenizer() throws IOException {
 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java	(revision 906571)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java	(working copy)
 @@ -50,9 +50,14 @@
        dataDir = new File(System.getProperty("dataDir", "./bin"));
      }

 -    public void testUnicode() throws IOException
 +    /**
 +     * @deprecated remove this test and its datafiles in Lucene 4.0
 +     * the Snowball version has its own data tests.
 +     */
 +    @Deprecated
 +    public void testUnicode30() throws IOException
      {
 -        RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);
 +        RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_30);
          inWords =
              new InputStreamReader(
                  new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testUTF8.txt")),
 @@ -110,12 +115,22 @@
          }
      }

 +    /** @deprecated remove this test in Lucene 4.0: stopwords changed */
 +    @Deprecated
 +    public void testReusableTokenStream30() throws Exception {
 +      Analyzer a = new RussianAnalyzer(Version.LUCENE_30);
 +      assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
 +          new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представлен" });
 +      assertAnalyzesToReuse(a, "Но знание это хранилось в тайне",
 +          new String[] { "знан", "хран", "тайн" });
 +    }
 +
      public void testReusableTokenStream() throws Exception {
        Analyzer a = new RussianAnalyzer(Version.LUCENE_CURRENT);
        assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
            new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представлен" });
        assertAnalyzesToReuse(a, "Но знание это хранилось в тайне",
 -          new String[] { "знан", "хран", "тайн" });
 +          new String[] { "знан", "эт", "хран", "тайн" });
      }


 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java	(revision 906571)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/ru/TestRussianStem.java	(working copy)
 @@ -24,6 +24,10 @@
  import java.io.FileInputStream;
  import java.util.ArrayList;

 +/**
 + * @deprecated Remove this test class (and its datafiles!) in Lucene 4.0
 + */
 +@Deprecated
  public class TestRussianStem extends LuceneTestCase
  {
      private ArrayList words = new ArrayList();
 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java	(revision 906571)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java	(working copy)
 @@ -22,11 +22,8 @@
  import java.util.Collection;
  import java.util.Iterator;
  import java.util.LinkedList;
 -import java.util.HashSet;
 -import java.util.Arrays;

  import org.apache.lucene.analysis.*;
 -import org.apache.lucene.analysis.standard.StandardTokenizer;
  import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
  import org.apache.lucene.analysis.miscellaneous.PrefixAndSuffixAwareTokenFilter;
  import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java	(revision 906571)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/el/GreekAnalyzerTest.java	(working copy)
 @@ -18,7 +18,6 @@

  import org.apache.lucene.analysis.BaseTokenStreamTestCase;
  import org.apache.lucene.analysis.Analyzer;
 -import org.apache.lucene.analysis.TokenStream;
  import org.apache.lucene.util.Version;

  /**
 @@ -63,4 +62,23 @@
  	    assertAnalyzesToReuse(a, "\u03a0\u03a1\u039f\u03ab\u03a0\u039f\u0398\u0395\u03a3\u0395\u0399\u03a3  \u0386\u03c8\u03bf\u03b3\u03bf\u03c2, \u03bf \u03bc\u03b5\u03c3\u03c4\u03cc\u03c2 \u03ba\u03b1\u03b9 \u03bf\u03b9 \u03ac\u03bb\u03bb\u03bf\u03b9",
  	            new String[] { "\u03c0\u03c1\u03bf\u03c5\u03c0\u03bf\u03b8\u03b5\u03c3\u03b5\u03b9\u03c3", "\u03b1\u03c8\u03bf\u03b3\u03bf\u03c3", "\u03bc\u03b5\u03c3\u03c4\u03bf\u03c3", "\u03b1\u03bb\u03bb\u03bf\u03b9" });
  	}
 +
 +	/**
 +	 * Greek Analyzer didn't call standardFilter, so no normalization of acronyms.
 +	 * check that this is preserved.
 +	 * @deprecated remove this test in Lucene 4.0
 +	 */
 +	@Deprecated
 +	public void testAcronymBWCompat() throws Exception {
 +	  Analyzer a = new GreekAnalyzer(Version.LUCENE_30);
 +	  assertAnalyzesTo(a, "Α.Π.Τ.", new String[] { "α.π.τ." });
 +	}
 +
 +  /**
 +   * test that acronym normalization works
 +   */
 +  public void testAcronym() throws Exception {
 +    Analyzer a = new GreekAnalyzer(Version.LUCENE_31);
 +    assertAnalyzesTo(a, "Α.Π.Τ.", new String[] { "απτ" });
 +  }
  }
 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java	(revision 0)
 @@ -0,0 +1,54 @@
 +package org.apache.lucene.analysis.en;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.util.HashSet;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 +import org.apache.lucene.util.Version;
 +
 +public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
 +  /** This test fails with NPE when the
 +   * stopwords file is missing in classpath */
 +  public void testResourcesAvailable() {
 +    new EnglishAnalyzer(Version.LUCENE_CURRENT);
 +  }
 +
 +  /** test stopwords and stemming */
 +  public void testBasics() throws IOException {
 +    Analyzer a = new EnglishAnalyzer(Version.LUCENE_CURRENT);
 +    // stemming
 +    checkOneTermReuse(a, "books", "book");
 +    checkOneTermReuse(a, "book", "book");
 +    // stopword
 +    assertAnalyzesTo(a, "the", new String[] {});
 +  }
 +
 +  /** test use of exclusion set */
 +  public void testExclude() throws IOException {
 +    Set<String> exclusionSet = new HashSet<String>();
 +    exclusionSet.add("books");
 +    Analyzer a = new EnglishAnalyzer(Version.LUCENE_CURRENT,
 +        EnglishAnalyzer.getDefaultStopSet(), exclusionSet);
 +    checkOneTermReuse(a, "books", "books");
 +    checkOneTermReuse(a, "book", "book");
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\test\org\apache\lucene\analysis\en\TestEnglishAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java	(revision 906571)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java	(working copy)
 @@ -22,8 +22,6 @@
  import java.util.HashSet;
  import java.util.Set;

 -import javax.print.DocFlavor.CHAR_ARRAY;
 -
  import org.apache.lucene.analysis.BaseTokenStreamTestCase;
  import org.apache.lucene.analysis.CharArraySet;
  import org.apache.lucene.util.Version;
 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java	(revision 906571)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/ar/TestArabicNormalizationFilter.java	(working copy)
 @@ -21,7 +21,6 @@
  import java.io.StringReader;

  import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 -import org.apache.lucene.analysis.tokenattributes.TermAttribute;
  import org.apache.lucene.util.Version;

  /**
 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java	(revision 0)
 @@ -0,0 +1,54 @@
 +package org.apache.lucene.analysis.es;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.util.HashSet;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 +import org.apache.lucene.util.Version;
 +
 +public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
 +  /** This test fails with NPE when the
 +   * stopwords file is missing in classpath */
 +  public void testResourcesAvailable() {
 +    new SpanishAnalyzer(Version.LUCENE_CURRENT);
 +  }
 +
 +  /** test stopwords and stemming */
 +  public void testBasics() throws IOException {
 +    Analyzer a = new SpanishAnalyzer(Version.LUCENE_CURRENT);
 +    // stemming
 +    checkOneTermReuse(a, "chicana", "chican");
 +    checkOneTermReuse(a, "chicano", "chican");
 +    // stopword
 +    assertAnalyzesTo(a, "los", new String[] {});
 +  }
 +
 +  /** test use of exclusion set */
 +  public void testExclude() throws IOException {
 +    Set<String> exclusionSet = new HashSet<String>();
 +    exclusionSet.add("chicano");
 +    Analyzer a = new SpanishAnalyzer(Version.LUCENE_CURRENT,
 +        SpanishAnalyzer.getDefaultStopSet(), exclusionSet);
 +    checkOneTermReuse(a, "chicana", "chican");
 +    checkOneTermReuse(a, "chicano", "chicano");
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\test\org\apache\lucene\analysis\es\TestSpanishAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java	(revision 0)
 @@ -0,0 +1,54 @@
 +package org.apache.lucene.analysis.it;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.util.HashSet;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 +import org.apache.lucene.util.Version;
 +
 +public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
 +  /** This test fails with NPE when the
 +   * stopwords file is missing in classpath */
 +  public void testResourcesAvailable() {
 +    new ItalianAnalyzer(Version.LUCENE_CURRENT);
 +  }
 +
 +  /** test stopwords and stemming */
 +  public void testBasics() throws IOException {
 +    Analyzer a = new ItalianAnalyzer(Version.LUCENE_CURRENT);
 +    // stemming
 +    checkOneTermReuse(a, "abbandonata", "abbandon");
 +    checkOneTermReuse(a, "abbandonati", "abbandon");
 +    // stopword
 +    assertAnalyzesTo(a, "dallo", new String[] {});
 +  }
 +
 +  /** test use of exclusion set */
 +  public void testExclude() throws IOException {
 +    Set<String> exclusionSet = new HashSet<String>();
 +    exclusionSet.add("abbandonata");
 +    Analyzer a = new ItalianAnalyzer(Version.LUCENE_CURRENT,
 +        ItalianAnalyzer.getDefaultStopSet(), exclusionSet);
 +    checkOneTermReuse(a, "abbandonata", "abbandonata");
 +    checkOneTermReuse(a, "abbandonati", "abbandon");
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\test\org\apache\lucene\analysis\it\TestItalianAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java	(revision 0)
 @@ -0,0 +1,129 @@
 +package org.apache.lucene.analysis.da;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.io.Reader;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.CharArraySet;
 +import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 +import org.apache.lucene.analysis.LowerCaseFilter;
 +import org.apache.lucene.analysis.StopFilter;
 +import org.apache.lucene.analysis.StopwordAnalyzerBase;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.analysis.WordlistLoader;
 +import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 +import org.apache.lucene.analysis.snowball.SnowballFilter;
 +import org.apache.lucene.analysis.standard.StandardFilter;
 +import org.apache.lucene.analysis.standard.StandardTokenizer;
 +import org.apache.lucene.util.Version;
 +import org.tartarus.snowball.ext.DanishStemmer;
 +
 +/**
 + * {@link Analyzer} for Danish.
 + */
 +public final class DanishAnalyzer extends StopwordAnalyzerBase {
 +  private final Set<?> stemExclusionSet;
 +
 +  /** File containing default Danish stopwords. */
 +  public final static String DEFAULT_STOPWORD_FILE = "danish_stop.txt";
 +
 +  /**
 +   * Returns an unmodifiable instance of the default stop words set.
 +   * @return default stop words set.
 +   */
 +  public static Set<?> getDefaultStopSet(){
 +    return DefaultSetHolder.DEFAULT_STOP_SET;
 +  }
 +
 +  /**
 +   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
 +   * accesses the static final set the first time.;
 +   */
 +  private static class DefaultSetHolder {
 +    static final Set<?> DEFAULT_STOP_SET;
 +
 +    static {
 +      try {
 +        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class,
 +            DEFAULT_STOPWORD_FILE);
 +      } catch (IOException ex) {
 +        // default set should always be present as it is part of the
 +        // distribution (JAR)
 +        throw new RuntimeException("Unable to load default stopword set");
 +      }
 +    }
 +  }
 +
 +  /**
 +   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
 +   */
 +  public DanishAnalyzer(Version matchVersion) {
 +    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   */
 +  public DanishAnalyzer(Version matchVersion, Set<?> stopwords) {
 +    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 +   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
 +   * stemming.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   * @param stemExclusionSet a set of terms not to be stemmed
 +   */
 +  public DanishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
 +    super(matchVersion, stopwords);
 +    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
 +        matchVersion, stemExclusionSet));
 +  }
 +
 +  /**
 +   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
 +   * {@link Reader}.
 +   *
 +   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
 +   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
 +   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
 +   *         exclusion set is provided and {@link SnowballFilter}.
 +   */
 +  @Override
 +  protected TokenStreamComponents createComponents(String fieldName,
 +      Reader reader) {
 +    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 +    TokenStream result = new StandardFilter(source);
 +    result = new LowerCaseFilter(matchVersion, result);
 +    result = new StopFilter(matchVersion, result, stopwords);
 +    if(!stemExclusionSet.isEmpty())
 +      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
 +    result = new SnowballFilter(result, new DanishStemmer());
 +    return new TokenStreamComponents(source, result);
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\da\DanishAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/da/package.html
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/da/package.html	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/da/package.html	(revision 0)
 @@ -0,0 +1,22 @@
 +<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 +<!--
 + Licensed to the Apache Software Foundation (ASF) under one or more
 + contributor license agreements.  See the NOTICE file distributed with
 + this work for additional information regarding copyright ownership.
 + The ASF licenses this file to You under the Apache License, Version 2.0
 + (the "License"); you may not use this file except in compliance with
 + the License.  You may obtain a copy of the License at
 +
 +     http://www.apache.org/licenses/LICENSE-2.0
 +
 + Unless required by applicable law or agreed to in writing, software
 + distributed under the License is distributed on an "AS IS" BASIS,
 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + See the License for the specific language governing permissions and
 + limitations under the License.
 +-->
 +<html><head></head>
 +<body>
 +Analyzer for Danish.
 +</body>
 +</html>

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\da\package.html
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java	(working copy)
 @@ -36,10 +36,12 @@
  import org.apache.lucene.analysis.TokenStream;
  import org.apache.lucene.analysis.Tokenizer;
  import org.apache.lucene.analysis.WordlistLoader;
 +import org.apache.lucene.analysis.snowball.SnowballFilter;
  import org.apache.lucene.analysis.standard.StandardAnalyzer;
  import org.apache.lucene.analysis.standard.StandardFilter;
  import org.apache.lucene.analysis.standard.StandardTokenizer;
  import org.apache.lucene.util.Version;
 +import org.tartarus.snowball.ext.German2Stemmer;

  /**
   * {@link Analyzer} for German language.
 @@ -60,7 +62,7 @@
     * List of typical german stopwords.
     * @deprecated use {@link #getDefaultStopSet()} instead
     */
 -  //TODO make this private in 3.1
 +  //TODO make this private in 3.1, remove in 4.0
    @Deprecated
    public final static String[] GERMAN_STOP_WORDS = {
      "einer", "eine", "eines", "einem", "einen",
 @@ -77,6 +79,9 @@
      "durch", "wegen", "wird"
    };

 +  /** File containing default German stopwords. */
 +  public final static String DEFAULT_STOPWORD_FILE = "german_stop.txt";
 +
    /**
     * Returns a set of default German-stopwords
     * @return a set of default German-stopwords
 @@ -86,8 +91,21 @@
    }

    private static class DefaultSetHolder {
 -    private static final Set<?> DEFAULT_SET = CharArraySet.unmodifiableSet(new CharArraySet(
 +    /** @deprecated remove in Lucene 4.0 */
 +    @Deprecated
 +    private static final Set<?> DEFAULT_SET_30 = CharArraySet.unmodifiableSet(new CharArraySet(
          Version.LUCENE_CURRENT, Arrays.asList(GERMAN_STOP_WORDS), false));
 +    private static final Set<?> DEFAULT_SET;
 +    static {
 +      try {
 +        DEFAULT_SET =
 +          WordlistLoader.getSnowballWordSet(SnowballFilter.class, DEFAULT_STOPWORD_FILE);
 +      } catch (IOException ex) {
 +        // default set should always be present as it is part of the
 +        // distribution (JAR)
 +        throw new RuntimeException("Unable to load default stopword set");
 +      }
 +    }
    }

    /**
 @@ -105,7 +123,9 @@
     * {@link #getDefaultStopSet()}.
     */
    public GermanAnalyzer(Version matchVersion) {
 -    this(matchVersion, DefaultSetHolder.DEFAULT_SET);
 +    this(matchVersion,
 +        matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_SET
 +            : DefaultSetHolder.DEFAULT_SET_30);
    }

    /**
 @@ -199,8 +219,9 @@
     *
     * @return {@link TokenStreamComponents} built from a
     *         {@link StandardTokenizer} filtered with {@link StandardFilter},
 -   *         {@link LowerCaseFilter}, {@link StopFilter}, and
 -   *         {@link GermanStemFilter}
 +   *         {@link LowerCaseFilter}, {@link StopFilter},
 +   *         {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided, and
 +   *         {@link SnowballFilter}
     */
    @Override
    protected TokenStreamComponents createComponents(String fieldName,
 @@ -210,6 +231,10 @@
      result = new LowerCaseFilter(matchVersion, result);
      result = new StopFilter( matchVersion, result, stopwords);
      result = new KeywordMarkerTokenFilter(result, exclusionSet);
 -    return new TokenStreamComponents(source, new GermanStemFilter(result));
 +    if (matchVersion.onOrAfter(Version.LUCENE_31))
 +      result = new SnowballFilter(result, new German2Stemmer());
 +    else
 +      result = new GermanStemFilter(result);
 +    return new TokenStreamComponents(source, result);
    }
  }
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java	(revision 0)
 @@ -0,0 +1,129 @@
 +package org.apache.lucene.analysis.sv;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.io.Reader;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.CharArraySet;
 +import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 +import org.apache.lucene.analysis.LowerCaseFilter;
 +import org.apache.lucene.analysis.StopFilter;
 +import org.apache.lucene.analysis.StopwordAnalyzerBase;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.analysis.WordlistLoader;
 +import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 +import org.apache.lucene.analysis.snowball.SnowballFilter;
 +import org.apache.lucene.analysis.standard.StandardFilter;
 +import org.apache.lucene.analysis.standard.StandardTokenizer;
 +import org.apache.lucene.util.Version;
 +import org.tartarus.snowball.ext.SwedishStemmer;
 +
 +/**
 + * {@link Analyzer} for Swedish.
 + */
 +public final class SwedishAnalyzer extends StopwordAnalyzerBase {
 +  private final Set<?> stemExclusionSet;
 +
 +  /** File containing default Swedish stopwords. */
 +  public final static String DEFAULT_STOPWORD_FILE = "swedish_stop.txt";
 +
 +  /**
 +   * Returns an unmodifiable instance of the default stop words set.
 +   * @return default stop words set.
 +   */
 +  public static Set<?> getDefaultStopSet(){
 +    return DefaultSetHolder.DEFAULT_STOP_SET;
 +  }
 +
 +  /**
 +   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
 +   * accesses the static final set the first time.;
 +   */
 +  private static class DefaultSetHolder {
 +    static final Set<?> DEFAULT_STOP_SET;
 +
 +    static {
 +      try {
 +        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class,
 +            DEFAULT_STOPWORD_FILE);
 +      } catch (IOException ex) {
 +        // default set should always be present as it is part of the
 +        // distribution (JAR)
 +        throw new RuntimeException("Unable to load default stopword set");
 +      }
 +    }
 +  }
 +
 +  /**
 +   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
 +   */
 +  public SwedishAnalyzer(Version matchVersion) {
 +    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   */
 +  public SwedishAnalyzer(Version matchVersion, Set<?> stopwords) {
 +    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 +   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
 +   * stemming.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   * @param stemExclusionSet a set of terms not to be stemmed
 +   */
 +  public SwedishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
 +    super(matchVersion, stopwords);
 +    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
 +        matchVersion, stemExclusionSet));
 +  }
 +
 +  /**
 +   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
 +   * {@link Reader}.
 +   *
 +   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
 +   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
 +   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
 +   *         exclusion set is provided and {@link SnowballFilter}.
 +   */
 +  @Override
 +  protected TokenStreamComponents createComponents(String fieldName,
 +      Reader reader) {
 +    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 +    TokenStream result = new StandardFilter(source);
 +    result = new LowerCaseFilter(matchVersion, result);
 +    result = new StopFilter(matchVersion, result, stopwords);
 +    if(!stemExclusionSet.isEmpty())
 +      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
 +    result = new SnowballFilter(result, new SwedishStemmer());
 +    return new TokenStreamComponents(source, result);
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\sv\SwedishAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/sv/package.html
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/sv/package.html	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/sv/package.html	(revision 0)
 @@ -0,0 +1,22 @@
 +<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 +<!--
 + Licensed to the Apache Software Foundation (ASF) under one or more
 + contributor license agreements.  See the NOTICE file distributed with
 + this work for additional information regarding copyright ownership.
 + The ASF licenses this file to You under the Apache License, Version 2.0
 + (the "License"); you may not use this file except in compliance with
 + the License.  You may obtain a copy of the License at
 +
 +     http://www.apache.org/licenses/LICENSE-2.0
 +
 + Unless required by applicable law or agreed to in writing, software
 + distributed under the License is distributed on an "AS IS" BASIS,
 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + See the License for the specific language governing permissions and
 + limitations under the License.
 +-->
 +<html><head></head>
 +<body>
 +Analyzer for Swedish.
 +</body>
 +</html>

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\sv\package.html
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java	(revision 0)
 @@ -0,0 +1,129 @@
 +package org.apache.lucene.analysis.fi;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.io.Reader;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.CharArraySet;
 +import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 +import org.apache.lucene.analysis.LowerCaseFilter;
 +import org.apache.lucene.analysis.StopFilter;
 +import org.apache.lucene.analysis.StopwordAnalyzerBase;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.analysis.WordlistLoader;
 +import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 +import org.apache.lucene.analysis.snowball.SnowballFilter;
 +import org.apache.lucene.analysis.standard.StandardFilter;
 +import org.apache.lucene.analysis.standard.StandardTokenizer;
 +import org.apache.lucene.util.Version;
 +import org.tartarus.snowball.ext.FinnishStemmer;
 +
 +/**
 + * {@link Analyzer} for Finnish.
 + */
 +public final class FinnishAnalyzer extends StopwordAnalyzerBase {
 +  private final Set<?> stemExclusionSet;
 +
 +  /** File containing default Italian stopwords. */
 +  public final static String DEFAULT_STOPWORD_FILE = "finnish_stop.txt";
 +
 +  /**
 +   * Returns an unmodifiable instance of the default stop words set.
 +   * @return default stop words set.
 +   */
 +  public static Set<?> getDefaultStopSet(){
 +    return DefaultSetHolder.DEFAULT_STOP_SET;
 +  }
 +
 +  /**
 +   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
 +   * accesses the static final set the first time.;
 +   */
 +  private static class DefaultSetHolder {
 +    static final Set<?> DEFAULT_STOP_SET;
 +
 +    static {
 +      try {
 +        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class,
 +            DEFAULT_STOPWORD_FILE);
 +      } catch (IOException ex) {
 +        // default set should always be present as it is part of the
 +        // distribution (JAR)
 +        throw new RuntimeException("Unable to load default stopword set");
 +      }
 +    }
 +  }
 +
 +  /**
 +   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
 +   */
 +  public FinnishAnalyzer(Version matchVersion) {
 +    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   */
 +  public FinnishAnalyzer(Version matchVersion, Set<?> stopwords) {
 +    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 +   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
 +   * stemming.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   * @param stemExclusionSet a set of terms not to be stemmed
 +   */
 +  public FinnishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
 +    super(matchVersion, stopwords);
 +    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
 +        matchVersion, stemExclusionSet));
 +  }
 +
 +  /**
 +   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
 +   * {@link Reader}.
 +   *
 +   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
 +   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
 +   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
 +   *         exclusion set is provided and {@link SnowballFilter}.
 +   */
 +  @Override
 +  protected TokenStreamComponents createComponents(String fieldName,
 +      Reader reader) {
 +    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 +    TokenStream result = new StandardFilter(source);
 +    result = new LowerCaseFilter(matchVersion, result);
 +    result = new StopFilter(matchVersion, result, stopwords);
 +    if(!stemExclusionSet.isEmpty())
 +      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
 +    result = new SnowballFilter(result, new FinnishStemmer());
 +    return new TokenStreamComponents(source, result);
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\fi\FinnishAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/fi/package.html
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/fi/package.html	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/fi/package.html	(revision 0)
 @@ -0,0 +1,22 @@
 +<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 +<!--
 + Licensed to the Apache Software Foundation (ASF) under one or more
 + contributor license agreements.  See the NOTICE file distributed with
 + this work for additional information regarding copyright ownership.
 + The ASF licenses this file to You under the Apache License, Version 2.0
 + (the "License"); you may not use this file except in compliance with
 + the License.  You may obtain a copy of the License at
 +
 +     http://www.apache.org/licenses/LICENSE-2.0
 +
 + Unless required by applicable law or agreed to in writing, software
 + distributed under the License is distributed on an "AS IS" BASIS,
 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + See the License for the specific language governing permissions and
 + limitations under the License.
 +-->
 +<html><head></head>
 +<body>
 +Analyzer for Finnish.
 +</body>
 +</html>

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\fi\package.html
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java	(revision 0)
 @@ -0,0 +1,70 @@
 +package org.apache.lucene.analysis.miscellaneous;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.util.Map;
 +
 +import org.apache.lucene.analysis.CharArrayMap;
 +import org.apache.lucene.analysis.TokenFilter;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 +import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 +import org.apache.lucene.util.Version;
 +
 +/**
 + * Provides the ability to override any {@link KeywordAttribute} aware stemmer
 + * with custom dictionary-based stemming.
 + */
 +public final class StemmerOverrideFilter extends TokenFilter {
 +  private final CharArrayMap<String> dictionary;
 +
 +  private final TermAttribute termAtt = addAttribute(TermAttribute.class);
 +  private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
 +
 +  /**
 +   * Create a new StemmerOverrideFilter, performing dictionary-based stemming
 +   * with the provided <code>dictionary</code>.
 +   * <p>
 +   * Any dictionary-stemmed terms will be marked with {@link KeywordAttribute}
 +   * so that they will not be stemmed with stemmers down the chain.
 +   * </p>
 +   */
 +  public StemmerOverrideFilter(Version matchVersion, TokenStream input,
 +      Map<?,String> dictionary) {
 +    super(input);
 +    this.dictionary = dictionary instanceof CharArrayMap ?
 +        (CharArrayMap<String>) dictionary : CharArrayMap.copy(matchVersion, dictionary);
 +  }
 +
 +  @Override
 +  public boolean incrementToken() throws IOException {
 +    if (input.incrementToken()) {
 +      if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms
 +        String stem = dictionary.get(termAtt.termBuffer(), 0, termAtt.termLength());
 +        if (stem != null) {
 +          termAtt.setTermBuffer(stem);
 +          keywordAtt.setKeyword(true);
 +        }
 +      }
 +      return true;
 +    } else {
 +      return false;
 +    }
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\miscellaneous\StemmerOverrideFilter.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/miscellaneous/EmptyTokenStream.java	(working copy)
 @@ -18,7 +18,6 @@
   */

  import org.apache.lucene.analysis.TokenStream;
 -import org.apache.lucene.analysis.Token;

  import java.io.IOException;

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java	(working copy)
 @@ -17,7 +17,6 @@
   * limitations under the License.
   */

 -import org.apache.lucene.analysis.Token;
  import org.apache.lucene.analysis.Tokenizer;
  import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
  import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java	(working copy)
 @@ -19,7 +19,6 @@

  import java.io.IOException;

 -import org.apache.lucene.analysis.Token;
  import org.apache.lucene.analysis.TokenFilter;
  import org.apache.lucene.analysis.TokenStream;
  import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemmer.java	(working copy)
 @@ -25,8 +25,10 @@
   * refer to http://snowball.sourceforge.net/french/stemmer.html<br>
   * (French stemming algorithm) for details
   * </p>
 + * @deprecated Use {@link org.tartarus.snowball.ext.FrenchStemmer} instead,
 + * which has the same functionality. This filter will be removed in Lucene 4.0
   */
 -
 +@Deprecated
  public class FrenchStemmer {

      /**
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java	(working copy)
 @@ -20,6 +20,7 @@
  import org.apache.lucene.analysis.KeywordMarkerTokenFilter;// for javadoc
  import org.apache.lucene.analysis.TokenFilter;
  import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.snowball.SnowballFilter;
  import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
  import org.apache.lucene.analysis.tokenattributes.TermAttribute;

 @@ -40,7 +41,11 @@
   * the {@link KeywordAttribute} before this {@link TokenStream}.
   * </p>
   * @see KeywordMarkerTokenFilter
 + * @deprecated Use {@link SnowballFilter} with
 + * {@link org.tartarus.snowball.ext.FrenchStemmer} instead, which has the
 + * same functionality. This filter will be removed in Lucene 4.0
   */
 +@Deprecated
  public final class FrenchStemFilter extends TokenFilter {

  	/**
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java	(working copy)
 @@ -68,7 +68,7 @@
    /**
     * Constructs an elision filter with standard stop words
     */
 -  protected ElisionFilter(Version matchVersion, TokenStream input) {
 +  public ElisionFilter(Version matchVersion, TokenStream input) {
      this(matchVersion, input, DEFAULT_ARTICLES);
    }

 @@ -77,7 +77,7 @@
     * @deprecated use {@link #ElisionFilter(Version, TokenStream)} instead
     */
    @Deprecated
 -  protected ElisionFilter(TokenStream input) {
 +  public ElisionFilter(TokenStream input) {
      this(Version.LUCENE_30, input);
    }

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java	(working copy)
 @@ -27,6 +27,7 @@
  import org.apache.lucene.analysis.TokenStream;
  import org.apache.lucene.analysis.Tokenizer;
  import org.apache.lucene.analysis.WordlistLoader;
 +import org.apache.lucene.analysis.snowball.SnowballFilter;
  import org.apache.lucene.analysis.standard.StandardFilter;
  import org.apache.lucene.analysis.standard.StandardTokenizer;
  import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
 @@ -68,7 +69,7 @@
     * Extended list of typical French stopwords.
     * @deprecated use {@link #getDefaultStopSet()} instead
     */
 -  // TODO make this private in 3.1
 +  // TODO make this private in 3.1, remove in 4.0
    @Deprecated
    public final static String[] FRENCH_STOP_WORDS = {
      "a", "afin", "ai", "ainsi", "après", "attendu", "au", "aujourd", "auquel", "aussi",
 @@ -95,6 +96,9 @@
      "été", "être", "ô"
    };

 +  /** File containing default French stopwords. */
 +  public final static String DEFAULT_STOPWORD_FILE = "french_stop.txt";
 +
    /**
     * Contains words that should be indexed but not stemmed.
     */
 @@ -110,16 +114,31 @@
    }

    private static class DefaultSetHolder {
 -    static final Set<?> DEFAULT_STOP_SET = CharArraySet
 +    /** @deprecated remove this in Lucene 4.0 */
 +    @Deprecated
 +    static final Set<?> DEFAULT_STOP_SET_30 = CharArraySet
          .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(FRENCH_STOP_WORDS),
              false));
 +    static final Set<?> DEFAULT_STOP_SET;
 +    static {
 +      try {
 +        DEFAULT_STOP_SET =
 +          WordlistLoader.getSnowballWordSet(SnowballFilter.class, DEFAULT_STOPWORD_FILE);
 +      } catch (IOException ex) {
 +        // default set should always be present as it is part of the
 +        // distribution (JAR)
 +        throw new RuntimeException("Unable to load default stopword set");
 +      }
 +    }
    }

    /**
 -   * Builds an analyzer with the default stop words ({@link #FRENCH_STOP_WORDS}).
 +   * Builds an analyzer with the default stop words ({@link #getDefaultStopSet}).
     */
    public FrenchAnalyzer(Version matchVersion) {
 -    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
 +    this(matchVersion,
 +        matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_STOP_SET
 +            : DefaultSetHolder.DEFAULT_STOP_SET_30);
    }

    /**
 @@ -207,20 +226,34 @@
     * {@link Reader}.
     *
     * @return {@link TokenStreamComponents} built from a {@link StandardTokenizer}
 -   *         filtered with {@link StandardFilter}, {@link StopFilter},
 -   *         {@link FrenchStemFilter} and {@link LowerCaseFilter}
 +   *         filtered with {@link StandardFilter}, {@link ElisionFilter},
 +   *         {@link LowerCaseFilter}, {@link StopFilter},
 +   *         {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided,
 +   *         and {@link SnowballFilter}
     */
    @Override
    protected TokenStreamComponents createComponents(String fieldName,
        Reader reader) {
 -    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 -    TokenStream result = new StandardFilter(source);
 -    result = new StopFilter(matchVersion, result, stopwords);
 -    if(!excltable.isEmpty())
 -      result = new KeywordMarkerTokenFilter(result, excltable);
 -    result = new FrenchStemFilter(result);
 -    // Convert to lowercase after stemming!
 -    return new TokenStreamComponents(source, new LowerCaseFilter(matchVersion, result));
 +    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
 +      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 +      TokenStream result = new StandardFilter(source);
 +      result = new ElisionFilter(matchVersion, result);
 +      result = new LowerCaseFilter(matchVersion, result);
 +      result = new StopFilter(matchVersion, result, stopwords);
 +      if(!excltable.isEmpty())
 +        result = new KeywordMarkerTokenFilter(result, excltable);
 +      result = new SnowballFilter(result, new org.tartarus.snowball.ext.FrenchStemmer());
 +      return new TokenStreamComponents(source, result);
 +    } else {
 +      final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 +      TokenStream result = new StandardFilter(source);
 +      result = new StopFilter(matchVersion, result, stopwords);
 +      if(!excltable.isEmpty())
 +        result = new KeywordMarkerTokenFilter(result, excltable);
 +      result = new FrenchStemFilter(result);
 +      // Convert to lowercase after stemming!
 +      return new TokenStreamComponents(source, new LowerCaseFilter(matchVersion, result));
 +    }
    }
  }

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemmer.java	(working copy)
 @@ -26,8 +26,10 @@
   * the <a href="http://snowball.tartarus.org/algorithms/dutch/stemmer.html">dutch stemming</a>
   * algorithm in Martin Porter's snowball project.
   * </p>
 + * @deprecated Use {@link org.tartarus.snowball.ext.DutchStemmer} instead,
 + * which has the same functionality. This filter will be removed in Lucene 4.0
   */
 -
 +@Deprecated
  public class DutchStemmer {
    /**
     * Buffer for the terms while stemming them.
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java	(working copy)
 @@ -26,6 +26,7 @@
  import org.apache.lucene.analysis.KeywordMarkerTokenFilter;// for javadoc
  import org.apache.lucene.analysis.TokenFilter;
  import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.snowball.SnowballFilter;
  import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
  import org.apache.lucene.analysis.tokenattributes.TermAttribute;

 @@ -42,7 +43,11 @@
   * the {@link KeywordAttribute} before this {@link TokenStream}.
   * </p>
   * @see KeywordMarkerTokenFilter
 + * @deprecated Use {@link SnowballFilter} with
 + * {@link org.tartarus.snowball.ext.DutchStemmer} instead, which has the
 + * same functionality. This filter will be removed in Lucene 4.0
   */
 +@Deprecated
  public final class DutchStemFilter extends TokenFilter {
    /**
     * The actual token in the input stream.
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java	(working copy)
 @@ -20,11 +20,14 @@
  import org.apache.lucene.analysis.Analyzer;
  import org.apache.lucene.analysis.CharArraySet;
  import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 +import org.apache.lucene.analysis.LowerCaseFilter;
  import org.apache.lucene.analysis.ReusableAnalyzerBase;
  import org.apache.lucene.analysis.StopFilter;
  import org.apache.lucene.analysis.TokenStream;
  import org.apache.lucene.analysis.Tokenizer;
  import org.apache.lucene.analysis.WordlistLoader;
 +import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
 +import org.apache.lucene.analysis.snowball.SnowballFilter;
  import org.apache.lucene.analysis.standard.StandardFilter;
  import org.apache.lucene.analysis.standard.StandardTokenizer;
  import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
 @@ -33,7 +36,6 @@
  import java.io.File;
  import java.io.IOException;
  import java.io.Reader;
 -import java.util.Arrays;
  import java.util.Collections;
  import java.util.HashMap;
  import java.util.HashSet;
 @@ -60,19 +62,11 @@
     * @deprecated use {@link #getDefaultStopSet()} instead
     */
    @Deprecated
 -  public final static String[] DUTCH_STOP_WORDS =
 -      {
 -        "de", "en", "van", "ik", "te", "dat", "die", "in", "een",
 -        "hij", "het", "niet", "zijn", "is", "was", "op", "aan", "met", "als", "voor", "had",
 -        "er", "maar", "om", "hem", "dan", "zou", "of", "wat", "mijn", "men", "dit", "zo",
 -        "door", "over", "ze", "zich", "bij", "ook", "tot", "je", "mij", "uit", "der", "daar",
 -        "haar", "naar", "heb", "hoe", "heeft", "hebben", "deze", "u", "want", "nog", "zal",
 -        "me", "zij", "nu", "ge", "geen", "omdat", "iets", "worden", "toch", "al", "waren",
 -        "veel", "meer", "doen", "toen", "moet", "ben", "zonder", "kan", "hun", "dus",
 -        "alles", "onder", "ja", "eens", "hier", "wie", "werd", "altijd", "doch", "wordt",
 -        "wezen", "kunnen", "ons", "zelf", "tegen", "na", "reeds", "wil", "kon", "niets",
 -        "uw", "iemand", "geweest", "andere"
 -      };
 +  public final static String[] DUTCH_STOP_WORDS = getDefaultStopSet().toArray(new String[0]);
 +
 +  /** File containing default Dutch stopwords. */
 +  public final static String DEFAULT_STOPWORD_FILE = "dutch_stop.txt";
 +
    /**
     * Returns an unmodifiable instance of the default stop-words set.
     * @return an unmodifiable instance of the default stop-words set.
 @@ -82,9 +76,18 @@
    }

    private static class DefaultSetHolder {
 -    static final Set<?> DEFAULT_STOP_SET = CharArraySet
 -        .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT,
 -            Arrays.asList(DUTCH_STOP_WORDS), false));
 +    static final Set<?> DEFAULT_STOP_SET;
 +
 +    static {
 +      try {
 +        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class,
 +            DEFAULT_STOPWORD_FILE);
 +      } catch (IOException ex) {
 +        // default set should always be present as it is part of the
 +        // distribution (JAR)
 +        throw new RuntimeException("Unable to load default stopword set");
 +      }
 +    }
    }


 @@ -223,18 +226,32 @@
     * text in the provided {@link Reader}.
     *
     * @return A {@link TokenStream} built from a {@link StandardTokenizer}
 -   *   filtered with {@link StandardFilter}, {@link StopFilter},
 -   *   and {@link DutchStemFilter}
 +   *   filtered with {@link StandardFilter}, {@link LowerCaseFilter},
 +   *   {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided,
 +   *   {@link StemmerOverrideFilter}, and {@link SnowballFilter}
     */
    @Override
    protected TokenStreamComponents createComponents(String fieldName,
        Reader aReader) {
 -    final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
 -    TokenStream result = new StandardFilter(source);
 -    result = new StopFilter(matchVersion, result, stoptable);
 -    if (!excltable.isEmpty())
 -      result = new KeywordMarkerTokenFilter(result, excltable);
 -    result = new DutchStemFilter(result, stemdict);
 -    return new TokenStreamComponents(source, result);
 +    if (matchVersion.onOrAfter(Version.LUCENE_31)) {
 +      final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
 +      TokenStream result = new StandardFilter(source);
 +      result = new LowerCaseFilter(matchVersion, result);
 +      result = new StopFilter(matchVersion, result, stoptable);
 +      if (!excltable.isEmpty())
 +        result = new KeywordMarkerTokenFilter(result, excltable);
 +      if (!stemdict.isEmpty())
 +        result = new StemmerOverrideFilter(matchVersion, result, stemdict);
 +      result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer());
 +      return new TokenStreamComponents(source, result);
 +    } else {
 +      final Tokenizer source = new StandardTokenizer(matchVersion, aReader);
 +      TokenStream result = new StandardFilter(source);
 +      result = new StopFilter(matchVersion, result, stoptable);
 +      if (!excltable.isEmpty())
 +        result = new KeywordMarkerTokenFilter(result, excltable);
 +      result = new DutchStemFilter(result, stemdict);
 +      return new TokenStreamComponents(source, result);
 +    }
    }
  }
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/th/ThaiWordFilter.java	(working copy)
 @@ -19,7 +19,6 @@
  import java.io.IOException;
  import java.util.Locale;
  import java.lang.Character.UnicodeBlock;
 -import org.apache.lucene.analysis.Token;
  import org.apache.lucene.analysis.TokenFilter;
  import org.apache.lucene.analysis.TokenStream;
  import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java	(revision 0)
 @@ -0,0 +1,129 @@
 +package org.apache.lucene.analysis.hu;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.io.Reader;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.CharArraySet;
 +import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 +import org.apache.lucene.analysis.LowerCaseFilter;
 +import org.apache.lucene.analysis.StopFilter;
 +import org.apache.lucene.analysis.StopwordAnalyzerBase;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.analysis.WordlistLoader;
 +import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 +import org.apache.lucene.analysis.snowball.SnowballFilter;
 +import org.apache.lucene.analysis.standard.StandardFilter;
 +import org.apache.lucene.analysis.standard.StandardTokenizer;
 +import org.apache.lucene.util.Version;
 +import org.tartarus.snowball.ext.HungarianStemmer;
 +
 +/**
 + * {@link Analyzer} for Hungarian.
 + */
 +public final class HungarianAnalyzer extends StopwordAnalyzerBase {
 +  private final Set<?> stemExclusionSet;
 +
 +  /** File containing default Hungarian stopwords. */
 +  public final static String DEFAULT_STOPWORD_FILE = "hungarian_stop.txt";
 +
 +  /**
 +   * Returns an unmodifiable instance of the default stop words set.
 +   * @return default stop words set.
 +   */
 +  public static Set<?> getDefaultStopSet(){
 +    return DefaultSetHolder.DEFAULT_STOP_SET;
 +  }
 +
 +  /**
 +   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
 +   * accesses the static final set the first time.;
 +   */
 +  private static class DefaultSetHolder {
 +    static final Set<?> DEFAULT_STOP_SET;
 +
 +    static {
 +      try {
 +        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class,
 +            DEFAULT_STOPWORD_FILE);
 +      } catch (IOException ex) {
 +        // default set should always be present as it is part of the
 +        // distribution (JAR)
 +        throw new RuntimeException("Unable to load default stopword set");
 +      }
 +    }
 +  }
 +
 +  /**
 +   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
 +   */
 +  public HungarianAnalyzer(Version matchVersion) {
 +    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   */
 +  public HungarianAnalyzer(Version matchVersion, Set<?> stopwords) {
 +    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 +   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
 +   * stemming.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   * @param stemExclusionSet a set of terms not to be stemmed
 +   */
 +  public HungarianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
 +    super(matchVersion, stopwords);
 +    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
 +        matchVersion, stemExclusionSet));
 +  }
 +
 +  /**
 +   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
 +   * {@link Reader}.
 +   *
 +   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
 +   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
 +   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
 +   *         exclusion set is provided and {@link SnowballFilter}.
 +   */
 +  @Override
 +  protected TokenStreamComponents createComponents(String fieldName,
 +      Reader reader) {
 +    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 +    TokenStream result = new StandardFilter(source);
 +    result = new LowerCaseFilter(matchVersion, result);
 +    result = new StopFilter(matchVersion, result, stopwords);
 +    if(!stemExclusionSet.isEmpty())
 +      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
 +    result = new SnowballFilter(result, new HungarianStemmer());
 +    return new TokenStreamComponents(source, result);
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\hu\HungarianAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/package.html
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/package.html	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/hu/package.html	(revision 0)
 @@ -0,0 +1,22 @@
 +<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 +<!--
 + Licensed to the Apache Software Foundation (ASF) under one or more
 + contributor license agreements.  See the NOTICE file distributed with
 + this work for additional information regarding copyright ownership.
 + The ASF licenses this file to You under the Apache License, Version 2.0
 + (the "License"); you may not use this file except in compliance with
 + the License.  You may obtain a copy of the License at
 +
 +     http://www.apache.org/licenses/LICENSE-2.0
 +
 + Unless required by applicable law or agreed to in writing, software
 + distributed under the License is distributed on an "AS IS" BASIS,
 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + See the License for the specific language governing permissions and
 + limitations under the License.
 +-->
 +<html><head></head>
 +<body>
 +Analyzer for Hungarian.
 +</body>
 +</html>

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\hu\package.html
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java	(revision 0)
 @@ -0,0 +1,130 @@
 +package org.apache.lucene.analysis.no;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.io.Reader;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.CharArraySet;
 +import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 +import org.apache.lucene.analysis.LowerCaseFilter;
 +import org.apache.lucene.analysis.StopFilter;
 +import org.apache.lucene.analysis.StopwordAnalyzerBase;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.analysis.WordlistLoader;
 +import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 +import org.apache.lucene.analysis.snowball.SnowballFilter;
 +import org.apache.lucene.analysis.standard.StandardFilter;
 +import org.apache.lucene.analysis.standard.StandardTokenizer;
 +import org.apache.lucene.util.Version;
 +import org.tartarus.snowball.ext.NorwegianStemmer;
 +
 +/**
 + * {@link Analyzer} for Norwegian.
 + */
 +public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
 +  private final Set<?> stemExclusionSet;
 +
 +  /** File containing default Norwegian stopwords. */
 +  public final static String DEFAULT_STOPWORD_FILE = "norwegian_stop.txt";
 +
 +  /**
 +   * Returns an unmodifiable instance of the default stop words set.
 +   * @return default stop words set.
 +   */
 +  public static Set<?> getDefaultStopSet(){
 +    return DefaultSetHolder.DEFAULT_STOP_SET;
 +  }
 +
 +  /**
 +   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
 +   * accesses the static final set the first time.;
 +   */
 +  private static class DefaultSetHolder {
 +    static final Set<?> DEFAULT_STOP_SET;
 +
 +    static {
 +      try {
 +        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class,
 +            DEFAULT_STOPWORD_FILE);
 +      } catch (IOException ex) {
 +        // default set should always be present as it is part of the
 +        // distribution (JAR)
 +        throw new RuntimeException("Unable to load default stopword set");
 +      }
 +    }
 +  }
 +
 +  /**
 +   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
 +   */
 +  public NorwegianAnalyzer(Version matchVersion) {
 +    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   */
 +  public NorwegianAnalyzer(Version matchVersion, Set<?> stopwords) {
 +    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 +   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
 +   * stemming.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   * @param stemExclusionSet a set of terms not to be stemmed
 +   */
 +  public NorwegianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
 +    super(matchVersion, stopwords);
 +    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
 +        matchVersion, stemExclusionSet));
 +  }
 +
 +  /**
 +   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
 +   * {@link Reader}.
 +   *
 +   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
 +   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
 +   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
 +   *         exclusion set is provided and {@link SnowballFilter}.
 +   */
 +  @Override
 +  protected TokenStreamComponents createComponents(String fieldName,
 +      Reader reader) {
 +    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 +    TokenStream result = new StandardFilter(source);
 +    result = new LowerCaseFilter(matchVersion, result);
 +    result = new StopFilter(matchVersion, result, stopwords);
 +    if(!stemExclusionSet.isEmpty())
 +      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
 +    result = new SnowballFilter(result, new NorwegianStemmer());
 +    return new TokenStreamComponents(source, result);
 +  }
 +}
 +

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\no\NorwegianAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/no/package.html
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/no/package.html	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/no/package.html	(revision 0)
 @@ -0,0 +1,22 @@
 +<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 +<!--
 + Licensed to the Apache Software Foundation (ASF) under one or more
 + contributor license agreements.  See the NOTICE file distributed with
 + this work for additional information regarding copyright ownership.
 + The ASF licenses this file to You under the Apache License, Version 2.0
 + (the "License"); you may not use this file except in compliance with
 + the License.  You may obtain a copy of the License at
 +
 +     http://www.apache.org/licenses/LICENSE-2.0
 +
 + Unless required by applicable law or agreed to in writing, software
 + distributed under the License is distributed on an "AS IS" BASIS,
 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + See the License for the specific language governing permissions and
 + limitations under the License.
 +-->
 +<html><head></head>
 +<body>
 +Analyzer for Norwegian.
 +</body>
 +</html>

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\no\package.html
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java	(working copy)
 @@ -17,7 +17,6 @@
   */


 -import org.apache.lucene.analysis.Token;
  import org.apache.lucene.analysis.TokenFilter;
  import org.apache.lucene.analysis.TokenStream;
  import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java	(working copy)
 @@ -19,7 +19,6 @@

  import org.apache.lucene.analysis.TokenFilter;
  import org.apache.lucene.analysis.TokenStream;
 -import org.apache.lucene.analysis.Token;
  import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
  import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
  import org.apache.lucene.index.Payload;
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java	(working copy)
 @@ -19,7 +19,6 @@

  import java.io.IOException;

 -import org.apache.lucene.analysis.Token;
  import org.apache.lucene.analysis.TokenFilter;
  import org.apache.lucene.analysis.TokenStream;
  import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java	(revision 0)
 @@ -0,0 +1,133 @@
 +package org.apache.lucene.analysis.ro;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.io.Reader;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.CharArraySet;
 +import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 +import org.apache.lucene.analysis.LowerCaseFilter;
 +import org.apache.lucene.analysis.StopFilter;
 +import org.apache.lucene.analysis.StopwordAnalyzerBase;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 +import org.apache.lucene.analysis.snowball.SnowballFilter;
 +import org.apache.lucene.analysis.standard.StandardFilter;
 +import org.apache.lucene.analysis.standard.StandardTokenizer;
 +import org.apache.lucene.util.Version;
 +import org.tartarus.snowball.ext.RomanianStemmer;
 +
 +/**
 + * {@link Analyzer} for Romanian.
 + */
 +public final class RomanianAnalyzer extends StopwordAnalyzerBase {
 +  private final Set<?> stemExclusionSet;
 +
 +  /** File containing default Romanian stopwords. */
 +  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
 +  /**
 +   * The comment character in the stopwords file.
 +   * All lines prefixed with this will be ignored.
 +   */
 +  private static final String STOPWORDS_COMMENT = "#";
 +
 +  /**
 +   * Returns an unmodifiable instance of the default stop words set.
 +   * @return default stop words set.
 +   */
 +  public static Set<?> getDefaultStopSet(){
 +    return DefaultSetHolder.DEFAULT_STOP_SET;
 +  }
 +
 +  /**
 +   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
 +   * accesses the static final set the first time.;
 +   */
 +  private static class DefaultSetHolder {
 +    static final Set<?> DEFAULT_STOP_SET;
 +
 +    static {
 +      try {
 +        DEFAULT_STOP_SET = loadStopwordSet(false, RomanianAnalyzer.class,
 +            DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
 +      } catch (IOException ex) {
 +        // default set should always be present as it is part of the
 +        // distribution (JAR)
 +        throw new RuntimeException("Unable to load default stopword set");
 +      }
 +    }
 +  }
 +
 +  /**
 +   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
 +   */
 +  public RomanianAnalyzer(Version matchVersion) {
 +    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   */
 +  public RomanianAnalyzer(Version matchVersion, Set<?> stopwords) {
 +    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 +   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
 +   * stemming.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   * @param stemExclusionSet a set of terms not to be stemmed
 +   */
 +  public RomanianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
 +    super(matchVersion, stopwords);
 +    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
 +        matchVersion, stemExclusionSet));
 +  }
 +
 +  /**
 +   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
 +   * {@link Reader}.
 +   *
 +   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
 +   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
 +   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
 +   *         exclusion set is provided and {@link SnowballFilter}.
 +   */
 +  @Override
 +  protected TokenStreamComponents createComponents(String fieldName,
 +      Reader reader) {
 +    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 +    TokenStream result = new StandardFilter(source);
 +    result = new LowerCaseFilter(matchVersion, result);
 +    result = new StopFilter(matchVersion, result, stopwords);
 +    if(!stemExclusionSet.isEmpty())
 +      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
 +    result = new SnowballFilter(result, new RomanianStemmer());
 +    return new TokenStreamComponents(source, result);
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\ro\RomanianAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ro/package.html
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ro/package.html	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ro/package.html	(revision 0)
 @@ -0,0 +1,22 @@
 +<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 +<!--
 + Licensed to the Apache Software Foundation (ASF) under one or more
 + contributor license agreements.  See the NOTICE file distributed with
 + this work for additional information regarding copyright ownership.
 + The ASF licenses this file to You under the Apache License, Version 2.0
 + (the "License"); you may not use this file except in compliance with
 + the License.  You may obtain a copy of the License at
 +
 +     http://www.apache.org/licenses/LICENSE-2.0
 +
 + Unless required by applicable law or agreed to in writing, software
 + distributed under the License is distributed on an "AS IS" BASIS,
 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + See the License for the specific language governing permissions and
 + limitations under the License.
 +-->
 +<html><head></head>
 +<body>
 +Analyzer for Romanian.
 +</body>
 +</html>

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\ro\package.html
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java	(working copy)
 @@ -39,7 +39,10 @@
   *   <li> As of 3.1, uses {@link TurkishLowerCaseFilter} for Turkish language.
   * </ul>
   * </p>
 + * @deprecated Use the language-specific analyzer in contrib/analyzers instead.
 + * This analyzer will be removed in Lucene 4.0
   */
 +@Deprecated
  public final class SnowballAnalyzer extends Analyzer {
    private String name;
    private Set<?> stopSet;
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java	(working copy)
 @@ -21,6 +21,7 @@

  import org.apache.lucene.analysis.TokenFilter;
  import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
  import org.apache.lucene.analysis.tokenattributes.TermAttribute;
  import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link
  import org.apache.lucene.analysis.LowerCaseFilter; // javadoc @link
 @@ -39,14 +40,14 @@
   */
  public final class SnowballFilter extends TokenFilter {

 -  private SnowballProgram stemmer;
 +  private final SnowballProgram stemmer;

 -  private TermAttribute termAtt;
 +  private final TermAttribute termAtt = addAttribute(TermAttribute.class);
 +  private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);

    public SnowballFilter(TokenStream input, SnowballProgram stemmer) {
      super(input);
      this.stemmer = stemmer;
 -    termAtt = addAttribute(TermAttribute.class);
    }

    /**
 @@ -67,23 +68,24 @@
      } catch (Exception e) {
        throw new RuntimeException(e.toString());
      }
 -    termAtt = addAttribute(TermAttribute.class);
    }

    /** Returns the next input Token, after being stemmed */
    @Override
    public final boolean incrementToken() throws IOException {
      if (input.incrementToken()) {
 -      char termBuffer[] = termAtt.termBuffer();
 -      final int length = termAtt.termLength();
 -      stemmer.setCurrent(termBuffer, length);
 -      stemmer.stem();
 -      final char finalTerm[] = stemmer.getCurrentBuffer();
 -      final int newLength = stemmer.getCurrentBufferLength();
 -      if (finalTerm != termBuffer)
 -        termAtt.setTermBuffer(finalTerm, 0, newLength);
 -      else
 -        termAtt.setTermLength(newLength);
 +      if (!keywordAttr.isKeyword()) {
 +        char termBuffer[] = termAtt.termBuffer();
 +        final int length = termAtt.termLength();
 +        stemmer.setCurrent(termBuffer, length);
 +        stemmer.stem();
 +        final char finalTerm[] = stemmer.getCurrentBuffer();
 +        final int newLength = stemmer.getCurrentBufferLength();
 +        if (finalTerm != termBuffer)
 +          termAtt.setTermBuffer(finalTerm, 0, newLength);
 +        else
 +          termAtt.setTermLength(newLength);
 +      }
        return true;
      } else {
        return false;
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java	(revision 0)
 @@ -0,0 +1,129 @@
 +package org.apache.lucene.analysis.pt;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.io.Reader;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.CharArraySet;
 +import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 +import org.apache.lucene.analysis.LowerCaseFilter;
 +import org.apache.lucene.analysis.StopFilter;
 +import org.apache.lucene.analysis.StopwordAnalyzerBase;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.analysis.WordlistLoader;
 +import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 +import org.apache.lucene.analysis.snowball.SnowballFilter;
 +import org.apache.lucene.analysis.standard.StandardFilter;
 +import org.apache.lucene.analysis.standard.StandardTokenizer;
 +import org.apache.lucene.util.Version;
 +import org.tartarus.snowball.ext.PortugueseStemmer;
 +
 +/**
 + * {@link Analyzer} for Portuguese.
 + */
 +public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
 +  private final Set<?> stemExclusionSet;
 +
 +  /** File containing default Portuguese stopwords. */
 +  public final static String DEFAULT_STOPWORD_FILE = "portuguese_stop.txt";
 +
 +  /**
 +   * Returns an unmodifiable instance of the default stop words set.
 +   * @return default stop words set.
 +   */
 +  public static Set<?> getDefaultStopSet(){
 +    return DefaultSetHolder.DEFAULT_STOP_SET;
 +  }
 +
 +  /**
 +   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
 +   * accesses the static final set the first time.;
 +   */
 +  private static class DefaultSetHolder {
 +    static final Set<?> DEFAULT_STOP_SET;
 +
 +    static {
 +      try {
 +        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class,
 +            DEFAULT_STOPWORD_FILE);
 +      } catch (IOException ex) {
 +        // default set should always be present as it is part of the
 +        // distribution (JAR)
 +        throw new RuntimeException("Unable to load default stopword set");
 +      }
 +    }
 +  }
 +
 +  /**
 +   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
 +   */
 +  public PortugueseAnalyzer(Version matchVersion) {
 +    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   */
 +  public PortugueseAnalyzer(Version matchVersion, Set<?> stopwords) {
 +    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 +   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
 +   * stemming.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   * @param stemExclusionSet a set of terms not to be stemmed
 +   */
 +  public PortugueseAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
 +    super(matchVersion, stopwords);
 +    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
 +        matchVersion, stemExclusionSet));
 +  }
 +
 +  /**
 +   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
 +   * {@link Reader}.
 +   *
 +   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
 +   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
 +   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
 +   *         exclusion set is provided and {@link SnowballFilter}.
 +   */
 +  @Override
 +  protected TokenStreamComponents createComponents(String fieldName,
 +      Reader reader) {
 +    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 +    TokenStream result = new StandardFilter(source);
 +    result = new LowerCaseFilter(matchVersion, result);
 +    result = new StopFilter(matchVersion, result, stopwords);
 +    if(!stemExclusionSet.isEmpty())
 +      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
 +    result = new SnowballFilter(result, new PortugueseStemmer());
 +    return new TokenStreamComponents(source, result);
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\pt\PortugueseAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/pt/package.html
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/pt/package.html	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/pt/package.html	(revision 0)
 @@ -0,0 +1,22 @@
 +<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 +<!--
 + Licensed to the Apache Software Foundation (ASF) under one or more
 + contributor license agreements.  See the NOTICE file distributed with
 + this work for additional information regarding copyright ownership.
 + The ASF licenses this file to You under the Apache License, Version 2.0
 + (the "License"); you may not use this file except in compliance with
 + the License.  You may obtain a copy of the License at
 +
 +     http://www.apache.org/licenses/LICENSE-2.0
 +
 + Unless required by applicable law or agreed to in writing, software
 + distributed under the License is distributed on an "AS IS" BASIS,
 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + See the License for the specific language governing permissions and
 + limitations under the License.
 +-->
 +<html><head></head>
 +<body>
 +Analyzer for Portuguese.
 +</body>
 +</html>

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\pt\package.html
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java	(revision 0)
 @@ -0,0 +1,132 @@
 +package org.apache.lucene.analysis.tr;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.io.Reader;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.CharArraySet;
 +import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 +import org.apache.lucene.analysis.StopFilter;
 +import org.apache.lucene.analysis.StopwordAnalyzerBase;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 +import org.apache.lucene.analysis.snowball.SnowballFilter;
 +import org.apache.lucene.analysis.standard.StandardFilter;
 +import org.apache.lucene.analysis.standard.StandardTokenizer;
 +import org.apache.lucene.util.Version;
 +import org.tartarus.snowball.ext.TurkishStemmer;
 +
 +/**
 + * {@link Analyzer} for Turkish.
 + */
 +public final class TurkishAnalyzer extends StopwordAnalyzerBase {
 +  private final Set<?> stemExclusionSet;
 +
 +  /** File containing default Turkish stopwords. */
 +  public final static String DEFAULT_STOPWORD_FILE = "stopwords.txt";
 +  /**
 +   * The comment character in the stopwords file.
 +   * All lines prefixed with this will be ignored.
 +   */
 +  private static final String STOPWORDS_COMMENT = "#";
 +
 +  /**
 +   * Returns an unmodifiable instance of the default stop words set.
 +   * @return default stop words set.
 +   */
 +  public static Set<?> getDefaultStopSet(){
 +    return DefaultSetHolder.DEFAULT_STOP_SET;
 +  }
 +
 +  /**
 +   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
 +   * accesses the static final set the first time.;
 +   */
 +  private static class DefaultSetHolder {
 +    static final Set<?> DEFAULT_STOP_SET;
 +
 +    static {
 +      try {
 +        DEFAULT_STOP_SET = loadStopwordSet(false, TurkishAnalyzer.class,
 +            DEFAULT_STOPWORD_FILE, STOPWORDS_COMMENT);
 +      } catch (IOException ex) {
 +        // default set should always be present as it is part of the
 +        // distribution (JAR)
 +        throw new RuntimeException("Unable to load default stopword set");
 +      }
 +    }
 +  }
 +
 +  /**
 +   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
 +   */
 +  public TurkishAnalyzer(Version matchVersion) {
 +    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   */
 +  public TurkishAnalyzer(Version matchVersion, Set<?> stopwords) {
 +    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 +   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
 +   * stemming.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   * @param stemExclusionSet a set of terms not to be stemmed
 +   */
 +  public TurkishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
 +    super(matchVersion, stopwords);
 +    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
 +        matchVersion, stemExclusionSet));
 +  }
 +
 +  /**
 +   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
 +   * {@link Reader}.
 +   *
 +   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
 +   *         filtered with {@link StandardFilter}, {@link TurkishLowerCaseFilter},
 +   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
 +   *         exclusion set is provided and {@link SnowballFilter}.
 +   */
 +  @Override
 +  protected TokenStreamComponents createComponents(String fieldName,
 +      Reader reader) {
 +    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 +    TokenStream result = new StandardFilter(source);
 +    result = new TurkishLowerCaseFilter(result);
 +    result = new StopFilter(matchVersion, result, stopwords);
 +    if(!stemExclusionSet.isEmpty())
 +      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
 +    result = new SnowballFilter(result, new TurkishStemmer());
 +    return new TokenStreamComponents(source, result);
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\tr\TurkishAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/package.html
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/package.html	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/tr/package.html	(working copy)
 @@ -17,15 +17,6 @@
  -->
  <html><head></head>
  <body>
 -Support for Turkish.
 -<p>
 -This package contains just the TokenStream for handling turkish casing,
 -for a stemmer please see the snowball package.
 -</p>
 -<p>
 -WARNING: SnowballAnalyzer uses LowerCaseFilter by default, even when the
 -language is set to Turkish, so you will need to construct your own
 -analyzer that combines TurkishLowerCaseFilter and SnowballFilter.
 -</p>
 +Analyzer for Turkish.
  </body>
 -</html>
 \ No newline at end of file
 +</html>
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianLetterTokenizer.java	(working copy)
 @@ -21,6 +21,7 @@
  import org.apache.lucene.analysis.CharTokenizer;
  import org.apache.lucene.analysis.Tokenizer; // for javadocs
  import org.apache.lucene.analysis.LetterTokenizer; // for javadocs
 +import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs
  import org.apache.lucene.util.AttributeSource;
  import org.apache.lucene.util.Version;

 @@ -35,8 +36,11 @@
   * <li>As of 3.1, {@link CharTokenizer} uses an int based API to normalize and
   * detect token characters. See {@link CharTokenizer#isTokenChar(int)} and
   * {@link CharTokenizer#normalize(int)} for details.</li>
 - * </ul>
 + * </ul>
 + * @deprecated Use {@link StandardTokenizer} instead, which has the same functionality.
 + * This filter will be removed in Lucene 4.0
   */
 +@Deprecated
  public class RussianLetterTokenizer extends CharTokenizer
  {
      private static final int DIGIT_0 = '0';
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemmer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemmer.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemmer.java	(working copy)
 @@ -19,7 +19,10 @@

  /**
   * Russian stemming algorithm implementation (see http://snowball.sourceforge.net for detailed description).
 + * @deprecated Use {@link org.tartarus.snowball.ext.RussianStemmer} instead,
 + * which has the same functionality. This filter will be removed in Lucene 4.0
   */
 +@Deprecated
  class RussianStemmer
  {
      // positions of RV, R1 and R2 respectively
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java	(working copy)
 @@ -24,6 +24,7 @@
  import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
  import org.apache.lucene.analysis.tokenattributes.TermAttribute;
  import org.apache.lucene.analysis.ru.RussianStemmer;//javadoc @link
 +import org.apache.lucene.analysis.snowball.SnowballFilter; // javadoc @link

  import java.io.IOException;

 @@ -40,7 +41,11 @@
   * the {@link KeywordAttribute} before this {@link TokenStream}.
   * </p>
   * @see KeywordMarkerTokenFilter
 + * @deprecated Use {@link SnowballFilter} with
 + * {@link org.tartarus.snowball.ext.RussianStemmer} instead, which has the
 + * same functionality. This filter will be removed in Lucene 4.0
   */
 +@Deprecated
  public final class RussianStemFilter extends TokenFilter
  {
      /**
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java	(working copy)
 @@ -17,6 +17,7 @@
   * limitations under the License.
   */

 +import java.io.IOException;
  import java.io.Reader;
  import java.util.Arrays;
  import java.util.Map;
 @@ -26,11 +27,15 @@
  import org.apache.lucene.analysis.CharArraySet;
  import org.apache.lucene.analysis.LowerCaseFilter;
  import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 +import org.apache.lucene.analysis.snowball.SnowballFilter;
 +import org.apache.lucene.analysis.standard.StandardFilter;
 +import org.apache.lucene.analysis.standard.StandardTokenizer;
  import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
  import org.apache.lucene.analysis.StopFilter;
  import org.apache.lucene.analysis.StopwordAnalyzerBase;
  import org.apache.lucene.analysis.TokenStream;
  import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.analysis.WordlistLoader;
  import org.apache.lucene.util.Version;

  /**
 @@ -44,9 +49,11 @@
  public final class RussianAnalyzer extends StopwordAnalyzerBase
  {
      /**
 -     * List of typical Russian stopwords.
 +     * List of typical Russian stopwords. (for backwards compatibility)
 +     * @deprecated Remove this for LUCENE 4.0
       */
 -    private static final String[] RUSSIAN_STOP_WORDS = {
 +    @Deprecated
 +    private static final String[] RUSSIAN_STOP_WORDS_30 = {
        "а", "без", "более", "бы", "был", "была", "были", "было", "быть", "в",
        "вам", "вас", "весь", "во", "вот", "все", "всего", "всех", "вы", "где",
        "да", "даже", "для", "до", "его", "ее", "ей", "ею", "если", "есть",
 @@ -59,10 +66,27 @@
        "чем", "что", "чтобы", "чье", "чья", "эта", "эти", "это", "я"
      };

 +    /** File containing default Russian stopwords. */
 +    public final static String DEFAULT_STOPWORD_FILE = "russian_stop.txt";
 +
      private static class DefaultSetHolder {
 -      static final Set<?> DEFAULT_STOP_SET = CharArraySet
 +      /** @deprecated remove this for Lucene 4.0 */
 +      @Deprecated
 +      static final Set<?> DEFAULT_STOP_SET_30 = CharArraySet
            .unmodifiableSet(new CharArraySet(Version.LUCENE_CURRENT,
 -              Arrays.asList(RUSSIAN_STOP_WORDS), false));
 +              Arrays.asList(RUSSIAN_STOP_WORDS_30), false));
 +      static final Set<?> DEFAULT_STOP_SET;
 +
 +      static {
 +        try {
 +          DEFAULT_STOP_SET =
 +            WordlistLoader.getSnowballWordSet(SnowballFilter.class, DEFAULT_STOPWORD_FILE);
 +        } catch (IOException ex) {
 +          // default set should always be present as it is part of the
 +          // distribution (JAR)
 +          throw new RuntimeException("Unable to load default stopword set");
 +        }
 +      }
      }

      private final Set<?> stemExclusionSet;
 @@ -77,7 +101,9 @@
      }

      public RussianAnalyzer(Version matchVersion) {
 -      this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
 +      this(matchVersion,
 +        matchVersion.onOrAfter(Version.LUCENE_31) ? DefaultSetHolder.DEFAULT_STOP_SET
 +            : DefaultSetHolder.DEFAULT_STOP_SET_30);
      }

      /**
 @@ -132,19 +158,30 @@
       * provided {@link Reader}.
       *
       * @return {@link TokenStreamComponents} built from a
 -     *   {@link RussianLetterTokenizer} filtered with
 +     *   {@link StandardTokenizer} filtered with {@link StandardFilter},
       *   {@link LowerCaseFilter}, {@link StopFilter},
 -     *   and {@link RussianStemFilter}
 +     *   {@link KeywordMarkerTokenFilter} if a stem exclusion set is provided,
 +     *   and {@link SnowballFilter}
       */
      @Override
      protected TokenStreamComponents createComponents(String fieldName,
          Reader reader) {
 -      final Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
 -      TokenStream result = new LowerCaseFilter(matchVersion, source);
 -      result = new StopFilter(matchVersion, result, stopwords);
 -      if(!stemExclusionSet.isEmpty())
 -        result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
 -      return new TokenStreamComponents(source, new RussianStemFilter(result));
 -
 +      if (matchVersion.onOrAfter(Version.LUCENE_31)) {
 +        final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 +        TokenStream result = new StandardFilter(source);
 +        result = new LowerCaseFilter(matchVersion, result);
 +        result = new StopFilter(matchVersion, result, stopwords);
 +        if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerTokenFilter(
 +            result, stemExclusionSet);
 +        result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
 +        return new TokenStreamComponents(source, result);
 +      } else {
 +        final Tokenizer source = new RussianLetterTokenizer(matchVersion, reader);
 +        TokenStream result = new LowerCaseFilter(matchVersion, source);
 +        result = new StopFilter(matchVersion, result, stopwords);
 +        if (!stemExclusionSet.isEmpty()) result = new KeywordMarkerTokenFilter(
 +          result, stemExclusionSet);
 +        return new TokenStreamComponents(source, new RussianStemFilter(result));
 +      }
      }
  }
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java	(working copy)
 @@ -24,6 +24,7 @@
  import org.apache.lucene.analysis.StopwordAnalyzerBase;
  import org.apache.lucene.analysis.TokenStream;
  import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.analysis.standard.StandardFilter;
  import org.apache.lucene.analysis.standard.StandardTokenizer;
  import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
  import org.apache.lucene.util.Version;
 @@ -117,13 +118,15 @@
      *
      * @return {@link TokenStreamComponents} built from a
      *         {@link StandardTokenizer} filtered with
 -    *         {@link GreekLowerCaseFilter} and {@link StopFilter}
 +    *         {@link GreekLowerCaseFilter}, {@link StandardFilter} and {@link StopFilter}
      */
      @Override
      protected TokenStreamComponents createComponents(String fieldName,
          Reader reader) {
        final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 -      final TokenStream result = new GreekLowerCaseFilter(source);
 +      TokenStream result = new GreekLowerCaseFilter(source);
 +      if (matchVersion.onOrAfter(Version.LUCENE_31))
 +        result = new StandardFilter(result);
        return new TokenStreamComponents(source, new StopFilter(matchVersion, result, stopwords));
      }
  }
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java	(revision 0)
 @@ -0,0 +1,113 @@
 +package org.apache.lucene.analysis.en;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.Reader;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.CharArraySet;
 +import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 +import org.apache.lucene.analysis.LowerCaseFilter;
 +import org.apache.lucene.analysis.PorterStemFilter;
 +import org.apache.lucene.analysis.StopFilter;
 +import org.apache.lucene.analysis.StopwordAnalyzerBase;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 +import org.apache.lucene.analysis.standard.StandardAnalyzer;
 +import org.apache.lucene.analysis.standard.StandardFilter;
 +import org.apache.lucene.analysis.standard.StandardTokenizer;
 +import org.apache.lucene.util.Version;
 +
 +/**
 + * {@link Analyzer} for English.
 + */
 +public final class EnglishAnalyzer extends StopwordAnalyzerBase {
 +  private final Set<?> stemExclusionSet;
 +
 +  /**
 +   * Returns an unmodifiable instance of the default stop words set.
 +   * @return default stop words set.
 +   */
 +  public static Set<?> getDefaultStopSet(){
 +    return DefaultSetHolder.DEFAULT_STOP_SET;
 +  }
 +
 +  /**
 +   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
 +   * accesses the static final set the first time.;
 +   */
 +  private static class DefaultSetHolder {
 +    static final Set<?> DEFAULT_STOP_SET = StandardAnalyzer.STOP_WORDS_SET;
 +  }
 +
 +  /**
 +   * Builds an analyzer with the default stop words: {@link #getDefaultStopSet}.
 +   */
 +  public EnglishAnalyzer(Version matchVersion) {
 +    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   */
 +  public EnglishAnalyzer(Version matchVersion, Set<?> stopwords) {
 +    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 +   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
 +   * stemming.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   * @param stemExclusionSet a set of terms not to be stemmed
 +   */
 +  public EnglishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
 +    super(matchVersion, stopwords);
 +    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
 +        matchVersion, stemExclusionSet));
 +  }
 +
 +  /**
 +   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
 +   * {@link Reader}.
 +   *
 +   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
 +   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
 +   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
 +   *         exclusion set is provided and {@link PorterStemFilter}.
 +   */
 +  @Override
 +  protected TokenStreamComponents createComponents(String fieldName,
 +      Reader reader) {
 +    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 +    TokenStream result = new StandardFilter(source);
 +    result = new LowerCaseFilter(matchVersion, result);
 +    result = new StopFilter(matchVersion, result, stopwords);
 +    if(!stemExclusionSet.isEmpty())
 +      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
 +    result = new PorterStemFilter(result);
 +    return new TokenStreamComponents(source, result);
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\en\EnglishAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/package.html
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/package.html	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/package.html	(revision 0)
 @@ -0,0 +1,22 @@
 +<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 +<!--
 + Licensed to the Apache Software Foundation (ASF) under one or more
 + contributor license agreements.  See the NOTICE file distributed with
 + this work for additional information regarding copyright ownership.
 + The ASF licenses this file to You under the Apache License, Version 2.0
 + (the "License"); you may not use this file except in compliance with
 + the License.  You may obtain a copy of the License at
 +
 +     http://www.apache.org/licenses/LICENSE-2.0
 +
 + Unless required by applicable law or agreed to in writing, software
 + distributed under the License is distributed on an "AS IS" BASIS,
 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + See the License for the specific language governing permissions and
 + limitations under the License.
 +-->
 +<html><head></head>
 +<body>
 +Analyzer for English.
 +</body>
 +</html>

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\en\package.html
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java	(revision 906571)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java	(working copy)
 @@ -21,7 +21,6 @@

  import org.apache.lucene.analysis.TokenFilter;
  import org.apache.lucene.analysis.TokenStream;
 -import org.apache.lucene.analysis.Token;
  import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;

  /** Set the positionIncrement of all tokens to the "positionIncrement",
 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java	(revision 0)
 @@ -0,0 +1,129 @@
 +package org.apache.lucene.analysis.es;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.io.Reader;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.CharArraySet;
 +import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 +import org.apache.lucene.analysis.LowerCaseFilter;
 +import org.apache.lucene.analysis.StopFilter;
 +import org.apache.lucene.analysis.StopwordAnalyzerBase;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.analysis.WordlistLoader;
 +import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 +import org.apache.lucene.analysis.snowball.SnowballFilter;
 +import org.apache.lucene.analysis.standard.StandardFilter;
 +import org.apache.lucene.analysis.standard.StandardTokenizer;
 +import org.apache.lucene.util.Version;
 +import org.tartarus.snowball.ext.SpanishStemmer;
 +
 +/**
 + * {@link Analyzer} for Spanish.
 + */
 +public final class SpanishAnalyzer extends StopwordAnalyzerBase {
 +  private final Set<?> stemExclusionSet;
 +
 +  /** File containing default Spanish stopwords. */
 +  public final static String DEFAULT_STOPWORD_FILE = "spanish_stop.txt";
 +
 +  /**
 +   * Returns an unmodifiable instance of the default stop words set.
 +   * @return default stop words set.
 +   */
 +  public static Set<?> getDefaultStopSet(){
 +    return DefaultSetHolder.DEFAULT_STOP_SET;
 +  }
 +
 +  /**
 +   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
 +   * accesses the static final set the first time.;
 +   */
 +  private static class DefaultSetHolder {
 +    static final Set<?> DEFAULT_STOP_SET;
 +
 +    static {
 +      try {
 +        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class,
 +            DEFAULT_STOPWORD_FILE);
 +      } catch (IOException ex) {
 +        // default set should always be present as it is part of the
 +        // distribution (JAR)
 +        throw new RuntimeException("Unable to load default stopword set");
 +      }
 +    }
 +  }
 +
 +  /**
 +   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
 +   */
 +  public SpanishAnalyzer(Version matchVersion) {
 +    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   */
 +  public SpanishAnalyzer(Version matchVersion, Set<?> stopwords) {
 +    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 +   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
 +   * stemming.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   * @param stemExclusionSet a set of terms not to be stemmed
 +   */
 +  public SpanishAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
 +    super(matchVersion, stopwords);
 +    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
 +        matchVersion, stemExclusionSet));
 +  }
 +
 +  /**
 +   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
 +   * {@link Reader}.
 +   *
 +   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
 +   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
 +   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
 +   *         exclusion set is provided and {@link SnowballFilter}.
 +   */
 +  @Override
 +  protected TokenStreamComponents createComponents(String fieldName,
 +      Reader reader) {
 +    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 +    TokenStream result = new StandardFilter(source);
 +    result = new LowerCaseFilter(matchVersion, result);
 +    result = new StopFilter(matchVersion, result, stopwords);
 +    if(!stemExclusionSet.isEmpty())
 +      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
 +    result = new SnowballFilter(result, new SpanishStemmer());
 +    return new TokenStreamComponents(source, result);
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\es\SpanishAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/es/package.html
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/es/package.html	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/es/package.html	(revision 0)
 @@ -0,0 +1,22 @@
 +<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 +<!--
 + Licensed to the Apache Software Foundation (ASF) under one or more
 + contributor license agreements.  See the NOTICE file distributed with
 + this work for additional information regarding copyright ownership.
 + The ASF licenses this file to You under the Apache License, Version 2.0
 + (the "License"); you may not use this file except in compliance with
 + the License.  You may obtain a copy of the License at
 +
 +     http://www.apache.org/licenses/LICENSE-2.0
 +
 + Unless required by applicable law or agreed to in writing, software
 + distributed under the License is distributed on an "AS IS" BASIS,
 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + See the License for the specific language governing permissions and
 + limitations under the License.
 +-->
 +<html><head></head>
 +<body>
 +Analyzer for Spanish.
 +</body>
 +</html>

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\es\package.html
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java	(revision 0)
 @@ -0,0 +1,129 @@
 +package org.apache.lucene.analysis.it;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.io.Reader;
 +import java.util.Set;
 +
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.CharArraySet;
 +import org.apache.lucene.analysis.KeywordMarkerTokenFilter;
 +import org.apache.lucene.analysis.LowerCaseFilter;
 +import org.apache.lucene.analysis.StopFilter;
 +import org.apache.lucene.analysis.StopwordAnalyzerBase;
 +import org.apache.lucene.analysis.TokenStream;
 +import org.apache.lucene.analysis.Tokenizer;
 +import org.apache.lucene.analysis.WordlistLoader;
 +import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
 +import org.apache.lucene.analysis.snowball.SnowballFilter;
 +import org.apache.lucene.analysis.standard.StandardFilter;
 +import org.apache.lucene.analysis.standard.StandardTokenizer;
 +import org.apache.lucene.util.Version;
 +import org.tartarus.snowball.ext.ItalianStemmer;
 +
 +/**
 + * {@link Analyzer} for Italian.
 + */
 +public final class ItalianAnalyzer extends StopwordAnalyzerBase {
 +  private final Set<?> stemExclusionSet;
 +
 +  /** File containing default Italian stopwords. */
 +  public final static String DEFAULT_STOPWORD_FILE = "italian_stop.txt";
 +
 +  /**
 +   * Returns an unmodifiable instance of the default stop words set.
 +   * @return default stop words set.
 +   */
 +  public static Set<?> getDefaultStopSet(){
 +    return DefaultSetHolder.DEFAULT_STOP_SET;
 +  }
 +
 +  /**
 +   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class
 +   * accesses the static final set the first time.;
 +   */
 +  private static class DefaultSetHolder {
 +    static final Set<?> DEFAULT_STOP_SET;
 +
 +    static {
 +      try {
 +        DEFAULT_STOP_SET = WordlistLoader.getSnowballWordSet(SnowballFilter.class,
 +            DEFAULT_STOPWORD_FILE);
 +      } catch (IOException ex) {
 +        // default set should always be present as it is part of the
 +        // distribution (JAR)
 +        throw new RuntimeException("Unable to load default stopword set");
 +      }
 +    }
 +  }
 +
 +  /**
 +   * Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
 +   */
 +  public ItalianAnalyzer(Version matchVersion) {
 +    this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   */
 +  public ItalianAnalyzer(Version matchVersion, Set<?> stopwords) {
 +    this(matchVersion, stopwords, CharArraySet.EMPTY_SET);
 +  }
 +
 +  /**
 +   * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 +   * provided this analyzer will add a {@link KeywordMarkerTokenFilter} before
 +   * stemming.
 +   *
 +   * @param matchVersion lucene compatibility version
 +   * @param stopwords a stopword set
 +   * @param stemExclusionSet a set of terms not to be stemmed
 +   */
 +  public ItalianAnalyzer(Version matchVersion, Set<?> stopwords, Set<?> stemExclusionSet) {
 +    super(matchVersion, stopwords);
 +    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(
 +        matchVersion, stemExclusionSet));
 +  }
 +
 +  /**
 +   * Creates a {@link TokenStreamComponents} which tokenizes all the text in the provided
 +   * {@link Reader}.
 +   *
 +   * @return A {@link TokenStreamComponents} built from an {@link StandardTokenizer}
 +   *         filtered with {@link StandardFilter}, {@link LowerCaseFilter},
 +   *         {@link StopFilter}, {@link KeywordMarkerTokenFilter} if a stem
 +   *         exclusion set is provided and {@link SnowballFilter}.
 +   */
 +  @Override
 +  protected TokenStreamComponents createComponents(String fieldName,
 +      Reader reader) {
 +    final Tokenizer source = new StandardTokenizer(matchVersion, reader);
 +    TokenStream result = new StandardFilter(source);
 +    result = new LowerCaseFilter(matchVersion, result);
 +    result = new StopFilter(matchVersion, result, stopwords);
 +    if(!stemExclusionSet.isEmpty())
 +      result = new KeywordMarkerTokenFilter(result, stemExclusionSet);
 +    result = new SnowballFilter(result, new ItalianStemmer());
 +    return new TokenStreamComponents(source, result);
 +  }
 +}

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\it\ItalianAnalyzer.java
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/java/org/apache/lucene/analysis/it/package.html
 ===================================================================
 --- contrib/analyzers/common/src/java/org/apache/lucene/analysis/it/package.html	(revision 0)
 +++ contrib/analyzers/common/src/java/org/apache/lucene/analysis/it/package.html	(revision 0)
 @@ -0,0 +1,22 @@
 +<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
 +<!--
 + Licensed to the Apache Software Foundation (ASF) under one or more
 + contributor license agreements.  See the NOTICE file distributed with
 + this work for additional information regarding copyright ownership.
 + The ASF licenses this file to You under the Apache License, Version 2.0
 + (the "License"); you may not use this file except in compliance with
 + the License.  You may obtain a copy of the License at
 +
 +     http://www.apache.org/licenses/LICENSE-2.0
 +
 + Unless required by applicable law or agreed to in writing, software
 + distributed under the License is distributed on an "AS IS" BASIS,
 + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + See the License for the specific language governing permissions and
 + limitations under the License.
 +-->
 +<html><head></head>
 +<body>
 +Analyzer for Italian.
 +</body>
 +</html>

 Property changes on: contrib\analyzers\common\src\java\org\apache\lucene\analysis\it\package.html
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt
 ===================================================================
 --- contrib/analyzers/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt	(revision 0)
 +++ contrib/analyzers/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt	(revision 0)
 @@ -0,0 +1,233 @@
 +# This file was created by Jacques Savoy and is distributed under the BSD license.
 +# See http://members.unine.ch/jacques.savoy/clef/index.html.
 +# Also see http://www.opensource.org/licenses/bsd-license.html
 +acea
 +aceasta
 +această
 +aceea
 +acei
 +aceia
 +acel
 +acela
 +acele
 +acelea
 +acest
 +acesta
 +aceste
 +acestea
 +aceşti
 +aceştia
 +acolo
 +acum
 +ai
 +aia
 +aibă
 +aici
 +al
 +ăla
 +ale
 +alea
 +ălea
 +altceva
 +altcineva
 +am
 +ar
 +are
 +aş
 +aşadar
 +asemenea
 +asta
 +ăsta
 +astăzi
 +astea
 +ăstea
 +ăştia
 +asupra
 +aţi
 +au
 +avea
 +avem
 +aveţi
 +azi
 +bine
 +bucur
 +bună
 +ca
 +că
 +căci
 +când
 +care
 +cărei
 +căror
 +cărui
 +cât
 +câte
 +câţi
 +către
 +câtva
 +ce
 +cel
 +ceva
 +chiar
 +cînd
 +cine
 +cineva
 +cît
 +cîte
 +cîţi
 +cîtva
 +contra
 +cu
 +cum
 +cumva
 +curând
 +curînd
 +da
 +dă
 +dacă
 +dar
 +datorită
 +de
 +deci
 +deja
 +deoarece
 +departe
 +deşi
 +din
 +dinaintea
 +dintr
 +dintre
 +drept
 +după
 +ea
 +ei
 +el
 +ele
 +eram
 +este
 +eşti
 +eu
 +face
 +fără
 +fi
 +fie
 +fiecare
 +fii
 +fim
 +fiţi
 +iar
 +ieri
 +îi
 +îl
 +îmi
 +împotriva
 +în
 +înainte
 +înaintea
 +încât
 +încît
 +încotro
 +între
 +întrucât
 +întrucît
 +îţi
 +la
 +lângă
 +le
 +li
 +lîngă
 +lor
 +lui
 +mă
 +mâine
 +mea
 +mei
 +mele
 +mereu
 +meu
 +mi
 +mine
 +mult
 +multă
 +mulţi
 +ne
 +nicăieri
 +nici
 +nimeni
 +nişte
 +noastră
 +noastre
 +noi
 +noştri
 +nostru
 +nu
 +ori
 +oricând
 +oricare
 +oricât
 +orice
 +oricînd
 +oricine
 +oricît
 +oricum
 +oriunde
 +până
 +pe
 +pentru
 +peste
 +pînă
 +poate
 +pot
 +prea
 +prima
 +primul
 +prin
 +printr
 +sa
 +să
 +săi
 +sale
 +sau
 +său
 +se
 +şi
 +sînt
 +sîntem
 +sînteţi
 +spre
 +sub
 +sunt
 +suntem
 +sunteţi
 +ta
 +tăi
 +tale
 +tău
 +te
 +ţi
 +ţie
 +tine
 +toată
 +toate
 +tot
 +toţi
 +totuşi
 +tu
 +un
 +una
 +unde
 +undeva
 +unei
 +unele
 +uneori
 +unor
 +vă
 +vi
 +voastră
 +voastre
 +voi
 +voştri
 +vostru
 +vouă
 +vreo
 +vreun

 Property changes on: contrib\analyzers\common\src\resources\org\apache\lucene\analysis\ro\stopwords.txt
 ___________________________________________________________________
 Added: svn:eol-style
    + native

 Index: contrib/analyzers/common/src/resources/org/apache/lucene/analysis/tr/stopwords.txt
 ===================================================================
 --- contrib/analyzers/common/src/resources/org/apache/lucene/analysis/tr/stopwords.txt	(revision 0)
 +++ contrib/analyzers/common/src/resources/org/apache/lucene/analysis/tr/stopwords.txt	(revision 0)
 @@ -0,0 +1,212 @@
 +# Turkish stopwords from LUCENE-559
 +# merged with the list from "Information Retrieval on Turkish Texts"
 +#   (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
 +acaba
 +altmış
 +altı
 +ama
 +ancak
 +arada
 +aslında
 +ayrıca
 +bana
 +bazı
 +belki
 +ben
 +benden
 +beni
 +benim
 +beri
 +beş
 +bile
 +bin
 +bir
 +birçok
 +biri
 +birkaç
 +birkez
 +birşey
 +birşeyi
 +biz
 +bize
 +bizden
 +bizi
 +bizim
 +böyle
 +böylece
 +bu
 +buna
 +bunda
 +bundan
 +bunlar
 +bunları
 +bunların
 +bunu
 +bunun
 +burada
 +çok
 +çünkü
 +da
 +daha
 +dahi
 +de
 +defa
 +değil
 +diğer
 +diye
 +doksan
 +dokuz
 +dolayı
 +dolayısıyla
 +dört
 +edecek
 +eden
 +ederek
 +edilecek
 +ediliyor
 +edilmesi
 +ediyor
 +eğer
 +elli
 +en
 +etmesi
 +etti
 +ettiği
 +ettiğini
 +gibi
 +göre
 +halen
 +hangi
 +hatta
 +hem
 +henüz
 +hep
 +hepsi
 +her
 +herhangi
 +herkesin
 +hiç
 +hiçbir
 +için
 +iki
 +ile
 +ilgili
 +ise
 +işte
 +itibaren
 +itibariyle
 +kadar
 +karşın
 +katrilyon
 +kendi
 +kendilerine
 +kendini
 +kendisi
 +kendisine
 +kendisini
 +kez
 +ki
 +kim
 +kimden
 +kime
 +kimi
 +kimse
 +kırk
 +milyar
 +milyon
 +mu
 +mü
 +mı
 +nasıl
 +ne
 +neden
 +nedenle
 +nerde
 +nerede
 +nereye
 +niye
 +niçin
 +o
 +olan
 +olarak
 +oldu
 +olduğu
 +olduğunu
 +olduklarını
 +olmadı
 +olmadığı
 +olmak
 +olması
 +olmayan
 +olmaz
 +olsa
 +olsun
 +olup
 +olur
 +olursa
 +oluyor
 +on
 +ona
 +ondan
 +onlar
 +onlardan
 +onları
 +onların
 +onu
 +onun
 +otuz
 +oysa
 +öyle
 +pek
 +rağmen
 +sadece
 +sanki
 +sekiz
 +seksen
 +sen
 +senden
 +seni
 +senin
 +siz
 +sizden
 +sizi
 +sizin
 +şey
 +şeyden
 +şeyi
 +şeyler
 +şöyle
 +şu
 +şuna
 +şunda
 +şundan
 +şunları
 +şunu
 +tarafından
 +trilyon
 +tüm
 +üç
 +üzere
 +var
 +vardı
 +ve
 +veya
 +ya
 +yani
 +yapacak
 +yapılan
 +yapılması
 +yapıyor
 +yapmak
 +yaptı
 +yaptığı
 +yaptığını
 +yaptıkları
 +yedi
 +yerine
 +yetmiş
 +yine
 +yirmi
 +yoksa
 +yüz
 +zaten

 Property changes on: contrib\analyzers\common\src\resources\org\apache\lucene\analysis\tr\stopwords.txt
 ___________________________________________________________________
 Added: svn:eol-style
    + native