| /* |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| */ |
| |
| using System; |
| using System.Collections.Generic; |
| using System.IO; |
| using System.Linq; |
| using Lucene.Net.Analysis.Standard; |
| using Version = Lucene.Net.Util.Version; |
| |
| namespace Lucene.Net.Analysis.El |
| { |
| /* |
| * {@link Analyzer} for the Greek language. |
| * <p> |
| * Supports an external list of stopwords (words |
| * that will not be indexed at all). |
| * A default set of stopwords is used unless an alternative list is specified. |
| * </p> |
| * |
| * <p><b>NOTE</b>: This class uses the same {@link Version} |
| * dependent settings as {@link StandardAnalyzer}.</p> |
| */ |
| public sealed class GreekAnalyzer : Analyzer |
| { |
| /* |
| * List of typical Greek stopwords. |
| */ |
| |
| private static readonly String[] GREEK_STOP_WORDS = { |
| "ο", "η", "το", "οι", "τα", "του", "τησ", "των", "τον", |
| "την", "και", |
| "κι", "κ", "ειμαι", "εισαι", "ειναι", "ειμαστε", "ειστε" |
| , "στο", "στον", |
| "στη", "στην", "μα", "αλλα", "απο", "για", "προσ", "με", |
| "σε", "ωσ", |
| "παρα", "αντι", "κατα", "μετα", "θα", "να", "δε", "δεν", |
| "μη", "μην", |
| "επι", "ενω", "εαν", "αν", "τοτε", "που", "πωσ", "ποιοσ" |
| , "ποια", "ποιο", |
| "ποιοι", "ποιεσ", "ποιων", "ποιουσ", "αυτοσ", "αυτη", |
| "αυτο", "αυτοι", |
| "αυτων", "αυτουσ", "αυτεσ", "αυτα", "εκεινοσ", "εκεινη", |
| "εκεινο", |
| "εκεινοι", "εκεινεσ", "εκεινα", "εκεινων", "εκεινουσ", |
| "οπωσ", "ομωσ", |
| "ισωσ", "οσο", "οτι" |
| }; |
| |
| /* |
| * Returns a set of default Greek-stopwords |
| * @return a set of default Greek-stopwords |
| */ |
| public static ISet<string> GetDefaultStopSet() |
| { |
| return DefaultSetHolder.DEFAULT_SET; |
| } |
| |
| private static class DefaultSetHolder |
| { |
| internal static ISet<string> DEFAULT_SET = CharArraySet.UnmodifiableSet(new CharArraySet((IEnumerable<string>)GREEK_STOP_WORDS, false)); |
| } |
| |
| /* |
| * Contains the stopwords used with the {@link StopFilter}. |
| */ |
| private readonly ISet<string> stopSet; |
| |
| private readonly Version matchVersion; |
| |
| public GreekAnalyzer(Version matchVersion) |
| : this(matchVersion, DefaultSetHolder.DEFAULT_SET) |
| { |
| } |
| |
| /* |
| * Builds an analyzer with the given stop words |
| * |
| * @param matchVersion |
| * lucene compatibility version |
| * @param stopwords |
| * a stopword set |
| */ |
| public GreekAnalyzer(Version matchVersion, ISet<string> stopwords) |
| { |
| stopSet = CharArraySet.UnmodifiableSet(CharArraySet.Copy(stopwords)); |
| this.matchVersion = matchVersion; |
| } |
| |
| /* |
| * Builds an analyzer with the given stop words. |
| * @param stopwords Array of stopwords to use. |
| * @deprecated use {@link #GreekAnalyzer(Version, Set)} instead |
| */ |
| public GreekAnalyzer(Version matchVersion, params string[] stopwords) |
| : this(matchVersion, StopFilter.MakeStopSet(stopwords)) |
| { |
| } |
| |
| /* |
| * Builds an analyzer with the given stop words. |
| * @deprecated use {@link #GreekAnalyzer(Version, Set)} instead |
| */ |
| public GreekAnalyzer(Version matchVersion, IDictionary<string, string> stopwords) |
| : this(matchVersion, stopwords.Keys.ToArray()) |
| { |
| } |
| |
| /* |
| * Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. |
| * |
| * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with |
| * {@link GreekLowerCaseFilter} and {@link StopFilter} |
| */ |
| public override TokenStream TokenStream(String fieldName, TextReader reader) |
| { |
| TokenStream result = new StandardTokenizer(matchVersion, reader); |
| result = new GreekLowerCaseFilter(result); |
| result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), |
| result, stopSet); |
| return result; |
| } |
| |
| private class SavedStreams |
| { |
| protected internal Tokenizer source; |
| protected internal TokenStream result; |
| }; |
| |
| /* |
| * Returns a (possibly reused) {@link TokenStream} which tokenizes all the text |
| * in the provided {@link Reader}. |
| * |
| * @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with |
| * {@link GreekLowerCaseFilter} and {@link StopFilter} |
| */ |
| public override TokenStream ReusableTokenStream(String fieldName, TextReader reader) |
| { |
| SavedStreams streams = (SavedStreams)PreviousTokenStream; |
| if (streams == null) |
| { |
| streams = new SavedStreams(); |
| streams.source = new StandardTokenizer(matchVersion, reader); |
| streams.result = new GreekLowerCaseFilter(streams.source); |
| streams.result = new StopFilter(StopFilter.GetEnablePositionIncrementsVersionDefault(matchVersion), |
| streams.result, stopSet); |
| PreviousTokenStream = streams; |
| } |
| else |
| { |
| streams.source.Reset(reader); |
| } |
| return streams.result; |
| } |
| } |
| } |