blob: 8e0a86c49f2be0b798991ac5e4a4609764547a7e [file] [log] [blame]
using J2N;
using Lucene.Net.Util;
using System;
using System.IO;
using System.Reflection;
using System.Text;
namespace Lucene.Net.Analysis.Util
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Base class for <see cref="Analyzer"/>s that need to make use of stopword sets.
/// </summary>
public abstract class StopwordAnalyzerBase : Analyzer
{
/// <summary>
/// An immutable stopword set
/// </summary>
protected readonly CharArraySet m_stopwords;
protected readonly LuceneVersion m_matchVersion;
/// <summary>
/// Returns the analyzer's stopword set or an empty set if the analyzer has no
/// stopwords
/// </summary>
/// <returns> the analyzer's stopword set or an empty set if the analyzer has no
/// stopwords </returns>
public virtual CharArraySet StopwordSet => m_stopwords;
/// <summary>
/// Creates a new instance initialized with the given stopword set
/// </summary>
/// <param name="version">
/// the Lucene version for cross version compatibility </param>
/// <param name="stopwords">
/// the analyzer's stopword set </param>
protected StopwordAnalyzerBase(LuceneVersion version, CharArraySet stopwords)
{
m_matchVersion = version;
// analyzers should use char array set for stopwords!
this.m_stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet.UnmodifiableSet(CharArraySet.Copy(version, stopwords));
}
/// <summary>
/// Creates a new <see cref="Analyzer"/> with an empty stopword set
/// </summary>
/// <param name="version">
/// the Lucene version for cross version compatibility </param>
protected StopwordAnalyzerBase(LuceneVersion version)
: this(version, null)
{
}
/// <summary>
/// Creates a <see cref="CharArraySet"/> from an embedded resource associated with a class. (See
/// <see cref="Assembly.GetManifestResourceStream(string)"/>).
/// </summary>
/// <param name="ignoreCase">
/// <c>true</c> if the set should ignore the case of the
/// stopwords, otherwise <c>false</c> </param>
/// <param name="aClass">
/// a class that is associated with the given stopwordResource </param>
/// <param name="resource">
/// name of the resource file associated with the given class </param>
/// <param name="comment">
/// comment string to ignore in the stopword file </param>
/// <returns> a <see cref="CharArraySet"/> containing the distinct stopwords from the given
/// file </returns>
/// <exception cref="IOException">
/// if loading the stopwords throws an <see cref="IOException"/> </exception>
protected static CharArraySet LoadStopwordSet(bool ignoreCase, Type aClass, string resource, string comment)
{
TextReader reader = null;
try
{
var resourceStream = aClass.FindAndGetManifestResourceStream(resource);
reader = IOUtils.GetDecodingReader(resourceStream, Encoding.UTF8);
return WordlistLoader.GetWordSet(reader, comment, new CharArraySet(
#pragma warning disable 612, 618
LuceneVersion.LUCENE_CURRENT, 16, ignoreCase));
#pragma warning restore 612, 618
}
finally
{
IOUtils.Dispose(reader);
}
}
/// <summary>
/// Creates a <see cref="CharArraySet"/> from a file.
/// </summary>
/// <param name="stopwords">
/// the stopwords file to load
/// </param>
/// <param name="matchVersion">
/// the Lucene version for cross version compatibility </param>
/// <returns> a <see cref="CharArraySet"/> containing the distinct stopwords from the given
/// file </returns>
/// <exception cref="IOException">
/// if loading the stopwords throws an <see cref="IOException"/> </exception>
protected static CharArraySet LoadStopwordSet(FileInfo stopwords, LuceneVersion matchVersion)
{
TextReader reader = null;
try
{
reader = IOUtils.GetDecodingReader(stopwords, Encoding.UTF8);
return WordlistLoader.GetWordSet(reader, matchVersion);
}
finally
{
IOUtils.Dispose(reader);
}
}
/// <summary>
/// Creates a <see cref="CharArraySet"/> from a file.
/// </summary>
/// <param name="stopwords">
/// the stopwords reader to load
/// </param>
/// <param name="matchVersion">
/// the Lucene version for cross version compatibility </param>
/// <returns> a <see cref="CharArraySet"/> containing the distinct stopwords from the given
/// reader </returns>
/// <exception cref="IOException">
/// if loading the stopwords throws an <see cref="IOException"/> </exception>
protected static CharArraySet LoadStopwordSet(TextReader stopwords, LuceneVersion matchVersion)
{
try
{
return WordlistLoader.GetWordSet(stopwords, matchVersion);
}
finally
{
IOUtils.Dispose(stopwords);
}
}
}
}