blob: 1b05100cea41922baaac05156c3a9d9bb7522698 [file] [log] [blame]
using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
namespace Lucene.Net.Analysis.Synonym
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
internal sealed class FSTSynonymFilterFactory : TokenFilterFactory, IResourceLoaderAware
private readonly bool ignoreCase;
private readonly string tokenizerFactory;
private readonly string synonyms;
private readonly string format;
private readonly bool expand;
private readonly IDictionary<string, string> tokArgs = new Dictionary<string, string>();
private SynonymMap map;
// LUCENENET: Optimized by pre-comiling regex and lazy-loading
private class Holder
public static readonly Regex TOKENIZER_FACTORY_REPLACEMENT_PATTERN = new Regex("^tokenizerFactory\\.", RegexOptions.Compiled);
[Obsolete(@"(3.4) use SynonymFilterFactory instead. this is only a backwards compatibility")]
public FSTSynonymFilterFactory(IDictionary<string, string> args)
: base(args)
ignoreCase = GetBoolean(args, "ignoreCase", false);
synonyms = Require(args, "synonyms");
format = Get(args, "format");
expand = GetBoolean(args, "expand", true);
tokenizerFactory = Get(args, "tokenizerFactory");
if (tokenizerFactory != null)
tokArgs["luceneMatchVersion"] = LuceneMatchVersion.ToString();
var keys = new List<string>(args.Keys);
foreach (string key in keys)
tokArgs[Holder.TOKENIZER_FACTORY_REPLACEMENT_PATTERN.Replace(key, "")] = args[key];
if (args.Count > 0)
throw new ArgumentException(string.Format(J2N.Text.StringFormatter.CurrentCulture, "Unknown parameters: {0}", args));
public override TokenStream Create(TokenStream input)
// if the fst is null, it means there's actually no synonyms... just return the original stream
// as there is nothing to do here.
return map.Fst == null ? input : new SynonymFilter(input, map, ignoreCase);
public void Inform(IResourceLoader loader)
TokenizerFactory factory = tokenizerFactory == null ? null : LoadTokenizerFactory(loader, tokenizerFactory);
Analyzer analyzer = Analyzer.NewAnonymous(createComponents: (fieldName, reader) =>
#pragma warning disable 612, 618
Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(LuceneVersion.LUCENE_CURRENT, reader) : factory.Create(reader);
TokenStream stream = ignoreCase ? (TokenStream)new LowerCaseFilter(LuceneVersion.LUCENE_CURRENT, tokenizer) : tokenizer;
#pragma warning restore 612, 618
return new TokenStreamComponents(tokenizer, stream);
string formatClass = format;
if (format == null || format.Equals("solr", StringComparison.Ordinal))
formatClass = typeof(SolrSynonymParser).AssemblyQualifiedName;
else if (format.Equals("wordnet", StringComparison.Ordinal))
formatClass = typeof(WordnetSynonymParser).AssemblyQualifiedName;
// TODO: expose dedup as a parameter?
map = LoadSynonyms(loader, formatClass, true, analyzer);
catch (Exception e)
throw new IOException("Error parsing synonyms file:", e);
/// <summary>
/// Load synonyms with the given <see cref="SynonymMap.Parser"/> class.
/// </summary>
private SynonymMap LoadSynonyms(IResourceLoader loader, string cname, bool dedup, Analyzer analyzer)
Encoding decoder = Encoding.UTF8;
SynonymMap.Parser parser;
Type clazz = loader.FindType(cname /*, typeof(SynonymMap.Parser) */);
parser = (SynonymMap.Parser)Activator.CreateInstance(clazz, new object[] { dedup, expand, analyzer });
catch (Exception /*e*/)
throw; // LUCENENET: CA2200: Rethrow to preserve stack details (
if (File.Exists(synonyms))
parser.Parse(new StreamReader(loader.OpenResource(synonyms), decoder));
IList<string> files = SplitFileNames(synonyms);
foreach (string file in files)
parser.Parse(new StreamReader(loader.OpenResource(synonyms), decoder));
return parser.Build();
// (there are no tests for this functionality)
private TokenizerFactory LoadTokenizerFactory(IResourceLoader loader, string cname)
Type clazz = loader.FindType(cname /*, typeof(TokenizerFactory) */);
TokenizerFactory tokFactory = (TokenizerFactory)Activator.CreateInstance(clazz, new object[] { tokArgs });
if (tokFactory is IResourceLoaderAware resourceLoaderAware)
return tokFactory;
catch (Exception /*e*/)
throw; // LUCENENET: CA2200: Rethrow to preserve stack details (