blob: bd896d355190e9d8d9cc27f3e10a1243d8c319b5 [file] [log] [blame]
using J2N.Collections.Generic.Extensions;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Support;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Text;
namespace Lucene.Net.Analysis.Synonym
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// Mapping rules for use with <see cref="SlowSynonymFilter"/>
/// </summary>
/// @deprecated (3.4) use <see cref="SynonymFilterFactory"/> instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0
[Obsolete("(3.4) use SynonymFilterFactory instead. only for precise index backwards compatibility. this factory will be removed in Lucene 5.0")]
internal class SlowSynonymMap
{
/// <summary>
/// @lucene.internal </summary>
public CharArrayMap<SlowSynonymMap> Submap // recursive: Map<String, SynonymMap>
{
get => submap;
set => submap = value;
}
private CharArrayMap<SlowSynonymMap> submap;
/// <summary>
/// @lucene.internal </summary>
[WritableArray]
[SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")]
public Token[] Synonyms
{
get => synonyms;
set => synonyms = value;
}
private Token[] synonyms;
internal int flags;
internal const int INCLUDE_ORIG = 0x01;
internal const int IGNORE_CASE = 0x02;
public SlowSynonymMap()
{
}
public SlowSynonymMap(bool ignoreCase)
{
if (ignoreCase)
{
flags |= IGNORE_CASE;
}
}
public virtual bool IncludeOrig => (flags & INCLUDE_ORIG) != 0;
public virtual bool IgnoreCase => (flags & IGNORE_CASE) != 0;
/// <param name="singleMatch"> <see cref="IList{String}"/>, the sequence of strings to match </param>
/// <param name="replacement"> <see cref="IList{Token}"/> the list of tokens to use on a match </param>
/// <param name="includeOrig"> sets a flag on this mapping signaling the generation of matched tokens in addition to the replacement tokens </param>
/// <param name="mergeExisting"> merge the replacement tokens with any other mappings that exist </param>
public virtual void Add(IList<string> singleMatch, IList<Token> replacement, bool includeOrig, bool mergeExisting)
{
var currMap = this;
foreach (string str in singleMatch)
{
if (currMap.submap == null)
{
// for now hardcode at 4.0, as its what the old code did.
// would be nice to fix, but shouldn't store a version in each submap!!!
currMap.submap = new CharArrayMap<SlowSynonymMap>(LuceneVersion.LUCENE_CURRENT, 1, IgnoreCase);
}
var map = currMap.submap.Get(str);
if (map == null)
{
map = new SlowSynonymMap();
map.flags |= flags & IGNORE_CASE;
currMap.submap.Put(str, map);
}
currMap = map;
}
if (currMap.synonyms != null && !mergeExisting)
{
throw new ArgumentException("SynonymFilter: there is already a mapping for " + singleMatch);
}
IList<Token> superset = currMap.synonyms == null ? replacement : MergeTokens(currMap.synonyms, replacement);
currMap.synonyms = superset.ToArray();
if (includeOrig)
{
currMap.flags |= INCLUDE_ORIG;
}
}
public override string ToString()
{
var sb = new StringBuilder("<");
if (synonyms != null)
{
sb.Append("[");
for (int i = 0; i < synonyms.Length; i++)
{
if (i != 0)
{
sb.Append(',');
}
sb.Append(synonyms[i]);
}
if ((flags & INCLUDE_ORIG) != 0)
{
sb.Append(",ORIG");
}
sb.Append("],");
}
sb.Append(submap);
sb.Append(">");
return sb.ToString();
}
/// <summary>
/// Produces a <see cref="IList{Token}"/> from a <see cref="IList{String}"/>
/// </summary>
public static IList<Token> MakeTokens(IList<string> strings)
{
IList<Token> ret = new List<Token>(strings.Count);
foreach (string str in strings)
{
//Token newTok = new Token(str,0,0,"SYNONYM");
Token newTok = new Token(str, 0, 0, "SYNONYM");
ret.Add(newTok);
}
return ret;
}
/// <summary>
/// Merge two lists of tokens, producing a single list with manipulated positionIncrements so that
/// the tokens end up at the same position.
///
/// Example: [a b] merged with [c d] produces [a/b c/d] ('/' denotes tokens in the same position)
/// Example: [a,5 b,2] merged with [c d,4 e,4] produces [c a,5/d b,2 e,2] (a,n means a has posInc=n)
/// </summary>
public static IList<Token> MergeTokens(IList<Token> lst1, IList<Token> lst2)
{
var result = new List<Token>();
if (lst1 == null || lst2 == null)
{
if (lst2 != null)
{
result.AddRange(lst2);
}
if (lst1 != null)
{
result.AddRange(lst1);
}
return result;
}
int pos = 0;
using (var iter1 = lst1.GetEnumerator())
using (var iter2 = lst2.GetEnumerator())
{
var tok1 = iter1.MoveNext() ? iter1.Current : null;
var tok2 = iter2.MoveNext() ? iter2.Current : null;
int pos1 = tok1 != null ? tok1.PositionIncrement : 0;
int pos2 = tok2 != null ? tok2.PositionIncrement : 0;
while (tok1 != null || tok2 != null)
{
while (tok1 != null && (pos1 <= pos2 || tok2 == null))
{
var tok = new Token(tok1.StartOffset, tok1.EndOffset, tok1.Type);
tok.CopyBuffer(tok1.Buffer, 0, tok1.Length);
tok.PositionIncrement = pos1 - pos;
result.Add(tok);
pos = pos1;
tok1 = iter1.MoveNext() ? iter1.Current : null;
pos1 += tok1 != null ? tok1.PositionIncrement : 0;
}
while (tok2 != null && (pos2 <= pos1 || tok1 == null))
{
var tok = new Token(tok2.StartOffset, tok2.EndOffset, tok2.Type);
tok.CopyBuffer(tok2.Buffer, 0, tok2.Length);
tok.PositionIncrement = pos2 - pos;
result.Add(tok);
pos = pos2;
tok2 = iter2.MoveNext() ? iter2.Current : null;
pos2 += tok2 != null ? tok2.PositionIncrement : 0;
}
}
return result;
}
}
}
}