blob: 12019d1fdd48e943380392e506f650970bcc5893 [file] [log] [blame]
using J2N;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Text;
/*
* dk.brics.automaton
*
* Copyright (c) 2001-2009 Anders Moeller
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* this SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* this SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
namespace Lucene.Net.Util.Automaton
{
/// <summary>
/// Construction of basic automata.
/// <para/>
/// @lucene.experimental
/// </summary>
public sealed class BasicAutomata
{
private BasicAutomata()
{
}
/// <summary>
/// Returns a new (deterministic) automaton with the empty language.
/// </summary>
public static Automaton MakeEmpty()
{
Automaton a = new Automaton();
State s = new State();
a.initial = s;
a.deterministic = true;
return a;
}
/// <summary>
/// Returns a new (deterministic) automaton that accepts only the empty string.
/// </summary>
public static Automaton MakeEmptyString()
{
Automaton a = new Automaton();
a.singleton = "";
a.deterministic = true;
return a;
}
/// <summary>
/// Returns a new (deterministic) automaton that accepts all strings.
/// </summary>
public static Automaton MakeAnyString()
{
Automaton a = new Automaton();
State s = new State();
a.initial = s;
s.accept = true;
s.AddTransition(new Transition(Character.MinCodePoint, Character.MaxCodePoint, s));
a.deterministic = true;
return a;
}
/// <summary>
/// Returns a new (deterministic) automaton that accepts any single codepoint.
/// </summary>
public static Automaton MakeAnyChar()
{
return MakeCharRange(Character.MinCodePoint, Character.MaxCodePoint);
}
/// <summary>
/// Returns a new (deterministic) automaton that accepts a single codepoint of
/// the given value.
/// </summary>
public static Automaton MakeChar(int c)
{
Automaton a = new Automaton();
a.singleton = new string(Character.ToChars(c));
a.deterministic = true;
return a;
}
/// <summary>
/// Returns a new (deterministic) automaton that accepts a single codepoint whose
/// value is in the given interval (including both end points).
/// </summary>
public static Automaton MakeCharRange(int min, int max)
{
if (min == max)
{
return MakeChar(min);
}
Automaton a = new Automaton();
State s1 = new State();
State s2 = new State();
a.initial = s1;
s2.accept = true;
if (min <= max)
{
s1.AddTransition(new Transition(min, max, s2));
}
a.deterministic = true;
return a;
}
/// <summary>
/// Constructs sub-automaton corresponding to decimal numbers of length
/// <c>x.Substring(n).Length</c>.
/// </summary>
private static State AnyOfRightLength(string x, int n)
{
State s = new State();
if (x.Length == n)
{
s.Accept = true;
}
else
{
s.AddTransition(new Transition('0', '9', AnyOfRightLength(x, n + 1)));
}
return s;
}
/// <summary>
/// Constructs sub-automaton corresponding to decimal numbers of value at least
/// <c>x.Substring(n)</c> and length <c>x.Substring(n).Length</c>.
/// </summary>
private static State AtLeast(string x, int n, ICollection<State> initials, bool zeros)
{
State s = new State();
if (x.Length == n)
{
s.Accept = true;
}
else
{
if (zeros)
{
initials.Add(s);
}
char c = x[n];
s.AddTransition(new Transition(c, AtLeast(x, n + 1, initials, zeros && c == '0')));
if (c < '9')
{
s.AddTransition(new Transition((char)(c + 1), '9', AnyOfRightLength(x, n + 1)));
}
}
return s;
}
/// <summary>
/// Constructs sub-automaton corresponding to decimal numbers of value at most
/// <c>x.Substring(n)</c> and length <c>x.Substring(n).Length</c>.
/// </summary>
private static State AtMost(string x, int n)
{
State s = new State();
if (x.Length == n)
{
s.Accept = true;
}
else
{
char c = x[n];
s.AddTransition(new Transition(c, AtMost(x, (char)n + 1)));
if (c > '0')
{
s.AddTransition(new Transition('0', (char)(c - 1), AnyOfRightLength(x, n + 1)));
}
}
return s;
}
/// <summary>
/// Constructs sub-automaton corresponding to decimal numbers of value between
/// <c>x.Substring(n)</c> and <c>y.Substring(n)</c> and of length <c>x.Substring(n).Length</c>
/// (which must be equal to <c>y.Substring(n).Length</c>).
/// </summary>
private static State Between(string x, string y, int n, ICollection<State> initials, bool zeros)
{
State s = new State();
if (x.Length == n)
{
s.Accept = true;
}
else
{
if (zeros)
{
initials.Add(s);
}
char cx = x[n];
char cy = y[n];
if (cx == cy)
{
s.AddTransition(new Transition(cx, Between(x, y, n + 1, initials, zeros && cx == '0')));
}
else // cx<cy
{
s.AddTransition(new Transition(cx, AtLeast(x, n + 1, initials, zeros && cx == '0')));
s.AddTransition(new Transition(cy, AtMost(y, n + 1)));
if (cx + 1 < cy)
{
s.AddTransition(new Transition((char)(cx + 1), (char)(cy - 1), AnyOfRightLength(x, n + 1)));
}
}
}
return s;
}
/// <summary>
/// Returns a new automaton that accepts strings representing decimal
/// non-negative integers in the given interval.
/// </summary>
/// <param name="min"> Minimal value of interval. </param>
/// <param name="max"> Maximal value of interval (both end points are included in the
/// interval). </param>
/// <param name="digits"> If &gt; 0, use fixed number of digits (strings must be prefixed
/// by 0's to obtain the right length) - otherwise, the number of
/// digits is not fixed. </param>
/// <exception cref="ArgumentException"> If min &gt; max or if numbers in the
/// interval cannot be expressed with the given fixed number of
/// digits. </exception>
public static Automaton MakeInterval(int min, int max, int digits)
{
Automaton a = new Automaton();
string x = Convert.ToString(min, CultureInfo.InvariantCulture);
string y = Convert.ToString(max, CultureInfo.InvariantCulture);
if (min > max || (digits > 0 && y.Length > digits))
{
throw new System.ArgumentException();
}
int d;
if (digits > 0)
{
d = digits;
}
else
{
d = y.Length;
}
StringBuilder bx = new StringBuilder();
for (int i = x.Length; i < d; i++)
{
bx.Append('0');
}
bx.Append(x);
x = bx.ToString();
StringBuilder by = new StringBuilder();
for (int i = y.Length; i < d; i++)
{
by.Append('0');
}
by.Append(y);
y = by.ToString();
ICollection<State> initials = new List<State>();
a.initial = Between(x, y, 0, initials, digits <= 0);
if (digits <= 0)
{
List<StatePair> pairs = new List<StatePair>();
foreach (State p in initials)
{
if (a.initial != p)
{
pairs.Add(new StatePair(a.initial, p));
}
}
BasicOperations.AddEpsilons(a, pairs);
a.initial.AddTransition(new Transition('0', a.initial));
a.deterministic = false;
}
else
{
a.deterministic = true;
}
a.CheckMinimizeAlways();
return a;
}
/// <summary>
/// Returns a new (deterministic) automaton that accepts the single given
/// string.
/// </summary>
public static Automaton MakeString(string s)
{
Automaton a = new Automaton();
a.singleton = s;
a.deterministic = true;
return a;
}
public static Automaton MakeString(int[] word, int offset, int length)
{
Automaton a = new Automaton();
a.IsDeterministic = true;
State s = new State();
a.initial = s;
for (int i = offset; i < offset + length; i++)
{
State s2 = new State();
s.AddTransition(new Transition(word[i], s2));
s = s2;
}
s.accept = true;
return a;
}
/// <summary>
/// Returns a new (deterministic and minimal) automaton that accepts the union
/// of the given collection of <see cref="BytesRef"/>s representing UTF-8 encoded
/// strings.
/// </summary>
/// <param name="utf8Strings">
/// The input strings, UTF-8 encoded. The collection must be in sorted
/// order.
/// </param>
/// <returns> An <see cref="Automaton"/> accepting all input strings. The resulting
/// automaton is codepoint based (full unicode codepoints on
/// transitions). </returns>
public static Automaton MakeStringUnion(ICollection<BytesRef> utf8Strings)
{
if (utf8Strings.Count == 0)
{
return MakeEmpty();
}
else
{
return DaciukMihovAutomatonBuilder.Build(utf8Strings);
}
}
}
}