blob: 96dc55a9b5c7aa4fc32b8a7ffe74870105dfe83f [file] [log] [blame]
using J2N;
using System;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using JCG = J2N.Collections.Generic;
/*
* dk.brics.automaton
*
* Copyright (c) 2001-2009 Anders Moeller
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* this SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* this SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
namespace Lucene.Net.Util.Automaton
{
/// <summary>
/// Basic automata operations.
/// <para/>
/// @lucene.experimental
/// </summary>
public static class BasicOperations // LUCENENET specific - made static since all members are static
{
/// <summary>
/// Returns an automaton that accepts the concatenation of the languages of the
/// given automata.
/// <para/>
/// Complexity: linear in number of states.
/// </summary>
public static Automaton Concatenate(Automaton a1, Automaton a2)
{
if (a1.IsSingleton && a2.IsSingleton)
{
return BasicAutomata.MakeString(a1.singleton + a2.singleton);
}
if (IsEmpty(a1) || IsEmpty(a2))
{
return BasicAutomata.MakeEmpty();
}
// adding epsilon transitions with the NFA concatenation algorithm
// in this case always produces a resulting DFA, preventing expensive
// redundant determinize() calls for this common case.
bool deterministic = a1.IsSingleton && a2.IsDeterministic;
if (a1 == a2)
{
a1 = a1.CloneExpanded();
a2 = a2.CloneExpanded();
}
else
{
a1 = a1.CloneExpandedIfRequired();
a2 = a2.CloneExpandedIfRequired();
}
foreach (State s in a1.GetAcceptStates())
{
s.accept = false;
s.AddEpsilon(a2.initial);
}
a1.deterministic = deterministic;
//a1.clearHashCode();
a1.ClearNumberedStates();
a1.CheckMinimizeAlways();
return a1;
}
/// <summary>
/// Returns an automaton that accepts the concatenation of the languages of the
/// given automata.
/// <para/>
/// Complexity: linear in total number of states.
/// </summary>
public static Automaton Concatenate(IList<Automaton> l)
{
if (l.Count == 0)
{
return BasicAutomata.MakeEmptyString();
}
bool all_singleton = true;
foreach (Automaton a in l)
{
if (!a.IsSingleton)
{
all_singleton = false;
break;
}
}
if (all_singleton)
{
StringBuilder b = new StringBuilder();
foreach (Automaton a in l)
{
b.Append(a.singleton);
}
return BasicAutomata.MakeString(b.ToString());
}
else
{
foreach (Automaton a in l)
{
if (BasicOperations.IsEmpty(a))
{
return BasicAutomata.MakeEmpty();
}
}
JCG.HashSet<int> ids = new JCG.HashSet<int>();
foreach (Automaton a in l)
{
ids.Add(a.GetHashCode());
}
bool has_aliases = ids.Count != l.Count;
Automaton b = l[0];
if (has_aliases)
{
b = b.CloneExpanded();
}
else
{
b = b.CloneExpandedIfRequired();
}
ISet<State> ac = b.GetAcceptStates();
bool first = true;
foreach (Automaton a in l)
{
if (first)
{
first = false;
}
else
{
if (a.IsEmptyString)
{
continue;
}
Automaton aa = a;
if (has_aliases)
{
aa = aa.CloneExpanded();
}
else
{
aa = aa.CloneExpandedIfRequired();
}
ISet<State> ns = aa.GetAcceptStates();
foreach (State s in ac)
{
s.accept = false;
s.AddEpsilon(aa.initial);
if (s.accept)
{
ns.Add(s);
}
}
ac = ns;
}
}
b.deterministic = false;
//b.clearHashCode();
b.ClearNumberedStates();
b.CheckMinimizeAlways();
return b;
}
}
/// <summary>
/// Returns an automaton that accepts the union of the empty string and the
/// language of the given automaton.
/// <para/>
/// Complexity: linear in number of states.
/// </summary>
public static Automaton Optional(Automaton a)
{
a = a.CloneExpandedIfRequired();
State s = new State();
s.AddEpsilon(a.initial);
s.accept = true;
a.initial = s;
a.deterministic = false;
//a.clearHashCode();
a.ClearNumberedStates();
a.CheckMinimizeAlways();
return a;
}
/// <summary>
/// Returns an automaton that accepts the Kleene star (zero or more
/// concatenated repetitions) of the language of the given automaton. Never
/// modifies the input automaton language.
/// <para/>
/// Complexity: linear in number of states.
/// </summary>
public static Automaton Repeat(Automaton a)
{
a = a.CloneExpanded();
State s = new State();
s.accept = true;
s.AddEpsilon(a.initial);
foreach (State p in a.GetAcceptStates())
{
p.AddEpsilon(s);
}
a.initial = s;
a.deterministic = false;
//a.clearHashCode();
a.ClearNumberedStates();
a.CheckMinimizeAlways();
return a;
}
/// <summary>
/// Returns an automaton that accepts <paramref name="min"/> or more concatenated
/// repetitions of the language of the given automaton.
/// <para/>
/// Complexity: linear in number of states and in <paramref name="min"/>.
/// </summary>
public static Automaton Repeat(Automaton a, int min)
{
if (min == 0)
{
return Repeat(a);
}
IList<Automaton> @as = new List<Automaton>();
while (min-- > 0)
{
@as.Add(a);
}
@as.Add(Repeat(a));
return Concatenate(@as);
}
/// <summary>
/// Returns an automaton that accepts between <paramref name="min"/> and
/// <paramref name="max"/> (including both) concatenated repetitions of the language
/// of the given automaton.
/// <para/>
/// Complexity: linear in number of states and in <paramref name="min"/> and
/// <paramref name="max"/>.
/// </summary>
public static Automaton Repeat(Automaton a, int min, int max)
{
if (min > max)
{
return BasicAutomata.MakeEmpty();
}
max -= min;
a.ExpandSingleton();
Automaton b;
if (min == 0)
{
b = BasicAutomata.MakeEmptyString();
}
else if (min == 1)
{
b = (Automaton)a.Clone();
}
else
{
IList<Automaton> @as = new List<Automaton>();
while (min-- > 0)
{
@as.Add(a);
}
b = Concatenate(@as);
}
if (max > 0)
{
Automaton d = (Automaton)a.Clone();
while (--max > 0)
{
Automaton c = (Automaton)a.Clone();
foreach (State p in c.GetAcceptStates())
{
p.AddEpsilon(d.initial);
}
d = c;
}
foreach (State p in b.GetAcceptStates())
{
p.AddEpsilon(d.initial);
}
b.deterministic = false;
//b.clearHashCode();
b.ClearNumberedStates();
b.CheckMinimizeAlways();
}
return b;
}
/// <summary>
/// Returns a (deterministic) automaton that accepts the complement of the
/// language of the given automaton.
/// <para/>
/// Complexity: linear in number of states (if already deterministic).
/// </summary>
public static Automaton Complement(Automaton a)
{
a = a.CloneExpandedIfRequired();
a.Determinize();
a.Totalize();
foreach (State p in a.GetNumberedStates())
{
p.accept = !p.accept;
}
a.RemoveDeadTransitions();
return a;
}
/// <summary>
/// Returns a (deterministic) automaton that accepts the intersection of the
/// language of <paramref name="a1"/> and the complement of the language of
/// <paramref name="a2"/>. As a side-effect, the automata may be determinized, if not
/// already deterministic.
/// <para/>
/// Complexity: quadratic in number of states (if already deterministic).
/// </summary>
public static Automaton Minus(Automaton a1, Automaton a2)
{
if (BasicOperations.IsEmpty(a1) || a1 == a2)
{
return BasicAutomata.MakeEmpty();
}
if (BasicOperations.IsEmpty(a2))
{
return a1.CloneIfRequired();
}
if (a1.IsSingleton)
{
if (BasicOperations.Run(a2, a1.singleton))
{
return BasicAutomata.MakeEmpty();
}
else
{
return a1.CloneIfRequired();
}
}
return Intersection(a1, a2.Complement());
}
/// <summary>
/// Returns an automaton that accepts the intersection of the languages of the
/// given automata. Never modifies the input automata languages.
/// <para/>
/// Complexity: quadratic in number of states.
/// </summary>
public static Automaton Intersection(Automaton a1, Automaton a2)
{
if (a1.IsSingleton)
{
if (BasicOperations.Run(a2, a1.singleton))
{
return a1.CloneIfRequired();
}
else
{
return BasicAutomata.MakeEmpty();
}
}
if (a2.IsSingleton)
{
if (BasicOperations.Run(a1, a2.singleton))
{
return a2.CloneIfRequired();
}
else
{
return BasicAutomata.MakeEmpty();
}
}
if (a1 == a2)
{
return a1.CloneIfRequired();
}
Transition[][] transitions1 = a1.GetSortedTransitions();
Transition[][] transitions2 = a2.GetSortedTransitions();
Automaton c = new Automaton();
LinkedList<StatePair> worklist = new LinkedList<StatePair>();
Dictionary<StatePair, StatePair> newstates = new Dictionary<StatePair, StatePair>();
StatePair p = new StatePair(c.initial, a1.initial, a2.initial);
worklist.AddLast(p);
newstates[p] = p;
while (worklist.Count > 0)
{
p = worklist.First.Value;
worklist.Remove(p);
p.s.accept = p.S1.accept && p.S2.accept;
Transition[] t1 = transitions1[p.S1.number];
Transition[] t2 = transitions2[p.S2.number];
for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++)
{
while (b2 < t2.Length && t2[b2].max < t1[n1].min)
{
b2++;
}
for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++)
{
if (t2[n2].max >= t1[n1].min)
{
StatePair q = new StatePair(t1[n1].to, t2[n2].to);
StatePair r;
newstates.TryGetValue(q, out r);
if (r == null)
{
q.s = new State();
worklist.AddLast(q);
newstates[q] = q;
r = q;
}
int min = t1[n1].min > t2[n2].min ? t1[n1].min : t2[n2].min;
int max = t1[n1].max < t2[n2].max ? t1[n1].max : t2[n2].max;
p.s.AddTransition(new Transition(min, max, r.s));
}
}
}
}
c.deterministic = a1.deterministic && a2.deterministic;
c.RemoveDeadTransitions();
c.CheckMinimizeAlways();
return c;
}
/// <summary>
/// Returns <c>true</c> if these two automata accept exactly the
/// same language. This is a costly computation! Note
/// also that <paramref name="a1"/> and <paramref name="a2"/> will be determinized as a side
/// effect.
/// </summary>
public static bool SameLanguage(Automaton a1, Automaton a2)
{
if (a1 == a2)
{
return true;
}
if (a1.IsSingleton && a2.IsSingleton)
{
return a1.singleton.Equals(a2.singleton, StringComparison.Ordinal);
}
else if (a1.IsSingleton)
{
// subsetOf is faster if the first automaton is a singleton
return SubsetOf(a1, a2) && SubsetOf(a2, a1);
}
else
{
return SubsetOf(a2, a1) && SubsetOf(a1, a2);
}
}
/// <summary>
/// Returns true if the language of <paramref name="a1"/> is a subset of the language
/// of <paramref name="a2"/>. As a side-effect, <paramref name="a2"/> is determinized if
/// not already marked as deterministic.
/// <para/>
/// Complexity: quadratic in number of states.
/// </summary>
public static bool SubsetOf(Automaton a1, Automaton a2)
{
if (a1 == a2)
{
return true;
}
if (a1.IsSingleton)
{
if (a2.IsSingleton)
{
return a1.singleton.Equals(a2.singleton, StringComparison.Ordinal);
}
return BasicOperations.Run(a2, a1.singleton);
}
a2.Determinize();
Transition[][] transitions1 = a1.GetSortedTransitions();
Transition[][] transitions2 = a2.GetSortedTransitions();
LinkedList<StatePair> worklist = new LinkedList<StatePair>();
JCG.HashSet<StatePair> visited = new JCG.HashSet<StatePair>();
StatePair p = new StatePair(a1.initial, a2.initial);
worklist.AddLast(p);
visited.Add(p);
while (worklist.Count > 0)
{
p = worklist.First.Value;
worklist.Remove(p);
if (p.S1.accept && !p.S2.accept)
{
return false;
}
Transition[] t1 = transitions1[p.S1.number];
Transition[] t2 = transitions2[p.S2.number];
for (int n1 = 0, b2 = 0; n1 < t1.Length; n1++)
{
while (b2 < t2.Length && t2[b2].max < t1[n1].min)
{
b2++;
}
int min1 = t1[n1].min, max1 = t1[n1].max;
for (int n2 = b2; n2 < t2.Length && t1[n1].max >= t2[n2].min; n2++)
{
if (t2[n2].min > min1)
{
return false;
}
if (t2[n2].max < Character.MaxCodePoint)
{
min1 = t2[n2].max + 1;
}
else
{
min1 = Character.MaxCodePoint;
max1 = Character.MinCodePoint;
}
StatePair q = new StatePair(t1[n1].to, t2[n2].to);
if (!visited.Contains(q))
{
worklist.AddLast(q);
visited.Add(q);
}
}
if (min1 <= max1)
{
return false;
}
}
}
return true;
}
/// <summary>
/// Returns an automaton that accepts the union of the languages of the given
/// automata.
/// <para/>
/// Complexity: linear in number of states.
/// </summary>
public static Automaton Union(Automaton a1, Automaton a2)
{
if ((a1.IsSingleton && a2.IsSingleton && a1.singleton.Equals(a2.singleton, StringComparison.Ordinal)) || a1 == a2)
{
return a1.CloneIfRequired();
}
if (a1 == a2)
{
a1 = a1.CloneExpanded();
a2 = a2.CloneExpanded();
}
else
{
a1 = a1.CloneExpandedIfRequired();
a2 = a2.CloneExpandedIfRequired();
}
State s = new State();
s.AddEpsilon(a1.initial);
s.AddEpsilon(a2.initial);
a1.initial = s;
a1.deterministic = false;
//a1.clearHashCode();
a1.ClearNumberedStates();
a1.CheckMinimizeAlways();
return a1;
}
/// <summary>
/// Returns an automaton that accepts the union of the languages of the given
/// automata.
/// <para/>
/// Complexity: linear in number of states.
/// </summary>
public static Automaton Union(ICollection<Automaton> l)
{
JCG.HashSet<int> ids = new JCG.HashSet<int>();
foreach (Automaton a in l)
{
ids.Add(a.GetHashCode());
}
bool has_aliases = ids.Count != l.Count;
State s = new State();
foreach (Automaton b in l)
{
if (BasicOperations.IsEmpty(b))
{
continue;
}
Automaton bb = b;
if (has_aliases)
{
bb = bb.CloneExpanded();
}
else
{
bb = bb.CloneExpandedIfRequired();
}
s.AddEpsilon(bb.initial);
}
Automaton a_ = new Automaton();
a_.initial = s;
a_.deterministic = false;
//a.clearHashCode();
a_.ClearNumberedStates();
a_.CheckMinimizeAlways();
return a_;
}
// Simple custom ArrayList<Transition>
private sealed class TransitionList
{
internal Transition[] transitions = new Transition[2];
internal int count;
public void Add(Transition t)
{
if (transitions.Length == count)
{
Transition[] newArray = new Transition[ArrayUtil.Oversize(1 + count, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
Array.Copy(transitions, 0, newArray, 0, count);
transitions = newArray;
}
transitions[count++] = t;
}
}
// Holds all transitions that start on this int point, or
// end at this point-1
private sealed class PointTransitions : IComparable<PointTransitions>
{
internal int point;
internal readonly TransitionList ends = new TransitionList();
internal readonly TransitionList starts = new TransitionList();
public int CompareTo(PointTransitions other)
{
return point - other.point;
}
public void Reset(int point)
{
this.point = point;
ends.count = 0;
starts.count = 0;
}
public override bool Equals(object other)
{
return ((PointTransitions)other).point == point;
}
public override int GetHashCode()
{
return point;
}
}
private sealed class PointTransitionSet
{
internal int count;
internal PointTransitions[] points = new PointTransitions[5];
private const int HASHMAP_CUTOVER = 30;
private readonly Dictionary<int?, PointTransitions> map = new Dictionary<int?, PointTransitions>();
private bool useHash = false;
private PointTransitions Next(int point)
{
// 1st time we are seeing this point
if (count == points.Length)
{
PointTransitions[] newArray = new PointTransitions[ArrayUtil.Oversize(1 + count, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
Array.Copy(points, 0, newArray, 0, count);
points = newArray;
}
PointTransitions points0 = points[count];
if (points0 == null)
{
points0 = points[count] = new PointTransitions();
}
points0.Reset(point);
count++;
return points0;
}
private PointTransitions Find(int point)
{
if (useHash)
{
int? pi = point;
PointTransitions p;
if (!map.TryGetValue(pi, out p))
{
p = Next(point);
map[pi] = p;
}
return p;
}
else
{
for (int i = 0; i < count; i++)
{
if (points[i].point == point)
{
return points[i];
}
}
PointTransitions p = Next(point);
if (count == HASHMAP_CUTOVER)
{
// switch to HashMap on the fly
Debug.Assert(map.Count == 0);
for (int i = 0; i < count; i++)
{
map[points[i].point] = points[i];
}
useHash = true;
}
return p;
}
}
public void Reset()
{
if (useHash)
{
map.Clear();
useHash = false;
}
count = 0;
}
public void Sort()
{
// Tim sort performs well on already sorted arrays:
if (count > 1)
{
ArrayUtil.TimSort(points, 0, count);
}
}
public void Add(Transition t)
{
Find(t.min).starts.Add(t);
Find(1 + t.max).ends.Add(t);
}
public override string ToString()
{
StringBuilder s = new StringBuilder();
for (int i = 0; i < count; i++)
{
if (i > 0)
{
s.Append(' ');
}
s.Append(points[i].point).Append(':').Append(points[i].starts.count).Append(',').Append(points[i].ends.count);
}
return s.ToString();
}
}
/// <summary>
/// Determinizes the given automaton.
/// <para/>
/// Worst case complexity: exponential in number of states.
/// </summary>
public static void Determinize(Automaton a)
{
if (a.IsDeterministic || a.IsSingleton)
{
return;
}
State[] allStates = a.GetNumberedStates();
// subset construction
bool initAccept = a.initial.accept;
int initNumber = a.initial.number;
a.initial = new State();
SortedInt32Set.FrozenInt32Set initialset = new SortedInt32Set.FrozenInt32Set(initNumber, a.initial);
LinkedList<SortedInt32Set.FrozenInt32Set> worklist = new LinkedList<SortedInt32Set.FrozenInt32Set>();
IDictionary<SortedInt32Set.FrozenInt32Set, State> newstate = new Dictionary<SortedInt32Set.FrozenInt32Set, State>();
worklist.AddLast(initialset);
a.initial.accept = initAccept;
newstate[initialset] = a.initial;
int newStateUpto = 0;
State[] newStatesArray = new State[5];
newStatesArray[newStateUpto] = a.initial;
a.initial.number = newStateUpto;
newStateUpto++;
// like Set<Integer,PointTransitions>
PointTransitionSet points = new PointTransitionSet();
// like SortedMap<Integer,Integer>
SortedInt32Set statesSet = new SortedInt32Set(5);
// LUCENENET NOTE: The problem here is almost certainly
// due to the conversion to FrozenIntSet along with its
// differing equality checking.
while (worklist.Count > 0)
{
SortedInt32Set.FrozenInt32Set s = worklist.First.Value;
worklist.Remove(s);
// Collate all outgoing transitions by min/1+max:
for (int i = 0; i < s.values.Length; i++)
{
State s0 = allStates[s.values[i]];
for (int j = 0; j < s0.numTransitions; j++)
{
points.Add(s0.TransitionsArray[j]);
}
}
if (points.count == 0)
{
// No outgoing transitions -- skip it
continue;
}
points.Sort();
int lastPoint = -1;
int accCount = 0;
State r = s.state;
for (int i = 0; i < points.count; i++)
{
int point = points.points[i].point;
if (statesSet.upto > 0)
{
Debug.Assert(lastPoint != -1);
statesSet.ComputeHash();
State q;
if (!newstate.TryGetValue(statesSet.ToFrozenInt32Set(), out q) || q == null)
{
q = new State();
SortedInt32Set.FrozenInt32Set p = statesSet.Freeze(q);
worklist.AddLast(p);
if (newStateUpto == newStatesArray.Length)
{
State[] newArray = new State[ArrayUtil.Oversize(1 + newStateUpto, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
Array.Copy(newStatesArray, 0, newArray, 0, newStateUpto);
newStatesArray = newArray;
}
newStatesArray[newStateUpto] = q;
q.number = newStateUpto;
newStateUpto++;
q.accept = accCount > 0;
newstate[p] = q;
}
else
{
Debug.Assert((accCount > 0) == q.accept, "accCount=" + accCount + " vs existing accept=" + q.accept + " states=" + statesSet);
}
r.AddTransition(new Transition(lastPoint, point - 1, q));
}
// process transitions that end on this point
// (closes an overlapping interval)
Transition[] transitions = points.points[i].ends.transitions;
int limit = points.points[i].ends.count;
for (int j = 0; j < limit; j++)
{
Transition t = transitions[j];
int num = t.to.number;
statesSet.Decr(num);
accCount -= t.to.accept ? 1 : 0;
}
points.points[i].ends.count = 0;
// process transitions that start on this point
// (opens a new interval)
transitions = points.points[i].starts.transitions;
limit = points.points[i].starts.count;
for (int j = 0; j < limit; j++)
{
Transition t = transitions[j];
int num = t.to.number;
statesSet.Incr(num);
accCount += t.to.accept ? 1 : 0;
}
lastPoint = point;
points.points[i].starts.count = 0;
}
points.Reset();
Debug.Assert(statesSet.upto == 0, "upto=" + statesSet.upto);
}
a.deterministic = true;
a.SetNumberedStates(newStatesArray, newStateUpto);
}
/// <summary>
/// Adds epsilon transitions to the given automaton. This method adds extra
/// character interval transitions that are equivalent to the given set of
/// epsilon transitions.
/// </summary>
/// <param name="a"> Automaton. </param>
/// <param name="pairs"> Collection of <see cref="StatePair"/> objects representing pairs of
/// source/destination states where epsilon transitions should be
/// added. </param>
public static void AddEpsilons(Automaton a, ICollection<StatePair> pairs)
{
a.ExpandSingleton();
Dictionary<State, JCG.HashSet<State>> forward = new Dictionary<State, JCG.HashSet<State>>();
Dictionary<State, JCG.HashSet<State>> back = new Dictionary<State, JCG.HashSet<State>>();
foreach (StatePair p in pairs)
{
if (!forward.TryGetValue(p.S1, out JCG.HashSet<State> to))
{
to = new JCG.HashSet<State>();
forward[p.S1] = to;
}
to.Add(p.S2);
JCG.HashSet<State> from;
if (!back.TryGetValue(p.S2, out from))
{
from = new JCG.HashSet<State>();
back[p.S2] = from;
}
from.Add(p.S1);
}
// calculate epsilon closure
LinkedList<StatePair> worklist = new LinkedList<StatePair>(pairs);
JCG.HashSet<StatePair> workset = new JCG.HashSet<StatePair>(pairs);
while (worklist.Count > 0)
{
StatePair p = worklist.First.Value;
worklist.Remove(p);
workset.Remove(p);
JCG.HashSet<State> to;
JCG.HashSet<State> from;
if (forward.TryGetValue(p.S2, out to))
{
foreach (State s in to)
{
StatePair pp = new StatePair(p.S1, s);
if (!pairs.Contains(pp))
{
pairs.Add(pp);
forward[p.S1].Add(s);
back[s].Add(p.S1);
worklist.AddLast(pp);
workset.Add(pp);
if (back.TryGetValue(p.S1, out from))
{
foreach (State q in from)
{
StatePair qq = new StatePair(q, p.S1);
if (!workset.Contains(qq))
{
worklist.AddLast(qq);
workset.Add(qq);
}
}
}
}
}
}
}
// add transitions
foreach (StatePair p in pairs)
{
p.S1.AddEpsilon(p.S2);
}
a.deterministic = false;
//a.clearHashCode();
a.ClearNumberedStates();
a.CheckMinimizeAlways();
}
/// <summary>
/// Returns <c>true</c> if the given automaton accepts the empty string and nothing
/// else.
/// </summary>
public static bool IsEmptyString(Automaton a)
{
if (a.IsSingleton)
{
return a.singleton.Length == 0;
}
else
{
return a.initial.accept && a.initial.NumTransitions == 0;
}
}
/// <summary>
/// Returns <c>true</c> if the given automaton accepts no strings.
/// </summary>
public static bool IsEmpty(Automaton a)
{
if (a.IsSingleton)
{
return false;
}
return !a.initial.accept && a.initial.NumTransitions == 0;
}
/// <summary>
/// Returns <c>true</c> if the given automaton accepts all strings.
/// </summary>
public static bool IsTotal(Automaton a)
{
if (a.IsSingleton)
{
return false;
}
if (a.initial.accept && a.initial.NumTransitions == 1)
{
Transition t = a.initial.GetTransitions().First();
return t.to == a.initial && t.min == Character.MinCodePoint && t.max == Character.MaxCodePoint;
}
return false;
}
/// <summary>
/// Returns <c>true</c> if the given string is accepted by the automaton.
/// <para/>
/// Complexity: linear in the length of the string.
/// <para/>
/// <b>Note:</b> for full performance, use the <see cref="RunAutomaton"/> class.
/// </summary>
public static bool Run(Automaton a, string s)
{
if (a.IsSingleton)
{
return s.Equals(a.singleton, StringComparison.Ordinal);
}
if (a.deterministic)
{
State p = a.initial;
for (int i = 0, cp = 0; i < s.Length; i += Character.CharCount(cp))
{
State q = p.Step(cp = Character.CodePointAt(s, i));
if (q == null)
{
return false;
}
p = q;
}
return p.accept;
}
else
{
State[] states = a.GetNumberedStates();
LinkedList<State> pp = new LinkedList<State>();
LinkedList<State> pp_other = new LinkedList<State>();
OpenBitSet bb = new OpenBitSet(states.Length);
OpenBitSet bb_other = new OpenBitSet(states.Length);
pp.AddLast(a.initial);
List<State> dest = new List<State>();
bool accept = a.initial.accept;
for (int i = 0, c = 0; i < s.Length; i += Character.CharCount(c))
{
c = Character.CodePointAt(s, i);
accept = false;
pp_other.Clear();
bb_other.Clear(0, bb_other.Length - 1);
foreach (State p in pp)
{
dest.Clear();
p.Step(c, dest);
foreach (State q in dest)
{
if (q.accept)
{
accept = true;
}
if (!bb_other.Get(q.number))
{
bb_other.Set(q.number);
pp_other.AddLast(q);
}
}
}
LinkedList<State> tp = pp;
pp = pp_other;
pp_other = tp;
OpenBitSet tb = bb;
bb = bb_other;
bb_other = tb;
}
return accept;
}
}
}
}