blob: 62774bfac7f2df0bfacba84678b7f594b551072a [file] [log] [blame]
/*
* dk.brics.automaton
*
* Copyright (c) 2001-2009 Anders Moeller
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.apache.lucene.util.automaton;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashSet;
import java.util.LinkedList;
/**
* Operations for minimizing automata.
*
* @lucene.experimental
*/
final public class MinimizationOperations {
private MinimizationOperations() {}
/**
* Minimizes (and determinizes if not already deterministic) the given
* automaton using Hopcroft's algorithm.
* @param determinizeWorkLimit maximum effort to spend determinizing the automaton. Set higher to
* allow more complex queries and lower to prevent memory exhaustion. Use {@link
* Operations#DEFAULT_DETERMINIZE_WORK_LIMIT} as a decent default if you don't otherwise know
* what to specify.
*/
public static Automaton minimize(Automaton a, int determinizeWorkLimit) {
if (a.getNumStates() == 0 || (a.isAccept(0) == false && a.getNumTransitions(0) == 0)) {
// Fastmatch for common case
return new Automaton();
}
a = Operations.determinize(a, determinizeWorkLimit);
//a.writeDot("adet");
if (a.getNumTransitions(0) == 1) {
Transition t = new Transition();
a.getTransition(0, 0, t);
if (t.dest == 0 && t.min == Character.MIN_CODE_POINT
&& t.max == Character.MAX_CODE_POINT) {
// Accepts all strings
return a;
}
}
a = Operations.totalize(a);
//a.writeDot("atot");
// initialize data structures
final int[] sigma = a.getStartPoints();
final int sigmaLen = sigma.length, statesLen = a.getNumStates();
@SuppressWarnings({"rawtypes","unchecked"}) final ArrayList<Integer>[][] reverse =
(ArrayList<Integer>[][]) new ArrayList[statesLen][sigmaLen];
@SuppressWarnings({"rawtypes","unchecked"}) final HashSet<Integer>[] partition =
(HashSet<Integer>[]) new HashSet[statesLen];
@SuppressWarnings({"rawtypes","unchecked"}) final ArrayList<Integer>[] splitblock =
(ArrayList<Integer>[]) new ArrayList[statesLen];
final int[] block = new int[statesLen];
final StateList[][] active = new StateList[statesLen][sigmaLen];
final StateListNode[][] active2 = new StateListNode[statesLen][sigmaLen];
final LinkedList<IntPair> pending = new LinkedList<>();
final BitSet pending2 = new BitSet(sigmaLen*statesLen);
final BitSet split = new BitSet(statesLen),
refine = new BitSet(statesLen), refine2 = new BitSet(statesLen);
for (int q = 0; q < statesLen; q++) {
splitblock[q] = new ArrayList<>();
partition[q] = new HashSet<>();
for (int x = 0; x < sigmaLen; x++) {
active[q][x] = new StateList();
}
}
// find initial partition and reverse edges
Transition transition = new Transition();
for (int q = 0; q < statesLen; q++) {
final int j = a.isAccept(q) ? 0 : 1;
partition[j].add(q);
block[q] = j;
transition.source = q;
transition.transitionUpto = -1;
for (int x = 0; x < sigmaLen; x++) {
final ArrayList<Integer>[] r = reverse[a.next(transition, sigma[x])];
if (r[x] == null) {
r[x] = new ArrayList<>();
}
r[x].add(q);
}
}
// initialize active sets
for (int j = 0; j <= 1; j++) {
for (int x = 0; x < sigmaLen; x++) {
for (int q : partition[j]) {
if (reverse[q][x] != null) {
active2[q][x] = active[j][x].add(q);
}
}
}
}
// initialize pending
for (int x = 0; x < sigmaLen; x++) {
final int j = (active[0][x].size <= active[1][x].size) ? 0 : 1;
pending.add(new IntPair(j, x));
pending2.set(x*statesLen + j);
}
// process pending until fixed point
int k = 2;
//System.out.println("start min");
while (!pending.isEmpty()) {
//System.out.println(" cycle pending");
final IntPair ip = pending.removeFirst();
final int p = ip.n1;
final int x = ip.n2;
//System.out.println(" pop n1=" + ip.n1 + " n2=" + ip.n2);
pending2.clear(x*statesLen + p);
// find states that need to be split off their blocks
for (StateListNode m = active[p][x].first; m != null; m = m.next) {
final ArrayList<Integer> r = reverse[m.q][x];
if (r != null) {
for (int i : r) {
if (!split.get(i)) {
split.set(i);
final int j = block[i];
splitblock[j].add(i);
if (!refine2.get(j)) {
refine2.set(j);
refine.set(j);
}
}
}
}
}
// refine blocks
for (int j = refine.nextSetBit(0); j >= 0; j = refine.nextSetBit(j+1)) {
final ArrayList<Integer> sb = splitblock[j];
if (sb.size() < partition[j].size()) {
final HashSet<Integer> b1 = partition[j];
final HashSet<Integer> b2 = partition[k];
for (int s : sb) {
b1.remove(s);
b2.add(s);
block[s] = k;
for (int c = 0; c < sigmaLen; c++) {
final StateListNode sn = active2[s][c];
if (sn != null && sn.sl == active[j][c]) {
sn.remove();
active2[s][c] = active[k][c].add(s);
}
}
}
// update pending
for (int c = 0; c < sigmaLen; c++) {
final int aj = active[j][c].size,
ak = active[k][c].size,
ofs = c*statesLen;
if (!pending2.get(ofs + j) && 0 < aj && aj <= ak) {
pending2.set(ofs + j);
pending.add(new IntPair(j, c));
} else {
pending2.set(ofs + k);
pending.add(new IntPair(k, c));
}
}
k++;
}
refine2.clear(j);
for (int s : sb) {
split.clear(s);
}
sb.clear();
}
refine.clear();
}
Automaton result = new Automaton();
Transition t = new Transition();
//System.out.println(" k=" + k);
// make a new state for each equivalence class, set initial state
int[] stateMap = new int[statesLen];
int[] stateRep = new int[k];
result.createState();
//System.out.println("min: k=" + k);
for (int n = 0; n < k; n++) {
//System.out.println(" n=" + n);
boolean isInitial = false;
for (int q : partition[n]) {
if (q == 0) {
isInitial = true;
//System.out.println(" isInitial!");
break;
}
}
int newState;
if (isInitial) {
newState = 0;
} else {
newState = result.createState();
}
//System.out.println(" newState=" + newState);
for (int q : partition[n]) {
stateMap[q] = newState;
//System.out.println(" q=" + q + " isAccept?=" + a.isAccept(q));
result.setAccept(newState, a.isAccept(q));
stateRep[newState] = q; // select representative
}
}
// build transitions and set acceptance
for (int n = 0; n < k; n++) {
int numTransitions = a.initTransition(stateRep[n], t);
for(int i=0;i<numTransitions;i++) {
a.getNextTransition(t);
//System.out.println(" add trans");
result.addTransition(n, stateMap[t.dest], t.min, t.max);
}
}
result.finishState();
//System.out.println(result.getNumStates() + " states");
return Operations.removeDeadStates(result);
}
static final class IntPair {
final int n1, n2;
IntPair(int n1, int n2) {
this.n1 = n1;
this.n2 = n2;
}
}
static final class StateList {
int size;
StateListNode first, last;
StateListNode add(int q) {
return new StateListNode(q, this);
}
}
static final class StateListNode {
final int q;
StateListNode next, prev;
final StateList sl;
StateListNode(int q, StateList sl) {
this.q = q;
this.sl = sl;
if (sl.size++ == 0) sl.first = sl.last = this;
else {
sl.last.next = this;
prev = sl.last;
sl.last = this;
}
}
void remove() {
sl.size--;
if (sl.first == this) sl.first = next;
else prev.next = next;
if (sl.last == this) sl.last = prev;
else next.prev = prev;
}
}
}