blob: 97ef9a62ea10b0cf98ebf30c931ba6377bd56390 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.suggest.analyzing;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.Util;
// TODO: move to core? nobody else uses it yet though...
/**
* Exposes a utility method to enumerate all paths
* intersecting an {@link Automaton} with an {@link FST}.
*/
public class FSTUtil {
private FSTUtil() {
}
/** Holds a pair (automaton, fst) of states and accumulated output in the intersected machine. */
public static final class Path<T> {
/** Node in the automaton where path ends: */
public final int state;
/** Node in the FST where path ends: */
public final FST.Arc<T> fstNode;
/** Output of the path so far: */
public final T output;
/** Input of the path so far: */
public final IntsRefBuilder input;
/** Sole constructor. */
public Path(int state, FST.Arc<T> fstNode, T output, IntsRefBuilder input) {
this.state = state;
this.fstNode = fstNode;
this.output = output;
this.input = input;
}
}
/**
* Enumerates all minimal prefix paths in the automaton that also intersect the FST,
* accumulating the FST end node and output for each path.
*/
public static <T> List<Path<T>> intersectPrefixPaths(Automaton a, FST<T> fst)
throws IOException {
assert a.isDeterministic();
final List<Path<T>> queue = new ArrayList<>();
final List<Path<T>> endNodes = new ArrayList<>();
if (a.getNumStates() == 0) {
return endNodes;
}
queue.add(new Path<>(0, fst
.getFirstArc(new FST.Arc<T>()), fst.outputs.getNoOutput(),
new IntsRefBuilder()));
final FST.Arc<T> scratchArc = new FST.Arc<>();
final FST.BytesReader fstReader = fst.getBytesReader();
Transition t = new Transition();
while (queue.size() != 0) {
final Path<T> path = queue.remove(queue.size() - 1);
if (a.isAccept(path.state)) {
endNodes.add(path);
// we can stop here if we accept this path,
// we accept all further paths too
continue;
}
IntsRefBuilder currentInput = path.input;
int count = a.initTransition(path.state, t);
for (int i=0;i<count;i++) {
a.getNextTransition(t);
final int min = t.min;
final int max = t.max;
if (min == max) {
final FST.Arc<T> nextArc = fst.findTargetArc(t.min,
path.fstNode, scratchArc, fstReader);
if (nextArc != null) {
final IntsRefBuilder newInput = new IntsRefBuilder();
newInput.copyInts(currentInput.get());
newInput.append(t.min);
queue.add(new Path<>(t.dest, new FST.Arc<T>()
.copyFrom(nextArc), fst.outputs
.add(path.output, nextArc.output()), newInput));
}
} else {
// TODO: if this transition's TO state is accepting, and
// it accepts the entire range possible in the FST (ie. 0 to 255),
// we can simply use the prefix as the accepted state instead of
// looking up all the ranges and terminate early
// here. This just shifts the work from one queue
// (this one) to another (the completion search
// done in AnalyzingSuggester).
FST.Arc<T> nextArc = Util.readCeilArc(min, fst, path.fstNode,
scratchArc, fstReader);
while (nextArc != null && nextArc.label() <= max) {
assert nextArc.label() <= max;
assert nextArc.label() >= min : nextArc.label() + " "
+ min;
final IntsRefBuilder newInput = new IntsRefBuilder();
newInput.copyInts(currentInput.get());
newInput.append(nextArc.label());
queue.add(new Path<>(t.dest, new FST.Arc<T>()
.copyFrom(nextArc), fst.outputs
.add(path.output, nextArc.output()), newInput));
final int label = nextArc.label(); // used in assert
nextArc = nextArc.isLast() ? null : fst.readNextRealArc(nextArc,
fstReader);
assert nextArc == null || label < nextArc.label() : "last: " + label
+ " next: " + nextArc.label();
}
}
}
}
return endNodes;
}
}