| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search.suggest.analyzing; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.List; |
| |
| import org.apache.lucene.util.IntsRefBuilder; |
| import org.apache.lucene.util.automaton.Automaton; |
| import org.apache.lucene.util.automaton.Transition; |
| import org.apache.lucene.util.fst.FST; |
| import org.apache.lucene.util.fst.Util; |
| |
| // TODO: move to core? nobody else uses it yet though... |
| |
| /** |
| * Exposes a utility method to enumerate all paths |
| * intersecting an {@link Automaton} with an {@link FST}. |
| */ |
| public class FSTUtil { |
| |
| private FSTUtil() { |
| } |
| |
| /** Holds a pair (automaton, fst) of states and accumulated output in the intersected machine. */ |
| public static final class Path<T> { |
| |
| /** Node in the automaton where path ends: */ |
| public final int state; |
| |
| /** Node in the FST where path ends: */ |
| public final FST.Arc<T> fstNode; |
| |
| /** Output of the path so far: */ |
| public final T output; |
| |
| /** Input of the path so far: */ |
| public final IntsRefBuilder input; |
| |
| /** Sole constructor. */ |
| public Path(int state, FST.Arc<T> fstNode, T output, IntsRefBuilder input) { |
| this.state = state; |
| this.fstNode = fstNode; |
| this.output = output; |
| this.input = input; |
| } |
| } |
| |
| /** |
| * Enumerates all minimal prefix paths in the automaton that also intersect the FST, |
| * accumulating the FST end node and output for each path. |
| */ |
| public static <T> List<Path<T>> intersectPrefixPaths(Automaton a, FST<T> fst) |
| throws IOException { |
| assert a.isDeterministic(); |
| final List<Path<T>> queue = new ArrayList<>(); |
| final List<Path<T>> endNodes = new ArrayList<>(); |
| if (a.getNumStates() == 0) { |
| return endNodes; |
| } |
| |
| queue.add(new Path<>(0, fst |
| .getFirstArc(new FST.Arc<T>()), fst.outputs.getNoOutput(), |
| new IntsRefBuilder())); |
| |
| final FST.Arc<T> scratchArc = new FST.Arc<>(); |
| final FST.BytesReader fstReader = fst.getBytesReader(); |
| |
| Transition t = new Transition(); |
| |
| while (queue.size() != 0) { |
| final Path<T> path = queue.remove(queue.size() - 1); |
| if (a.isAccept(path.state)) { |
| endNodes.add(path); |
| // we can stop here if we accept this path, |
| // we accept all further paths too |
| continue; |
| } |
| |
| IntsRefBuilder currentInput = path.input; |
| int count = a.initTransition(path.state, t); |
| for (int i=0;i<count;i++) { |
| a.getNextTransition(t); |
| final int min = t.min; |
| final int max = t.max; |
| if (min == max) { |
| final FST.Arc<T> nextArc = fst.findTargetArc(t.min, |
| path.fstNode, scratchArc, fstReader); |
| if (nextArc != null) { |
| final IntsRefBuilder newInput = new IntsRefBuilder(); |
| newInput.copyInts(currentInput.get()); |
| newInput.append(t.min); |
| queue.add(new Path<>(t.dest, new FST.Arc<T>() |
| .copyFrom(nextArc), fst.outputs |
| .add(path.output, nextArc.output()), newInput)); |
| } |
| } else { |
| // TODO: if this transition's TO state is accepting, and |
| // it accepts the entire range possible in the FST (ie. 0 to 255), |
| // we can simply use the prefix as the accepted state instead of |
| // looking up all the ranges and terminate early |
| // here. This just shifts the work from one queue |
| // (this one) to another (the completion search |
| // done in AnalyzingSuggester). |
| FST.Arc<T> nextArc = Util.readCeilArc(min, fst, path.fstNode, |
| scratchArc, fstReader); |
| while (nextArc != null && nextArc.label() <= max) { |
| assert nextArc.label() <= max; |
| assert nextArc.label() >= min : nextArc.label() + " " |
| + min; |
| final IntsRefBuilder newInput = new IntsRefBuilder(); |
| newInput.copyInts(currentInput.get()); |
| newInput.append(nextArc.label()); |
| queue.add(new Path<>(t.dest, new FST.Arc<T>() |
| .copyFrom(nextArc), fst.outputs |
| .add(path.output, nextArc.output()), newInput)); |
| final int label = nextArc.label(); // used in assert |
| nextArc = nextArc.isLast() ? null : fst.readNextRealArc(nextArc, |
| fstReader); |
| assert nextArc == null || label < nextArc.label() : "last: " + label |
| + " next: " + nextArc.label(); |
| } |
| } |
| } |
| } |
| return endNodes; |
| } |
| |
| } |