| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search.suggest.analyzing; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.List; |
| import org.apache.lucene.util.IntsRefBuilder; |
| import org.apache.lucene.util.automaton.Automaton; |
| import org.apache.lucene.util.automaton.Transition; |
| import org.apache.lucene.util.fst.FST; |
| import org.apache.lucene.util.fst.Util; |
| |
| // TODO: move to core? nobody else uses it yet though... |
| |
| /** |
| * Exposes a utility method to enumerate all paths intersecting an {@link Automaton} with an {@link |
| * FST}. |
| */ |
| public class FSTUtil { |
| |
| private FSTUtil() {} |
| |
| /** Holds a pair (automaton, fst) of states and accumulated output in the intersected machine. */ |
| public static final class Path<T> { |
| |
| /** Node in the automaton where path ends: */ |
| public final int state; |
| |
| /** Node in the FST where path ends: */ |
| public final FST.Arc<T> fstNode; |
| |
| /** Output of the path so far: */ |
| public final T output; |
| |
| /** Input of the path so far: */ |
| public final IntsRefBuilder input; |
| |
| /** Sole constructor. */ |
| public Path(int state, FST.Arc<T> fstNode, T output, IntsRefBuilder input) { |
| this.state = state; |
| this.fstNode = fstNode; |
| this.output = output; |
| this.input = input; |
| } |
| } |
| |
| /** |
| * Enumerates all minimal prefix paths in the automaton that also intersect the FST, accumulating |
| * the FST end node and output for each path. |
| */ |
| public static <T> List<Path<T>> intersectPrefixPaths(Automaton a, FST<T> fst) throws IOException { |
| assert a.isDeterministic(); |
| final List<Path<T>> queue = new ArrayList<>(); |
| final List<Path<T>> endNodes = new ArrayList<>(); |
| if (a.getNumStates() == 0) { |
| return endNodes; |
| } |
| |
| queue.add( |
| new Path<>( |
| 0, fst.getFirstArc(new FST.Arc<T>()), fst.outputs.getNoOutput(), new IntsRefBuilder())); |
| |
| final FST.Arc<T> scratchArc = new FST.Arc<>(); |
| final FST.BytesReader fstReader = fst.getBytesReader(); |
| |
| Transition t = new Transition(); |
| |
| while (queue.size() != 0) { |
| final Path<T> path = queue.remove(queue.size() - 1); |
| if (a.isAccept(path.state)) { |
| endNodes.add(path); |
| // we can stop here if we accept this path, |
| // we accept all further paths too |
| continue; |
| } |
| |
| IntsRefBuilder currentInput = path.input; |
| int count = a.initTransition(path.state, t); |
| for (int i = 0; i < count; i++) { |
| a.getNextTransition(t); |
| final int min = t.min; |
| final int max = t.max; |
| if (min == max) { |
| final FST.Arc<T> nextArc = fst.findTargetArc(t.min, path.fstNode, scratchArc, fstReader); |
| if (nextArc != null) { |
| final IntsRefBuilder newInput = new IntsRefBuilder(); |
| newInput.copyInts(currentInput.get()); |
| newInput.append(t.min); |
| queue.add( |
| new Path<>( |
| t.dest, |
| new FST.Arc<T>().copyFrom(nextArc), |
| fst.outputs.add(path.output, nextArc.output()), |
| newInput)); |
| } |
| } else { |
| // TODO: if this transition's TO state is accepting, and |
| // it accepts the entire range possible in the FST (ie. 0 to 255), |
| // we can simply use the prefix as the accepted state instead of |
| // looking up all the ranges and terminate early |
| // here. This just shifts the work from one queue |
| // (this one) to another (the completion search |
| // done in AnalyzingSuggester). |
| FST.Arc<T> nextArc = Util.readCeilArc(min, fst, path.fstNode, scratchArc, fstReader); |
| while (nextArc != null && nextArc.label() <= max) { |
| assert nextArc.label() <= max; |
| assert nextArc.label() >= min : nextArc.label() + " " + min; |
| final IntsRefBuilder newInput = new IntsRefBuilder(); |
| newInput.copyInts(currentInput.get()); |
| newInput.append(nextArc.label()); |
| queue.add( |
| new Path<>( |
| t.dest, |
| new FST.Arc<T>().copyFrom(nextArc), |
| fst.outputs.add(path.output, nextArc.output()), |
| newInput)); |
| final int label = nextArc.label(); // used in assert |
| nextArc = nextArc.isLast() ? null : fst.readNextRealArc(nextArc, fstReader); |
| assert nextArc == null || label < nextArc.label() |
| : "last: " + label + " next: " + nextArc.label(); |
| } |
| } |
| } |
| } |
| return endNodes; |
| } |
| } |