lucene/core/src/java/org/apache/lucene/util/fst/Builder.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.util.fst;


 import java.io.IOException;

 import org.apache.lucene.store.ByteArrayDataOutput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.fst.FST.INPUT_TYPE; // javadoc

 // TODO: could we somehow stream an FST to disk while we
 // build it?

 /**
  * Builds a minimal FST (maps an IntsRef term to an arbitrary
  * output) from pre-sorted terms with outputs.  The FST
  * becomes an FSA if you use NoOutputs.  The FST is written
  * on-the-fly into a compact serialized format byte array, which can
  * be saved to / loaded from a Directory or used directly
  * for traversal.  The FST is always finite (no cycles).
  *
  * <p>NOTE: The algorithm is described at
  * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.3698</p>
  *
  * <p>The parameterized type T is the output type.  See the
  * subclasses of {@link Outputs}.
  *
  * <p>FSTs larger than 2.1GB are now possible (as of Lucene
  * 4.2).  FSTs containing more than 2.1B nodes are also now
  * possible, however they cannot be packed.
  *
  * @lucene.experimental
  */

 public class Builder<T> {

   /**
    * Default oversizing factor used to decide whether to encode a node with direct addressing or binary search.
    * Default is 1: ensure no oversizing on average.
    * <p>
    * This factor does not determine whether to encode a node with a list of variable length arcs or with
    * fixed length arcs. It only determines the effective encoding of a node that is already known to be
    * encoded with fixed length arcs.
    * See {@code FST.shouldExpandNodeWithFixedLengthArcs()}
    * and {@code FST.shouldExpandNodeWithDirectAddressing()}.
    * <p>
    * For English words we measured 217K nodes, only 3.27% nodes are encoded with fixed length arcs,
    * and 99.99% of them with direct addressing. Overall FST memory reduced by 1.67%.
    * <p>
    * For worst case we measured 168K nodes, 50% of them are encoded with fixed length arcs,
    * and 14% of them with direct encoding. Overall FST memory reduced by 0.8%.
    * <p>
    * Use {@code TestFstDirectAddressing.main()}
    * and {@code TestFstDirectAddressing.testWorstCaseForDirectAddressing()}
    * to evaluate a change.
    *
    * @see #setDirectAddressingMaxOversizingFactor
    */
   static final float DIRECT_ADDRESSING_MAX_OVERSIZING_FACTOR = 1.0f;

   private final NodeHash<T> dedupHash;
   final FST<T> fst;
   private final T NO_OUTPUT;

   // private static final boolean DEBUG = true;

   // simplistic pruning: we prune node (and all following
   // nodes) if less than this number of terms go through it:
   private final int minSuffixCount1;

   // better pruning: we prune node (and all following
   // nodes) if the prior node has less than this number of
   // terms go through it:
   private final int minSuffixCount2;

   private final boolean doShareNonSingletonNodes;
   private final int shareMaxTailLength;

   private final IntsRefBuilder lastInput = new IntsRefBuilder();

   // NOTE: cutting this over to ArrayList instead loses ~6%
   // in build performance on 9.8M Wikipedia terms; so we
   // left this as an array:
   // current "frontier"
   private UnCompiledNode<T>[] frontier;

   // Used for the BIT_TARGET_NEXT optimization (whereby
   // instead of storing the address of the target node for
   // a given arc, we mark a single bit noting that the next
   // node in the byte[] is the target node):
   long lastFrozenNode;

   // Reused temporarily while building the FST:
   int[] numBytesPerArc = new int[4];
   int[] numLabelBytesPerArc = new int[numBytesPerArc.length];
   final FixedLengthArcsBuffer fixedLengthArcsBuffer = new FixedLengthArcsBuffer();

   long arcCount;
   long nodeCount;
   long binarySearchNodeCount;
   long directAddressingNodeCount;

   boolean allowFixedLengthArcs;
   float directAddressingMaxOversizingFactor = DIRECT_ADDRESSING_MAX_OVERSIZING_FACTOR;
   long directAddressingExpansionCredit;

   BytesStore bytes;

   /**
    * Instantiates an FST/FSA builder without any pruning. A shortcut to {@link
    * #Builder(FST.INPUT_TYPE, int, int, boolean, boolean, int, Outputs, boolean, int)} with
    * pruning options turned off.
    */
   public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
     this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
   }

   /**
    * Instantiates an FST/FSA builder with all the possible tuning and construction
    * tweaks. Read parameter documentation carefully.
    *
    * @param inputType
    *    The input type (transition labels). Can be anything from {@link INPUT_TYPE}
    *    enumeration. Shorter types will consume less memory. Strings (character sequences) are
    *    represented as {@link INPUT_TYPE#BYTE4} (full unicode codepoints).
    *
    * @param minSuffixCount1
    *    If pruning the input graph during construction, this threshold is used for telling
    *    if a node is kept or pruned. If transition_count(node) &gt;= minSuffixCount1, the node
    *    is kept.
    *
    * @param minSuffixCount2
    *    (Note: only Mike McCandless knows what this one is really doing...)
    *
    * @param doShareSuffix
    *    If <code>true</code>, the shared suffixes will be compacted into unique paths.
    *    This requires an additional RAM-intensive hash map for lookups in memory. Setting this parameter to
    *    <code>false</code> creates a single suffix path for all input sequences. This will result in a larger
    *    FST, but requires substantially less memory and CPU during building.
    *
    * @param doShareNonSingletonNodes
    *    Only used if doShareSuffix is true.  Set this to
    *    true to ensure FST is fully minimal, at cost of more
    *    CPU and more RAM during building.
    *
    * @param shareMaxTailLength
    *    Only used if doShareSuffix is true.  Set this to
    *    Integer.MAX_VALUE to ensure FST is fully minimal, at cost of more
    *    CPU and more RAM during building.
    *
    * @param outputs The output type for each input sequence. Applies only if building an FST. For
    *    FSA, use {@link NoOutputs#getSingleton()} and {@link NoOutputs#getNoOutput()} as the
    *    singleton output object.
    *
    * @param allowFixedLengthArcs Pass false to disable the fixed length arc optimization (binary search or
    *    direct addressing) while building the FST; this will make the resulting FST smaller but slower to
    *    traverse.
    *
    * @param bytesPageBits How many bits wide to make each
    *    byte[] block in the BytesStore; if you know the FST
    *    will be large then make this larger.  For example 15
    *    bits = 32768 byte pages.
    */
   public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
                  boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
                  boolean allowFixedLengthArcs, int bytesPageBits) {
     this.minSuffixCount1 = minSuffixCount1;
     this.minSuffixCount2 = minSuffixCount2;
     this.doShareNonSingletonNodes = doShareNonSingletonNodes;
     this.shareMaxTailLength = shareMaxTailLength;
     this.allowFixedLengthArcs = allowFixedLengthArcs;
     fst = new FST<>(inputType, outputs, bytesPageBits);
     bytes = fst.bytes;
     assert bytes != null;
     if (doShareSuffix) {
       dedupHash = new NodeHash<>(fst, bytes.getReverseReader(false));
     } else {
       dedupHash = null;
     }
     NO_OUTPUT = outputs.getNoOutput();

     @SuppressWarnings({"rawtypes","unchecked"}) final UnCompiledNode<T>[] f =
         (UnCompiledNode<T>[]) new UnCompiledNode[10];
     frontier = f;
     for(int idx=0;idx<frontier.length;idx++) {
       frontier[idx] = new UnCompiledNode<>(this, idx);
     }
   }

   /**
    * Overrides the default the maximum oversizing of fixed array allowed to enable direct addressing
    * of arcs instead of binary search.
    * <p>
    * Setting this factor to a negative value (e.g. -1) effectively disables direct addressing,
    * only binary search nodes will be created.
    *
    * @see #DIRECT_ADDRESSING_MAX_OVERSIZING_FACTOR
    */
   public Builder<T> setDirectAddressingMaxOversizingFactor(float factor) {
     directAddressingMaxOversizingFactor = factor;
     return this;
   }

   /**
    * @see #setDirectAddressingMaxOversizingFactor(float)
    */
   public float getDirectAddressingMaxOversizingFactor() {
     return directAddressingMaxOversizingFactor;
   }

   public long getTermCount() {
     return frontier[0].inputCount;
   }

   public long getNodeCount() {
     // 1+ in order to count the -1 implicit final node
     return 1+nodeCount;
   }

   public long getArcCount() {
     return arcCount;
   }

   public long getMappedStateCount() {
     return dedupHash == null ? 0 : nodeCount;
   }

   private CompiledNode compileNode(UnCompiledNode<T> nodeIn, int tailLength) throws IOException {
     final long node;
     long bytesPosStart = bytes.getPosition();
     if (dedupHash != null && (doShareNonSingletonNodes || nodeIn.numArcs <= 1) && tailLength <= shareMaxTailLength) {
       if (nodeIn.numArcs == 0) {
         node = fst.addNode(this, nodeIn);
         lastFrozenNode = node;
       } else {
         node = dedupHash.add(this, nodeIn);
       }
     } else {
       node = fst.addNode(this, nodeIn);
     }
     assert node != -2;

     long bytesPosEnd = bytes.getPosition();
     if (bytesPosEnd != bytesPosStart) {
       // The FST added a new node:
       assert bytesPosEnd > bytesPosStart;
       lastFrozenNode = node;
     }

     nodeIn.clear();

     final CompiledNode fn = new CompiledNode();
     fn.node = node;
     return fn;
   }

   private void freezeTail(int prefixLenPlus1) throws IOException {
     //System.out.println("  compileTail " + prefixLenPlus1);
     final int downTo = Math.max(1, prefixLenPlus1);
     for(int idx=lastInput.length(); idx >= downTo; idx--) {

       boolean doPrune = false;
       boolean doCompile = false;

       final UnCompiledNode<T> node = frontier[idx];
       final UnCompiledNode<T> parent = frontier[idx-1];

       if (node.inputCount < minSuffixCount1) {
         doPrune = true;
         doCompile = true;
       } else if (idx > prefixLenPlus1) {
         // prune if parent's inputCount is less than suffixMinCount2
         if (parent.inputCount < minSuffixCount2 || (minSuffixCount2 == 1 && parent.inputCount == 1 && idx > 1)) {
           // my parent, about to be compiled, doesn't make the cut, so
           // I'm definitely pruned

           // if minSuffixCount2 is 1, we keep only up
           // until the 'distinguished edge', ie we keep only the
           // 'divergent' part of the FST. if my parent, about to be
           // compiled, has inputCount 1 then we are already past the
           // distinguished edge.  NOTE: this only works if
           // the FST outputs are not "compressible" (simple
           // ords ARE compressible).
           doPrune = true;
         } else {
           // my parent, about to be compiled, does make the cut, so
           // I'm definitely not pruned
           doPrune = false;
         }
         doCompile = true;
       } else {
         // if pruning is disabled (count is 0) we can always
         // compile current node
         doCompile = minSuffixCount2 == 0;
       }

       //System.out.println("    label=" + ((char) lastInput.ints[lastInput.offset+idx-1]) + " idx=" + idx + " inputCount=" + frontier[idx].inputCount + " doCompile=" + doCompile + " doPrune=" + doPrune);

       if (node.inputCount < minSuffixCount2 || (minSuffixCount2 == 1 && node.inputCount == 1 && idx > 1)) {
         // drop all arcs
         for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
           @SuppressWarnings({"rawtypes","unchecked"}) final UnCompiledNode<T> target =
           (UnCompiledNode<T>) node.arcs[arcIdx].target;
           target.clear();
         }
         node.numArcs = 0;
       }

       if (doPrune) {
         // this node doesn't make it -- deref it
         node.clear();
         parent.deleteLast(lastInput.intAt(idx-1), node);
       } else {

         if (minSuffixCount2 != 0) {
           compileAllTargets(node, lastInput.length()-idx);
         }
         final T nextFinalOutput = node.output;

         // We "fake" the node as being final if it has no
         // outgoing arcs; in theory we could leave it
         // as non-final (the FST can represent this), but
         // FSTEnum, Util, etc., have trouble w/ non-final
         // dead-end states:
         final boolean isFinal = node.isFinal || node.numArcs == 0;

         if (doCompile) {
           // this node makes it and we now compile it.  first,
           // compile any targets that were previously
           // undecided:
           parent.replaceLast(lastInput.intAt(idx-1),
                              compileNode(node, 1+lastInput.length()-idx),
                              nextFinalOutput,
                              isFinal);
         } else {
           // replaceLast just to install
           // nextFinalOutput/isFinal onto the arc
           parent.replaceLast(lastInput.intAt(idx-1),
                              node,
                              nextFinalOutput,
                              isFinal);
           // this node will stay in play for now, since we are
           // undecided on whether to prune it.  later, it
           // will be either compiled or pruned, so we must
           // allocate a new node:
           frontier[idx] = new UnCompiledNode<>(this, idx);
         }
       }
     }
   }

   // for debugging
   /*
   private String toString(BytesRef b) {
     try {
       return b.utf8ToString() + " " + b;
     } catch (Throwable t) {
       return b.toString();
     }
   }
   */

   /** Add the next input/output pair.  The provided input
    *  must be sorted after the previous one according to
    *  {@link IntsRef#compareTo}.  It's also OK to add the same
    *  input twice in a row with different outputs, as long
    *  as {@link Outputs} implements the {@link Outputs#merge}
    *  method. Note that input is fully consumed after this
    *  method is returned (so caller is free to reuse), but
    *  output is not.  So if your outputs are changeable (eg
    *  {@link ByteSequenceOutputs} or {@link
    *  IntSequenceOutputs}) then you cannot reuse across
    *  calls. */
   public void add(IntsRef input, T output) throws IOException {
     /*
     if (DEBUG) {
       BytesRef b = new BytesRef(input.length);
       for(int x=0;x<input.length;x++) {
         b.bytes[x] = (byte) input.ints[x];
       }
       b.length = input.length;
       if (output == NO_OUTPUT) {
         System.out.println("\nFST ADD: input=" + toString(b) + " " + b);
       } else {
         System.out.println("\nFST ADD: input=" + toString(b) + " " + b + " output=" + fst.outputs.outputToString(output));
       }
     }
     */

     // De-dup NO_OUTPUT since it must be a singleton:
     if (output.equals(NO_OUTPUT)) {
       output = NO_OUTPUT;
     }

     assert lastInput.length() == 0 || input.compareTo(lastInput.get()) >= 0: "inputs are added out of order lastInput=" + lastInput.get() + " vs input=" + input;
     assert validOutput(output);

     //System.out.println("\nadd: " + input);
     if (input.length == 0) {
       // empty input: only allowed as first input.  we have
       // to special case this because the packed FST
       // format cannot represent the empty input since
       // 'finalness' is stored on the incoming arc, not on
       // the node
       frontier[0].inputCount++;
       frontier[0].isFinal = true;
       fst.setEmptyOutput(output);
       return;
     }

     // compare shared prefix length
     int pos1 = 0;
     int pos2 = input.offset;
     final int pos1Stop = Math.min(lastInput.length(), input.length);
     while(true) {
       frontier[pos1].inputCount++;
       //System.out.println("  incr " + pos1 + " ct=" + frontier[pos1].inputCount + " n=" + frontier[pos1]);
       if (pos1 >= pos1Stop || lastInput.intAt(pos1) != input.ints[pos2]) {
         break;
       }
       pos1++;
       pos2++;
     }
     final int prefixLenPlus1 = pos1+1;

     if (frontier.length < input.length+1) {
       final UnCompiledNode<T>[] next = ArrayUtil.grow(frontier, input.length+1);
       for(int idx=frontier.length;idx<next.length;idx++) {
         next[idx] = new UnCompiledNode<>(this, idx);
       }
       frontier = next;
     }

     // minimize/compile states from previous input's
     // orphan'd suffix
     freezeTail(prefixLenPlus1);

     // init tail states for current input
     for(int idx=prefixLenPlus1;idx<=input.length;idx++) {
       frontier[idx-1].addArc(input.ints[input.offset + idx - 1],
                              frontier[idx]);
       frontier[idx].inputCount++;
     }

     final UnCompiledNode<T> lastNode = frontier[input.length];
     if (lastInput.length() != input.length || prefixLenPlus1 != input.length + 1) {
       lastNode.isFinal = true;
       lastNode.output = NO_OUTPUT;
     }

     // push conflicting outputs forward, only as far as
     // needed
     for(int idx=1;idx<prefixLenPlus1;idx++) {
       final UnCompiledNode<T> node = frontier[idx];
       final UnCompiledNode<T> parentNode = frontier[idx-1];

       final T lastOutput = parentNode.getLastOutput(input.ints[input.offset + idx - 1]);
       assert validOutput(lastOutput);

       final T commonOutputPrefix;
       final T wordSuffix;

       if (lastOutput != NO_OUTPUT) {
         commonOutputPrefix = fst.outputs.common(output, lastOutput);
         assert validOutput(commonOutputPrefix);
         wordSuffix = fst.outputs.subtract(lastOutput, commonOutputPrefix);
         assert validOutput(wordSuffix);
         parentNode.setLastOutput(input.ints[input.offset + idx - 1], commonOutputPrefix);
         node.prependOutput(wordSuffix);
       } else {
         commonOutputPrefix = wordSuffix = NO_OUTPUT;
       }

       output = fst.outputs.subtract(output, commonOutputPrefix);
       assert validOutput(output);
     }

     if (lastInput.length() == input.length && prefixLenPlus1 == 1+input.length) {
       // same input more than 1 time in a row, mapping to
       // multiple outputs
       lastNode.output = fst.outputs.merge(lastNode.output, output);
     } else {
       // this new arc is private to this new input; set its
       // arc output to the leftover output:
       frontier[prefixLenPlus1-1].setLastOutput(input.ints[input.offset + prefixLenPlus1-1], output);
     }

     // save last input
     lastInput.copyInts(input);

     //System.out.println("  count[0]=" + frontier[0].inputCount);
   }

   private boolean validOutput(T output) {
     return output == NO_OUTPUT || !output.equals(NO_OUTPUT);
   }

   /** Returns final FST.  NOTE: this will return null if
    *  nothing is accepted by the FST. */
   public FST<T> finish() throws IOException {

     final UnCompiledNode<T> root = frontier[0];

     // minimize nodes in the last word's suffix
     freezeTail(0);
     if (root.inputCount < minSuffixCount1 || root.inputCount < minSuffixCount2 || root.numArcs == 0) {
       if (fst.emptyOutput == null) {
         return null;
       } else if (minSuffixCount1 > 0 || minSuffixCount2 > 0) {
         // empty string got pruned
         return null;
       }
     } else {
       if (minSuffixCount2 != 0) {
         compileAllTargets(root, lastInput.length());
       }
     }
     //if (DEBUG) System.out.println("  builder.finish root.isFinal=" + root.isFinal + " root.output=" + root.output);
     fst.finish(compileNode(root, lastInput.length()).node);

     return fst;
   }

   private void compileAllTargets(UnCompiledNode<T> node, int tailLength) throws IOException {
     for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
       final Arc<T> arc = node.arcs[arcIdx];
       if (!arc.target.isCompiled()) {
         // not yet compiled
         @SuppressWarnings({"rawtypes","unchecked"}) final UnCompiledNode<T> n = (UnCompiledNode<T>) arc.target;
         if (n.numArcs == 0) {
           //System.out.println("seg=" + segment + "        FORCE final arc=" + (char) arc.label);
           arc.isFinal = n.isFinal = true;
         }
         arc.target = compileNode(n, tailLength-1);
       }
     }
   }

   /** Expert: holds a pending (seen but not yet serialized) arc. */
   public static class Arc<T> {
     public int label;                             // really an "unsigned" byte
     public Node target;
     public boolean isFinal;
     public T output;
     public T nextFinalOutput;
   }

   // NOTE: not many instances of Node or CompiledNode are in
   // memory while the FST is being built; it's only the
   // current "frontier":

   static interface Node {
     boolean isCompiled();
   }

   public long fstRamBytesUsed() {
     return fst.ramBytesUsed();
   }

   static final class CompiledNode implements Node {
     long node;
     @Override
     public boolean isCompiled() {
       return true;
     }
   }

   /** Expert: holds a pending (seen but not yet serialized) Node. */
   public static final class UnCompiledNode<T> implements Node {
     final Builder<T> owner;
     public int numArcs;
     public Arc<T>[] arcs;
     // TODO: instead of recording isFinal/output on the
     // node, maybe we should use -1 arc to mean "end" (like
     // we do when reading the FST).  Would simplify much
     // code here...
     public T output;
     public boolean isFinal;
     public long inputCount;

     /** This node's depth, starting from the automaton root. */
     public final int depth;

     /**
      * @param depth
      *          The node's depth starting from the automaton root. Needed for
      *          LUCENE-2934 (node expansion based on conditions other than the
      *          fanout size).
      */
     @SuppressWarnings({"rawtypes","unchecked"})
     public UnCompiledNode(Builder<T> owner, int depth) {
       this.owner = owner;
       arcs = (Arc<T>[]) new Arc[1];
       arcs[0] = new Arc<>();
       output = owner.NO_OUTPUT;
       this.depth = depth;
     }

     @Override
     public boolean isCompiled() {
       return false;
     }

     public void clear() {
       numArcs = 0;
       isFinal = false;
       output = owner.NO_OUTPUT;
       inputCount = 0;

       // We don't clear the depth here because it never changes
       // for nodes on the frontier (even when reused).
     }

     public T getLastOutput(int labelToMatch) {
       assert numArcs > 0;
       assert arcs[numArcs-1].label == labelToMatch;
       return arcs[numArcs-1].output;
     }

     public void addArc(int label, Node target) {
       assert label >= 0;
       assert numArcs == 0 || label > arcs[numArcs-1].label: "arc[numArcs-1].label=" + arcs[numArcs-1].label + " new label=" + label + " numArcs=" + numArcs;
       if (numArcs == arcs.length) {
         final Arc<T>[] newArcs = ArrayUtil.grow(arcs, numArcs+1);
         for(int arcIdx=numArcs;arcIdx<newArcs.length;arcIdx++) {
           newArcs[arcIdx] = new Arc<>();
         }
         arcs = newArcs;
       }
       final Arc<T> arc = arcs[numArcs++];
       arc.label = label;
       arc.target = target;
       arc.output = arc.nextFinalOutput = owner.NO_OUTPUT;
       arc.isFinal = false;
     }

     public void replaceLast(int labelToMatch, Node target, T nextFinalOutput, boolean isFinal) {
       assert numArcs > 0;
       final Arc<T> arc = arcs[numArcs-1];
       assert arc.label == labelToMatch: "arc.label=" + arc.label + " vs " + labelToMatch;
       arc.target = target;
       //assert target.node != -2;
       arc.nextFinalOutput = nextFinalOutput;
       arc.isFinal = isFinal;
     }

     public void deleteLast(int label, Node target) {
       assert numArcs > 0;
       assert label == arcs[numArcs-1].label;
       assert target == arcs[numArcs-1].target;
       numArcs--;
     }

     public void setLastOutput(int labelToMatch, T newOutput) {
       assert owner.validOutput(newOutput);
       assert numArcs > 0;
       final Arc<T> arc = arcs[numArcs-1];
       assert arc.label == labelToMatch;
       arc.output = newOutput;
     }

     // pushes an output prefix forward onto all arcs
     public void prependOutput(T outputPrefix) {
       assert owner.validOutput(outputPrefix);

       for(int arcIdx=0;arcIdx<numArcs;arcIdx++) {
         arcs[arcIdx].output = owner.fst.outputs.add(outputPrefix, arcs[arcIdx].output);
         assert owner.validOutput(arcs[arcIdx].output);
       }

       if (isFinal) {
         output = owner.fst.outputs.add(outputPrefix, output);
         assert owner.validOutput(output);
       }
     }
   }

   /**
    * Reusable buffer for building nodes with fixed length arcs (binary search or direct addressing).
    */
   static class FixedLengthArcsBuffer {

     // Initial capacity is the max length required for the header of a node with fixed length arcs:
     // header(byte) + numArcs(vint) + numBytes(vint)
     private byte[] bytes = new byte[11];
     private final ByteArrayDataOutput bado = new ByteArrayDataOutput(bytes);

     /** Ensures the capacity of the internal byte array. Enlarges it if needed. */
     FixedLengthArcsBuffer ensureCapacity(int capacity) {
       if (bytes.length < capacity) {
         bytes = new byte[ArrayUtil.oversize(capacity, Byte.BYTES)];
         bado.reset(bytes);
       }
       return this;
     }

     FixedLengthArcsBuffer resetPosition() {
       bado.reset(bytes);
       return this;
     }

     FixedLengthArcsBuffer writeByte(byte b) {
       bado.writeByte(b);
       return this;
     }

     FixedLengthArcsBuffer writeVInt(int i) {
       try {
         bado.writeVInt(i);
       } catch (IOException e) { // Never thrown.
         throw new RuntimeException(e);
       }
       return this;
     }

     int getPosition() {
       return bado.getPosition();
     }

     /** Gets the internal byte array. */
     byte[] getBytes() {
       return bytes;
     }
   }
 }