lucene/analysis/common/src/java/org/apache/lucene/analysis/core/FlattenGraphFilter.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.lucene.analysis.core;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.synonym.SynonymGraphFilter;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.RollingBuffer;

 /**
  * Converts an incoming graph token stream, such as one from {@link SynonymGraphFilter}, into a flat
  * form so that all nodes form a single linear chain with no side paths. Every path through the
  * graph touches every node. This is necessary when indexing a graph token stream, because the index
  * does not save {@link PositionLengthAttribute} and so it cannot preserve the graph structure.
  * However, at search time, query parsers can correctly handle the graph and this token filter
  * should <b>not</b> be used.
  *
  * <p>If the graph was not already flat to start, this is likely a lossy process, i.e. it will often
  * cause the graph to accept token sequences it should not, and to reject token sequences it should
  * not.
  *
  * <p>However, when applying synonyms during indexing, this is necessary because Lucene already does
  * not index a graph and so the indexing process is already lossy (it ignores the {@link
  * PositionLengthAttribute}).
  *
  * @lucene.experimental
  */
 public final class FlattenGraphFilter extends TokenFilter {

   /** Holds all tokens leaving a given input position. */
   private static final class InputNode implements RollingBuffer.Resettable {
     private final List<AttributeSource.State> tokens = new ArrayList<>();

     /** Our input node, or -1 if we haven't been assigned yet */
     int node = -1;

     /**
      * Maximum to input node for all tokens leaving here; we use this to know when we can freeze.
      */
     int maxToNode = -1;

     /**
      * Where we currently map to; this changes (can only increase as we see more input tokens),
      * until we are finished with this position.
      */
     int outputNode = -1;

     /** Which token (index into {@link #tokens}) we will next output. */
     int nextOut;

     @Override
     public void reset() {
       tokens.clear();
       node = -1;
       outputNode = -1;
       maxToNode = -1;
       nextOut = 0;
     }
   }

   /**
    * Gathers up merged input positions into a single output position, only for the current
    * "frontier" of nodes we've seen but can't yet output because they are not frozen.
    */
   private static final class OutputNode implements RollingBuffer.Resettable {
     private final List<Integer> inputNodes = new ArrayList<>();

     /** Node ID for this output, or -1 if we haven't been assigned yet. */
     int node = -1;

     /** Which input node (index into {@link #inputNodes}) we will next output. */
     int nextOut;

     /** Start offset of tokens leaving this node. */
     int startOffset = -1;

     /** End offset of tokens arriving to this node. */
     int endOffset = -1;

     @Override
     public void reset() {
       inputNodes.clear();
       node = -1;
       nextOut = 0;
       startOffset = -1;
       endOffset = -1;
     }
   }

   private final RollingBuffer<InputNode> inputNodes =
       new RollingBuffer<InputNode>() {
         @Override
         protected InputNode newInstance() {
           return new InputNode();
         }
       };

   private final RollingBuffer<OutputNode> outputNodes =
       new RollingBuffer<OutputNode>() {
         @Override
         protected OutputNode newInstance() {
           return new OutputNode();
         }
       };

   private final PositionIncrementAttribute posIncAtt =
       addAttribute(PositionIncrementAttribute.class);
   private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
   private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);

   /** Which input node the last seen token leaves from */
   private int inputFrom;

   /** We are currently releasing tokens leaving from this output node */
   private int outputFrom;

   // for debugging:
   // private int retOutputFrom;

   private boolean done;

   private int lastOutputFrom;

   private int finalOffset;

   private int finalPosInc;

   private int maxLookaheadUsed;

   private int lastStartOffset;

   public FlattenGraphFilter(TokenStream in) {
     super(in);
   }

   private boolean releaseBufferedToken() {

     // We only need the while loop (retry) if we have a hole (an output node that has no tokens
     // leaving):
     while (outputFrom < outputNodes.getMaxPos()) {
       OutputNode output = outputNodes.get(outputFrom);
       if (output.inputNodes.isEmpty()) {
         // No tokens arrived to this node, which happens for the first node
         // after a hole:
         // System.out.println("    skip empty outputFrom=" + outputFrom);
         outputFrom++;
         continue;
       }

       int maxToNode = -1;
       for (int inputNodeID : output.inputNodes) {
         InputNode inputNode = inputNodes.get(inputNodeID);
         assert inputNode.outputNode == outputFrom;
         maxToNode = Math.max(maxToNode, inputNode.maxToNode);
       }
       // System.out.println("  release maxToNode=" + maxToNode + " vs inputFrom=" + inputFrom);

       // TODO: we could shrink the frontier here somewhat if we
       // always output posLen=1 as part of our "sausagizing":
       if (maxToNode <= inputFrom || done) {
         // System.out.println("  output node merged these inputs: " + output.inputNodes);
         // These tokens are now frozen
         assert output.nextOut < output.inputNodes.size()
             : "output.nextOut="
                 + output.nextOut
                 + " vs output.inputNodes.size()="
                 + output.inputNodes.size();
         InputNode inputNode = inputNodes.get(output.inputNodes.get(output.nextOut));
         if (done && inputNode.tokens.size() == 0 && outputFrom >= outputNodes.getMaxPos()) {
           return false;
         }
         if (inputNode.tokens.size() == 0) {
           assert inputNode.nextOut == 0;
           assert output.nextOut == 0;
           // Hole dest nodes should never be merged since 1) we always
           // assign them to a new output position, and 2) since they never
           // have arriving tokens they cannot be pushed:
           assert output.inputNodes.size() == 1 : output.inputNodes.size();
           outputFrom++;
           inputNodes.freeBefore(output.inputNodes.get(0));
           outputNodes.freeBefore(outputFrom);
           continue;
         }

         assert inputNode.nextOut < inputNode.tokens.size();

         restoreState(inputNode.tokens.get(inputNode.nextOut));

         // Correct posInc
         assert outputFrom >= lastOutputFrom;
         posIncAtt.setPositionIncrement(outputFrom - lastOutputFrom);
         int toInputNodeID = inputNode.node + posLenAtt.getPositionLength();
         InputNode toInputNode = inputNodes.get(toInputNodeID);

         // Correct posLen
         assert toInputNode.outputNode > outputFrom;
         posLenAtt.setPositionLength(toInputNode.outputNode - outputFrom);
         lastOutputFrom = outputFrom;
         inputNode.nextOut++;
         // System.out.println("  ret " + this);

         OutputNode outputEndNode = outputNodes.get(toInputNode.outputNode);

         // Correct offsets

         // This is a bit messy; we must do this so offset don't go backwards,
         // which would otherwise happen if the replacement has more tokens
         // than the input:
         int startOffset = Math.max(lastStartOffset, output.startOffset);

         // We must do this in case the incoming tokens have broken offsets:
         int endOffset = Math.max(startOffset, outputEndNode.endOffset);

         offsetAtt.setOffset(startOffset, endOffset);
         lastStartOffset = startOffset;

         if (inputNode.nextOut == inputNode.tokens.size()) {
           output.nextOut++;
           if (output.nextOut == output.inputNodes.size()) {
             outputFrom++;
             inputNodes.freeBefore(output.inputNodes.get(0));
             outputNodes.freeBefore(outputFrom);
           }
         }

         return true;
       } else {
         return false;
       }
     }

     // System.out.println("    break false");
     return false;
   }

   @Override
   public boolean incrementToken() throws IOException {
     // System.out.println("\nF.increment inputFrom=" + inputFrom + " outputFrom=" + outputFrom);

     while (true) {
       if (releaseBufferedToken()) {
         // retOutputFrom += posIncAtt.getPositionIncrement();
         // System.out.println("    return buffered: " + termAtt + " " + retOutputFrom + "-" +
         // (retOutputFrom + posLenAtt.getPositionLength()));
         // printStates();
         return true;
       } else if (done) {
         // System.out.println("    done, return false");
         return false;
       }

       if (input.incrementToken()) {
         // Input node this token leaves from:
         inputFrom += posIncAtt.getPositionIncrement();

         int startOffset = offsetAtt.startOffset();
         int endOffset = offsetAtt.endOffset();

         // Input node this token goes to:
         int inputTo = inputFrom + posLenAtt.getPositionLength();
         // System.out.println("  input.inc " + termAtt + ": " + inputFrom + "-" + inputTo);

         InputNode src = inputNodes.get(inputFrom);
         if (src.node == -1) {
           // This means the "from" node of this token was never seen as a "to" node,
           // which should only happen if we just crossed a hole.  This is a challenging
           // case for us because we normally rely on the full dependencies expressed
           // by the arcs to assign outgoing node IDs.  It would be better if tokens
           // were never dropped but instead just marked deleted with a new
           // TermDeletedAttribute (boolean valued) ... but until that future, we have
           // a hack here to forcefully jump the output node ID:
           assert src.outputNode == -1;
           src.node = inputFrom;

           src.outputNode = outputNodes.getMaxPos() + 1;
           // System.out.println("    hole: force to outputNode=" + src.outputNode);
           OutputNode outSrc = outputNodes.get(src.outputNode);

           // Not assigned yet:
           assert outSrc.node == -1;
           outSrc.node = src.outputNode;
           outSrc.inputNodes.add(inputFrom);
           outSrc.startOffset = startOffset;
         } else {
           OutputNode outSrc = outputNodes.get(src.outputNode);
           if (outSrc.startOffset == -1 || startOffset > outSrc.startOffset) {
             // "shrink wrap" the offsets so the original tokens (with most
             // restrictive offsets) win:
             outSrc.startOffset = Math.max(startOffset, outSrc.startOffset);
           }
         }

         // Buffer this token:
         src.tokens.add(captureState());
         src.maxToNode = Math.max(src.maxToNode, inputTo);
         maxLookaheadUsed = Math.max(maxLookaheadUsed, inputNodes.getBufferSize());

         InputNode dest = inputNodes.get(inputTo);
         if (dest.node == -1) {
           // Common case: first time a token is arriving to this input position:
           dest.node = inputTo;
         }

         // Always number output nodes sequentially:
         int outputEndNode = src.outputNode + 1;

         if (outputEndNode > dest.outputNode) {
           if (dest.outputNode != -1) {
             boolean removed =
                 outputNodes.get(dest.outputNode).inputNodes.remove(Integer.valueOf(inputTo));
             assert removed;
           }
           // System.out.println("    increase output node: " + dest.outputNode + " vs " +
           // outputEndNode);
           outputNodes.get(outputEndNode).inputNodes.add(inputTo);
           dest.outputNode = outputEndNode;

           // Since all we ever do is merge incoming nodes together, and then renumber
           // the merged nodes sequentially, we should only ever assign smaller node
           // numbers:
           assert outputEndNode <= inputTo
               : "outputEndNode=" + outputEndNode + " vs inputTo=" + inputTo;
         }

         OutputNode outDest = outputNodes.get(dest.outputNode);
         // "shrink wrap" the offsets so the original tokens (with most
         // restrictive offsets) win:
         if (outDest.endOffset == -1 || endOffset < outDest.endOffset) {
           outDest.endOffset = endOffset;
         }

       } else {
         // System.out.println("  got false from input");
         input.end();
         finalPosInc = posIncAtt.getPositionIncrement();
         finalOffset = offsetAtt.endOffset();
         done = true;
         // Don't return false here: we need to force release any buffered tokens now
       }
     }
   }

   // Only for debugging:
   /*
   private void printStates() {
     System.out.println("states:");
     for(int i=outputFrom;i<outputNodes.getMaxPos();i++) {
       OutputNode outputNode = outputNodes.get(i);
       System.out.println("  output " + i + ": inputs " + outputNode.inputNodes);
       for(int inputNodeID : outputNode.inputNodes) {
         InputNode inputNode = inputNodes.get(inputNodeID);
         assert inputNode.outputNode == i;
       }
     }
   }
   */

   @Override
   public void end() throws IOException {
     if (done == false) {
       super.end();
     } else {
       // NOTE, shady: don't call super.end, because we did already from incrementToken
     }

     clearAttributes();
     if (done) {
       // On exc, done is false, and we will not have set these:
       posIncAtt.setPositionIncrement(finalPosInc);
       offsetAtt.setOffset(finalOffset, finalOffset);
     } else {
       super.end();
     }
   }

   @Override
   public void reset() throws IOException {
     // System.out.println("F: reset");
     super.reset();
     inputFrom = -1;
     inputNodes.reset();
     InputNode in = inputNodes.get(0);
     in.node = 0;
     in.outputNode = 0;

     outputNodes.reset();
     OutputNode out = outputNodes.get(0);
     out.node = 0;
     out.inputNodes.add(0);
     out.startOffset = 0;
     outputFrom = 0;
     // retOutputFrom = -1;
     lastOutputFrom = -1;
     done = false;
     finalPosInc = -1;
     finalOffset = -1;
     lastStartOffset = 0;
     maxLookaheadUsed = 0;
   }

   /** For testing */
   public int getMaxLookaheadUsed() {
     return maxLookaheadUsed;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.lucene.analysis.core;

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.List;
	import org.apache.lucene.analysis.TokenFilter;
	import org.apache.lucene.analysis.TokenStream;
	import org.apache.lucene.analysis.synonym.SynonymGraphFilter;
	import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
	import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
	import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
	import org.apache.lucene.util.AttributeSource;
	import org.apache.lucene.util.RollingBuffer;

	/**
	* Converts an incoming graph token stream, such as one from {@link SynonymGraphFilter}, into a flat
	* form so that all nodes form a single linear chain with no side paths. Every path through the
	* graph touches every node. This is necessary when indexing a graph token stream, because the index
	* does not save {@link PositionLengthAttribute} and so it cannot preserve the graph structure.
	* However, at search time, query parsers can correctly handle the graph and this token filter
	* should <b>not</b> be used.
	*
	* <p>If the graph was not already flat to start, this is likely a lossy process, i.e. it will often
	* cause the graph to accept token sequences it should not, and to reject token sequences it should
	* not.
	*
	* <p>However, when applying synonyms during indexing, this is necessary because Lucene already does
	* not index a graph and so the indexing process is already lossy (it ignores the {@link
	* PositionLengthAttribute}).
	*
	* @lucene.experimental
	*/
	public final class FlattenGraphFilter extends TokenFilter {

	/** Holds all tokens leaving a given input position. */
	private static final class InputNode implements RollingBuffer.Resettable {
	private final List<AttributeSource.State> tokens = new ArrayList<>();

	/** Our input node, or -1 if we haven't been assigned yet */
	int node = -1;

	/**
	* Maximum to input node for all tokens leaving here; we use this to know when we can freeze.
	*/
	int maxToNode = -1;

	/**
	* Where we currently map to; this changes (can only increase as we see more input tokens),
	* until we are finished with this position.
	*/
	int outputNode = -1;

	/** Which token (index into {@link #tokens}) we will next output. */
	int nextOut;

	@Override
	public void reset() {
	tokens.clear();
	node = -1;
	outputNode = -1;
	maxToNode = -1;
	nextOut = 0;
	}
	}

	/**
	* Gathers up merged input positions into a single output position, only for the current
	* "frontier" of nodes we've seen but can't yet output because they are not frozen.
	*/
	private static final class OutputNode implements RollingBuffer.Resettable {
	private final List<Integer> inputNodes = new ArrayList<>();

	/** Node ID for this output, or -1 if we haven't been assigned yet. */
	int node = -1;

	/** Which input node (index into {@link #inputNodes}) we will next output. */
	int nextOut;

	/** Start offset of tokens leaving this node. */
	int startOffset = -1;

	/** End offset of tokens arriving to this node. */
	int endOffset = -1;

	@Override
	public void reset() {
	inputNodes.clear();
	node = -1;
	nextOut = 0;
	startOffset = -1;
	endOffset = -1;
	}
	}

	private final RollingBuffer<InputNode> inputNodes =
	new RollingBuffer<InputNode>() {
	@Override
	protected InputNode newInstance() {
	return new InputNode();
	}
	};

	private final RollingBuffer<OutputNode> outputNodes =
	new RollingBuffer<OutputNode>() {
	@Override
	protected OutputNode newInstance() {
	return new OutputNode();
	}
	};

	private final PositionIncrementAttribute posIncAtt =
	addAttribute(PositionIncrementAttribute.class);
	private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
	private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);

	/** Which input node the last seen token leaves from */
	private int inputFrom;

	/** We are currently releasing tokens leaving from this output node */
	private int outputFrom;

	// for debugging:
	// private int retOutputFrom;

	private boolean done;

	private int lastOutputFrom;

	private int finalOffset;

	private int finalPosInc;

	private int maxLookaheadUsed;

	private int lastStartOffset;

	public FlattenGraphFilter(TokenStream in) {
	super(in);
	}

	private boolean releaseBufferedToken() {

	// We only need the while loop (retry) if we have a hole (an output node that has no tokens
	// leaving):
	while (outputFrom < outputNodes.getMaxPos()) {
	OutputNode output = outputNodes.get(outputFrom);
	if (output.inputNodes.isEmpty()) {
	// No tokens arrived to this node, which happens for the first node
	// after a hole:
	// System.out.println(" skip empty outputFrom=" + outputFrom);
	outputFrom++;
	continue;
	}

	int maxToNode = -1;
	for (int inputNodeID : output.inputNodes) {
	InputNode inputNode = inputNodes.get(inputNodeID);
	assert inputNode.outputNode == outputFrom;
	maxToNode = Math.max(maxToNode, inputNode.maxToNode);
	}
	// System.out.println(" release maxToNode=" + maxToNode + " vs inputFrom=" + inputFrom);

	// TODO: we could shrink the frontier here somewhat if we
	// always output posLen=1 as part of our "sausagizing":
	if (maxToNode <= inputFrom \|\| done) {
	// System.out.println(" output node merged these inputs: " + output.inputNodes);
	// These tokens are now frozen
	assert output.nextOut < output.inputNodes.size()
	: "output.nextOut="
	+ output.nextOut
	+ " vs output.inputNodes.size()="
	+ output.inputNodes.size();
	InputNode inputNode = inputNodes.get(output.inputNodes.get(output.nextOut));
	if (done && inputNode.tokens.size() == 0 && outputFrom >= outputNodes.getMaxPos()) {
	return false;
	}
	if (inputNode.tokens.size() == 0) {
	assert inputNode.nextOut == 0;
	assert output.nextOut == 0;
	// Hole dest nodes should never be merged since 1) we always
	// assign them to a new output position, and 2) since they never
	// have arriving tokens they cannot be pushed:
	assert output.inputNodes.size() == 1 : output.inputNodes.size();
	outputFrom++;
	inputNodes.freeBefore(output.inputNodes.get(0));
	outputNodes.freeBefore(outputFrom);
	continue;
	}

	assert inputNode.nextOut < inputNode.tokens.size();

	restoreState(inputNode.tokens.get(inputNode.nextOut));

	// Correct posInc
	assert outputFrom >= lastOutputFrom;
	posIncAtt.setPositionIncrement(outputFrom - lastOutputFrom);
	int toInputNodeID = inputNode.node + posLenAtt.getPositionLength();
	InputNode toInputNode = inputNodes.get(toInputNodeID);

	// Correct posLen
	assert toInputNode.outputNode > outputFrom;
	posLenAtt.setPositionLength(toInputNode.outputNode - outputFrom);
	lastOutputFrom = outputFrom;
	inputNode.nextOut++;
	// System.out.println(" ret " + this);

	OutputNode outputEndNode = outputNodes.get(toInputNode.outputNode);

	// Correct offsets

	// This is a bit messy; we must do this so offset don't go backwards,
	// which would otherwise happen if the replacement has more tokens
	// than the input:
	int startOffset = Math.max(lastStartOffset, output.startOffset);

	// We must do this in case the incoming tokens have broken offsets:
	int endOffset = Math.max(startOffset, outputEndNode.endOffset);

	offsetAtt.setOffset(startOffset, endOffset);
	lastStartOffset = startOffset;

	if (inputNode.nextOut == inputNode.tokens.size()) {
	output.nextOut++;
	if (output.nextOut == output.inputNodes.size()) {
	outputFrom++;
	inputNodes.freeBefore(output.inputNodes.get(0));
	outputNodes.freeBefore(outputFrom);
	}
	}

	return true;
	} else {
	return false;
	}
	}

	// System.out.println(" break false");
	return false;
	}

	@Override
	public boolean incrementToken() throws IOException {
	// System.out.println("\nF.increment inputFrom=" + inputFrom + " outputFrom=" + outputFrom);

	while (true) {
	if (releaseBufferedToken()) {
	// retOutputFrom += posIncAtt.getPositionIncrement();
	// System.out.println(" return buffered: " + termAtt + " " + retOutputFrom + "-" +
	// (retOutputFrom + posLenAtt.getPositionLength()));
	// printStates();
	return true;
	} else if (done) {
	// System.out.println(" done, return false");
	return false;
	}

	if (input.incrementToken()) {
	// Input node this token leaves from:
	inputFrom += posIncAtt.getPositionIncrement();

	int startOffset = offsetAtt.startOffset();
	int endOffset = offsetAtt.endOffset();

	// Input node this token goes to:
	int inputTo = inputFrom + posLenAtt.getPositionLength();
	// System.out.println(" input.inc " + termAtt + ": " + inputFrom + "-" + inputTo);

	InputNode src = inputNodes.get(inputFrom);
	if (src.node == -1) {
	// This means the "from" node of this token was never seen as a "to" node,
	// which should only happen if we just crossed a hole. This is a challenging
	// case for us because we normally rely on the full dependencies expressed
	// by the arcs to assign outgoing node IDs. It would be better if tokens
	// were never dropped but instead just marked deleted with a new
	// TermDeletedAttribute (boolean valued) ... but until that future, we have
	// a hack here to forcefully jump the output node ID:
	assert src.outputNode == -1;
	src.node = inputFrom;

	src.outputNode = outputNodes.getMaxPos() + 1;
	// System.out.println(" hole: force to outputNode=" + src.outputNode);
	OutputNode outSrc = outputNodes.get(src.outputNode);

	// Not assigned yet:
	assert outSrc.node == -1;
	outSrc.node = src.outputNode;
	outSrc.inputNodes.add(inputFrom);
	outSrc.startOffset = startOffset;
	} else {
	OutputNode outSrc = outputNodes.get(src.outputNode);
	if (outSrc.startOffset == -1 \|\| startOffset > outSrc.startOffset) {
	// "shrink wrap" the offsets so the original tokens (with most
	// restrictive offsets) win:
	outSrc.startOffset = Math.max(startOffset, outSrc.startOffset);
	}
	}

	// Buffer this token:
	src.tokens.add(captureState());
	src.maxToNode = Math.max(src.maxToNode, inputTo);
	maxLookaheadUsed = Math.max(maxLookaheadUsed, inputNodes.getBufferSize());

	InputNode dest = inputNodes.get(inputTo);
	if (dest.node == -1) {
	// Common case: first time a token is arriving to this input position:
	dest.node = inputTo;
	}

	// Always number output nodes sequentially:
	int outputEndNode = src.outputNode + 1;

	if (outputEndNode > dest.outputNode) {
	if (dest.outputNode != -1) {
	boolean removed =
	outputNodes.get(dest.outputNode).inputNodes.remove(Integer.valueOf(inputTo));
	assert removed;
	}
	// System.out.println(" increase output node: " + dest.outputNode + " vs " +
	// outputEndNode);
	outputNodes.get(outputEndNode).inputNodes.add(inputTo);
	dest.outputNode = outputEndNode;

	// Since all we ever do is merge incoming nodes together, and then renumber
	// the merged nodes sequentially, we should only ever assign smaller node
	// numbers:
	assert outputEndNode <= inputTo
	: "outputEndNode=" + outputEndNode + " vs inputTo=" + inputTo;
	}

	OutputNode outDest = outputNodes.get(dest.outputNode);
	// "shrink wrap" the offsets so the original tokens (with most
	// restrictive offsets) win:
	if (outDest.endOffset == -1 \|\| endOffset < outDest.endOffset) {
	outDest.endOffset = endOffset;
	}

	} else {
	// System.out.println(" got false from input");
	input.end();
	finalPosInc = posIncAtt.getPositionIncrement();
	finalOffset = offsetAtt.endOffset();
	done = true;
	// Don't return false here: we need to force release any buffered tokens now
	}
	}
	}

	// Only for debugging:
	/*
	private void printStates() {
	System.out.println("states:");
	for(int i=outputFrom;i<outputNodes.getMaxPos();i++) {
	OutputNode outputNode = outputNodes.get(i);
	System.out.println(" output " + i + ": inputs " + outputNode.inputNodes);
	for(int inputNodeID : outputNode.inputNodes) {
	InputNode inputNode = inputNodes.get(inputNodeID);
	assert inputNode.outputNode == i;
	}
	}
	}
	*/

	@Override
	public void end() throws IOException {
	if (done == false) {
	super.end();
	} else {
	// NOTE, shady: don't call super.end, because we did already from incrementToken
	}

	clearAttributes();
	if (done) {
	// On exc, done is false, and we will not have set these:
	posIncAtt.setPositionIncrement(finalPosInc);
	offsetAtt.setOffset(finalOffset, finalOffset);
	} else {
	super.end();
	}
	}

	@Override
	public void reset() throws IOException {
	// System.out.println("F: reset");
	super.reset();
	inputFrom = -1;
	inputNodes.reset();
	InputNode in = inputNodes.get(0);
	in.node = 0;
	in.outputNode = 0;

	outputNodes.reset();
	OutputNode out = outputNodes.get(0);
	out.node = 0;
	out.inputNodes.add(0);
	out.startOffset = 0;
	outputFrom = 0;
	// retOutputFrom = -1;
	lastOutputFrom = -1;
	done = false;
	finalPosInc = -1;
	finalOffset = -1;
	lastStartOffset = 0;
	maxLookaheadUsed = 0;
	}

	/** For testing */
	public int getMaxLookaheadUsed() {
	return maxLookaheadUsed;
	}
	}