lucene/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymGraphFilter.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.lucene.analysis.synonym;

 import java.io.IOException;
 import java.io.StringReader;
 import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Set;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockGraphTokenFilter;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.FlattenGraphFilter;
 import org.apache.lucene.analysis.tokenattributes.*;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.CharsRefBuilder;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.AutomatonTestUtil;
 import org.apache.lucene.util.automaton.Operations;
 import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
 import org.apache.lucene.util.automaton.Transition;
 import org.apache.lucene.util.fst.Util;

 public class TestSynonymGraphFilter extends BaseTokenStreamTestCase {

   /** Set as a side effect by {@link #getAnalyzer} and {@link #getFlattenAnalyzer}. */
   private SynonymGraphFilter synFilter;
   private FlattenGraphFilter flattenFilter;

   public void testBasicKeepOrigOneOutput() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b", "x", true);

     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "c a b",
                      new String[] {"c", "x", "a", "b"},
                      new int[]    { 0,   2,   2,   4},
                      new int[]    { 1,   5,   3,   5},
                      new String[] {"word", "SYNONYM", "word", "word"},
                      new int[]    { 1,   1,   0,   1},
                      new int[]    { 1,   2,   1,   1});
     a.close();
   }

   public void testMixedKeepOrig() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b", "x", true);
     add(b, "e f", "y", false);

     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "c a b c e f g",
                      new String[] {"c", "x", "a", "b", "c", "y", "g"},
                      new int[]    { 0,   2,   2,   4,   6,   8,   12},
                      new int[]    { 1,   5,   3,   5,   7,   11,  13},
                      new String[] {"word", "SYNONYM", "word", "word", "word", "SYNONYM", "word"},
                      new int[]    { 1,   1,   0,   1,   1,   1,   1},
                      new int[]    { 1,   2,   1,   1,   1,   1,   1});
     a.close();
   }

   public void testNoParseAfterBuffer() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "b a", "x", true);

     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "b b b",
                      new String[] {"b", "b", "b"},
                      new int[]    { 0,   2,   4},
                      new int[]    { 1,   3,   5},
                      new String[] {"word", "word", "word"},
                      new int[]    { 1,   1,   1},
                      new int[]    { 1,   1,   1});
     a.close();
   }

   public void testOneInputMultipleOutputKeepOrig() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b", "x", true);
     add(b, "a b", "y", true);

     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "c a b c",
                      new String[] {"c", "x", "y",  "a", "b", "c"},
                      new int[]    { 0,   2,   2,   2,   4,   6},
                      new int[]    { 1,   5,   5,   3,   5,   7},
                      new String[] {"word", "SYNONYM", "SYNONYM", "word", "word", "word"},
                      new int[]    { 1,   1,   0,   0,   1,   1,   1,   1},
                      new int[]    { 1,   2,   2,   1,   1,   1,   1,   1});
     a.close();
   }

   /**
    * Verify type of token and positionLength after analyzer.
    */
   public void testPositionLengthAndTypeSimple() throws Exception {
     String testFile =
         "spider man, spiderman";

     Analyzer analyzer = solrSynsToAnalyzer(testFile);

     assertAnalyzesToPositions(analyzer, "spider man",
         new String[]{"spiderman", "spider", "man"},
         new String[]{"SYNONYM", "word", "word"},
         new int[]{1, 0, 1},
         new int[]{2, 1, 1});
   }

   /**
    * parse a syn file with some escaped syntax chars
    */
   public void testEscapedStuff() throws Exception {
     String testFile =
         "a\\=>a => b\\=>b\n" +
             "a\\,a => b\\,b";
     Analyzer analyzer = solrSynsToAnalyzer(testFile);

     assertAnalyzesTo(analyzer, "ball",
         new String[]{"ball"},
         new int[]{1});

     assertAnalyzesTo(analyzer, "a=>a",
         new String[]{"b=>b"},
         new int[]{1});

     assertAnalyzesTo(analyzer, "a,a",
                      new String[]{"b,b"},
                      new int[]{1});
     analyzer.close();
   }

   /**
    * parse a syn file with bad syntax
    */
   public void testInvalidAnalyzesToNothingOutput() throws Exception {
     String testFile = "a => 1";
     Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, false);
     SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer);
     expectThrows(ParseException.class, () -> parser.parse(new StringReader(testFile)));
     analyzer.close();
   }

   /**
    * parse a syn file with bad syntax
    */
   public void testInvalidDoubleMap() throws Exception {
     String testFile = "a => b => c";
     Analyzer analyzer = new MockAnalyzer(random());
     SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer);
     expectThrows(ParseException.class, () -> parser.parse(new StringReader(testFile)));
     analyzer.close();
   }

   /**
    * Tests some simple examples from the solr wiki
    */
   public void testSimple() throws Exception {
     String testFile =
         "i-pod, ipod, ipoooood\n" +
             "foo => foo bar\n" +
             "foo => baz\n" +
             "this test, that testing";

     Analyzer analyzer = solrSynsToAnalyzer(testFile);

     assertAnalyzesTo(analyzer, "ball",
         new String[]{"ball"},
         new int[]{1});

     assertAnalyzesTo(analyzer, "i-pod",
                      new String[]{"ipod", "ipoooood", "i-pod"},
                      new int[]{1, 0, 0});

     assertAnalyzesTo(analyzer, "foo",
         new String[]{"foo", "baz", "bar"},
         new int[]{1, 0, 1});

     assertAnalyzesTo(analyzer, "this test",
         new String[]{"that", "this", "testing", "test"},
         new int[]{1, 0, 1, 0});
     analyzer.close();
   }

   public void testBufferLength() throws Exception {
     String testFile =
         "c => 8 2 5 6 7\n" +
             "f c e d f, 1\n" +
             "c g a f d, 6 5 5\n" +
             "e c => 4\n" +
             "g => 5\n" +
             "a g b f e => 5 0 7 7\n" +
             "b => 1";
     Analyzer analyzer = solrSynsToAnalyzer(testFile);

     String doc = "b c g a f b d";
     String[] expected = new String[]{"1", "8", "2", "5", "6", "7", "5", "a", "f", "1", "d"};
     assertAnalyzesTo(analyzer, doc, expected);
   }

   private Analyzer solrSynsToAnalyzer(String syns) throws IOException, ParseException {
     Analyzer analyzer = new MockAnalyzer(random());
     SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer);
     parser.parse(new StringReader(syns));
     analyzer.close();
     return getFlattenAnalyzer(parser, true);
   }

   public void testMoreThanOneLookAhead() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b c d", "x", true);

     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "a b c e",
                      new String[] {"a", "b", "c", "e"},
                      new int[]    { 0,   2,   4,   6},
                      new int[]    { 1,   3,   5,   7},
                      new String[] {"word", "word", "word", "word"},
                      new int[]    { 1,   1,   1,   1},
                      new int[]    { 1,   1,   1,   1});
     a.close();
   }

   public void testLookaheadAfterParse() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "b b", "x", true);
     add(b, "b", "y", true);

     Analyzer a = getAnalyzer(b, true);

     assertAnalyzesTo(a, "b a b b",
                      new String[] {"y", "b", "a", "x", "b", "b"},
                      new int[]    {0,    0,   2,   4,   4,   6},
                      new int[]    {1,    1,   3,   7,   5,   7},
                      null,
                      new int[]    {1,    0,   1,   1,   0,   1},
                      new int[]    {1,    1,   1,   2,   1,   1},
                      true);
   }

   public void testLookaheadSecondParse() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "b b b", "x", true);
     add(b, "b", "y", true);

     Analyzer a = getAnalyzer(b, true);

     assertAnalyzesTo(a, "b b",
                      new String[] {"y", "b", "y", "b"},
                      new int[]    { 0,   0,   2,   2},
                      new int[]    { 1,   1,   3,   3},
                      null,
                      new int[]    { 1,    0,   1,   0},
                      new int[]    { 1,    1,   1,   1},
                      true);
   }

   public void testOneInputMultipleOutputNoKeepOrig() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b", "x", false);
     add(b, "a b", "y", false);

     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "c a b c",
                      new String[] {"c", "x", "y", "c"},
                      new int[]    { 0,   2,   2,   6},
                      new int[]    { 1,   5,   5,   7},
                      new String[] {"word", "SYNONYM", "SYNONYM", "word"},
                      new int[]    { 1,   1,   0,   1},
                      new int[]    { 1,   1,   1,   1});
     a.close();
   }

   public void testOneInputMultipleOutputMixedKeepOrig() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b", "x", true);
     add(b, "a b", "y", false);

     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "c a b c",
                      new String[] {"c", "x", "y",  "a", "b", "c"},
                      new int[]    { 0,   2,   2,   2,   4,   6},
                      new int[]    { 1,   5,   5,   3,   5,   7},
                      new String[] {"word", "SYNONYM", "SYNONYM", "word", "word", "word"},
                      new int[]    { 1,   1,   0,   0,   1,   1,   1,   1},
                      new int[]    { 1,   2,   2,   1,   1,   1,   1,   1});
     a.close();
   }

   public void testSynAtEnd() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b", "x", true);

     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "c d e a b",
                      new String[] {"c", "d", "e", "x", "a", "b"},
                      new int[]    { 0,   2,   4,   6,   6,   8},
                      new int[]    { 1,   3,   5,   9,   7,   9},
                      new String[] {"word", "word", "word", "SYNONYM", "word", "word"},
                      new int[]    { 1,   1,   1,   1,   0,   1},
                      new int[]    { 1,   1,   1,   2,   1,   1});
     a.close();
   }

   public void testTwoSynsInARow() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a", "x", false);

     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "c a a b",
                      new String[] {"c", "x", "x", "b"},
                      new int[]    { 0,   2,   4,   6},
                      new int[]    { 1,   3,   5,   7},
                      new String[] {"word", "SYNONYM", "SYNONYM", "word"},
                      new int[]    { 1,   1,   1,   1},
                      new int[]    { 1,   1,   1,   1});
     a.close();
   }

   public void testBasicKeepOrigTwoOutputs() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b", "x y", true);
     add(b, "a b", "m n o", true);

     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "c a b d",
                      new String[] {"c", "x", "m", "a", "y", "n", "o", "b", "d"},
                      new int[]    { 0,   2,   2,   2,   2,   2,   2,   4,   6},
                      new int[]    { 1,   5,   5,   3,   5,   5,   5,   5,   7},
                      new String[] {"word", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "word"},
                      new int[]    { 1,   1,   0,   0,   1,   1,   1,   1,   1},
                      new int[]    { 1,   1,   2,   4,   4,   1,   2,   1,   1});
     a.close();
   }

   public void testNoCaptureIfNoMatch() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b", "x y", true);

     Analyzer a = getAnalyzer(b, true);

     assertAnalyzesTo(a,
                      "c d d",
                      new String[] {"c", "d", "d"},
                      new int[]    { 0,   2,   4},
                      new int[]    { 1,   3,   5},
                      new String[] {"word", "word", "word"},
                      new int[]    { 1,   1,   1},
                      new int[]    { 1,   1,   1});
     assertEquals(0, synFilter.getCaptureCount());
     a.close();
   }

   public void testBasicNotKeepOrigOneOutput() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b", "x", false);

     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "c a b",
                      new String[] {"c", "x"},
                      new int[] {0, 2},
                      new int[] {1, 5},
                      new String[] {"word", "SYNONYM"},
                      new int[] {1, 1},
                      new int[] {1, 1});
     a.close();
   }

   public void testBasicNoKeepOrigTwoOutputs() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b", "x y", false);
     add(b, "a b", "m n o", false);

     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "c a b d",
                      new String[] {"c", "x", "m", "y", "n", "o", "d"},
                      new int[]    { 0,   2,   2,   2,   2,   2,   6},
                      new int[]    { 1,   5,   5,   5,   5,   5,   7},
                      new String[] {"word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "word"},
                      new int[]    { 1,   1,   0,   1,   1,   1,   1},
                      new int[]    { 1,   1,   2,   3,   1,   1,   1});
     a.close();
   }

   public void testIgnoreCase() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b", "x y", false);
     add(b, "a b", "m n o", false);

     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "c A B D",
                      new String[] {"c", "x", "m", "y", "n", "o", "D"},
                      new int[]    { 0,   2,   2,   2,   2,   2,   6},
                      new int[]    { 1,   5,   5,   5,   5,   5,   7},
                      new String[] {"word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "word"},
                      new int[]    { 1,   1,   0,   1,   1,   1,   1},
                      new int[]    { 1,   1,   2,   3,   1,   1,   1});
     a.close();
   }

   public void testDoNotIgnoreCase() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b", "x y", false);
     add(b, "a b", "m n o", false);

     Analyzer a = getAnalyzer(b, false);
     assertAnalyzesTo(a,
                      "c A B D",
                      new String[] {"c", "A", "B", "D"},
                      new int[]    { 0,   2,   4,   6},
                      new int[]    { 1,   3,   5,   7},
                      new String[] {"word", "word", "word", "word"},
                      new int[]    { 1,   1,   1,   1},
                      new int[]    { 1,   1,   1,   1});
     a.close();
   }

   public void testBufferedFinish1() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b c", "m n o", false);

     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "c a b",
                      new String[] {"c", "a", "b"},
                      new int[]    { 0,   2,   4},
                      new int[]    { 1,   3,   5},
                      new String[] {"word", "word", "word"},
                      new int[]    { 1,   1,   1},
                      new int[]    { 1,   1,   1});
     a.close();
   }

   public void testBufferedFinish2() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b", "m n o", false);
     add(b, "d e", "m n o", false);

     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "c a d",
                      new String[] {"c", "a", "d"},
                      new int[]    { 0,   2,   4},
                      new int[]    { 1,   3,   5},
                      new String[] {"word", "word", "word"},
                      new int[]    { 1,   1,   1},
                      new int[]    { 1,   1,   1});
     a.close();
   }

   public void testCanReuse() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b", "x", true);
     Analyzer a = getAnalyzer(b, true);
     for(int i=0;i<10;i++) {
       assertAnalyzesTo(a,
                        "c a b",
                        new String[] {"c", "x", "a", "b"},
                        new int[]    { 0,   2,   2,   4},
                        new int[]    { 1,   5,   3,   5},
                        new String[] {"word", "SYNONYM", "word", "word"},
                        new int[]    { 1,   1,   0,   1},
                        new int[]    { 1,   2,   1,   1});
     }
     a.close();
   }

   /** Multiple input tokens map to a single output token */
   public void testManyToOne() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b c", "z", true);

     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "a b c d",
                      new String[] {"z", "a", "b", "c", "d"},
                      new int[]    { 0,   0,   2,   4,   6},
                      new int[]    { 5,   1,   3,   5,   7},
                      new String[] {"SYNONYM", "word", "word", "word", "word"},
                      new int[]    { 1,   0,   1,   1,   1},
                      new int[]    { 3,   1,   1,   1,   1});
     a.close();
   }

   public void testBufferAfterMatch() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "a b c d", "x", true);
     add(b, "a b", "y", false);

     // The 'c' token has to be buffered because SynGraphFilter
     // needs to know whether a b c d -> x matches:
     Analyzer a = getAnalyzer(b, true);
     assertAnalyzesTo(a,
                      "f a b c e",
                      new String[] {"f", "y", "c", "e"},
                      new int[]    { 0,   2,   6,   8},
                      new int[]    { 1,   5,   7,   9},
                      new String[] {"word", "SYNONYM", "word", "word"},
                      new int[]    { 1,   1,   1,   1},
                      new int[]    { 1,   1,   1,   1});
     a.close();
   }

   public void testZeroSyns() throws Exception {
     Tokenizer tokenizer = new MockTokenizer();
     tokenizer.setReader(new StringReader("aa bb"));

     IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () ->
         new SynonymGraphFilter(tokenizer, new SynonymMap.Builder(true).build(), true));
     assertEquals("fst must be non-null", ex.getMessage());
   }

   public void testOutputHangsOffEnd() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder(true);
     final boolean keepOrig = false;
     // b hangs off the end (no input token under it):
     add(b, "a", "a b", keepOrig);
     Analyzer a = getFlattenAnalyzer(b, true);
     assertAnalyzesTo(a, "a",
                      new String[] {"a", "b"},
                      new int[]    { 0,   0},
                      new int[]    { 1,   1},
                      null,
                      new int[]    { 1,   1},
                      new int[]    { 1,   1},
                      true);
     a.close();
   }

   public void testDedup() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder(true);
     final boolean keepOrig = false;
     add(b, "a b", "ab", keepOrig);
     add(b, "a b", "ab", keepOrig);
     add(b, "a b", "ab", keepOrig);
     Analyzer a = getFlattenAnalyzer(b, true);

     assertAnalyzesTo(a, "a b",
         new String[]{"ab"},
         new int[]{1});
     a.close();
   }

   public void testNoDedup() throws Exception {
     // dedup is false:
     SynonymMap.Builder b = new SynonymMap.Builder(false);
     final boolean keepOrig = false;
     add(b, "a b", "ab", keepOrig);
     add(b, "a b", "ab", keepOrig);
     add(b, "a b", "ab", keepOrig);
     Analyzer a = getFlattenAnalyzer(b, true);

     assertAnalyzesTo(a, "a b",
         new String[]{"ab", "ab", "ab"},
         new int[]{1, 0, 0});
     a.close();
   }

   public void testMatching() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder(true);
     final boolean keepOrig = false;
     add(b, "a b", "ab", keepOrig);
     add(b, "a c", "ac", keepOrig);
     add(b, "a", "aa", keepOrig);
     add(b, "b", "bb", keepOrig);
     add(b, "z x c v", "zxcv", keepOrig);
     add(b, "x c", "xc", keepOrig);

     Analyzer a = getFlattenAnalyzer(b, true);

     checkOneTerm(a, "$", "$");
     checkOneTerm(a, "a", "aa");
     checkOneTerm(a, "b", "bb");

     assertAnalyzesTo(a, "a $",
         new String[]{"aa", "$"},
         new int[]{1, 1});

     assertAnalyzesTo(a, "$ a",
         new String[]{"$", "aa"},
         new int[]{1, 1});

     assertAnalyzesTo(a, "a a",
         new String[]{"aa", "aa"},
         new int[]{1, 1});

     assertAnalyzesTo(a, "z x c v",
         new String[]{"zxcv"},
         new int[]{1});

     assertAnalyzesTo(a, "z x c $",
         new String[]{"z", "xc", "$"},
         new int[]{1, 1, 1});
     a.close();
   }

   public void testBasic1() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder(true);
     add(b, "a", "foo", true);
     add(b, "a b", "bar fee", true);
     add(b, "b c", "dog collar", true);
     add(b, "c d", "dog harness holder extras", true);
     add(b, "m c e", "dog barks loudly", false);
     add(b, "i j k", "feep", true);

     add(b, "e f", "foo bar", false);
     add(b, "e f", "baz bee", false);

     add(b, "z", "boo", false);
     add(b, "y", "bee", true);
     Analyzer a = getFlattenAnalyzer(b, true);

     assertAnalyzesTo(a, "a b c",
                      new String[] {"bar", "a", "fee", "b", "c"},
                      new int[] {1, 0, 1, 0, 1});

     assertAnalyzesTo(a, "x a b c d",
                      new String[] {"x", "bar", "a", "fee", "b", "dog", "c", "harness", "d", "holder", "extras"},
                      new int[] {1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1});

     assertAnalyzesTo(a, "a b a",
                      new String[] {"bar", "a", "fee", "b", "foo", "a"},
                      new int[] {1, 0, 1, 0, 1, 0});

     // outputs no longer add to one another:
     assertAnalyzesTo(a, "c d c d",
                      new String[] {"dog", "c", "harness", "d", "holder", "extras", "dog", "c", "harness", "d", "holder", "extras"},
                      new int[] {1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1});

     // two outputs for same input
     assertAnalyzesTo(a, "e f",
                      new String[] {"foo", "baz", "bar", "bee"},
                      new int[] {1, 0, 1, 0});

     // verify multi-word / single-output offsets:
     assertAnalyzesTo(a, "g i j k g",
                      new String[] {"g", "feep", "i", "j", "k", "g"},
                      new int[] {1, 1, 0, 1, 1, 1});

     // mixed keepOrig true/false:
     assertAnalyzesTo(a, "a m c e x",
                      new String[] {"foo", "a", "dog", "barks", "loudly", "x"},
                      new int[] {1, 0, 1, 1, 1, 1});
     assertAnalyzesTo(a, "c d m c e x",
                      new String[] {"dog", "c", "harness", "d", "holder", "extras", "dog", "barks", "loudly","x"},
                      new int[] {1, 0, 1, 0, 1, 1, 1, 1, 1, 1});
     assertTrue(synFilter.getCaptureCount() > 0);

     // no captureStates when no syns matched
     assertAnalyzesTo(a, "p q r s t",
                      new String[] {"p", "q", "r", "s", "t"},
                      new int[] {1, 1, 1, 1, 1});
     assertEquals(0, synFilter.getCaptureCount());

     // captureStates are necessary for the single-token syn case:
     assertAnalyzesTo(a, "p q z y t",
                      new String[] {"p", "q", "boo", "bee", "y", "t"},
                      new int[] {1, 1, 1, 1, 0, 1});
     assertTrue(synFilter.getCaptureCount() > 0);
   }

   public void testBasic2() throws Exception {
     boolean keepOrig = true;
     do {
       keepOrig = !keepOrig;

       SynonymMap.Builder b = new SynonymMap.Builder(true);
       add(b,"aaa", "aaaa1 aaaa2 aaaa3", keepOrig);
       add(b, "bbb", "bbbb1 bbbb2", keepOrig);
       Analyzer a = getFlattenAnalyzer(b, true);

       if (keepOrig) {
         assertAnalyzesTo(a, "xyzzy bbb pot of gold",
                          new String[] {"xyzzy", "bbbb1", "bbb", "bbbb2", "pot", "of", "gold"},
                          new int[] {1, 1, 0, 1, 1, 1, 1});
         assertAnalyzesTo(a, "xyzzy aaa pot of gold",
                          new String[] {"xyzzy", "aaaa1", "aaa", "aaaa2", "aaaa2", "pot", "of", "gold"},
                          new int[] {1, 1, 0, 1, 1, 1, 1, 1});
       } else {
         assertAnalyzesTo(a, "xyzzy bbb pot of gold",
                          new String[] {"xyzzy", "bbbb1", "bbbb2", "pot", "of", "gold"},
                          new int[] {1, 1, 1, 1, 1, 1});
         assertAnalyzesTo(a, "xyzzy aaa pot of gold",
                          new String[] {"xyzzy", "aaaa1", "aaaa2", "aaaa3", "pot", "of", "gold"},
                          new int[] {1, 1, 1, 1, 1, 1, 1});
       }
     } while (keepOrig);
   }

   /** If we expand synonyms during indexing, it's a bit better than
    *  SynonymFilter is today, but still necessarily has false
    *  positive and negative PhraseQuery matches because we do not
    *  index posLength, so we lose information. */
   public void testFlattenedGraph() throws Exception {

     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "wtf", "what the fudge", true);

     Analyzer a = getFlattenAnalyzer(b, true);

     assertAnalyzesTo(a, "wtf happened",
                      new String[] {"what", "wtf", "the", "fudge", "happened"},
                      new int[]    {    0,     0,      0,     0,       4},
                      new int[]    {    3,     3,      3,     3,       12},
                      null,
                      new int[]    {    1,     0,      1,     1,       1},
                      new int[]    {    1,     3,      1,     1,       1},
                      true);

     Directory dir = newDirectory();
     RandomIndexWriter w = new RandomIndexWriter(random(), dir, a);
     Document doc = new Document();
     doc.add(newTextField("field", "wtf happened", Field.Store.NO));
     w.addDocument(doc);
     IndexReader r = w.getReader();
     w.close();

     IndexSearcher s = newSearcher(r);

     // Good (this should not match, and doesn't):
     assertEquals(0, s.count(new PhraseQuery("field", "what", "happened")));

     // Bad (this should match, but doesn't):
     assertEquals(0, s.count(new PhraseQuery("field", "wtf", "happened")));

     // Good (this should match, and does):
     assertEquals(1, s.count(new PhraseQuery("field", "what", "the", "fudge", "happened")));

     // Bad (this should not match, but does):
     assertEquals(1, s.count(new PhraseQuery("field", "wtf", "the")));

     IOUtils.close(r, dir);
   }

   // Needs TermAutomatonQuery, which is in sandbox still:
   /*
   public void testAccurateGraphQuery1() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter w = new RandomIndexWriter(random(), dir);
     Document doc = new Document();
     doc.add(newTextField("field", "wtf happened", Field.Store.NO));
     w.addDocument(doc);
     IndexReader r = w.getReader();
     w.close();

     IndexSearcher s = newSearcher(r);

     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "what the fudge", "wtf", true);

     SynonymMap map = b.build();

     TokenStreamToTermAutomatonQuery ts2q = new TokenStreamToTermAutomatonQuery();

     TokenStream in = new CannedTokenStream(0, 23, new Token[] {
         token("what", 1, 1, 0, 4),
         token("the", 1, 1, 5, 8),
         token("fudge", 1, 1, 9, 14),
         token("happened", 1, 1, 15, 23),
       });

     assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));

     in = new CannedTokenStream(0, 12, new Token[] {
         token("wtf", 1, 1, 0, 3),
         token("happened", 1, 1, 4, 12),
       });

     assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));

     // "what happened" should NOT match:
     in = new CannedTokenStream(0, 13, new Token[] {
         token("what", 1, 1, 0, 4),
         token("happened", 1, 1, 5, 13),
       });
     assertEquals(0, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));

     IOUtils.close(r, dir);
   }
   */

   /** If we expand synonyms at search time, the results are correct. */
   // Needs TermAutomatonQuery, which is in sandbox still:
   /*
   public void testAccurateGraphQuery2() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter w = new RandomIndexWriter(random(), dir);
     Document doc = new Document();
     doc.add(newTextField("field", "say wtf happened", Field.Store.NO));
     w.addDocument(doc);
     IndexReader r = w.getReader();
     w.close();

     IndexSearcher s = newSearcher(r);

     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "what the fudge", "wtf", true);

     SynonymMap map = b.build();

     TokenStream in = new CannedTokenStream(0, 26, new Token[] {
         token("say", 1, 1, 0, 3),
         token("what", 1, 1, 3, 7),
         token("the", 1, 1, 8, 11),
         token("fudge", 1, 1, 12, 17),
         token("happened", 1, 1, 18, 26),
       });

     TokenStreamToTermAutomatonQuery ts2q = new TokenStreamToTermAutomatonQuery();

     assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));

     // "what happened" should NOT match:
     in = new CannedTokenStream(0, 13, new Token[] {
         token("what", 1, 1, 0, 4),
         token("happened", 1, 1, 5, 13),
       });
     assertEquals(0, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));

     IOUtils.close(r, dir);
   }
   */

   // Needs TermAutomatonQuery, which is in sandbox still:
   /*
   public void testAccurateGraphQuery3() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter w = new RandomIndexWriter(random(), dir);
     Document doc = new Document();
     doc.add(newTextField("field", "say what the fudge happened", Field.Store.NO));
     w.addDocument(doc);
     IndexReader r = w.getReader();
     w.close();

     IndexSearcher s = newSearcher(r);

     SynonymMap.Builder b = new SynonymMap.Builder();
     add(b, "wtf", "what the fudge", true);

     SynonymMap map = b.build();

     TokenStream in = new CannedTokenStream(0, 15, new Token[] {
         token("say", 1, 1, 0, 3),
         token("wtf", 1, 1, 3, 6),
         token("happened", 1, 1, 7, 15),
       });

     TokenStreamToTermAutomatonQuery ts2q = new TokenStreamToTermAutomatonQuery();

     assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));

     // "what happened" should NOT match:
     in = new CannedTokenStream(0, 13, new Token[] {
         token("what", 1, 1, 0, 4),
         token("happened", 1, 1, 5, 13),
       });
     assertEquals(0, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true))));

     IOUtils.close(r, dir);
   }

   private static Token token(String term, int posInc, int posLength, int startOffset, int endOffset) {
     final Token t = new Token(term, startOffset, endOffset);
     t.setPositionIncrement(posInc);
     t.setPositionLength(posLength);
     return t;
   }
   */

   private String randomNonEmptyString() {
     while(true) {
       String s = TestUtil.randomUnicodeString(random()).trim();
       //String s = TestUtil.randomSimpleString(random()).trim();
       if (s.length() != 0 && s.indexOf('\u0000') == -1) {
         return s;
       }
     }
   }

   // Adds MockGraphTokenFilter after SynFilter:
   public void testRandomGraphAfter() throws Exception {
     final int numIters = atLeast(3);
     for (int i = 0; i < numIters; i++) {
       SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
       final int numEntries = atLeast(10);
       for (int j = 0; j < numEntries; j++) {
         add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
       }
       final SynonymMap map = b.build();
       final boolean ignoreCase = random().nextBoolean();
       final boolean doFlatten = random().nextBoolean();

       final Analyzer analyzer = new Analyzer() {
         @Override
         protected TokenStreamComponents createComponents(String fieldName) {
           Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
           TokenStream syns = new SynonymGraphFilter(tokenizer, map, ignoreCase);
           TokenStream graph = new MockGraphTokenFilter(random(), syns);
           if (doFlatten) {
             graph = new FlattenGraphFilter(graph);
           }
           return new TokenStreamComponents(tokenizer, graph);
         }
       };

       checkRandomData(random(), analyzer, 100);
       analyzer.close();
     }
   }

   public void testEmptyStringInput() throws IOException {
     final int numIters = atLeast(10);
     for (int i = 0; i < numIters; i++) {
       SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
       final int numEntries = atLeast(10);
       for (int j = 0; j < numEntries; j++) {
         add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
       }
       final boolean ignoreCase = random().nextBoolean();

       Analyzer analyzer = getAnalyzer(b, ignoreCase);

       checkAnalysisConsistency(random(), analyzer, random().nextBoolean(), "");
       analyzer.close();
     }
   }

   /** simple random test, doesn't verify correctness.
    *  does verify it doesnt throw exceptions, or that the stream doesn't misbehave
    */
   public void testRandom2() throws Exception {
     final int numIters = atLeast(3);
     for (int i = 0; i < numIters; i++) {
       SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
       final int numEntries = atLeast(10);
       for (int j = 0; j < numEntries; j++) {
         add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
       }
       final boolean ignoreCase = random().nextBoolean();
       final boolean doFlatten = random().nextBoolean();

       Analyzer analyzer;
       if (doFlatten) {
         analyzer = getFlattenAnalyzer(b, ignoreCase);
       } else {
         analyzer = getAnalyzer(b, ignoreCase);
       }

       checkRandomData(random(), analyzer, 100);
       analyzer.close();
     }
   }

   /** simple random test like testRandom2, but for larger docs
    */
   public void testRandomHuge() throws Exception {
     final int numIters = atLeast(1);
     for (int i = 0; i < numIters; i++) {
       SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
       final int numEntries = atLeast(10);
       if (VERBOSE) {
         System.out.println("TEST: iter=" + i + " numEntries=" + numEntries);
       }
       for (int j = 0; j < numEntries; j++) {
         add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
       }
       final boolean ignoreCase = random().nextBoolean();
       final boolean doFlatten = random().nextBoolean();

       Analyzer analyzer;
       if (doFlatten) {
         analyzer = getFlattenAnalyzer(b, ignoreCase);
       } else {
         analyzer = getAnalyzer(b, ignoreCase);
       }

       checkRandomData(random(), analyzer, 100, 1024);
       analyzer.close();
     }
   }

   public void testEmptyTerm() throws IOException {
     final int numIters = atLeast(10);
     for (int i = 0; i < numIters; i++) {
       SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
       final int numEntries = atLeast(10);
       for (int j = 0; j < numEntries; j++) {
         add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
       }
       final boolean ignoreCase = random().nextBoolean();

       final Analyzer analyzer = getAnalyzer(b, ignoreCase);

       checkAnalysisConsistency(random(), analyzer, random().nextBoolean(), "");
       analyzer.close();
     }
   }

   // LUCENE-3375
   public void testVanishingTermsNoFlatten() throws Exception {
     String testFile =
       "aaa => aaaa1 aaaa2 aaaa3\n" +
       "bbb => bbbb1 bbbb2\n";
     Analyzer analyzer = solrSynsToAnalyzer(testFile);

     assertAnalyzesTo(analyzer, "xyzzy bbb pot of gold",
                      new String[] { "xyzzy", "bbbb1", "bbbb2", "pot", "of", "gold" });

     // xyzzy aaa pot of gold -> xyzzy aaaa1 aaaa2 aaaa3 gold
     assertAnalyzesTo(analyzer, "xyzzy aaa pot of gold",
                      new String[] { "xyzzy", "aaaa1", "aaaa2", "aaaa3", "pot", "of", "gold" });
     analyzer.close();
   }

   // LUCENE-3375
   public void testVanishingTermsWithFlatten() throws Exception {
     String testFile =
       "aaa => aaaa1 aaaa2 aaaa3\n" +
       "bbb => bbbb1 bbbb2\n";

     Analyzer analyzer = solrSynsToAnalyzer(testFile);

     assertAnalyzesTo(analyzer, "xyzzy bbb pot of gold",
                      new String[] { "xyzzy", "bbbb1", "bbbb2", "pot", "of", "gold" });

     // xyzzy aaa pot of gold -> xyzzy aaaa1 aaaa2 aaaa3 gold
     assertAnalyzesTo(analyzer, "xyzzy aaa pot of gold",
                      new String[] { "xyzzy", "aaaa1", "aaaa2", "aaaa3", "pot", "of", "gold" });
     analyzer.close();
   }

   public void testBuilderDedup() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder(true);
     final boolean keepOrig = false;
     add(b, "a b", "ab", keepOrig);
     add(b, "a b", "ab", keepOrig);
     add(b, "a b", "ab", keepOrig);
     Analyzer a = getAnalyzer(b, true);

     assertAnalyzesTo(a, "a b",
         new String[] { "ab" },
         new int[] { 1 });
     a.close();
   }

   public void testBuilderNoDedup() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder(false);
     final boolean keepOrig = false;
     add(b, "a b", "ab", keepOrig);
     add(b, "a b", "ab", keepOrig);
     add(b, "a b", "ab", keepOrig);
     Analyzer a = getAnalyzer(b, true);

     assertAnalyzesTo(a, "a b",
         new String[] { "ab", "ab", "ab" },
         new int[] { 1, 0, 0 });
     a.close();
   }

   public void testRecursion1() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder(true);
     final boolean keepOrig = false;
     add(b, "zoo", "zoo", keepOrig);
     Analyzer a = getAnalyzer(b, true);

     assertAnalyzesTo(a, "zoo zoo $ zoo",
         new String[] { "zoo", "zoo", "$", "zoo" },
         new int[] { 1, 1, 1, 1 });
     a.close();
   }

   public void testRecursion2() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder(true);
     final boolean keepOrig = false;
     add(b, "zoo", "zoo", keepOrig);
     add(b, "zoo", "zoo zoo", keepOrig);
     Analyzer a = getAnalyzer(b, true);

     // verify("zoo zoo $ zoo", "zoo/zoo zoo/zoo/zoo $/zoo zoo/zoo zoo");
     assertAnalyzesTo(a, "zoo zoo $ zoo",
                      new String[] { "zoo", "zoo", "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo" },
         new int[] { 1, 0, 1, 1, 0, 1, 1, 1, 0, 1 });
     a.close();
   }

   public void testRecursion3() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder(true);
     final boolean keepOrig = true;
     add(b, "zoo zoo", "zoo", keepOrig);
     Analyzer a = getFlattenAnalyzer(b, true);

     assertAnalyzesTo(a, "zoo zoo $ zoo",
        new String[]{"zoo", "zoo", "zoo", "$", "zoo"},
         new int[]{1, 0, 1, 1, 1});
     a.close();
   }

   public void testRecursion4() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder(true);
     final boolean keepOrig = true;
     add(b, "zoo zoo", "zoo", keepOrig);
     add(b, "zoo", "zoo zoo", keepOrig);
     Analyzer a = getFlattenAnalyzer(b, true);
     assertAnalyzesTo(a, "zoo zoo $ zoo",
         new String[]{"zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo"},
        new int[]{1, 0, 1, 1, 1, 0, 1});
     a.close();
   }

   public void testKeepOrig() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder(true);
     final boolean keepOrig = true;
     add(b, "a b", "ab", keepOrig);
     add(b, "a c", "ac", keepOrig);
     add(b, "a", "aa", keepOrig);
     add(b, "b", "bb", keepOrig);
     add(b, "z x c v", "zxcv", keepOrig);
     add(b, "x c", "xc", keepOrig);
     Analyzer a = getAnalyzer(b, true);

     assertAnalyzesTo(a, "$",
         new String[] { "$" },
         new int[] { 1 });
     assertAnalyzesTo(a, "a",
         new String[] { "aa", "a" },
         new int[] { 1, 0 });
     assertAnalyzesTo(a, "a",
         new String[] { "aa", "a" },
         new int[] { 1, 0 });
     assertAnalyzesTo(a, "$ a",
         new String[] { "$", "aa", "a" },
         new int[] { 1, 1, 0 });
     assertAnalyzesTo(a, "a $",
         new String[] { "aa", "a", "$" },
         new int[] { 1, 0, 1 });
     assertAnalyzesTo(a, "$ a !",
         new String[] { "$", "aa", "a", "!" },
         new int[] { 1, 1, 0, 1 });
     assertAnalyzesTo(a, "a a",
         new String[] { "aa", "a", "aa", "a" },
         new int[] { 1, 0, 1, 0 });
     assertAnalyzesTo(a, "b",
         new String[] { "bb", "b" },
         new int[] { 1, 0 });
     assertAnalyzesTo(a, "z x c v",
         new String[] { "zxcv", "z", "x", "c", "v" },
         new int[] { 1, 0, 1, 1, 1 });
     assertAnalyzesTo(a, "z x c $",
         new String[] { "z", "xc", "x", "c", "$" },
         new int[] { 1, 1, 0, 1, 1 });
     a.close();
   }

   /**
    * verify type of token and positionLengths on synonyms of different word counts, with non preserving, explicit rules.
    */
   public void testNonPreservingMultiwordSynonyms() throws Exception {
     String testFile =
       "aaa => two words\n" +
       "bbb => one two, very many multiple words\n" +
       "ee ff, gg, h i j k, h i => one\n" +
       "cc dd => usa,united states,u s a,united states of america";

     Analyzer analyzer = solrSynsToAnalyzer(testFile);

     assertAnalyzesTo(analyzer, "aaa",
         new String[]{"two", "words"},
         new int[]{0, 0},
         new int[]{3, 3},
         new String[]{"SYNONYM", "SYNONYM"},
         new int[]{1, 1},
         new int[]{1, 1});

     assertAnalyzesToPositions(analyzer, "amazing aaa",
         new String[]{"amazing", "two", "words"},
         new String[]{"word", "SYNONYM", "SYNONYM"},
         new int[]{1, 1, 1},
         new int[]{1, 1, 1});

     assertAnalyzesTo(analyzer, "p bbb s",
         new String[]{"p", "one", "very", "two", "many", "multiple", "words", "s"},
         new int[]{0, 2, 2, 2, 2, 2, 2, 6},
         new int[]{1, 5, 5, 5, 5, 5, 5, 7},
         new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "word"},
         new int[]{1, 1, 0, 1, 0, 1, 1, 1},
         new int[]{1, 1, 1, 3, 1, 1, 1, 1});

     assertAnalyzesTo(analyzer, "p ee ff s",
         new String[]{"p", "one", "s"},
         new int[]{0, 2, 8},
         new int[]{1, 7, 9},
         new String[]{"word", "SYNONYM", "word"},
         new int[]{1, 1, 1},
         new int[]{1, 1, 1});

     assertAnalyzesTo(analyzer, "p h i j s",
         new String[]{"p", "one", "j", "s"},
         new int[]{0, 2, 6, 8},
         new int[]{1, 5, 7, 9},
         new String[]{"word", "SYNONYM", "word", "word"},
         new int[]{1, 1, 1, 1},
         new int[]{1, 1, 1, 1});

     analyzer.close();
   }

   private Analyzer getAnalyzer(SynonymMap.Builder b, final boolean ignoreCase) throws IOException {
     final SynonymMap map = b.build();
     return new Analyzer() {
         @Override
         protected TokenStreamComponents createComponents(String fieldName) {
           Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
           // Make a local variable so testRandomHuge doesn't share it across threads!
           SynonymGraphFilter synFilter = new SynonymGraphFilter(tokenizer, map, ignoreCase);
           TestSynonymGraphFilter.this.flattenFilter = null;
           TestSynonymGraphFilter.this.synFilter = synFilter;
           return new TokenStreamComponents(tokenizer, synFilter);
         }
       };
   }

   /** Appends FlattenGraphFilter too */
   private Analyzer getFlattenAnalyzer(SynonymMap.Builder b, boolean ignoreCase) throws IOException {
     final SynonymMap map = b.build();
     return new Analyzer() {
         @Override
         protected TokenStreamComponents createComponents(String fieldName) {
           Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, true);
           // Make a local variable so testRandomHuge doesn't share it across threads!
           SynonymGraphFilter synFilter = new SynonymGraphFilter(tokenizer, map, ignoreCase);
           FlattenGraphFilter flattenFilter = new FlattenGraphFilter(synFilter);
           TestSynonymGraphFilter.this.synFilter = synFilter;
           TestSynonymGraphFilter.this.flattenFilter = flattenFilter;
           return new TokenStreamComponents(tokenizer, flattenFilter);
         }
       };
   }

   private void add(SynonymMap.Builder b, String input, String output, boolean keepOrig) {
     if (VERBOSE) {
       //System.out.println("  add input=" + input + " output=" + output + " keepOrig=" + keepOrig);
     }
     CharsRefBuilder inputCharsRef = new CharsRefBuilder();
     SynonymMap.Builder.join(input.split(" +"), inputCharsRef);

     CharsRefBuilder outputCharsRef = new CharsRefBuilder();
     SynonymMap.Builder.join(output.split(" +"), outputCharsRef);

     b.add(inputCharsRef.get(), outputCharsRef.get(), keepOrig);
   }

   private char[] randomBinaryChars(int minLen, int maxLen, double bias, char base) {
     int len = TestUtil.nextInt(random(), minLen, maxLen);
     char[] chars = new char[len];
     for(int i=0;i<len;i++) {
       char ch;
       if (random().nextDouble() < bias) {
         ch = base;
       } else {
         ch = (char) (base+1);
       }
       chars[i] = ch;
     }

     return chars;
   }

   private static String toTokenString(char[] chars) {
     StringBuilder b = new StringBuilder();
     for(char c : chars) {
       if (b.length() > 0) {
         b.append(' ');
       }
       b.append(c);
     }
     return b.toString();
   }

   private static class OneSyn {
     char[] in;
     char[] out;
     boolean keepOrig;

     @Override
     public String toString() {
       return toTokenString(in) + " --> " + toTokenString(out) + " (keepOrig=" + keepOrig + ")";
     }
   }

   public void testRandomSyns() throws Exception {
     int synCount = atLeast(10);
     double bias = random().nextDouble();
     boolean dedup = random().nextBoolean();

     boolean flatten = random().nextBoolean();

     SynonymMap.Builder b = new SynonymMap.Builder(dedup);
     List<OneSyn> syns = new ArrayList<>();
     // Makes random syns from random a / b tokens, mapping to random x / y tokens
     if (VERBOSE) {
       System.out.println("TEST: make " + synCount + " syns");
       System.out.println("  bias for a over b=" + bias);
       System.out.println("  dedup=" + dedup);
       System.out.println("  flatten=" + flatten);
     }

     int maxSynLength = 0;

     for(int i=0;i<synCount;i++) {
       OneSyn syn = new OneSyn();
       syn.in = randomBinaryChars(1, 5, bias, 'a');
       syn.out = randomBinaryChars(1, 5, 0.5, 'x');
       syn.keepOrig = random().nextBoolean();
       syns.add(syn);

       maxSynLength = Math.max(maxSynLength, syn.in.length);

       if (VERBOSE) {
         System.out.println("  " + syn);
       }
       add(b, toTokenString(syn.in), toTokenString(syn.out), syn.keepOrig);
     }

     // Compute max allowed lookahead for flatten filter:
     int maxFlattenLookahead = 0;
     if (flatten) {
       for(int i=0;i<synCount;i++) {
         OneSyn syn1 = syns.get(i);
         int count = syn1.out.length;
         boolean keepOrig = syn1.keepOrig;
         for(int j=0;j<synCount;j++) {
           OneSyn syn2 = syns.get(i);
           keepOrig |= syn2.keepOrig;
           if (syn1.in.equals(syn2.in)) {
             count += syn2.out.length;
           }
         }

         if (keepOrig) {
           count += syn1.in.length;
         }

         maxFlattenLookahead = Math.max(maxFlattenLookahead, count);
       }
     }

     // Only used w/ VERBOSE:
     Analyzer aNoFlattened;
     if (VERBOSE) {
       aNoFlattened = getAnalyzer(b, true);
     } else {
       aNoFlattened = null;
     }

     Analyzer a;
     if (flatten) {
       a = getFlattenAnalyzer(b, true);
     } else {
       a = getAnalyzer(b, true);
     }

     int iters = atLeast(1);
     for(int iter=0;iter<iters;iter++) {

       String doc = toTokenString(randomBinaryChars(50, 100, bias, 'a'));
       //String doc = toTokenString(randomBinaryChars(10, 50, bias, 'a'));

       if (VERBOSE) {
         System.out.println("TEST: iter="+  iter + " doc=" + doc);
       }
       Automaton expected = slowSynFilter(doc, syns, flatten);
       if (VERBOSE) {
         System.out.println("  expected:\n" + expected.toDot());
         if (flatten) {
           Automaton unflattened = toAutomaton(aNoFlattened.tokenStream("field", new StringReader(doc)));
           System.out.println("  actual unflattened:\n" + unflattened.toDot());
         }
       }
       Automaton actual = toAutomaton(a.tokenStream("field", new StringReader(doc)));
       if (VERBOSE) {
         System.out.println("  actual:\n" + actual.toDot());
       }

       assertTrue("maxLookaheadUsed=" + synFilter.getMaxLookaheadUsed() + " maxSynLength=" + maxSynLength,
                  synFilter.getMaxLookaheadUsed() <= maxSynLength);
       if (flatten) {
         assertTrue("flatten maxLookaheadUsed=" + flattenFilter.getMaxLookaheadUsed() + " maxFlattenLookahead=" + maxFlattenLookahead,
                    flattenFilter.getMaxLookaheadUsed() <= maxFlattenLookahead);
       }

       checkAnalysisConsistency(random(), a, random().nextBoolean(), doc);
       // We can easily have a non-deterministic automaton at this point, e.g. if
       // more than one syn matched at given point, or if the syn mapped to an
       // output token that also happens to be in the input:
       try {
         actual = Operations.determinize(actual, 50000);
       } catch (TooComplexToDeterminizeException tctde) {
         // Unfortunately the syns can easily create difficult-to-determinize graphs:
         assertTrue(approxEquals(actual, expected));
         continue;
       }

       try {
         expected = Operations.determinize(expected, 50000);
       } catch (TooComplexToDeterminizeException tctde) {
         // Unfortunately the syns can easily create difficult-to-determinize graphs:
         assertTrue(approxEquals(actual, expected));
         continue;
       }

       assertTrue(approxEquals(actual, expected));
       assertTrue(Operations.sameLanguage(actual, expected));
     }

     a.close();
   }

   /** Only used when true equality is too costly to check! */
   private boolean approxEquals(Automaton actual, Automaton expected) {
     // Don't collapse these into one line else the thread stack won't say which direction failed!:
     boolean b1 = approxSubsetOf(actual, expected);
     boolean b2 = approxSubsetOf(expected, actual);
     return b1 && b2;
   }

   private boolean approxSubsetOf(Automaton a1, Automaton a2) {
     AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(a1);
     for(int i=0;i<2000;i++) {
       int[] ints = ras.getRandomAcceptedString(random());
       IntsRef path = new IntsRef(ints, 0, ints.length);
       if (accepts(a2, path) == false) {
         throw new RuntimeException("a2 does not accept " + path);
       }
     }

     // Presumed true
     return true;
   }

   /** Like {@link Operations#run} except the incoming automaton is allowed to be non-deterministic. */
   private static boolean accepts(Automaton a, IntsRef path) {
     Set<Integer> states = new HashSet<>();
     states.add(0);
     Transition t = new Transition();
     for(int i=0;i<path.length;i++) {
       int digit = path.ints[path.offset+i];
       Set<Integer> nextStates = new HashSet<>();
       for(int state : states) {
         int count = a.initTransition(state, t);
         for(int j=0;j<count;j++) {
           a.getNextTransition(t);
           if (digit >= t.min && digit <= t.max) {
             nextStates.add(t.dest);
           }
         }
       }
       states = nextStates;
       if (states.isEmpty()) {
         return false;
       }
     }

     for(int state : states) {
       if (a.isAccept(state)) {
         return true;
       }
     }

     return false;
   }

   /** Stupid, slow brute-force, yet hopefully bug-free, synonym filter. */
   private Automaton slowSynFilter(String doc, List<OneSyn> syns, boolean flatten) {
     String[] tokens = doc.split(" +");
     if (VERBOSE) {
       System.out.println("  doc has " + tokens.length + " tokens");
     }
     int i=0;
     Automaton.Builder a = new Automaton.Builder();
     int lastState = a.createState();
     while (i<tokens.length) {
       // Consider all possible syn matches starting at this point:
       assert tokens[i].length() == 1;
       if (VERBOSE) {
         System.out.println("    i=" + i);
       }

       List<OneSyn> matches = new ArrayList<>();
       for(OneSyn syn : syns) {
         if (i + syn.in.length <= tokens.length) {
           boolean match = true;
           for(int j=0;j<syn.in.length;j++) {
             if (tokens[i+j].charAt(0) != syn.in[j]) {
               match = false;
               break;
             }
           }

           if (match) {
             if (matches.isEmpty() == false) {
               if (syn.in.length < matches.get(0).in.length) {
                 // Greedy matching: we already found longer syns matching here
                 continue;
               } else if (syn.in.length > matches.get(0).in.length) {
                 // Greedy matching: all previous matches were shorter, so we drop them
                 matches.clear();
               } else {
                 // Keep the current matches: we allow multiple synonyms matching the same input string
               }
             }

             matches.add(syn);
           }
         }
       }

       int nextState = a.createState();

       if (matches.isEmpty() == false) {
         // We have match(es) starting at this token
         if (VERBOSE) {
           System.out.println("  matches @ i=" + i + ": " + matches);
         }
         // We keepOrig if any of the matches said to:
         boolean keepOrig = false;
         for(OneSyn syn : matches) {
           keepOrig |= syn.keepOrig;
         }

         List<Integer> flatStates;
         if (flatten) {
           flatStates = new ArrayList<>();
         } else {
           flatStates = null;
         }

         if (keepOrig) {
           // Add path for the original tokens
           addSidePath(a, lastState, nextState, matches.get(0).in, flatStates);
         }

         for(OneSyn syn : matches) {
           addSidePath(a, lastState, nextState, syn.out, flatStates);
         }

         i += matches.get(0).in.length;
       } else {
         a.addTransition(lastState, nextState, tokens[i].charAt(0));
         i++;
       }

       lastState = nextState;
     }

     a.setAccept(lastState, true);

     return topoSort(a.finish());
   }

   /** Just creates a side path from startState to endState with the provided tokens. */
   private static void addSidePath(Automaton.Builder a, int startState, int endState, char[] tokens, List<Integer> flatStates) {
     int lastState = startState;
     for(int i=0;i<tokens.length;i++) {
       int nextState;
       if (i == tokens.length-1) {
         nextState = endState;
       } else if (flatStates == null || i >= flatStates.size()) {
         nextState = a.createState();
         if (flatStates != null) {
           assert i == flatStates.size();
           flatStates.add(nextState);
         }
       } else {
         nextState = flatStates.get(i);
       }
       a.addTransition(lastState, nextState, tokens[i]);

       lastState = nextState;
     }
   }

   private Automaton toAutomaton(TokenStream ts) throws IOException {
     PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
     PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class);
     CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
     ts.reset();
     Automaton a = new Automaton();
     int srcNode = -1;
     int destNode = -1;
     int state = a.createState();
     while (ts.incrementToken()) {
       assert termAtt.length() == 1;
       char c = termAtt.charAt(0);
       int posInc = posIncAtt.getPositionIncrement();
       if (posInc != 0) {
         srcNode += posInc;
         while (state < srcNode) {
           state = a.createState();
         }
       }
       destNode = srcNode + posLenAtt.getPositionLength();
       while (state < destNode) {
         state = a.createState();
       }
       a.addTransition(srcNode, destNode, c);
     }
     ts.end();
     ts.close();
     a.finishState();
     a.setAccept(destNode, true);
     return a;
   }

   /*
   private String toDot(TokenStream ts) throws IOException {
     PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
     PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class);
     CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
     TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class);
     ts.reset();
     int srcNode = -1;
     int destNode = -1;

     StringBuilder b = new StringBuilder();
     b.append("digraph Automaton {\n");
     b.append("  rankdir = LR\n");
     b.append("  node [width=0.2, height=0.2, fontsize=8]\n");
     b.append("  initial [shape=plaintext,label=\"\"]\n");
     b.append("  initial -> 0\n");

     while (ts.incrementToken()) {
       int posInc = posIncAtt.getPositionIncrement();
       if (posInc != 0) {
         srcNode += posInc;
         b.append("  ");
         b.append(srcNode);
         b.append(" [shape=circle,label=\"" + srcNode + "\"]\n");
       }
       destNode = srcNode + posLenAtt.getPositionLength();
       b.append("  ");
       b.append(srcNode);
       b.append(" -> ");
       b.append(destNode);
       b.append(" [label=\"");
       b.append(termAtt);
       b.append("\"");
       if (typeAtt.type().equals("word") == false) {
         b.append(" color=red");
       }
       b.append("]\n");
     }
     ts.end();
     ts.close();

     b.append('}');
     return b.toString();
   }
   */

   /** Renumbers nodes according to their topo sort */
   private Automaton topoSort(Automaton in) {
     int[] newToOld = Operations.topoSortStates(in);
     int[] oldToNew = new int[newToOld.length];

     Automaton.Builder a = new Automaton.Builder();
     //System.out.println("remap:");
     for(int i=0;i<newToOld.length;i++) {
       a.createState();
       oldToNew[newToOld[i]] = i;
       //System.out.println("  " + newToOld[i] + " -> " + i);
       if (in.isAccept(newToOld[i])) {
         a.setAccept(i, true);
         //System.out.println("    **");
       }
     }

     Transition t = new Transition();
     for(int i=0;i<newToOld.length;i++) {
       int count = in.initTransition(newToOld[i], t);
       for(int j=0;j<count;j++) {
         in.getNextTransition(t);
         a.addTransition(i, oldToNew[t.dest], t.min, t.max);
       }
     }

     return a.finish();
   }

   /**
    * verify type of token and positionLengths on synonyms of different word counts.
    */
   public void testPositionLengthAndType() throws Exception {
     String testFile =
         "spider man, spiderman\n" +
         "usa,united states,u s a,united states of america";
     Analyzer analyzer = new MockAnalyzer(random());
     SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer);

     parser.parse(new StringReader(testFile));
     analyzer.close();

     SynonymMap map = parser.build();
     analyzer = getFlattenAnalyzer(parser, true);

     BytesRef value = Util.get(map.fst, Util.toUTF32(new CharsRef("usa"), new IntsRefBuilder()));
     ByteArrayDataInput bytesReader = new ByteArrayDataInput(value.bytes, value.offset, value.length);
     final int code = bytesReader.readVInt();
     final int count = code >>> 1;

     final int[] synonymsIdxs = new int[count];
     for (int i = 0; i < count; i++) {
       synonymsIdxs[i] = bytesReader.readVInt();
     }

     BytesRef scratchBytes = new BytesRef();
     map.words.get(synonymsIdxs[2], scratchBytes);

     int synonymLength = 1;
     for (int i = scratchBytes.offset; i < scratchBytes.offset + scratchBytes.length; i++) {
       if (scratchBytes.bytes[i] == SynonymMap.WORD_SEPARATOR) {
         synonymLength++;
       }
     }

     assertEquals(count, 3);
     assertEquals(synonymLength, 4);

     assertAnalyzesTo(analyzer, "spider man",
                      new String[]{"spiderman", "spider", "man"},
                      new int[]{0, 0, 7},
                      new int[]{10, 6, 10},
                      new String[]{"SYNONYM", "word", "word"},
                      new int[]{1, 0, 1},
                      new int[]{2, 1, 1});

     assertAnalyzesToPositions(analyzer, "amazing spider man",
                               new String[]{"amazing", "spiderman", "spider", "man"},
                               new String[]{"word", "SYNONYM", "word", "word"},
                               new int[]{1, 1, 0, 1},
                               new int[]{1, 2, 1, 1});

     // System.out.println(toDot(getAnalyzer(parser, true).tokenStream("field", new StringReader("the usa is wealthy"))));

     assertAnalyzesTo(analyzer, "the united states of america is wealthy",
                      new String[]{"the", "usa", "united", "u", "united", "states", "s", "states", "a", "of", "america", "is", "wealthy"},
                      new int[]      {0,     4,        4,   4,        4,       11,  11,       11,   18,  18,        21,   29,        32},
                      new int[]      {3,    28,       10,  10,       10,       28,  17,       17,   28,  20,        28,   31,        39},
                      new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "word", "SYNONYM", "word", "word", "word", "word"},
                      new int[]      {1,     1,        0,   0,        0,        1,   0,        0,    1,   0,         1,    1,         1},
                      new int[]      {1,     4,        1,   1,        1,        3,   1,        1,    2,   1,         1,    1,         1});

     assertAnalyzesToPositions(analyzer, "spiderman",
                               new String[]{"spider", "spiderman", "man"},
                               new String[]{"SYNONYM", "word", "SYNONYM"},
                               new int[]{1, 0, 1},
                               new int[]{1, 2, 1});

     assertAnalyzesTo(analyzer, "spiderman enemies",
                      new String[]{"spider", "spiderman", "man", "enemies"},
                      new int[]{0, 0, 0, 10},
                      new int[]{9, 9, 9, 17},
                      new String[]{"SYNONYM", "word", "SYNONYM", "word"},
                      new int[]{1, 0, 1, 1},
                      new int[]{1, 2, 1, 1});

     assertAnalyzesTo(analyzer, "the usa is wealthy",
                      new String[]{"the", "united", "u", "united", "usa", "states", "s", "states", "a", "of", "america", "is", "wealthy"},
                      new int[]      {0,        4,   4,        4,     4,        4,   4,        4,   4,    4,         4,    8,        11},
                      new int[]      {3,        7,   7,        7,     7,        7,   7,        7,   7,    7,         7,   10,        18},
                      new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "word", "word"},
                      new int[]      {1,        1,   0,        0,     0,        1,   0,        0,   1,    0,         1,    1,         1},
                      new int[]      {1,        1,   1,        1,     4,        3,   1,        1,   2,    1,         1,    1,         1});

     assertGraphStrings(analyzer, "the usa is wealthy", new String[] {
         "the usa is wealthy",
         "the united states is wealthy",
         "the u s a is wealthy",
         "the united states of america is wealthy",
         // Wrong. Here only due to "sausagization" of the multi word synonyms.
         "the u states is wealthy",
         "the u states a is wealthy",
         "the u s of america is wealthy",
         "the u states of america is wealthy",
         "the united s a is wealthy",
         "the united states a is wealthy",
         "the united s of america is wealthy"});

     assertAnalyzesTo(analyzer, "the united states is wealthy",
                      new String[]{"the", "usa", "u", "united", "united", "s", "states", "states", "a", "of", "america", "is", "wealthy"},
                      new int[]      {0,     4,   4,        4,        4,  11,       11,       11,  11,   11,        11,   18,        21},
                      new int[]      {3,    17,  10,       10,       10,  17,       17,       17,  17,   17,        17,   20,        28},
                      new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "word"},
                      new int[]      {1,     1,   0,        0,        0,   1,        0,        0,   1,    0,         1,    1,         1},
                      new int[]      {1,     4,   1,        1,        1,   1,        1,        3,   2,    1,         1,    1,         1},
                      false);

     assertAnalyzesTo(analyzer, "the united states of balance",
                      new String[]{"the", "usa", "u", "united", "united", "s", "states", "states", "a", "of", "america", "of", "balance"},
                      new int[]      {0,     4,   4,        4,        4,  11,       11,       11,  11,   11,        11,   18,        21},
                      new int[]      {3,    17,  10,       10,       10,  17,       17,       17,  17,   17,        17,   20,        28},
                      new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "word", "SYNONYM", "SYNONYM", "SYNONYM", "word", "word"},
                      new int[]      {1,     1,   0,        0,        0,   1,        0,        0,   1,    0,         1,    1,         1},
                      new int[]      {1,     4,   1,        1,        1,   1,        1,        3,   2,    1,         1,    1,         1});

     analyzer.close();
   }

   public void testMultiwordOffsets() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder(true);
     final boolean keepOrig = true;
     add(b, "national hockey league", "nhl", keepOrig);
     Analyzer a = getFlattenAnalyzer(b, true);

     assertAnalyzesTo(a, "national hockey league",
         new String[]{"nhl", "national", "hockey", "league"},
         new int[]{0, 0, 9, 16},
         new int[]{22, 8, 15, 22},
         new int[]{1, 0, 1, 1});
     a.close();
   }

   public void testIncludeOrig() throws Exception {
     SynonymMap.Builder b = new SynonymMap.Builder(true);
     final boolean keepOrig = true;
     add(b, "a b", "ab", keepOrig);
     add(b, "a c", "ac", keepOrig);
     add(b, "a", "aa", keepOrig);
     add(b, "b", "bb", keepOrig);
     add(b, "z x c v", "zxcv", keepOrig);
     add(b, "x c", "xc", keepOrig);

     Analyzer a = getFlattenAnalyzer(b, true);

     assertAnalyzesTo(a, "$",
         new String[]{"$"},
         new int[]{1});
     assertAnalyzesTo(a, "a",
         new String[]{"aa", "a"},
         new int[]{1, 0});
     assertAnalyzesTo(a, "a",
         new String[]{"aa", "a"},
         new int[]{1, 0});
     assertAnalyzesTo(a, "$ a",
         new String[]{"$", "aa", "a"},
         new int[]{1, 1, 0});
     assertAnalyzesTo(a, "a $",
         new String[]{"aa", "a", "$"},
         new int[]{1, 0, 1});
     assertAnalyzesTo(a, "$ a !",
         new String[]{"$", "aa", "a", "!"},
         new int[]{1, 1, 0, 1});
     assertAnalyzesTo(a, "a a",
         new String[]{"aa", "a", "aa", "a"},
         new int[]{1, 0, 1, 0});
     assertAnalyzesTo(a, "b",
         new String[]{"bb", "b"},
         new int[]{1, 0});
     assertAnalyzesTo(a, "z x c v",
         new String[]{"zxcv", "z", "x", "c", "v"},
         new int[]{1, 0, 1, 1, 1});
     assertAnalyzesTo(a, "z x c $",
         new String[]{"z", "xc", "x", "c", "$"},
         new int[]{1, 1, 0, 1, 1});
     a.close();
   }

   public void testUpperCase() throws IOException {
     assertMapping("word", "synonym");
     assertMapping("word".toUpperCase(Locale.ROOT), "synonym");
   }

   private void assertMapping(String inputString, String outputString) throws IOException {
     SynonymMap.Builder builder = new SynonymMap.Builder(false);
     // the rules must be lowercased up front, but the incoming tokens will be case insensitive:
     CharsRef input = SynonymMap.Builder.join(inputString.toLowerCase(Locale.ROOT).split(" "), new CharsRefBuilder());
     CharsRef output = SynonymMap.Builder.join(outputString.split(" "), new CharsRefBuilder());
     builder.add(input, output, true);
     Analyzer analyzer = new CustomAnalyzer(builder.build());
     TokenStream tokenStream = analyzer.tokenStream("field", inputString);
     assertTokenStreamContents(tokenStream, new String[]{
         outputString, inputString
       });
   }

   static class CustomAnalyzer extends Analyzer {
     private SynonymMap synonymMap;

     CustomAnalyzer(SynonymMap synonymMap) {
       this.synonymMap = synonymMap;
     }

     @Override
     protected TokenStreamComponents createComponents(String s) {
       Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
       TokenStream tokenStream = new SynonymGraphFilter(tokenizer, synonymMap, true); // Ignore case True
       return new TokenStreamComponents(tokenizer, tokenStream);
     }
   }
 }