docs/attachments/LUCENE-3695/LUCENE-3695.patch - lucene-jira-archive - Git at Google

 Index: modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
 ===================================================================
 --- modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java	(revision 1231386)
 +++ modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java	(working copy)
 @@ -5,6 +5,7 @@
  import java.util.Iterator;

  import org.apache.lucene.util.BytesRef;
 +import org.apache.lucene.util.IntsRef;
  import org.apache.lucene.util.fst.*;

  /**
 @@ -219,11 +220,12 @@
          shareMaxTailLength, outputs, null);

      BytesRef scratch = new BytesRef();
 +    final IntsRef scratchIntsRef = new IntsRef();
      int count = 0;
      for (Iterator<BytesRef> i = sorter.iterator(); i.hasNext(); count++) {
        BytesRef entry = i.next();
        if (scratch.compareTo(entry) != 0) {
 -        builder.add(entry, empty);
 +        builder.add(Util.toIntsRef(entry, scratchIntsRef), empty);
          scratch.copyBytes(entry);
        }
      }
 Index: modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
 ===================================================================
 --- modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java	(revision 1231386)
 +++ modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java	(working copy)
 @@ -33,6 +33,8 @@
  import org.apache.lucene.analysis.Tokenizer;
  import org.junit.Ignore;

 +// nocommit
 +@Ignore
  public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase {

    //this is some text  here is a  link  and another  link . This is an entity: & plus a <.  Here is an &
 Index: modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
 ===================================================================
 --- modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java	(revision 1231386)
 +++ modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java	(working copy)
 @@ -33,9 +33,11 @@
  import org.apache.lucene.util.BytesRef;
  import org.apache.lucene.util.BytesRefHash;
  import org.apache.lucene.util.CharsRef;
 +import org.apache.lucene.util.IntsRef;
  import org.apache.lucene.util.UnicodeUtil;
  import org.apache.lucene.util.fst.ByteSequenceOutputs;
  import org.apache.lucene.util.fst.FST;
 +import org.apache.lucene.util.fst.Util;

  /**
   * A map of synonyms, keys and values are phrases.
 @@ -262,6 +264,8 @@
        Set<CharsRef> keys = workingSet.keySet();
        CharsRef sortedKeys[] = keys.toArray(new CharsRef[keys.size()]);
        Arrays.sort(sortedKeys, CharsRef.getUTF16SortedAsUTF8Comparator());
 +
 +      final IntsRef scratchIntsRef = new IntsRef();

        //System.out.println("fmap.build");
        for (int keyIdx = 0; keyIdx < sortedKeys.length; keyIdx++) {
 @@ -307,7 +311,7 @@

          scratch.length = scratchOutput.getPosition() - scratch.offset;
          //System.out.println("  add input=" + input + " output=" + scratch + " offset=" + scratch.offset + " length=" + scratch.length + " count=" + count);
 -        builder.add(input, BytesRef.deepCopyOf(scratch));
 +        builder.add(Util.toUTF32(input, scratchIntsRef), BytesRef.deepCopyOf(scratch));
        }

        FST<BytesRef> fst = builder.finish();
 Index: lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java
 ===================================================================
 --- lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java	(revision 1231386)
 +++ lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java	(working copy)
 @@ -1050,6 +1050,7 @@
      }
      Terms terms = MultiFields.getTerms(r, "body");
      if (terms != null) {
 +      final IntsRef scratchIntsRef = new IntsRef();
        final TermsEnum termsEnum = terms.iterator(null);
        if (VERBOSE) {
          System.out.println("TEST: got termsEnum=" + termsEnum);
 @@ -1073,7 +1074,7 @@
          } else {
            output = termsEnum.docFreq();
          }
 -        builder.add(term, outputs.get(output));
 +        builder.add(Util.toIntsRef(term, scratchIntsRef), outputs.get(output));
          ord++;
          if (VERBOSE && ord % 100000 == 0 && LuceneTestCase.TEST_NIGHTLY) {
            System.out.println(ord + " terms...");
 @@ -1373,7 +1374,7 @@
    public void testSingleString() throws Exception {
      final Outputs<Object> outputs = NoOutputs.getSingleton();
      final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, outputs);
 -    b.add(new BytesRef("foobar"), outputs.getNoOutput());
 +    b.add(Util.toIntsRef(new BytesRef("foobar"), new IntsRef()), outputs.getNoOutput());
      final BytesRefFSTEnum<Object> fstEnum = new BytesRefFSTEnum<Object>(b.finish());
      assertNull(fstEnum.seekFloor(new BytesRef("foo")));
      assertNull(fstEnum.seekCeil(new BytesRef("foobaz")));
 @@ -1395,9 +1396,9 @@
      final BytesRef b = new BytesRef("b");
      final BytesRef c = new BytesRef("c");

 -    builder.add(a, outputs.get(17));
 -    builder.add(b, outputs.get(42));
 -    builder.add(c, outputs.get(13824324872317238L));
 +    builder.add(Util.toIntsRef(a, new IntsRef()), outputs.get(17));
 +    builder.add(Util.toIntsRef(b, new IntsRef()), outputs.get(42));
 +    builder.add(Util.toIntsRef(c, new IntsRef()), outputs.get(13824324872317238L));

      final FST<Long> fst = builder.finish();

 @@ -1628,13 +1629,14 @@

          int line = 0;
          final BytesRef term = new BytesRef();
 +        final IntsRef scratchIntsRef = new IntsRef();
          while (line < lines.length) {
            String w = lines[line++];
            if (w == null) {
              break;
            }
            term.copyChars(w);
 -          b.add(term, nothing);
 +          b.add(Util.toIntsRef(term, scratchIntsRef), nothing);
          }

          return b.finish();
 @@ -1698,8 +1700,8 @@
      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);

      final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null);
 -    builder.add("stat", outputs.get(17));
 -    builder.add("station", outputs.get(10));
 +    builder.add(Util.toUTF32("stat", new IntsRef()), outputs.get(17));
 +    builder.add(Util.toUTF32("station", new IntsRef()), outputs.get(10));
      final FST<Long> fst = builder.finish();
      //Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot"));
      StringWriter w = new StringWriter();
 @@ -1713,8 +1715,8 @@
      final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);

      final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null);
 -    builder.add(new BytesRef("stat"), outputs.getNoOutput());
 -    builder.add(new BytesRef("station"), outputs.getNoOutput());
 +    builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRef()), outputs.getNoOutput());
 +    builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRef()), outputs.getNoOutput());
      final FST<Long> fst = builder.finish();
      StringWriter w = new StringWriter();
      //Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot"));
 Index: lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
 ===================================================================
 --- lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java	(revision 1231386)
 +++ lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java	(working copy)
 @@ -51,10 +51,12 @@
  import org.apache.lucene.util.ArrayUtil;
  import org.apache.lucene.util.Bits;
  import org.apache.lucene.util.BytesRef;
 +import org.apache.lucene.util.IntsRef;
  import org.apache.lucene.util.fst.Builder;
  import org.apache.lucene.util.fst.ByteSequenceOutputs;
  import org.apache.lucene.util.fst.BytesRefFSTEnum;
  import org.apache.lucene.util.fst.FST;
 +import org.apache.lucene.util.fst.Util;

  // TODO: would be nice to somehow allow this to act like
  // InstantiatedIndex, by never writing to disk; ie you write
 @@ -183,6 +185,8 @@
      private final BytesRef spare = new BytesRef();
      private byte[] finalBuffer = new byte[128];

 +    private final IntsRef scratchIntsRef = new IntsRef();
 +
      @Override
      public void finishTerm(BytesRef text, TermStats stats) throws IOException {

 @@ -213,7 +217,7 @@
            System.out.println("      " + Integer.toHexString(finalBuffer[i]&0xFF));
          }
        }
 -      builder.add(text, BytesRef.deepCopyOf(spare));
 +      builder.add(Util.toIntsRef(text, scratchIntsRef), BytesRef.deepCopyOf(spare));
        termCount++;
      }

 Index: lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java
 ===================================================================
 --- lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java	(revision 1231386)
 +++ lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java	(working copy)
 @@ -33,6 +33,7 @@
  import org.apache.lucene.store.IndexInput;
  import org.apache.lucene.util.BytesRef;
  import org.apache.lucene.util.CodecUtil;
 +import org.apache.lucene.util.IntsRef;
  import org.apache.lucene.util.fst.Builder;
  import org.apache.lucene.util.fst.BytesRefFSTEnum;
  import org.apache.lucene.util.fst.FST;
 @@ -187,6 +188,7 @@

          if (indexDivisor > 1) {
            // subsample
 +          final IntsRef scratchIntsRef = new IntsRef();
            final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
            final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
            final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
 @@ -194,7 +196,7 @@
            int count = indexDivisor;
            while((result = fstEnum.next()) != null) {
              if (count == indexDivisor) {
 -              builder.add(result.input, result.output);
 +              builder.add(Util.toIntsRef(result.input, scratchIntsRef), result.output);
                count = 0;
              }
              count++;
 Index: lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java
 ===================================================================
 --- lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java	(revision 1231386)
 +++ lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java	(working copy)
 @@ -29,9 +29,11 @@
  import org.apache.lucene.util.BytesRef;
  import org.apache.lucene.util.CodecUtil;
  import org.apache.lucene.util.IOUtils;
 +import org.apache.lucene.util.IntsRef;
  import org.apache.lucene.util.fst.Builder;
  import org.apache.lucene.util.fst.FST;
  import org.apache.lucene.util.fst.PositiveIntOutputs;
 +import org.apache.lucene.util.fst.Util;

  /**
   * Selects index terms according to provided pluggable
 @@ -227,7 +229,7 @@
        ////System.out.println("VGW: field=" + fieldInfo.name);

        // Always put empty string in
 -      fstBuilder.add(new BytesRef(), fstOutputs.get(termsFilePointer));
 +      fstBuilder.add(new IntsRef(), fstOutputs.get(termsFilePointer));
        startTermsFilePointer = termsFilePointer;
      }

 @@ -246,6 +248,8 @@
        }
      }

 +    private final IntsRef scratchIntsRef = new IntsRef();
 +
      @Override
      public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
        if (text.length == 0) {
 @@ -256,7 +260,7 @@
        final int lengthSave = text.length;
        text.length = indexedTermPrefixLength(lastTerm, text);
        try {
 -        fstBuilder.add(text, fstOutputs.get(termsFilePointer));
 +        fstBuilder.add(Util.toIntsRef(text, scratchIntsRef), fstOutputs.get(termsFilePointer));
        } finally {
          text.length = lengthSave;
        }
 Index: lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
 ===================================================================
 --- lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java	(revision 1231386)
 +++ lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java	(working copy)
 @@ -36,6 +36,7 @@
  import org.apache.lucene.util.Bits;
  import org.apache.lucene.util.BytesRef;
  import org.apache.lucene.util.CharsRef;
 +import org.apache.lucene.util.IntsRef;
  import org.apache.lucene.util.OpenBitSet;
  import org.apache.lucene.util.StringHelper;
  import org.apache.lucene.util.UnicodeUtil;
 @@ -44,6 +45,7 @@
  import org.apache.lucene.util.fst.FST;
  import org.apache.lucene.util.fst.PairOutputs;
  import org.apache.lucene.util.fst.PositiveIntOutputs;
 +import org.apache.lucene.util.fst.Util;

  class SimpleTextFieldsReader extends FieldsProducer {

 @@ -477,11 +479,12 @@
        int docFreq = 0;
        long totalTermFreq = 0;
        OpenBitSet visitedDocs = new OpenBitSet();
 +      final IntsRef scratchIntsRef = new IntsRef();
        while(true) {
          SimpleTextUtil.readLine(in, scratch);
          if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
            if (lastDocsStart != -1) {
 -            b.add(lastTerm, new PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>(lastDocsStart,
 +            b.add(Util.toIntsRef(lastTerm, scratchIntsRef), new PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>(lastDocsStart,
                                                                                     new PairOutputs.Pair<Long,Long>((long) docFreq,
                                                                                                                     posIntOutputs.get(totalTermFreq))));
              sumTotalTermFreq += totalTermFreq;
 @@ -497,7 +500,7 @@
            totalTermFreq++;
          } else if (StringHelper.startsWith(scratch, TERM)) {
            if (lastDocsStart != -1) {
 -            b.add(lastTerm, new PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>(lastDocsStart,
 +            b.add(Util.toIntsRef(lastTerm, scratchIntsRef), new PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>(lastDocsStart,
                                                                                     new PairOutputs.Pair<Long,Long>((long) docFreq,
                                                                                                                     posIntOutputs.get(totalTermFreq))));
            }
 Index: lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
 ===================================================================
 --- lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java	(revision 1231386)
 +++ lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java	(working copy)
 @@ -22,8 +22,8 @@
  import java.util.Comparator;
  import java.util.List;

 +import org.apache.lucene.index.FieldInfo.IndexOptions;
  import org.apache.lucene.index.FieldInfo;
 -import org.apache.lucene.index.FieldInfo.IndexOptions;
  import org.apache.lucene.index.FieldInfos;
  import org.apache.lucene.index.IndexFileNames;
  import org.apache.lucene.index.SegmentWriteState;
 @@ -39,6 +39,7 @@
  import org.apache.lucene.util.fst.BytesRefFSTEnum;
  import org.apache.lucene.util.fst.FST;
  import org.apache.lucene.util.fst.NoOutputs;
 +import org.apache.lucene.util.fst.Util;

  /*
    TODO:
 @@ -244,6 +245,7 @@
      public final boolean hasTerms;
      public final boolean isFloor;
      public final int floorLeadByte;
 +    private final IntsRef scratchIntsRef = new IntsRef();

      public PendingBlock(BytesRef prefix, long fp, boolean hasTerms, boolean isFloor, int floorLeadByte, List<FST<BytesRef>> subIndices) {
        super(false);
 @@ -294,7 +296,7 @@
        final byte[] bytes = new byte[(int) scratchBytes.getFilePointer()];
        assert bytes.length > 0;
        scratchBytes.writeTo(bytes, 0);
 -      indexBuilder.add(prefix, new BytesRef(bytes, 0, bytes.length));
 +      indexBuilder.add(Util.toIntsRef(prefix, scratchIntsRef), new BytesRef(bytes, 0, bytes.length));
        scratchBytes.reset();

        // Copy over index for all sub-blocks
 @@ -337,7 +339,7 @@
          //if (DEBUG) {
          //  System.out.println("      add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
          //}
 -        builder.add(indexEnt.input, indexEnt.output);
 +        builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
        }
      }
    }
 @@ -853,13 +855,15 @@
        return postingsWriter;
      }

 +    private final IntsRef scratchIntsRef = new IntsRef();
 +
      @Override
      public void finishTerm(BytesRef text, TermStats stats) throws IOException {

        assert stats.docFreq > 0;
        //if (DEBUG) System.out.println("BTTW.finishTerm term=" + fieldInfo.name + ":" + toString(text) + " seg=" + segment + " df=" + stats.docFreq);

 -      blockBuilder.add(text, noOutputs.getNoOutput());
 +      blockBuilder.add(Util.toIntsRef(text, scratchIntsRef), noOutputs.getNoOutput());
        pending.add(new PendingTerm(BytesRef.deepCopyOf(text), stats));
        postingsWriter.finishTerm(stats);
        numTerms++;
 Index: lucene/src/java/org/apache/lucene/util/fst/Util.java
 ===================================================================
 --- lucene/src/java/org/apache/lucene/util/fst/Util.java	(revision 1231386)
 +++ lucene/src/java/org/apache/lucene/util/fst/Util.java	(working copy)
 @@ -31,10 +31,8 @@
    }

    /** Looks up the output for this input, or null if the
 -   *  input is not accepted. FST must be
 -   *  INPUT_TYPE.BYTE4. */
 +   *  input is not accepted. */
    public static<T> T get(FST<T> fst, IntsRef input) throws IOException {
 -    assert fst.inputType == FST.INPUT_TYPE.BYTE4;

      // TODO: would be nice not to alloc this on every lookup
      final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
 @@ -59,78 +57,6 @@
      }
    }

 -  /** Logically casts input to UTF32 ints then looks up the output
 -   *  or null if the input is not accepted.  FST must be
 -   *  INPUT_TYPE.BYTE4.  */
 -  public static<T> T get(FST<T> fst, char[] input, int offset, int length) throws IOException {
 -    assert fst.inputType == FST.INPUT_TYPE.BYTE4;
 -
 -    // TODO: would be nice not to alloc this on every lookup
 -    final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
 -
 -    int charIdx = offset;
 -    final int charLimit = offset + length;
 -
 -    // Accumulate output as we go
 -    final T NO_OUTPUT = fst.outputs.getNoOutput();
 -    T output = NO_OUTPUT;
 -    while(charIdx < charLimit) {
 -      final int utf32 = Character.codePointAt(input, charIdx);
 -      charIdx += Character.charCount(utf32);
 -
 -      if (fst.findTargetArc(utf32, arc, arc) == null) {
 -        return null;
 -      } else if (arc.output != NO_OUTPUT) {
 -        output = fst.outputs.add(output, arc.output);
 -      }
 -    }
 -
 -    if (fst.findTargetArc(FST.END_LABEL, arc, arc) == null) {
 -      return null;
 -    } else if (arc.output != NO_OUTPUT) {
 -      return fst.outputs.add(output, arc.output);
 -    } else {
 -      return output;
 -    }
 -  }
 -
 -
 -  /** Logically casts input to UTF32 ints then looks up the output
 -   *  or null if the input is not accepted.  FST must be
 -   *  INPUT_TYPE.BYTE4.  */
 -  public static<T> T get(FST<T> fst, CharSequence input) throws IOException {
 -    assert fst.inputType == FST.INPUT_TYPE.BYTE4;
 -
 -    // TODO: would be nice not to alloc this on every lookup
 -    final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
 -
 -    int charIdx = 0;
 -    final int charLimit = input.length();
 -
 -    // Accumulate output as we go
 -    final T NO_OUTPUT = fst.outputs.getNoOutput();
 -    T output = NO_OUTPUT;
 -
 -    while(charIdx < charLimit) {
 -      final int utf32 = Character.codePointAt(input, charIdx);
 -      charIdx += Character.charCount(utf32);
 -
 -      if (fst.findTargetArc(utf32, arc, arc) == null) {
 -        return null;
 -      } else if (arc.output != NO_OUTPUT) {
 -        output = fst.outputs.add(output, arc.output);
 -      }
 -    }
 -
 -    if (fst.findTargetArc(FST.END_LABEL, arc, arc) == null) {
 -      return null;
 -    } else if (arc.output != NO_OUTPUT) {
 -      return fst.outputs.add(output, arc.output);
 -    } else {
 -      return output;
 -    }
 -  }
 -
    /** Looks up the output for this input, or null if the
     *  input is not accepted */
    public static<T> T get(FST<T> fst, BytesRef input) throws IOException {
 @@ -381,4 +307,51 @@
        return "0x" + Integer.toHexString(label);
      }
    }
 +
 +  /** Decodes the Unicode codepoints from the provided
 +   *  CharSequence and places them in the provided scratch
 +   *  IntsRef, which must not be null, returning it. */
 +  public static IntsRef toUTF32(CharSequence s, IntsRef scratch) {
 +    int charIdx = 0;
 +    int intIdx = 0;
 +    final int charLimit = s.length();
 +    while(charIdx < charLimit) {
 +      scratch.grow(intIdx+1);
 +      final int utf32 = Character.codePointAt(s, charIdx);
 +      scratch.ints[intIdx] = utf32;
 +      charIdx += Character.charCount(utf32);
 +      intIdx++;
 +    }
 +    scratch.length = intIdx;
 +    return scratch;
 +  }
 +
 +  /** Decodes the Unicode codepoints from the provided
 +   *  CharSequence and places them in the provided scratch
 +   *  IntsRef, which must not be null, returning it. */
 +  public static IntsRef toUTF32(char[] s, int offset, int length, IntsRef scratch) {
 +    int charIdx = offset;
 +    int intIdx = 0;
 +    final int charLimit = offset + length;
 +    while(charIdx < charLimit) {
 +      scratch.grow(intIdx+1);
 +      final int utf32 = Character.codePointAt(s, charIdx);
 +      scratch.ints[intIdx] = utf32;
 +      charIdx += Character.charCount(utf32);
 +      intIdx++;
 +    }
 +    scratch.length = intIdx;
 +    return scratch;
 +  }
 +
 +  /** Just takes unsigned byte values from the BytesRef and
 +   *  converts into an IntsRef. */
 +  public static IntsRef toIntsRef(BytesRef input, IntsRef scratch) {
 +    scratch.grow(input.length);
 +    for(int i=0;i<input.length;i++) {
 +      scratch.ints[i] = input.bytes[i+input.offset] & 0xFF;
 +    }
 +    scratch.length = input.length;
 +    return scratch;
 +  }
  }
 Index: lucene/src/java/org/apache/lucene/util/fst/Builder.java
 ===================================================================
 --- lucene/src/java/org/apache/lucene/util/fst/Builder.java	(revision 1231386)
 +++ lucene/src/java/org/apache/lucene/util/fst/Builder.java	(working copy)
 @@ -19,7 +19,6 @@

  import org.apache.lucene.util.ArrayUtil;
  import org.apache.lucene.util.RamUsageEstimator;
 -import org.apache.lucene.util.BytesRef;
  import org.apache.lucene.util.IntsRef;
  import org.apache.lucene.util.fst.FST.INPUT_TYPE; // javadoc

 @@ -290,54 +289,6 @@
      }
    }

 -  private final IntsRef scratchIntsRef = new IntsRef(10);
 -
 -  public void add(BytesRef input, T output) throws IOException {
 -    assert fst.getInputType() == FST.INPUT_TYPE.BYTE1;
 -    scratchIntsRef.grow(input.length);
 -    for(int i=0;i<input.length;i++) {
 -      scratchIntsRef.ints[i] = input.bytes[i+input.offset] & 0xFF;
 -    }
 -    scratchIntsRef.length = input.length;
 -    add(scratchIntsRef, output);
 -  }
 -
 -  /** Sugar: adds the UTF32 codepoints from char[] slice.  FST
 -   *  must be FST.INPUT_TYPE.BYTE4! */
 -  public void add(char[] s, int offset, int length, T output) throws IOException {
 -    assert fst.getInputType() == FST.INPUT_TYPE.BYTE4;
 -    int charIdx = offset;
 -    int intIdx = 0;
 -    final int charLimit = offset + length;
 -    while(charIdx < charLimit) {
 -      scratchIntsRef.grow(intIdx+1);
 -      final int utf32 = Character.codePointAt(s, charIdx);
 -      scratchIntsRef.ints[intIdx] = utf32;
 -      charIdx += Character.charCount(utf32);
 -      intIdx++;
 -    }
 -    scratchIntsRef.length = intIdx;
 -    add(scratchIntsRef, output);
 -  }
 -
 -  /** Sugar: adds the UTF32 codepoints from CharSequence.  FST
 -   *  must be FST.INPUT_TYPE.BYTE4! */
 -  public void add(CharSequence s, T output) throws IOException {
 -    assert fst.getInputType() == FST.INPUT_TYPE.BYTE4;
 -    int charIdx = 0;
 -    int intIdx = 0;
 -    final int charLimit = s.length();
 -    while(charIdx < charLimit) {
 -      scratchIntsRef.grow(intIdx+1);
 -      final int utf32 = Character.codePointAt(s, charIdx);
 -      scratchIntsRef.ints[intIdx] = utf32;
 -      charIdx += Character.charCount(utf32);
 -      intIdx++;
 -    }
 -    scratchIntsRef.length = intIdx;
 -    add(scratchIntsRef, output);
 -  }
 -
    // for debugging
    /*
    private String toString(BytesRef b) {