blob: 44724ced84e2259fb579a5e675dfc46f00ba9723 [file] [log] [blame]
Index: modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
===================================================================
--- modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java (revision 1231386)
+++ modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java (working copy)
@@ -5,6 +5,7 @@
import java.util.Iterator;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.*;
/**
@@ -219,11 +220,12 @@
shareMaxTailLength, outputs, null);
BytesRef scratch = new BytesRef();
+ final IntsRef scratchIntsRef = new IntsRef();
int count = 0;
for (Iterator<BytesRef> i = sorter.iterator(); i.hasNext(); count++) {
BytesRef entry = i.next();
if (scratch.compareTo(entry) != 0) {
- builder.add(entry, empty);
+ builder.add(Util.toIntsRef(entry, scratchIntsRef), empty);
scratch.copyBytes(entry);
}
}
Index: modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
===================================================================
--- modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java (revision 1231386)
+++ modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java (working copy)
@@ -33,6 +33,8 @@
import org.apache.lucene.analysis.Tokenizer;
import org.junit.Ignore;
+// nocommit
+@Ignore
public class HTMLStripCharFilterTest extends BaseTokenStreamTestCase {
//this is some text here is a link and another link . This is an entity: & plus a <. Here is an &
Index: modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java (revision 1231386)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java (working copy)
@@ -33,9 +33,11 @@
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.Util;
/**
* A map of synonyms, keys and values are phrases.
@@ -262,6 +264,8 @@
Set<CharsRef> keys = workingSet.keySet();
CharsRef sortedKeys[] = keys.toArray(new CharsRef[keys.size()]);
Arrays.sort(sortedKeys, CharsRef.getUTF16SortedAsUTF8Comparator());
+
+ final IntsRef scratchIntsRef = new IntsRef();
//System.out.println("fmap.build");
for (int keyIdx = 0; keyIdx < sortedKeys.length; keyIdx++) {
@@ -307,7 +311,7 @@
scratch.length = scratchOutput.getPosition() - scratch.offset;
//System.out.println(" add input=" + input + " output=" + scratch + " offset=" + scratch.offset + " length=" + scratch.length + " count=" + count);
- builder.add(input, BytesRef.deepCopyOf(scratch));
+ builder.add(Util.toUTF32(input, scratchIntsRef), BytesRef.deepCopyOf(scratch));
}
FST<BytesRef> fst = builder.finish();
Index: lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java
===================================================================
--- lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java (revision 1231386)
+++ lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java (working copy)
@@ -1050,6 +1050,7 @@
}
Terms terms = MultiFields.getTerms(r, "body");
if (terms != null) {
+ final IntsRef scratchIntsRef = new IntsRef();
final TermsEnum termsEnum = terms.iterator(null);
if (VERBOSE) {
System.out.println("TEST: got termsEnum=" + termsEnum);
@@ -1073,7 +1074,7 @@
} else {
output = termsEnum.docFreq();
}
- builder.add(term, outputs.get(output));
+ builder.add(Util.toIntsRef(term, scratchIntsRef), outputs.get(output));
ord++;
if (VERBOSE && ord % 100000 == 0 && LuceneTestCase.TEST_NIGHTLY) {
System.out.println(ord + " terms...");
@@ -1373,7 +1374,7 @@
public void testSingleString() throws Exception {
final Outputs<Object> outputs = NoOutputs.getSingleton();
final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, outputs);
- b.add(new BytesRef("foobar"), outputs.getNoOutput());
+ b.add(Util.toIntsRef(new BytesRef("foobar"), new IntsRef()), outputs.getNoOutput());
final BytesRefFSTEnum<Object> fstEnum = new BytesRefFSTEnum<Object>(b.finish());
assertNull(fstEnum.seekFloor(new BytesRef("foo")));
assertNull(fstEnum.seekCeil(new BytesRef("foobaz")));
@@ -1395,9 +1396,9 @@
final BytesRef b = new BytesRef("b");
final BytesRef c = new BytesRef("c");
- builder.add(a, outputs.get(17));
- builder.add(b, outputs.get(42));
- builder.add(c, outputs.get(13824324872317238L));
+ builder.add(Util.toIntsRef(a, new IntsRef()), outputs.get(17));
+ builder.add(Util.toIntsRef(b, new IntsRef()), outputs.get(42));
+ builder.add(Util.toIntsRef(c, new IntsRef()), outputs.get(13824324872317238L));
final FST<Long> fst = builder.finish();
@@ -1628,13 +1629,14 @@
int line = 0;
final BytesRef term = new BytesRef();
+ final IntsRef scratchIntsRef = new IntsRef();
while (line < lines.length) {
String w = lines[line++];
if (w == null) {
break;
}
term.copyChars(w);
- b.add(term, nothing);
+ b.add(Util.toIntsRef(term, scratchIntsRef), nothing);
}
return b.finish();
@@ -1698,8 +1700,8 @@
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null);
- builder.add("stat", outputs.get(17));
- builder.add("station", outputs.get(10));
+ builder.add(Util.toUTF32("stat", new IntsRef()), outputs.get(17));
+ builder.add(Util.toUTF32("station", new IntsRef()), outputs.get(10));
final FST<Long> fst = builder.finish();
//Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot"));
StringWriter w = new StringWriter();
@@ -1713,8 +1715,8 @@
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null);
- builder.add(new BytesRef("stat"), outputs.getNoOutput());
- builder.add(new BytesRef("station"), outputs.getNoOutput());
+ builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRef()), outputs.getNoOutput());
+ builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRef()), outputs.getNoOutput());
final FST<Long> fst = builder.finish();
StringWriter w = new StringWriter();
//Writer w = new OutputStreamWriter(new FileOutputStream("/x/tmp/out.dot"));
Index: lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
===================================================================
--- lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (revision 1231386)
+++ lucene/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java (working copy)
@@ -51,10 +51,12 @@
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.ByteSequenceOutputs;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.Util;
// TODO: would be nice to somehow allow this to act like
// InstantiatedIndex, by never writing to disk; ie you write
@@ -183,6 +185,8 @@
private final BytesRef spare = new BytesRef();
private byte[] finalBuffer = new byte[128];
+ private final IntsRef scratchIntsRef = new IntsRef();
+
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
@@ -213,7 +217,7 @@
System.out.println(" " + Integer.toHexString(finalBuffer[i]&0xFF));
}
}
- builder.add(text, BytesRef.deepCopyOf(spare));
+ builder.add(Util.toIntsRef(text, scratchIntsRef), BytesRef.deepCopyOf(spare));
termCount++;
}
Index: lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java
===================================================================
--- lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java (revision 1231386)
+++ lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexReader.java (working copy)
@@ -33,6 +33,7 @@
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
@@ -187,6 +188,7 @@
if (indexDivisor > 1) {
// subsample
+ final IntsRef scratchIntsRef = new IntsRef();
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);
final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<Long>(fst);
@@ -194,7 +196,7 @@
int count = indexDivisor;
while((result = fstEnum.next()) != null) {
if (count == indexDivisor) {
- builder.add(result.input, result.output);
+ builder.add(Util.toIntsRef(result.input, scratchIntsRef), result.output);
count = 0;
}
count++;
Index: lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java
===================================================================
--- lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java (revision 1231386)
+++ lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java (working copy)
@@ -29,9 +29,11 @@
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PositiveIntOutputs;
+import org.apache.lucene.util.fst.Util;
/**
* Selects index terms according to provided pluggable
@@ -227,7 +229,7 @@
////System.out.println("VGW: field=" + fieldInfo.name);
// Always put empty string in
- fstBuilder.add(new BytesRef(), fstOutputs.get(termsFilePointer));
+ fstBuilder.add(new IntsRef(), fstOutputs.get(termsFilePointer));
startTermsFilePointer = termsFilePointer;
}
@@ -246,6 +248,8 @@
}
}
+ private final IntsRef scratchIntsRef = new IntsRef();
+
@Override
public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
if (text.length == 0) {
@@ -256,7 +260,7 @@
final int lengthSave = text.length;
text.length = indexedTermPrefixLength(lastTerm, text);
try {
- fstBuilder.add(text, fstOutputs.get(termsFilePointer));
+ fstBuilder.add(Util.toIntsRef(text, scratchIntsRef), fstOutputs.get(termsFilePointer));
} finally {
text.length = lengthSave;
}
Index: lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
===================================================================
--- lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (revision 1231386)
+++ lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (working copy)
@@ -36,6 +36,7 @@
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil;
@@ -44,6 +45,7 @@
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PairOutputs;
import org.apache.lucene.util.fst.PositiveIntOutputs;
+import org.apache.lucene.util.fst.Util;
class SimpleTextFieldsReader extends FieldsProducer {
@@ -477,11 +479,12 @@
int docFreq = 0;
long totalTermFreq = 0;
OpenBitSet visitedDocs = new OpenBitSet();
+ final IntsRef scratchIntsRef = new IntsRef();
while(true) {
SimpleTextUtil.readLine(in, scratch);
if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
if (lastDocsStart != -1) {
- b.add(lastTerm, new PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>(lastDocsStart,
+ b.add(Util.toIntsRef(lastTerm, scratchIntsRef), new PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>(lastDocsStart,
new PairOutputs.Pair<Long,Long>((long) docFreq,
posIntOutputs.get(totalTermFreq))));
sumTotalTermFreq += totalTermFreq;
@@ -497,7 +500,7 @@
totalTermFreq++;
} else if (StringHelper.startsWith(scratch, TERM)) {
if (lastDocsStart != -1) {
- b.add(lastTerm, new PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>(lastDocsStart,
+ b.add(Util.toIntsRef(lastTerm, scratchIntsRef), new PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>(lastDocsStart,
new PairOutputs.Pair<Long,Long>((long) docFreq,
posIntOutputs.get(totalTermFreq))));
}
Index: lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
===================================================================
--- lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (revision 1231386)
+++ lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (working copy)
@@ -22,8 +22,8 @@
import java.util.Comparator;
import java.util.List;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentWriteState;
@@ -39,6 +39,7 @@
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.NoOutputs;
+import org.apache.lucene.util.fst.Util;
/*
TODO:
@@ -244,6 +245,7 @@
public final boolean hasTerms;
public final boolean isFloor;
public final int floorLeadByte;
+ private final IntsRef scratchIntsRef = new IntsRef();
public PendingBlock(BytesRef prefix, long fp, boolean hasTerms, boolean isFloor, int floorLeadByte, List<FST<BytesRef>> subIndices) {
super(false);
@@ -294,7 +296,7 @@
final byte[] bytes = new byte[(int) scratchBytes.getFilePointer()];
assert bytes.length > 0;
scratchBytes.writeTo(bytes, 0);
- indexBuilder.add(prefix, new BytesRef(bytes, 0, bytes.length));
+ indexBuilder.add(Util.toIntsRef(prefix, scratchIntsRef), new BytesRef(bytes, 0, bytes.length));
scratchBytes.reset();
// Copy over index for all sub-blocks
@@ -337,7 +339,7 @@
//if (DEBUG) {
// System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output);
//}
- builder.add(indexEnt.input, indexEnt.output);
+ builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output);
}
}
}
@@ -853,13 +855,15 @@
return postingsWriter;
}
+ private final IntsRef scratchIntsRef = new IntsRef();
+
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
assert stats.docFreq > 0;
//if (DEBUG) System.out.println("BTTW.finishTerm term=" + fieldInfo.name + ":" + toString(text) + " seg=" + segment + " df=" + stats.docFreq);
- blockBuilder.add(text, noOutputs.getNoOutput());
+ blockBuilder.add(Util.toIntsRef(text, scratchIntsRef), noOutputs.getNoOutput());
pending.add(new PendingTerm(BytesRef.deepCopyOf(text), stats));
postingsWriter.finishTerm(stats);
numTerms++;
Index: lucene/src/java/org/apache/lucene/util/fst/Util.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/fst/Util.java (revision 1231386)
+++ lucene/src/java/org/apache/lucene/util/fst/Util.java (working copy)
@@ -31,10 +31,8 @@
}
/** Looks up the output for this input, or null if the
- * input is not accepted. FST must be
- * INPUT_TYPE.BYTE4. */
+ * input is not accepted. */
public static<T> T get(FST<T> fst, IntsRef input) throws IOException {
- assert fst.inputType == FST.INPUT_TYPE.BYTE4;
// TODO: would be nice not to alloc this on every lookup
final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
@@ -59,78 +57,6 @@
}
}
- /** Logically casts input to UTF32 ints then looks up the output
- * or null if the input is not accepted. FST must be
- * INPUT_TYPE.BYTE4. */
- public static<T> T get(FST<T> fst, char[] input, int offset, int length) throws IOException {
- assert fst.inputType == FST.INPUT_TYPE.BYTE4;
-
- // TODO: would be nice not to alloc this on every lookup
- final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
-
- int charIdx = offset;
- final int charLimit = offset + length;
-
- // Accumulate output as we go
- final T NO_OUTPUT = fst.outputs.getNoOutput();
- T output = NO_OUTPUT;
- while(charIdx < charLimit) {
- final int utf32 = Character.codePointAt(input, charIdx);
- charIdx += Character.charCount(utf32);
-
- if (fst.findTargetArc(utf32, arc, arc) == null) {
- return null;
- } else if (arc.output != NO_OUTPUT) {
- output = fst.outputs.add(output, arc.output);
- }
- }
-
- if (fst.findTargetArc(FST.END_LABEL, arc, arc) == null) {
- return null;
- } else if (arc.output != NO_OUTPUT) {
- return fst.outputs.add(output, arc.output);
- } else {
- return output;
- }
- }
-
-
- /** Logically casts input to UTF32 ints then looks up the output
- * or null if the input is not accepted. FST must be
- * INPUT_TYPE.BYTE4. */
- public static<T> T get(FST<T> fst, CharSequence input) throws IOException {
- assert fst.inputType == FST.INPUT_TYPE.BYTE4;
-
- // TODO: would be nice not to alloc this on every lookup
- final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
-
- int charIdx = 0;
- final int charLimit = input.length();
-
- // Accumulate output as we go
- final T NO_OUTPUT = fst.outputs.getNoOutput();
- T output = NO_OUTPUT;
-
- while(charIdx < charLimit) {
- final int utf32 = Character.codePointAt(input, charIdx);
- charIdx += Character.charCount(utf32);
-
- if (fst.findTargetArc(utf32, arc, arc) == null) {
- return null;
- } else if (arc.output != NO_OUTPUT) {
- output = fst.outputs.add(output, arc.output);
- }
- }
-
- if (fst.findTargetArc(FST.END_LABEL, arc, arc) == null) {
- return null;
- } else if (arc.output != NO_OUTPUT) {
- return fst.outputs.add(output, arc.output);
- } else {
- return output;
- }
- }
-
/** Looks up the output for this input, or null if the
* input is not accepted */
public static<T> T get(FST<T> fst, BytesRef input) throws IOException {
@@ -381,4 +307,51 @@
return "0x" + Integer.toHexString(label);
}
}
+
+ /** Decodes the Unicode codepoints from the provided
+ * CharSequence and places them in the provided scratch
+ * IntsRef, which must not be null, returning it. */
+ public static IntsRef toUTF32(CharSequence s, IntsRef scratch) {
+ int charIdx = 0;
+ int intIdx = 0;
+ final int charLimit = s.length();
+ while(charIdx < charLimit) {
+ scratch.grow(intIdx+1);
+ final int utf32 = Character.codePointAt(s, charIdx);
+ scratch.ints[intIdx] = utf32;
+ charIdx += Character.charCount(utf32);
+ intIdx++;
+ }
+ scratch.length = intIdx;
+ return scratch;
+ }
+
+ /** Decodes the Unicode codepoints from the provided
+ * CharSequence and places them in the provided scratch
+ * IntsRef, which must not be null, returning it. */
+ public static IntsRef toUTF32(char[] s, int offset, int length, IntsRef scratch) {
+ int charIdx = offset;
+ int intIdx = 0;
+ final int charLimit = offset + length;
+ while(charIdx < charLimit) {
+ scratch.grow(intIdx+1);
+ final int utf32 = Character.codePointAt(s, charIdx);
+ scratch.ints[intIdx] = utf32;
+ charIdx += Character.charCount(utf32);
+ intIdx++;
+ }
+ scratch.length = intIdx;
+ return scratch;
+ }
+
+ /** Just takes unsigned byte values from the BytesRef and
+ * converts into an IntsRef. */
+ public static IntsRef toIntsRef(BytesRef input, IntsRef scratch) {
+ scratch.grow(input.length);
+ for(int i=0;i<input.length;i++) {
+ scratch.ints[i] = input.bytes[i+input.offset] & 0xFF;
+ }
+ scratch.length = input.length;
+ return scratch;
+ }
}
Index: lucene/src/java/org/apache/lucene/util/fst/Builder.java
===================================================================
--- lucene/src/java/org/apache/lucene/util/fst/Builder.java (revision 1231386)
+++ lucene/src/java/org/apache/lucene/util/fst/Builder.java (working copy)
@@ -19,7 +19,6 @@
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.FST.INPUT_TYPE; // javadoc
@@ -290,54 +289,6 @@
}
}
- private final IntsRef scratchIntsRef = new IntsRef(10);
-
- public void add(BytesRef input, T output) throws IOException {
- assert fst.getInputType() == FST.INPUT_TYPE.BYTE1;
- scratchIntsRef.grow(input.length);
- for(int i=0;i<input.length;i++) {
- scratchIntsRef.ints[i] = input.bytes[i+input.offset] & 0xFF;
- }
- scratchIntsRef.length = input.length;
- add(scratchIntsRef, output);
- }
-
- /** Sugar: adds the UTF32 codepoints from char[] slice. FST
- * must be FST.INPUT_TYPE.BYTE4! */
- public void add(char[] s, int offset, int length, T output) throws IOException {
- assert fst.getInputType() == FST.INPUT_TYPE.BYTE4;
- int charIdx = offset;
- int intIdx = 0;
- final int charLimit = offset + length;
- while(charIdx < charLimit) {
- scratchIntsRef.grow(intIdx+1);
- final int utf32 = Character.codePointAt(s, charIdx);
- scratchIntsRef.ints[intIdx] = utf32;
- charIdx += Character.charCount(utf32);
- intIdx++;
- }
- scratchIntsRef.length = intIdx;
- add(scratchIntsRef, output);
- }
-
- /** Sugar: adds the UTF32 codepoints from CharSequence. FST
- * must be FST.INPUT_TYPE.BYTE4! */
- public void add(CharSequence s, T output) throws IOException {
- assert fst.getInputType() == FST.INPUT_TYPE.BYTE4;
- int charIdx = 0;
- int intIdx = 0;
- final int charLimit = s.length();
- while(charIdx < charLimit) {
- scratchIntsRef.grow(intIdx+1);
- final int utf32 = Character.codePointAt(s, charIdx);
- scratchIntsRef.ints[intIdx] = utf32;
- charIdx += Character.charCount(utf32);
- intIdx++;
- }
- scratchIntsRef.length = intIdx;
- add(scratchIntsRef, output);
- }
-
// for debugging
/*
private String toString(BytesRef b) {