Removing thread local variables.
diff --git a/blur-core/src/main/java/org/apache/blur/command/BaseCommandManager.java b/blur-core/src/main/java/org/apache/blur/command/BaseCommandManager.java
index be92e34..b91e378 100644
--- a/blur-core/src/main/java/org/apache/blur/command/BaseCommandManager.java
+++ b/blur-core/src/main/java/org/apache/blur/command/BaseCommandManager.java
@@ -390,7 +390,9 @@
throw new IOException("Execution instance id [" + instanceExecutionId + "] did not find any executing commands.");
}
try {
- return future.get(_connectionTimeout, TimeUnit.MILLISECONDS);
+ Response response = future.get(_connectionTimeout, TimeUnit.MILLISECONDS);
+ _driverRunningMap.remove(instanceExecutionId);
+ return response;
} catch (CancellationException e) {
throw new IOException(e);
} catch (InterruptedException e) {
@@ -411,7 +413,9 @@
_driverRunningMap.put(instanceExecutionId, new ResponseFuture<Response>(_runningCacheTombstoneTime, future,
commandExecuting, originalCommandStatusObject, running));
try {
- return future.get(_connectionTimeout, TimeUnit.MILLISECONDS);
+ Response response = future.get(_connectionTimeout, TimeUnit.MILLISECONDS);
+ _driverRunningMap.remove(instanceExecutionId);
+ return response;
} catch (CancellationException e) {
throw new IOException(e);
} catch (InterruptedException e) {
@@ -447,9 +451,10 @@
CommandStatus originalCommandStatusObject, AtomicBoolean running) {
Future<T> future = _executorServiceWorker.submit(callable);
Long instanceExecutionId = getInstanceExecutionId();
- _workerRunningMap.put(instanceExecutionId, new ResponseFuture<T>(_runningCacheTombstoneTime, future,
- commandExecuting, originalCommandStatusObject, running));
- return future;
+ ResponseFuture<T> responseFuture = new ResponseFuture<T>(_runningCacheTombstoneTime, future,
+ commandExecuting, originalCommandStatusObject, running);
+ _workerRunningMap.put(instanceExecutionId, responseFuture);
+ return responseFuture;
}
@Override
diff --git a/blur-core/src/main/java/org/apache/blur/manager/indexserver/MasterBasedDistributedLayoutFactory.java b/blur-core/src/main/java/org/apache/blur/manager/indexserver/MasterBasedDistributedLayoutFactory.java
index 6fff994..1a2e916 100644
--- a/blur-core/src/main/java/org/apache/blur/manager/indexserver/MasterBasedDistributedLayoutFactory.java
+++ b/blur-core/src/main/java/org/apache/blur/manager/indexserver/MasterBasedDistributedLayoutFactory.java
@@ -38,6 +38,7 @@
import org.apache.blur.log.Log;
import org.apache.blur.log.LogFactory;
+import org.apache.blur.utils.ThreadValue;
import org.apache.blur.zookeeper.ZkUtils;
import org.apache.blur.zookeeper.ZooKeeperLockManager;
import org.apache.blur.zookeeper.ZookeeperPathConstants;
@@ -57,7 +58,7 @@
private final String _storagePath;
private final ZooKeeperLockManager _zooKeeperLockManager;
private final String _locksStoragePath;
- private final ThreadLocal<Random> _random = new ThreadLocal<Random>() {
+ private final ThreadValue<Random> _random = new ThreadValue<Random>() {
@Override
protected Random initialValue() {
return new Random();
diff --git a/blur-core/src/main/java/org/apache/blur/utils/BlurUtil.java b/blur-core/src/main/java/org/apache/blur/utils/BlurUtil.java
index 9406e66..5c0022c 100644
--- a/blur-core/src/main/java/org/apache/blur/utils/BlurUtil.java
+++ b/blur-core/src/main/java/org/apache/blur/utils/BlurUtil.java
@@ -221,7 +221,7 @@
final String prefix = new SimpleDateFormat("yyyyMMddHHmmss").format(new Date());
InvocationHandler handler = new InvocationHandler() {
private final AtomicLong _requestCounter = new AtomicLong();
- private ThreadLocal<LoggerArgsState> _loggerArgsState = new ThreadLocal<LoggerArgsState>() {
+ private ThreadValue<LoggerArgsState> _loggerArgsState = new ThreadValue<LoggerArgsState>() {
@Override
protected LoggerArgsState initialValue() {
return new LoggerArgsState(1024);
diff --git a/blur-core/src/test/java/org/apache/blur/command/ShardCommandManagerTest.java b/blur-core/src/test/java/org/apache/blur/command/ShardCommandManagerTest.java
index f5fe4ea..0fb966a 100644
--- a/blur-core/src/test/java/org/apache/blur/command/ShardCommandManagerTest.java
+++ b/blur-core/src/test/java/org/apache/blur/command/ShardCommandManagerTest.java
@@ -96,7 +96,7 @@
@Test
public void testGetCommands() {
Map<String, BigInteger> commands = _manager.getCommands();
- assertEquals(4, commands.size());
+ assertEquals(5, commands.size());
assertTrue(commands.containsKey("wait"));
assertTrue(commands.containsKey("error"));
assertTrue(commands.containsKey("RunSlowForTesting"));
diff --git a/blur-hive/src/main/java/org/apache/blur/hive/BlurSerializer.java b/blur-hive/src/main/java/org/apache/blur/hive/BlurSerializer.java
index 460c7f9..bd5ae74 100644
--- a/blur-hive/src/main/java/org/apache/blur/hive/BlurSerializer.java
+++ b/blur-hive/src/main/java/org/apache/blur/hive/BlurSerializer.java
@@ -26,6 +26,7 @@
import org.apache.blur.mapreduce.lib.BlurRecord;
import org.apache.blur.thrift.generated.ColumnDefinition;
+import org.apache.blur.utils.ThreadValue;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -39,7 +40,7 @@
private static final String DATE_FORMAT = "dateFormat";
private static final String DATE = "date";
- private Map<String, ThreadLocal<SimpleDateFormat>> _dateFormat = new HashMap<String, ThreadLocal<SimpleDateFormat>>();
+ private Map<String, ThreadValue<SimpleDateFormat>> _dateFormat = new HashMap<String, ThreadValue<SimpleDateFormat>>();
private BlurColumnNameResolver _columnNameResolver;
public BlurSerializer(Map<String, ColumnDefinition> colDefs, BlurColumnNameResolver columnNameResolver) {
@@ -52,7 +53,7 @@
if (fieldType.equals(DATE)) {
Map<String, String> properties = columnDefinition.getProperties();
final String dateFormat = properties.get(DATE_FORMAT);
- ThreadLocal<SimpleDateFormat> threadLocal = new ThreadLocal<SimpleDateFormat>() {
+ ThreadValue<SimpleDateFormat> threadLocal = new ThreadValue<SimpleDateFormat>() {
@Override
protected SimpleDateFormat initialValue() {
return new SimpleDateFormat(dateFormat);
@@ -184,7 +185,7 @@
}
private SimpleDateFormat getSimpleDateFormat(String columnName) throws SerDeException {
- ThreadLocal<SimpleDateFormat> threadLocal = _dateFormat.get(columnName);
+ ThreadValue<SimpleDateFormat> threadLocal = _dateFormat.get(columnName);
if (threadLocal == null) {
throw new SerDeException("Date format missing for column [" + columnName + "]");
}
diff --git a/blur-mapred/src/main/java/org/apache/blur/mapreduce/lib/BlurOutputFormat.java b/blur-mapred/src/main/java/org/apache/blur/mapreduce/lib/BlurOutputFormat.java
index 52900cf..3e72772 100644
--- a/blur-mapred/src/main/java/org/apache/blur/mapreduce/lib/BlurOutputFormat.java
+++ b/blur-mapred/src/main/java/org/apache/blur/mapreduce/lib/BlurOutputFormat.java
@@ -27,6 +27,7 @@
import org.apache.blur.thrift.BlurClient;
import org.apache.blur.thrift.generated.Blur.Iface;
import org.apache.blur.thrift.generated.TableDescriptor;
+import org.apache.blur.utils.ThreadValue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
@@ -77,8 +78,8 @@
public static final String BLUR_OUTPUT_PATH = "blur.output.path";
private static final String MAPRED_OUTPUT_COMMITTER_CLASS = "mapred.output.committer.class";
- private static ThreadLocal<Progressable> _progressable = new ThreadLocal<Progressable>();
- private static ThreadLocal<GetCounter> _getCounter = new ThreadLocal<GetCounter>();
+ private static ThreadValue<Progressable> _progressable = new ThreadValue<Progressable>();
+ private static ThreadValue<GetCounter> _getCounter = new ThreadValue<GetCounter>();
public static void setProgressable(Progressable progressable) {
_progressable.set(progressable);
diff --git a/blur-mapred/src/main/java/org/apache/blur/mapreduce/lib/GenericBlurRecordWriter.java b/blur-mapred/src/main/java/org/apache/blur/mapreduce/lib/GenericBlurRecordWriter.java
index 8828f85..fba819e 100644
--- a/blur-mapred/src/main/java/org/apache/blur/mapreduce/lib/GenericBlurRecordWriter.java
+++ b/blur-mapred/src/main/java/org/apache/blur/mapreduce/lib/GenericBlurRecordWriter.java
@@ -39,6 +39,7 @@
import org.apache.blur.utils.BlurConstants;
import org.apache.blur.utils.RowDocumentUtil;
import org.apache.blur.utils.ShardUtil;
+import org.apache.blur.utils.ThreadValue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
@@ -278,7 +279,7 @@
return record;
}
- private static ThreadLocal<AtomicBoolean> _existingRow = new ThreadLocal<AtomicBoolean>() {
+ private static ThreadValue<AtomicBoolean> _existingRow = new ThreadValue<AtomicBoolean>() {
@Override
protected AtomicBoolean initialValue() {
return new AtomicBoolean();
diff --git a/blur-query/src/main/java/org/apache/blur/analysis/type/DateFieldTypeDefinition.java b/blur-query/src/main/java/org/apache/blur/analysis/type/DateFieldTypeDefinition.java
index 695cdc3..0e35125 100644
--- a/blur-query/src/main/java/org/apache/blur/analysis/type/DateFieldTypeDefinition.java
+++ b/blur-query/src/main/java/org/apache/blur/analysis/type/DateFieldTypeDefinition.java
@@ -25,6 +25,7 @@
import java.util.concurrent.TimeUnit;
import org.apache.blur.thrift.generated.Column;
+import org.apache.blur.utils.ThreadValue;
import org.apache.hadoop.conf.Configuration;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -44,7 +45,7 @@
public static final String DATE_FORMAT = "dateFormat";
public static final String NAME = "date";
private FieldType _typeNotStored;
- private ThreadLocal<SimpleDateFormat> _simpleDateFormat;
+ private ThreadValue<SimpleDateFormat> _simpleDateFormat;
private TimeUnit _timeUnit = TimeUnit.SECONDS;
@Override
@@ -62,7 +63,7 @@
if (timeUnitStr != null) {
_timeUnit = TimeUnit.valueOf(timeUnitStr.trim().toUpperCase());
}
- _simpleDateFormat = new ThreadLocal<SimpleDateFormat>() {
+ _simpleDateFormat = new ThreadValue<SimpleDateFormat>() {
@Override
protected SimpleDateFormat initialValue() {
return new SimpleDateFormat(dateFormat);
diff --git a/blur-store/src/main/java/org/apache/blur/index/ExitObject.java b/blur-store/src/main/java/org/apache/blur/index/ExitObject.java
index 92cd219..d2ad352 100644
--- a/blur-store/src/main/java/org/apache/blur/index/ExitObject.java
+++ b/blur-store/src/main/java/org/apache/blur/index/ExitObject.java
@@ -18,9 +18,11 @@
import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.blur.utils.ThreadValue;
+
public class ExitObject {
- private final ThreadLocal<AtomicBoolean> _running = new ThreadLocal<AtomicBoolean>() {
+ private final ThreadValue<AtomicBoolean> _running = new ThreadValue<AtomicBoolean>() {
@Override
protected AtomicBoolean initialValue() {
return new AtomicBoolean(true);
diff --git a/blur-store/src/main/java/org/apache/blur/lucene/codec/CachedDecompressor.java b/blur-store/src/main/java/org/apache/blur/lucene/codec/CachedDecompressor.java
index 18c4cc1..1a438da 100644
--- a/blur-store/src/main/java/org/apache/blur/lucene/codec/CachedDecompressor.java
+++ b/blur-store/src/main/java/org/apache/blur/lucene/codec/CachedDecompressor.java
@@ -18,6 +18,7 @@
import java.io.IOException;
+import org.apache.blur.utils.ThreadValue;
import org.apache.lucene.codecs.compressing.Decompressor;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
@@ -27,7 +28,7 @@
public class CachedDecompressor extends Decompressor {
private final Decompressor _decompressor;
- private final ThreadLocal<Entry> _entry = new ThreadLocal<Entry>() {
+ private final ThreadValue<Entry> _entry = new ThreadValue<Entry>() {
@Override
protected Entry initialValue() {
return new Entry();
diff --git a/blur-store/src/main/java/org/apache/blur/lucene/codec/DiskDocValuesProducer.java b/blur-store/src/main/java/org/apache/blur/lucene/codec/DiskDocValuesProducer.java
index fd617b6..ff51ddd 100644
--- a/blur-store/src/main/java/org/apache/blur/lucene/codec/DiskDocValuesProducer.java
+++ b/blur-store/src/main/java/org/apache/blur/lucene/codec/DiskDocValuesProducer.java
@@ -23,6 +23,7 @@
import org.apache.blur.trace.Trace;
import org.apache.blur.trace.Tracer;
+import org.apache.blur.utils.ThreadValue;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
@@ -236,7 +237,7 @@
return new LongBinaryDocValues() {
- private final ThreadLocal<IndexInput> in = new ThreadLocal<IndexInput>() {
+ private final ThreadValue<IndexInput> in = new ThreadValue<IndexInput>() {
@Override
protected IndexInput initialValue() {
return data.clone();
@@ -277,7 +278,7 @@
}
return new LongBinaryDocValues() {
- private final ThreadLocal<IndexInput> _input = new ThreadLocal<IndexInput>() {
+ private final ThreadValue<IndexInput> _input = new ThreadValue<IndexInput>() {
@Override
protected IndexInput initialValue() {
return data.clone();
diff --git a/blur-store/src/main/java/org/apache/blur/store/blockcache/BlockCache.java b/blur-store/src/main/java/org/apache/blur/store/blockcache/BlockCache.java
index 4597ffa..1c6d9a1 100644
--- a/blur-store/src/main/java/org/apache/blur/store/blockcache/BlockCache.java
+++ b/blur-store/src/main/java/org/apache/blur/store/blockcache/BlockCache.java
@@ -33,6 +33,8 @@
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.blur.utils.ThreadValue;
+
import com.googlecode.concurrentlinkedhashmap.ConcurrentLinkedHashMap;
import com.googlecode.concurrentlinkedhashmap.EvictionListener;
import com.yammer.metrics.Metrics;
@@ -71,7 +73,7 @@
private final Meter evictions;
private final int _numberOfSlabs;
private final boolean _directAllocation;
- private final ThreadLocal<ByteBuffer[]> _threadLocalSlabs = new ThreadLocal<ByteBuffer[]>() {
+ private final ThreadValue<ByteBuffer[]> _threadLocalSlabs = new ThreadValue<ByteBuffer[]>() {
@Override
protected ByteBuffer[] initialValue() {
return new ByteBuffer[_numberOfSlabs];
diff --git a/blur-store/src/main/java/org/apache/blur/store/blockcache_v2/MeterWrapper.java b/blur-store/src/main/java/org/apache/blur/store/blockcache_v2/MeterWrapper.java
index 269245a..5a7102d 100644
--- a/blur-store/src/main/java/org/apache/blur/store/blockcache_v2/MeterWrapper.java
+++ b/blur-store/src/main/java/org/apache/blur/store/blockcache_v2/MeterWrapper.java
@@ -31,6 +31,7 @@
import org.apache.blur.log.Log;
import org.apache.blur.log.LogFactory;
+import org.apache.blur.utils.ThreadValue;
import com.yammer.metrics.core.Meter;
@@ -75,7 +76,7 @@
public static MeterWrapper wrap(final SimpleMeter meter) {
final String id = UUID.randomUUID().toString();
- final ThreadLocal<AtomicLong> countThreadLocal = new ThreadLocal<AtomicLong>() {
+ final ThreadValue<AtomicLong> countThreadLocal = new ThreadValue<AtomicLong>() {
@Override
protected AtomicLong initialValue() {
AtomicLong counter = new AtomicLong();
diff --git a/blur-store/src/main/java/org/apache/blur/store/hdfs_v2/StoreDirection.java b/blur-store/src/main/java/org/apache/blur/store/hdfs_v2/StoreDirection.java
index 2dacc8a..2ee6e3c 100644
--- a/blur-store/src/main/java/org/apache/blur/store/hdfs_v2/StoreDirection.java
+++ b/blur-store/src/main/java/org/apache/blur/store/hdfs_v2/StoreDirection.java
@@ -16,8 +16,10 @@
*/
package org.apache.blur.store.hdfs_v2;
+import org.apache.blur.utils.ThreadValue;
+
public class StoreDirection {
- public static ThreadLocal<Boolean> LONG_TERM = new ThreadLocal<Boolean>() {
+ public static ThreadValue<Boolean> LONG_TERM = new ThreadValue<Boolean>() {
@Override
protected Boolean initialValue() {
return false;
diff --git a/blur-store/src/main/java/org/apache/lucene/codecs/BlockTreeTermsReader.java b/blur-store/src/main/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
new file mode 100644
index 0000000..4ef65ff
--- /dev/null
+++ b/blur-store/src/main/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
@@ -0,0 +1,2939 @@
+package org.apache.lucene.codecs;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.TreeMap;
+
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.RunAutomaton;
+import org.apache.lucene.util.automaton.Transition;
+import org.apache.lucene.util.fst.ByteSequenceOutputs;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.Outputs;
+import org.apache.lucene.util.fst.Util;
+
+/** A block-based terms index and dictionary that assigns
+ * terms to variable length blocks according to how they
+ * share prefixes. The terms index is a prefix trie
+ * whose leaves are term blocks. The advantage of this
+ * approach is that seekExact is often able to
+ * determine a term cannot exist without doing any IO, and
+ * intersection with Automata is very fast. Note that this
+ * terms dictionary has it's own fixed terms index (ie, it
+ * does not support a pluggable terms index
+ * implementation).
+ *
+ * <p><b>NOTE</b>: this terms dictionary does not support
+ * index divisor when opening an IndexReader. Instead, you
+ * can change the min/maxItemsPerBlock during indexing.</p>
+ *
+ * <p>The data structure used by this implementation is very
+ * similar to a burst trie
+ * (http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.18.3499),
+ * but with added logic to break up too-large blocks of all
+ * terms sharing a given prefix into smaller ones.</p>
+ *
+ * <p>Use {@link org.apache.lucene.index.CheckIndex} with the <code>-verbose</code>
+ * option to see summary statistics on the blocks in the
+ * dictionary.
+ *
+ * See {@link BlockTreeTermsWriter}.
+ *
+ * @lucene.experimental
+ */
+
+public class BlockTreeTermsReader extends FieldsProducer {
+
+ // Open input to the main terms dict file (_X.tib)
+ private final IndexInput in;
+
+ //private static final boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+
+ // Reads the terms dict entries, to gather state to
+ // produce DocsEnum on demand
+ private final PostingsReaderBase postingsReader;
+
+ private final TreeMap<String,FieldReader> fields = new TreeMap<String,FieldReader>();
+
+ /** File offset where the directory starts in the terms file. */
+ private long dirOffset;
+
+ /** File offset where the directory starts in the index file. */
+ private long indexDirOffset;
+
+ private String segment;
+
+ private final int version;
+
+ private final IndexInput indexIn;
+
+ /** Sole constructor. */
+ public BlockTreeTermsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo info,
+ PostingsReaderBase postingsReader, IOContext ioContext,
+ String segmentSuffix, int indexDivisor)
+ throws IOException {
+
+ this.postingsReader = postingsReader;
+
+ this.segment = info.name;
+ in = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_EXTENSION),
+ ioContext);
+
+ boolean success = false;
+
+ try {
+ version = readHeader(in);
+ if (indexDivisor != -1) {
+ indexIn = dir.openInput(IndexFileNames.segmentFileName(segment, segmentSuffix, BlockTreeTermsWriter.TERMS_INDEX_EXTENSION),
+ ioContext);
+ int indexVersion = readIndexHeader(indexIn);
+ if (indexVersion != version) {
+ throw new CorruptIndexException("mixmatched version files: " + in + "=" + version + "," + indexIn + "=" + indexVersion);
+ }
+ } else {
+ indexIn = null;
+ }
+
+ // Have PostingsReader init itself
+ postingsReader.init(in);
+
+ // Read per-field details
+ seekDir(in, dirOffset);
+ if (indexDivisor != -1) {
+ seekDir(indexIn, indexDirOffset);
+ }
+
+ final int numFields = in.readVInt();
+ if (numFields < 0) {
+ throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + in + ")");
+ }
+
+ for(int i=0;i<numFields;i++) {
+ final int field = in.readVInt();
+ final long numTerms = in.readVLong();
+ assert numTerms >= 0;
+ final int numBytes = in.readVInt();
+ final BytesRef rootCode = new BytesRef(new byte[numBytes]);
+ in.readBytes(rootCode.bytes, 0, numBytes);
+ rootCode.length = numBytes;
+ final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
+ assert fieldInfo != null: "field=" + field;
+ final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : in.readVLong();
+ final long sumDocFreq = in.readVLong();
+ final int docCount = in.readVInt();
+ if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
+ throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
+ }
+ if (sumDocFreq < docCount) { // #postings must be >= #docs with field
+ throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount + " (resource=" + in + ")");
+ }
+ if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
+ throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
+ }
+ final long indexStartFP = indexDivisor != -1 ? indexIn.readVLong() : 0;
+ FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, indexIn));
+ if (previous != null) {
+ throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
+ }
+ }
+
+ success = true;
+ } finally {
+ if (!success) {
+ // this.close() will close in:
+ IOUtils.closeWhileHandlingException(this);
+ }
+ }
+ }
+
+ /** Reads terms file header. */
+ protected int readHeader(IndexInput input) throws IOException {
+ int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_CODEC_NAME,
+ BlockTreeTermsWriter.TERMS_VERSION_START,
+ BlockTreeTermsWriter.TERMS_VERSION_CURRENT);
+ if (version < BlockTreeTermsWriter.TERMS_VERSION_APPEND_ONLY) {
+ dirOffset = input.readLong();
+ }
+ return version;
+ }
+
+ /** Reads index file header. */
+ protected int readIndexHeader(IndexInput input) throws IOException {
+ int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_INDEX_CODEC_NAME,
+ BlockTreeTermsWriter.TERMS_INDEX_VERSION_START,
+ BlockTreeTermsWriter.TERMS_INDEX_VERSION_CURRENT);
+ if (version < BlockTreeTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) {
+ indexDirOffset = input.readLong();
+ }
+ return version;
+ }
+
+ /** Seek {@code input} to the directory offset. */
+ protected void seekDir(IndexInput input, long dirOffset)
+ throws IOException {
+ if (version >= BlockTreeTermsWriter.TERMS_INDEX_VERSION_APPEND_ONLY) {
+ input.seek(input.length() - 8);
+ dirOffset = input.readLong();
+ }
+ input.seek(dirOffset);
+ }
+
+ // for debugging
+ // private static String toHex(int v) {
+ // return "0x" + Integer.toHexString(v);
+ // }
+
+ @Override
+ public void close() throws IOException {
+ try {
+ IOUtils.close(indexIn, in, postingsReader);
+ } finally {
+ // Clear so refs to terms index is GCable even if
+ // app hangs onto us:
+ fields.clear();
+ }
+ }
+
+ @Override
+ public Iterator<String> iterator() {
+ return Collections.unmodifiableSet(fields.keySet()).iterator();
+ }
+
+ @Override
+ public Terms terms(String field) throws IOException {
+ assert field != null;
+ return fields.get(field);
+ }
+
+ @Override
+ public int size() {
+ return fields.size();
+ }
+
+ // for debugging
+ String brToString(BytesRef b) {
+ if (b == null) {
+ return "null";
+ } else {
+ try {
+ return b.utf8ToString() + " " + b;
+ } catch (Throwable t) {
+ // If BytesRef isn't actually UTF8, or it's eg a
+ // prefix of UTF8 that ends mid-unicode-char, we
+ // fallback to hex:
+ return b.toString();
+ }
+ }
+ }
+
+ /**
+ * BlockTree statistics for a single field
+ * returned by {@link FieldReader#computeStats()}.
+ */
+ public static class Stats {
+ /** How many nodes in the index FST. */
+ public long indexNodeCount;
+
+ /** How many arcs in the index FST. */
+ public long indexArcCount;
+
+ /** Byte size of the index. */
+ public long indexNumBytes;
+
+ /** Total number of terms in the field. */
+ public long totalTermCount;
+
+ /** Total number of bytes (sum of term lengths) across all terms in the field. */
+ public long totalTermBytes;
+
+ /** The number of normal (non-floor) blocks in the terms file. */
+ public int nonFloorBlockCount;
+
+ /** The number of floor blocks (meta-blocks larger than the
+ * allowed {@code maxItemsPerBlock}) in the terms file. */
+ public int floorBlockCount;
+
+ /** The number of sub-blocks within the floor blocks. */
+ public int floorSubBlockCount;
+
+ /** The number of "internal" blocks (that have both
+ * terms and sub-blocks). */
+ public int mixedBlockCount;
+
+ /** The number of "leaf" blocks (blocks that have only
+ * terms). */
+ public int termsOnlyBlockCount;
+
+ /** The number of "internal" blocks that do not contain
+ * terms (have only sub-blocks). */
+ public int subBlocksOnlyBlockCount;
+
+ /** Total number of blocks. */
+ public int totalBlockCount;
+
+ /** Number of blocks at each prefix depth. */
+ public int[] blockCountByPrefixLen = new int[10];
+ private int startBlockCount;
+ private int endBlockCount;
+
+ /** Total number of bytes used to store term suffixes. */
+ public long totalBlockSuffixBytes;
+
+ /** Total number of bytes used to store term stats (not
+ * including what the {@link PostingsBaseFormat}
+ * stores. */
+ public long totalBlockStatsBytes;
+
+ /** Total bytes stored by the {@link PostingsBaseFormat},
+ * plus the other few vInts stored in the frame. */
+ public long totalBlockOtherBytes;
+
+ /** Segment name. */
+ public final String segment;
+
+ /** Field name. */
+ public final String field;
+
+ Stats(String segment, String field) {
+ this.segment = segment;
+ this.field = field;
+ }
+
+ void startBlock(FieldReader.SegmentTermsEnum.Frame frame, boolean isFloor) {
+ totalBlockCount++;
+ if (isFloor) {
+ if (frame.fp == frame.fpOrig) {
+ floorBlockCount++;
+ }
+ floorSubBlockCount++;
+ } else {
+ nonFloorBlockCount++;
+ }
+
+ if (blockCountByPrefixLen.length <= frame.prefix) {
+ blockCountByPrefixLen = ArrayUtil.grow(blockCountByPrefixLen, 1+frame.prefix);
+ }
+ blockCountByPrefixLen[frame.prefix]++;
+ startBlockCount++;
+ totalBlockSuffixBytes += frame.suffixesReader.length();
+ totalBlockStatsBytes += frame.statsReader.length();
+ }
+
+ void endBlock(FieldReader.SegmentTermsEnum.Frame frame) {
+ final int termCount = frame.isLeafBlock ? frame.entCount : frame.state.termBlockOrd;
+ final int subBlockCount = frame.entCount - termCount;
+ totalTermCount += termCount;
+ if (termCount != 0 && subBlockCount != 0) {
+ mixedBlockCount++;
+ } else if (termCount != 0) {
+ termsOnlyBlockCount++;
+ } else if (subBlockCount != 0) {
+ subBlocksOnlyBlockCount++;
+ } else {
+ throw new IllegalStateException();
+ }
+ endBlockCount++;
+ final long otherBytes = frame.fpEnd - frame.fp - frame.suffixesReader.length() - frame.statsReader.length();
+ assert otherBytes > 0 : "otherBytes=" + otherBytes + " frame.fp=" + frame.fp + " frame.fpEnd=" + frame.fpEnd;
+ totalBlockOtherBytes += otherBytes;
+ }
+
+ void term(BytesRef term) {
+ totalTermBytes += term.length;
+ }
+
+ void finish() {
+ assert startBlockCount == endBlockCount: "startBlockCount=" + startBlockCount + " endBlockCount=" + endBlockCount;
+ assert totalBlockCount == floorSubBlockCount + nonFloorBlockCount: "floorSubBlockCount=" + floorSubBlockCount + " nonFloorBlockCount=" + nonFloorBlockCount + " totalBlockCount=" + totalBlockCount;
+ assert totalBlockCount == mixedBlockCount + termsOnlyBlockCount + subBlocksOnlyBlockCount: "totalBlockCount=" + totalBlockCount + " mixedBlockCount=" + mixedBlockCount + " subBlocksOnlyBlockCount=" + subBlocksOnlyBlockCount + " termsOnlyBlockCount=" + termsOnlyBlockCount;
+ }
+
+ @Override
+ public String toString() {
+ final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
+ PrintStream out;
+ try {
+ out = new PrintStream(bos, false, "UTF-8");
+ } catch (UnsupportedEncodingException bogus) {
+ throw new RuntimeException(bogus);
+ }
+
+ out.println(" index FST:");
+ out.println(" " + indexNodeCount + " nodes");
+ out.println(" " + indexArcCount + " arcs");
+ out.println(" " + indexNumBytes + " bytes");
+ out.println(" terms:");
+ out.println(" " + totalTermCount + " terms");
+ out.println(" " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : ""));
+ out.println(" blocks:");
+ out.println(" " + totalBlockCount + " blocks");
+ out.println(" " + termsOnlyBlockCount + " terms-only blocks");
+ out.println(" " + subBlocksOnlyBlockCount + " sub-block-only blocks");
+ out.println(" " + mixedBlockCount + " mixed blocks");
+ out.println(" " + floorBlockCount + " floor blocks");
+ out.println(" " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks");
+ out.println(" " + floorSubBlockCount + " floor sub-blocks");
+ out.println(" " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : ""));
+ out.println(" " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : ""));
+ out.println(" " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : ""));
+ if (totalBlockCount != 0) {
+ out.println(" by prefix length:");
+ int total = 0;
+ for(int prefix=0;prefix<blockCountByPrefixLen.length;prefix++) {
+ final int blockCount = blockCountByPrefixLen[prefix];
+ total += blockCount;
+ if (blockCount != 0) {
+ out.println(" " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount);
+ }
+ }
+ assert totalBlockCount == total;
+ }
+
+ try {
+ return bos.toString("UTF-8");
+ } catch (UnsupportedEncodingException bogus) {
+ throw new RuntimeException(bogus);
+ }
+ }
+ }
+
+ final Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();
+ final BytesRef NO_OUTPUT = fstOutputs.getNoOutput();
+
+ /** BlockTree's implementation of {@link Terms}. */
+ public final class FieldReader extends Terms {
+ final long numTerms;
+ final FieldInfo fieldInfo;
+ final long sumTotalTermFreq;
+ final long sumDocFreq;
+ final int docCount;
+ final long indexStartFP;
+ final long rootBlockFP;
+ final BytesRef rootCode;
+ private final FST<BytesRef> index;
+
+ //private boolean DEBUG;
+
+ FieldReader(FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, IndexInput indexIn) throws IOException {
+ assert numTerms > 0;
+ this.fieldInfo = fieldInfo;
+ //DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
+ this.numTerms = numTerms;
+ this.sumTotalTermFreq = sumTotalTermFreq;
+ this.sumDocFreq = sumDocFreq;
+ this.docCount = docCount;
+ this.indexStartFP = indexStartFP;
+ this.rootCode = rootCode;
+ // if (DEBUG) {
+ // System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
+ // }
+
+ rootBlockFP = (new ByteArrayDataInput(rootCode.bytes, rootCode.offset, rootCode.length)).readVLong() >>> BlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
+
+ if (indexIn != null) {
+ final IndexInput clone = indexIn.clone();
+ //System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
+ clone.seek(indexStartFP);
+ index = new FST<BytesRef>(clone, ByteSequenceOutputs.getSingleton());
+
+ /*
+ if (false) {
+ final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
+ Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName));
+ Util.toDot(index, w, false, false);
+ System.out.println("FST INDEX: SAVED to " + dotFileName);
+ w.close();
+ }
+ */
+ } else {
+ index = null;
+ }
+ }
+
+ /** For debugging -- used by CheckIndex too*/
+ // TODO: maybe push this into Terms?
+ public Stats computeStats() throws IOException {
+ return new SegmentTermsEnum().computeBlockStats();
+ }
+
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ return BytesRef.getUTF8SortedAsUnicodeComparator();
+ }
+
+ @Override
+ public boolean hasOffsets() {
+ return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+ }
+
+ @Override
+ public boolean hasPositions() {
+ return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+ }
+
+ @Override
+ public boolean hasPayloads() {
+ return fieldInfo.hasPayloads();
+ }
+
+ @Override
+ public TermsEnum iterator(TermsEnum reuse) throws IOException {
+ return new SegmentTermsEnum();
+ }
+
+ @Override
+ public long size() {
+ return numTerms;
+ }
+
+ @Override
+ public long getSumTotalTermFreq() {
+ return sumTotalTermFreq;
+ }
+
+ @Override
+ public long getSumDocFreq() {
+ return sumDocFreq;
+ }
+
+ @Override
+ public int getDocCount() {
+ return docCount;
+ }
+
+ @Override
+ public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
+ if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+ throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
+ }
+ return new IntersectEnum(compiled, startTerm);
+ }
+
+ // NOTE: cannot seek!
+ private final class IntersectEnum extends TermsEnum {
+ private final IndexInput in;
+
+ private Frame[] stack;
+
+ @SuppressWarnings({"rawtypes","unchecked"}) private FST.Arc<BytesRef>[] arcs = new FST.Arc[5];
+
+ private final RunAutomaton runAutomaton;
+ private final CompiledAutomaton compiledAutomaton;
+
+ private Frame currentFrame;
+
+ private final BytesRef term = new BytesRef();
+
+ private final FST.BytesReader fstReader;
+
+ // TODO: can we share this with the frame in STE?
+ private final class Frame {
+ final int ord;
+ long fp;
+ long fpOrig;
+ long fpEnd;
+ long lastSubFP;
+
+ // State in automaton
+ int state;
+
+ int metaDataUpto;
+
+ byte[] suffixBytes = new byte[128];
+ final ByteArrayDataInput suffixesReader = new ByteArrayDataInput();
+
+ byte[] statBytes = new byte[64];
+ final ByteArrayDataInput statsReader = new ByteArrayDataInput();
+
+ byte[] floorData = new byte[32];
+ final ByteArrayDataInput floorDataReader = new ByteArrayDataInput();
+
+ // Length of prefix shared by all terms in this block
+ int prefix;
+
+ // Number of entries (term or sub-block) in this block
+ int entCount;
+
+ // Which term we will next read
+ int nextEnt;
+
+ // True if this block is either not a floor block,
+ // or, it's the last sub-block of a floor block
+ boolean isLastInFloor;
+
+ // True if all entries are terms
+ boolean isLeafBlock;
+
+ int numFollowFloorBlocks;
+ int nextFloorLabel;
+
+ Transition[] transitions;
+ int curTransitionMax;
+ int transitionIndex;
+
+ FST.Arc<BytesRef> arc;
+
+ final BlockTermState termState;
+
+ // Cumulative output so far
+ BytesRef outputPrefix;
+
+ private int startBytePos;
+ private int suffix;
+
+ public Frame(int ord) throws IOException {
+ this.ord = ord;
+ termState = postingsReader.newTermState();
+ termState.totalTermFreq = -1;
+ }
+
+ void loadNextFloorBlock() throws IOException {
+ assert numFollowFloorBlocks > 0;
+ //if (DEBUG) System.out.println(" loadNextFoorBlock trans=" + transitions[transitionIndex]);
+
+ do {
+ fp = fpOrig + (floorDataReader.readVLong() >>> 1);
+ numFollowFloorBlocks--;
+ // if (DEBUG) System.out.println(" skip floor block2! nextFloorLabel=" + (char) nextFloorLabel + " vs target=" + (char) transitions[transitionIndex].getMin() + " newFP=" + fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
+ if (numFollowFloorBlocks != 0) {
+ nextFloorLabel = floorDataReader.readByte() & 0xff;
+ } else {
+ nextFloorLabel = 256;
+ }
+ // if (DEBUG) System.out.println(" nextFloorLabel=" + (char) nextFloorLabel);
+ } while (numFollowFloorBlocks != 0 && nextFloorLabel <= transitions[transitionIndex].getMin());
+
+ load(null);
+ }
+
+ public void setState(int state) {
+ this.state = state;
+ transitionIndex = 0;
+ transitions = compiledAutomaton.sortedTransitions[state];
+ if (transitions.length != 0) {
+ curTransitionMax = transitions[0].getMax();
+ } else {
+ curTransitionMax = -1;
+ }
+ }
+
+ void load(BytesRef frameIndexData) throws IOException {
+
+ // if (DEBUG) System.out.println(" load fp=" + fp + " fpOrig=" + fpOrig + " frameIndexData=" + frameIndexData + " trans=" + (transitions.length != 0 ? transitions[0] : "n/a" + " state=" + state));
+
+ if (frameIndexData != null && transitions.length != 0) {
+ // Floor frame
+ if (floorData.length < frameIndexData.length) {
+ this.floorData = new byte[ArrayUtil.oversize(frameIndexData.length, 1)];
+ }
+ System.arraycopy(frameIndexData.bytes, frameIndexData.offset, floorData, 0, frameIndexData.length);
+ floorDataReader.reset(floorData, 0, frameIndexData.length);
+ // Skip first long -- has redundant fp, hasTerms
+ // flag, isFloor flag
+ final long code = floorDataReader.readVLong();
+ if ((code & BlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0) {
+ numFollowFloorBlocks = floorDataReader.readVInt();
+ nextFloorLabel = floorDataReader.readByte() & 0xff;
+ // if (DEBUG) System.out.println(" numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + nextFloorLabel);
+
+ // If current state is accept, we must process
+ // first block in case it has empty suffix:
+ if (!runAutomaton.isAccept(state)) {
+ // Maybe skip floor blocks:
+ while (numFollowFloorBlocks != 0 && nextFloorLabel <= transitions[0].getMin()) {
+ fp = fpOrig + (floorDataReader.readVLong() >>> 1);
+ numFollowFloorBlocks--;
+ // if (DEBUG) System.out.println(" skip floor block! nextFloorLabel=" + (char) nextFloorLabel + " vs target=" + (char) transitions[0].getMin() + " newFP=" + fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
+ if (numFollowFloorBlocks != 0) {
+ nextFloorLabel = floorDataReader.readByte() & 0xff;
+ } else {
+ nextFloorLabel = 256;
+ }
+ }
+ }
+ }
+ }
+
+ in.seek(fp);
+ int code = in.readVInt();
+ entCount = code >>> 1;
+ assert entCount > 0;
+ isLastInFloor = (code & 1) != 0;
+
+ // term suffixes:
+ code = in.readVInt();
+ isLeafBlock = (code & 1) != 0;
+ int numBytes = code >>> 1;
+ // if (DEBUG) System.out.println(" entCount=" + entCount + " lastInFloor?=" + isLastInFloor + " leafBlock?=" + isLeafBlock + " numSuffixBytes=" + numBytes);
+ if (suffixBytes.length < numBytes) {
+ suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+ }
+ in.readBytes(suffixBytes, 0, numBytes);
+ suffixesReader.reset(suffixBytes, 0, numBytes);
+
+ // stats
+ numBytes = in.readVInt();
+ if (statBytes.length < numBytes) {
+ statBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+ }
+ in.readBytes(statBytes, 0, numBytes);
+ statsReader.reset(statBytes, 0, numBytes);
+ metaDataUpto = 0;
+
+ termState.termBlockOrd = 0;
+ nextEnt = 0;
+
+ postingsReader.readTermsBlock(in, fieldInfo, termState);
+
+ if (!isLastInFloor) {
+ // Sub-blocks of a single floor block are always
+ // written one after another -- tail recurse:
+ fpEnd = in.getFilePointer();
+ }
+ }
+
+ // TODO: maybe add scanToLabel; should give perf boost
+
+ public boolean next() {
+ return isLeafBlock ? nextLeaf() : nextNonLeaf();
+ }
+
+ // Decodes next entry; returns true if it's a sub-block
+ public boolean nextLeaf() {
+ //if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
+ assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+ nextEnt++;
+ suffix = suffixesReader.readVInt();
+ startBytePos = suffixesReader.getPosition();
+ suffixesReader.skipBytes(suffix);
+ return false;
+ }
+
+ public boolean nextNonLeaf() {
+ //if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
+ assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+ nextEnt++;
+ final int code = suffixesReader.readVInt();
+ suffix = code >>> 1;
+ startBytePos = suffixesReader.getPosition();
+ suffixesReader.skipBytes(suffix);
+ if ((code & 1) == 0) {
+ // A normal term
+ termState.termBlockOrd++;
+ return false;
+ } else {
+ // A sub-block; make sub-FP absolute:
+ lastSubFP = fp - suffixesReader.readVLong();
+ return true;
+ }
+ }
+
+ public int getTermBlockOrd() {
+ return isLeafBlock ? nextEnt : termState.termBlockOrd;
+ }
+
+ public void decodeMetaData() throws IOException {
+
+ // lazily catch up on metadata decode:
+ final int limit = getTermBlockOrd();
+ assert limit > 0;
+
+ // We must set/incr state.termCount because
+ // postings impl can look at this
+ termState.termBlockOrd = metaDataUpto;
+
+ // TODO: better API would be "jump straight to term=N"???
+ while (metaDataUpto < limit) {
+
+ // TODO: we could make "tiers" of metadata, ie,
+ // decode docFreq/totalTF but don't decode postings
+ // metadata; this way caller could get
+ // docFreq/totalTF w/o paying decode cost for
+ // postings
+
+ // TODO: if docFreq were bulk decoded we could
+ // just skipN here:
+ termState.docFreq = statsReader.readVInt();
+ //if (DEBUG) System.out.println(" dF=" + state.docFreq);
+ if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+ termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
+ //if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
+ }
+
+ postingsReader.nextTerm(fieldInfo, termState);
+ metaDataUpto++;
+ termState.termBlockOrd++;
+ }
+ }
+ }
+
+ private BytesRef savedStartTerm;
+
+ // TODO: in some cases we can filter by length? eg
+ // regexp foo*bar must be at least length 6 bytes
+ public IntersectEnum(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
+ // if (DEBUG) {
+ // System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" + brToString(compiled.commonSuffixRef));
+ // }
+ runAutomaton = compiled.runAutomaton;
+ compiledAutomaton = compiled;
+ in = BlockTreeTermsReader.this.in.clone();
+ stack = new Frame[5];
+ for(int idx=0;idx<stack.length;idx++) {
+ stack[idx] = new Frame(idx);
+ }
+ for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
+ arcs[arcIdx] = new FST.Arc<BytesRef>();
+ }
+
+ if (index == null) {
+ fstReader = null;
+ } else {
+ fstReader = index.getBytesReader();
+ }
+
+ // TODO: if the automaton is "smallish" we really
+ // should use the terms index to seek at least to
+ // the initial term and likely to subsequent terms
+ // (or, maybe just fallback to ATE for such cases).
+ // Else the seek cost of loading the frames will be
+ // too costly.
+
+ final FST.Arc<BytesRef> arc = index.getFirstArc(arcs[0]);
+ // Empty string prefix must have an output in the index!
+ assert arc.isFinal();
+
+ // Special pushFrame since it's the first one:
+ final Frame f = stack[0];
+ f.fp = f.fpOrig = rootBlockFP;
+ f.prefix = 0;
+ f.setState(runAutomaton.getInitialState());
+ f.arc = arc;
+ f.outputPrefix = arc.output;
+ f.load(rootCode);
+
+ // for assert:
+ assert setSavedStartTerm(startTerm);
+
+ currentFrame = f;
+ if (startTerm != null) {
+ seekToStartTerm(startTerm);
+ }
+ }
+
+ // only for assert:
+ private boolean setSavedStartTerm(BytesRef startTerm) {
+ savedStartTerm = startTerm == null ? null : BytesRef.deepCopyOf(startTerm);
+ return true;
+ }
+
+ @Override
+ public TermState termState() throws IOException {
+ currentFrame.decodeMetaData();
+ return currentFrame.termState.clone();
+ }
+
+ private Frame getFrame(int ord) throws IOException {
+ if (ord >= stack.length) {
+ final Frame[] next = new Frame[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ System.arraycopy(stack, 0, next, 0, stack.length);
+ for(int stackOrd=stack.length;stackOrd<next.length;stackOrd++) {
+ next[stackOrd] = new Frame(stackOrd);
+ }
+ stack = next;
+ }
+ assert stack[ord].ord == ord;
+ return stack[ord];
+ }
+
+ private FST.Arc<BytesRef> getArc(int ord) {
+ if (ord >= arcs.length) {
+ @SuppressWarnings({"rawtypes","unchecked"}) final FST.Arc<BytesRef>[] next =
+ new FST.Arc[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ System.arraycopy(arcs, 0, next, 0, arcs.length);
+ for(int arcOrd=arcs.length;arcOrd<next.length;arcOrd++) {
+ next[arcOrd] = new FST.Arc<BytesRef>();
+ }
+ arcs = next;
+ }
+ return arcs[ord];
+ }
+
+ private Frame pushFrame(int state) throws IOException {
+ final Frame f = getFrame(currentFrame == null ? 0 : 1+currentFrame.ord);
+
+ f.fp = f.fpOrig = currentFrame.lastSubFP;
+ f.prefix = currentFrame.prefix + currentFrame.suffix;
+ // if (DEBUG) System.out.println(" pushFrame state=" + state + " prefix=" + f.prefix);
+ f.setState(state);
+
+ // Walk the arc through the index -- we only
+ // "bother" with this so we can get the floor data
+ // from the index and skip floor blocks when
+ // possible:
+ FST.Arc<BytesRef> arc = currentFrame.arc;
+ int idx = currentFrame.prefix;
+ assert currentFrame.suffix > 0;
+ BytesRef output = currentFrame.outputPrefix;
+ while (idx < f.prefix) {
+ final int target = term.bytes[idx] & 0xff;
+ // TODO: we could be more efficient for the next()
+ // case by using current arc as starting point,
+ // passed to findTargetArc
+ arc = index.findTargetArc(target, arc, getArc(1+idx), fstReader);
+ assert arc != null;
+ output = fstOutputs.add(output, arc.output);
+ idx++;
+ }
+
+ f.arc = arc;
+ f.outputPrefix = output;
+ assert arc.isFinal();
+ f.load(fstOutputs.add(output, arc.nextFinalOutput));
+ return f;
+ }
+
+ @Override
+ public BytesRef term() {
+ return term;
+ }
+
+ @Override
+ public int docFreq() throws IOException {
+ //if (DEBUG) System.out.println("BTIR.docFreq");
+ currentFrame.decodeMetaData();
+ //if (DEBUG) System.out.println(" return " + currentFrame.termState.docFreq);
+ return currentFrame.termState.docFreq;
+ }
+
+ @Override
+ public long totalTermFreq() throws IOException {
+ currentFrame.decodeMetaData();
+ return currentFrame.termState.totalTermFreq;
+ }
+
+ @Override
+ public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
+ currentFrame.decodeMetaData();
+ return postingsReader.docs(fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
+ }
+
+ @Override
+ public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+ // Positions were not indexed:
+ return null;
+ }
+
+ currentFrame.decodeMetaData();
+ return postingsReader.docsAndPositions(fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
+ }
+
+ private int getState() {
+ int state = currentFrame.state;
+ for(int idx=0;idx<currentFrame.suffix;idx++) {
+ state = runAutomaton.step(state, currentFrame.suffixBytes[currentFrame.startBytePos+idx] & 0xff);
+ assert state != -1;
+ }
+ return state;
+ }
+
+ // NOTE: specialized to only doing the first-time
+ // seek, but we could generalize it to allow
+ // arbitrary seekExact/Ceil. Note that this is a
+ // seekFloor!
+ private void seekToStartTerm(BytesRef target) throws IOException {
+ //if (DEBUG) System.out.println("seek to startTerm=" + target.utf8ToString());
+ assert currentFrame.ord == 0;
+ if (term.length < target.length) {
+ term.bytes = ArrayUtil.grow(term.bytes, target.length);
+ }
+ FST.Arc<BytesRef> arc = arcs[0];
+ assert arc == currentFrame.arc;
+
+ for(int idx=0;idx<=target.length;idx++) {
+
+ while (true) {
+ final int savePos = currentFrame.suffixesReader.getPosition();
+ final int saveStartBytePos = currentFrame.startBytePos;
+ final int saveSuffix = currentFrame.suffix;
+ final long saveLastSubFP = currentFrame.lastSubFP;
+ final int saveTermBlockOrd = currentFrame.termState.termBlockOrd;
+
+ final boolean isSubBlock = currentFrame.next();
+
+ //if (DEBUG) System.out.println(" cycle ent=" + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") prefix=" + currentFrame.prefix + " suffix=" + currentFrame.suffix + " isBlock=" + isSubBlock + " firstLabel=" + (currentFrame.suffix == 0 ? "" : (currentFrame.suffixBytes[currentFrame.startBytePos])&0xff));
+ term.length = currentFrame.prefix + currentFrame.suffix;
+ if (term.bytes.length < term.length) {
+ term.bytes = ArrayUtil.grow(term.bytes, term.length);
+ }
+ System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
+
+ if (isSubBlock && StringHelper.startsWith(target, term)) {
+ // Recurse
+ //if (DEBUG) System.out.println(" recurse!");
+ currentFrame = pushFrame(getState());
+ break;
+ } else {
+ final int cmp = term.compareTo(target);
+ if (cmp < 0) {
+ if (currentFrame.nextEnt == currentFrame.entCount) {
+ if (!currentFrame.isLastInFloor) {
+ //if (DEBUG) System.out.println(" load floorBlock");
+ currentFrame.loadNextFloorBlock();
+ continue;
+ } else {
+ //if (DEBUG) System.out.println(" return term=" + brToString(term));
+ return;
+ }
+ }
+ continue;
+ } else if (cmp == 0) {
+ //if (DEBUG) System.out.println(" return term=" + brToString(term));
+ return;
+ } else {
+ // Fallback to prior entry: the semantics of
+ // this method is that the first call to
+ // next() will return the term after the
+ // requested term
+ currentFrame.nextEnt--;
+ currentFrame.lastSubFP = saveLastSubFP;
+ currentFrame.startBytePos = saveStartBytePos;
+ currentFrame.suffix = saveSuffix;
+ currentFrame.suffixesReader.setPosition(savePos);
+ currentFrame.termState.termBlockOrd = saveTermBlockOrd;
+ System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
+ term.length = currentFrame.prefix + currentFrame.suffix;
+ // If the last entry was a block we don't
+ // need to bother recursing and pushing to
+ // the last term under it because the first
+ // next() will simply skip the frame anyway
+ return;
+ }
+ }
+ }
+ }
+
+ assert false;
+ }
+
+ @Override
+ public BytesRef next() throws IOException {
+
+ // if (DEBUG) {
+ // System.out.println("\nintEnum.next seg=" + segment);
+ // System.out.println(" frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
+ // }
+
+ nextTerm:
+ while(true) {
+ // Pop finished frames
+ while (currentFrame.nextEnt == currentFrame.entCount) {
+ if (!currentFrame.isLastInFloor) {
+ //if (DEBUG) System.out.println(" next-floor-block");
+ currentFrame.loadNextFloorBlock();
+ //if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
+ } else {
+ //if (DEBUG) System.out.println(" pop frame");
+ if (currentFrame.ord == 0) {
+ return null;
+ }
+ final long lastFP = currentFrame.fpOrig;
+ currentFrame = stack[currentFrame.ord-1];
+ assert currentFrame.lastSubFP == lastFP;
+ //if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
+ }
+ }
+
+ final boolean isSubBlock = currentFrame.next();
+ // if (DEBUG) {
+ // final BytesRef suffixRef = new BytesRef();
+ // suffixRef.bytes = currentFrame.suffixBytes;
+ // suffixRef.offset = currentFrame.startBytePos;
+ // suffixRef.length = currentFrame.suffix;
+ // System.out.println(" " + (isSubBlock ? "sub-block" : "term") + " " + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" + brToString(suffixRef));
+ // }
+
+ if (currentFrame.suffix != 0) {
+ final int label = currentFrame.suffixBytes[currentFrame.startBytePos] & 0xff;
+ while (label > currentFrame.curTransitionMax) {
+ if (currentFrame.transitionIndex >= currentFrame.transitions.length-1) {
+ // Stop processing this frame -- no further
+ // matches are possible because we've moved
+ // beyond what the max transition will allow
+ //if (DEBUG) System.out.println(" break: trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]));
+
+ // sneaky! forces a pop above
+ currentFrame.isLastInFloor = true;
+ currentFrame.nextEnt = currentFrame.entCount;
+ continue nextTerm;
+ }
+ currentFrame.transitionIndex++;
+ currentFrame.curTransitionMax = currentFrame.transitions[currentFrame.transitionIndex].getMax();
+ //if (DEBUG) System.out.println(" next trans=" + currentFrame.transitions[currentFrame.transitionIndex]);
+ }
+ }
+
+ // First test the common suffix, if set:
+ if (compiledAutomaton.commonSuffixRef != null && !isSubBlock) {
+ final int termLen = currentFrame.prefix + currentFrame.suffix;
+ if (termLen < compiledAutomaton.commonSuffixRef.length) {
+ // No match
+ // if (DEBUG) {
+ // System.out.println(" skip: common suffix length");
+ // }
+ continue nextTerm;
+ }
+
+ final byte[] suffixBytes = currentFrame.suffixBytes;
+ final byte[] commonSuffixBytes = compiledAutomaton.commonSuffixRef.bytes;
+
+ final int lenInPrefix = compiledAutomaton.commonSuffixRef.length - currentFrame.suffix;
+ assert compiledAutomaton.commonSuffixRef.offset == 0;
+ int suffixBytesPos;
+ int commonSuffixBytesPos = 0;
+
+ if (lenInPrefix > 0) {
+ // A prefix of the common suffix overlaps with
+ // the suffix of the block prefix so we first
+ // test whether the prefix part matches:
+ final byte[] termBytes = term.bytes;
+ int termBytesPos = currentFrame.prefix - lenInPrefix;
+ assert termBytesPos >= 0;
+ final int termBytesPosEnd = currentFrame.prefix;
+ while (termBytesPos < termBytesPosEnd) {
+ if (termBytes[termBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
+ // if (DEBUG) {
+ // System.out.println(" skip: common suffix mismatch (in prefix)");
+ // }
+ continue nextTerm;
+ }
+ }
+ suffixBytesPos = currentFrame.startBytePos;
+ } else {
+ suffixBytesPos = currentFrame.startBytePos + currentFrame.suffix - compiledAutomaton.commonSuffixRef.length;
+ }
+
+ // Test overlapping suffix part:
+ final int commonSuffixBytesPosEnd = compiledAutomaton.commonSuffixRef.length;
+ while (commonSuffixBytesPos < commonSuffixBytesPosEnd) {
+ if (suffixBytes[suffixBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
+ // if (DEBUG) {
+ // System.out.println(" skip: common suffix mismatch");
+ // }
+ continue nextTerm;
+ }
+ }
+ }
+
+ // TODO: maybe we should do the same linear test
+ // that AutomatonTermsEnum does, so that if we
+ // reach a part of the automaton where .* is
+ // "temporarily" accepted, we just blindly .next()
+ // until the limit
+
+ // See if the term prefix matches the automaton:
+ int state = currentFrame.state;
+ for (int idx=0;idx<currentFrame.suffix;idx++) {
+ state = runAutomaton.step(state, currentFrame.suffixBytes[currentFrame.startBytePos+idx] & 0xff);
+ if (state == -1) {
+ // No match
+ //System.out.println(" no s=" + state);
+ continue nextTerm;
+ } else {
+ //System.out.println(" c s=" + state);
+ }
+ }
+
+ if (isSubBlock) {
+ // Match! Recurse:
+ //if (DEBUG) System.out.println(" sub-block match to state=" + state + "; recurse fp=" + currentFrame.lastSubFP);
+ copyTerm();
+ currentFrame = pushFrame(state);
+ //if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
+ } else if (runAutomaton.isAccept(state)) {
+ copyTerm();
+ //if (DEBUG) System.out.println(" term match to state=" + state + "; return term=" + brToString(term));
+ assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0: "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString();
+ return term;
+ } else {
+ //System.out.println(" no s=" + state);
+ }
+ }
+ }
+
+ private void copyTerm() {
+ //System.out.println(" copyTerm cur.prefix=" + currentFrame.prefix + " cur.suffix=" + currentFrame.suffix + " first=" + (char) currentFrame.suffixBytes[currentFrame.startBytePos]);
+ final int len = currentFrame.prefix + currentFrame.suffix;
+ if (term.bytes.length < len) {
+ term.bytes = ArrayUtil.grow(term.bytes, len);
+ }
+ System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
+ term.length = len;
+ }
+
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ return BytesRef.getUTF8SortedAsUnicodeComparator();
+ }
+
+ @Override
+ public boolean seekExact(BytesRef text, boolean useCache) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void seekExact(long ord) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long ord() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public SeekStatus seekCeil(BytesRef text, boolean useCache) {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ // Iterates through terms in this field
+ private final class SegmentTermsEnum extends TermsEnum {
+ private IndexInput in;
+
+ private Frame[] stack;
+ private final Frame staticFrame;
+ private Frame currentFrame;
+ private boolean termExists;
+
+ private int targetBeforeCurrentLength;
+
+ private final ByteArrayDataInput scratchReader = new ByteArrayDataInput();
+
+ // What prefix of the current term was present in the index:
+ private int validIndexPrefix;
+
+ // assert only:
+ private boolean eof;
+
+ final BytesRef term = new BytesRef();
+ private final FST.BytesReader fstReader;
+
+ @SuppressWarnings({"rawtypes","unchecked"}) private FST.Arc<BytesRef>[] arcs =
+ new FST.Arc[1];
+
+ public SegmentTermsEnum() throws IOException {
+ //if (DEBUG) System.out.println("BTTR.init seg=" + segment);
+ stack = new Frame[0];
+
+ // Used to hold seek by TermState, or cached seek
+ staticFrame = new Frame(-1);
+
+ if (index == null) {
+ fstReader = null;
+ } else {
+ fstReader = index.getBytesReader();
+ }
+
+ // Init w/ root block; don't use index since it may
+ // not (and need not) have been loaded
+ for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
+ arcs[arcIdx] = new FST.Arc<BytesRef>();
+ }
+
+ currentFrame = staticFrame;
+ final FST.Arc<BytesRef> arc;
+ if (index != null) {
+ arc = index.getFirstArc(arcs[0]);
+ // Empty string prefix must have an output in the index!
+ assert arc.isFinal();
+ } else {
+ arc = null;
+ }
+ currentFrame = staticFrame;
+ //currentFrame = pushFrame(arc, rootCode, 0);
+ //currentFrame.loadBlock();
+ validIndexPrefix = 0;
+ // if (DEBUG) {
+ // System.out.println("init frame state " + currentFrame.ord);
+ // printSeekState();
+ // }
+
+ //System.out.println();
+ // computeBlockStats().print(System.out);
+ }
+
+ // Not private to avoid synthetic access$NNN methods
+ void initIndexInput() {
+ if (this.in == null) {
+ this.in = BlockTreeTermsReader.this.in.clone();
+ }
+ }
+
+ /** Runs next() through the entire terms dict,
+ * computing aggregate statistics. */
+ public Stats computeBlockStats() throws IOException {
+
+ Stats stats = new Stats(segment, fieldInfo.name);
+ if (index != null) {
+ stats.indexNodeCount = index.getNodeCount();
+ stats.indexArcCount = index.getArcCount();
+ stats.indexNumBytes = index.sizeInBytes();
+ }
+
+ currentFrame = staticFrame;
+ FST.Arc<BytesRef> arc;
+ if (index != null) {
+ arc = index.getFirstArc(arcs[0]);
+ // Empty string prefix must have an output in the index!
+ assert arc.isFinal();
+ } else {
+ arc = null;
+ }
+
+ // Empty string prefix must have an output in the
+ // index!
+ currentFrame = pushFrame(arc, rootCode, 0);
+ currentFrame.fpOrig = currentFrame.fp;
+ currentFrame.loadBlock();
+ validIndexPrefix = 0;
+
+ stats.startBlock(currentFrame, !currentFrame.isLastInFloor);
+
+ allTerms:
+ while (true) {
+
+ // Pop finished blocks
+ while (currentFrame.nextEnt == currentFrame.entCount) {
+ stats.endBlock(currentFrame);
+ if (!currentFrame.isLastInFloor) {
+ currentFrame.loadNextFloorBlock();
+ stats.startBlock(currentFrame, true);
+ } else {
+ if (currentFrame.ord == 0) {
+ break allTerms;
+ }
+ final long lastFP = currentFrame.fpOrig;
+ currentFrame = stack[currentFrame.ord-1];
+ assert lastFP == currentFrame.lastSubFP;
+ // if (DEBUG) {
+ // System.out.println(" reset validIndexPrefix=" + validIndexPrefix);
+ // }
+ }
+ }
+
+ while(true) {
+ if (currentFrame.next()) {
+ // Push to new block:
+ currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length);
+ currentFrame.fpOrig = currentFrame.fp;
+ // This is a "next" frame -- even if it's
+ // floor'd we must pretend it isn't so we don't
+ // try to scan to the right floor frame:
+ currentFrame.isFloor = false;
+ //currentFrame.hasTerms = true;
+ currentFrame.loadBlock();
+ stats.startBlock(currentFrame, !currentFrame.isLastInFloor);
+ } else {
+ stats.term(term);
+ break;
+ }
+ }
+ }
+
+ stats.finish();
+
+ // Put root frame back:
+ currentFrame = staticFrame;
+ if (index != null) {
+ arc = index.getFirstArc(arcs[0]);
+ // Empty string prefix must have an output in the index!
+ assert arc.isFinal();
+ } else {
+ arc = null;
+ }
+ currentFrame = pushFrame(arc, rootCode, 0);
+ currentFrame.rewind();
+ currentFrame.loadBlock();
+ validIndexPrefix = 0;
+ term.length = 0;
+
+ return stats;
+ }
+
+ private Frame getFrame(int ord) throws IOException {
+ if (ord >= stack.length) {
+ final Frame[] next = new Frame[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ System.arraycopy(stack, 0, next, 0, stack.length);
+ for(int stackOrd=stack.length;stackOrd<next.length;stackOrd++) {
+ next[stackOrd] = new Frame(stackOrd);
+ }
+ stack = next;
+ }
+ assert stack[ord].ord == ord;
+ return stack[ord];
+ }
+
+ private FST.Arc<BytesRef> getArc(int ord) {
+ if (ord >= arcs.length) {
+ @SuppressWarnings({"rawtypes","unchecked"}) final FST.Arc<BytesRef>[] next =
+ new FST.Arc[ArrayUtil.oversize(1+ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
+ System.arraycopy(arcs, 0, next, 0, arcs.length);
+ for(int arcOrd=arcs.length;arcOrd<next.length;arcOrd++) {
+ next[arcOrd] = new FST.Arc<BytesRef>();
+ }
+ arcs = next;
+ }
+ return arcs[ord];
+ }
+
+ @Override
+ public Comparator<BytesRef> getComparator() {
+ return BytesRef.getUTF8SortedAsUnicodeComparator();
+ }
+
+ // Pushes a frame we seek'd to
+ Frame pushFrame(FST.Arc<BytesRef> arc, BytesRef frameData, int length) throws IOException {
+ scratchReader.reset(frameData.bytes, frameData.offset, frameData.length);
+ final long code = scratchReader.readVLong();
+ final long fpSeek = code >>> BlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS;
+ final Frame f = getFrame(1+currentFrame.ord);
+ f.hasTerms = (code & BlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS) != 0;
+ f.hasTermsOrig = f.hasTerms;
+ f.isFloor = (code & BlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR) != 0;
+ if (f.isFloor) {
+ f.setFloorData(scratchReader, frameData);
+ }
+ pushFrame(arc, fpSeek, length);
+
+ return f;
+ }
+
+ // Pushes next'd frame or seek'd frame; we later
+ // lazy-load the frame only when needed
+ Frame pushFrame(FST.Arc<BytesRef> arc, long fp, int length) throws IOException {
+ final Frame f = getFrame(1+currentFrame.ord);
+ f.arc = arc;
+ if (f.fpOrig == fp && f.nextEnt != -1) {
+ //if (DEBUG) System.out.println(" push reused frame ord=" + f.ord + " fp=" + f.fp + " isFloor?=" + f.isFloor + " hasTerms=" + f.hasTerms + " pref=" + term + " nextEnt=" + f.nextEnt + " targetBeforeCurrentLength=" + targetBeforeCurrentLength + " term.length=" + term.length + " vs prefix=" + f.prefix);
+ if (f.prefix > targetBeforeCurrentLength) {
+ f.rewind();
+ } else {
+ // if (DEBUG) {
+ // System.out.println(" skip rewind!");
+ // }
+ }
+ assert length == f.prefix;
+ } else {
+ f.nextEnt = -1;
+ f.prefix = length;
+ f.state.termBlockOrd = 0;
+ f.fpOrig = f.fp = fp;
+ f.lastSubFP = -1;
+ // if (DEBUG) {
+ // final int sav = term.length;
+ // term.length = length;
+ // System.out.println(" push new frame ord=" + f.ord + " fp=" + f.fp + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " pref=" + brToString(term));
+ // term.length = sav;
+ // }
+ }
+
+ return f;
+ }
+
+ // asserts only
+ private boolean clearEOF() {
+ eof = false;
+ return true;
+ }
+
+ // asserts only
+ private boolean setEOF() {
+ eof = true;
+ return true;
+ }
+
+ @Override
+ public boolean seekExact(final BytesRef target, final boolean useCache) throws IOException {
+
+ if (index == null) {
+ throw new IllegalStateException("terms index was not loaded");
+ }
+
+ if (term.bytes.length <= target.length) {
+ term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
+ }
+
+ assert clearEOF();
+
+ // if (DEBUG) {
+ // System.out.println("\nBTTR.seekExact seg=" + segment + " target=" + fieldInfo.name + ":" + brToString(target) + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix=" + validIndexPrefix);
+ // printSeekState();
+ // }
+
+ FST.Arc<BytesRef> arc;
+ int targetUpto;
+ BytesRef output;
+
+ targetBeforeCurrentLength = currentFrame.ord;
+
+ if (currentFrame != staticFrame) {
+
+ // We are already seek'd; find the common
+ // prefix of new seek term vs current term and
+ // re-use the corresponding seek state. For
+ // example, if app first seeks to foobar, then
+ // seeks to foobaz, we can re-use the seek state
+ // for the first 5 bytes.
+
+ // if (DEBUG) {
+ // System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix);
+ // }
+
+ arc = arcs[0];
+ assert arc.isFinal();
+ output = arc.output;
+ targetUpto = 0;
+
+ Frame lastFrame = stack[0];
+ assert validIndexPrefix <= term.length;
+
+ final int targetLimit = Math.min(target.length, validIndexPrefix);
+
+ int cmp = 0;
+
+ // TODO: reverse vLong byte order for better FST
+ // prefix output sharing
+
+ // First compare up to valid seek frames:
+ while (targetUpto < targetLimit) {
+ cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
+ // if (DEBUG) {
+ // System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + " output=" + output);
+ // }
+ if (cmp != 0) {
+ break;
+ }
+ arc = arcs[1+targetUpto];
+ //if (arc.label != (target.bytes[target.offset + targetUpto] & 0xFF)) {
+ //System.out.println("FAIL: arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF));
+ //}
+ assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
+ if (arc.output != NO_OUTPUT) {
+ output = fstOutputs.add(output, arc.output);
+ }
+ if (arc.isFinal()) {
+ lastFrame = stack[1+lastFrame.ord];
+ }
+ targetUpto++;
+ }
+
+ if (cmp == 0) {
+ final int targetUptoMid = targetUpto;
+
+ // Second compare the rest of the term, but
+ // don't save arc/output/frame; we only do this
+ // to find out if the target term is before,
+ // equal or after the current term
+ final int targetLimit2 = Math.min(target.length, term.length);
+ while (targetUpto < targetLimit2) {
+ cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
+ // if (DEBUG) {
+ // System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
+ // }
+ if (cmp != 0) {
+ break;
+ }
+ targetUpto++;
+ }
+
+ if (cmp == 0) {
+ cmp = term.length - target.length;
+ }
+ targetUpto = targetUptoMid;
+ }
+
+ if (cmp < 0) {
+ // Common case: target term is after current
+ // term, ie, app is seeking multiple terms
+ // in sorted order
+ // if (DEBUG) {
+ // System.out.println(" target is after current (shares prefixLen=" + targetUpto + "); frame.ord=" + lastFrame.ord);
+ // }
+ currentFrame = lastFrame;
+
+ } else if (cmp > 0) {
+ // Uncommon case: target term
+ // is before current term; this means we can
+ // keep the currentFrame but we must rewind it
+ // (so we scan from the start)
+ targetBeforeCurrentLength = 0;
+ // if (DEBUG) {
+ // System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
+ // }
+ currentFrame = lastFrame;
+ currentFrame.rewind();
+ } else {
+ // Target is exactly the same as current term
+ assert term.length == target.length;
+ if (termExists) {
+ // if (DEBUG) {
+ // System.out.println(" target is same as current; return true");
+ // }
+ return true;
+ } else {
+ // if (DEBUG) {
+ // System.out.println(" target is same as current but term doesn't exist");
+ // }
+ }
+ //validIndexPrefix = currentFrame.depth;
+ //term.length = target.length;
+ //return termExists;
+ }
+
+ } else {
+
+ targetBeforeCurrentLength = -1;
+ arc = index.getFirstArc(arcs[0]);
+
+ // Empty string prefix must have an output (block) in the index!
+ assert arc.isFinal();
+ assert arc.output != null;
+
+ // if (DEBUG) {
+ // System.out.println(" no seek state; push root frame");
+ // }
+
+ output = arc.output;
+
+ currentFrame = staticFrame;
+
+ //term.length = 0;
+ targetUpto = 0;
+ currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), 0);
+ }
+
+ // if (DEBUG) {
+ // System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
+ // }
+
+ while (targetUpto < target.length) {
+
+ final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
+
+ final FST.Arc<BytesRef> nextArc = index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
+
+ if (nextArc == null) {
+
+ // Index is exhausted
+ // if (DEBUG) {
+ // System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel));
+ // }
+
+ validIndexPrefix = currentFrame.prefix;
+ //validIndexPrefix = targetUpto;
+
+ currentFrame.scanToFloorFrame(target);
+
+ if (!currentFrame.hasTerms) {
+ termExists = false;
+ term.bytes[targetUpto] = (byte) targetLabel;
+ term.length = 1+targetUpto;
+ // if (DEBUG) {
+ // System.out.println(" FAST NOT_FOUND term=" + brToString(term));
+ // }
+ return false;
+ }
+
+ currentFrame.loadBlock();
+
+ final SeekStatus result = currentFrame.scanToTerm(target, true);
+ if (result == SeekStatus.FOUND) {
+ // if (DEBUG) {
+ // System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term);
+ // }
+ return true;
+ } else {
+ // if (DEBUG) {
+ // System.out.println(" got " + result + "; return NOT_FOUND term=" + brToString(term));
+ // }
+ return false;
+ }
+ } else {
+ // Follow this arc
+ arc = nextArc;
+ term.bytes[targetUpto] = (byte) targetLabel;
+ // Aggregate output as we go:
+ assert arc.output != null;
+ if (arc.output != NO_OUTPUT) {
+ output = fstOutputs.add(output, arc.output);
+ }
+
+ // if (DEBUG) {
+ // System.out.println(" index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
+ // }
+ targetUpto++;
+
+ if (arc.isFinal()) {
+ //if (DEBUG) System.out.println(" arc is final!");
+ currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), targetUpto);
+ //if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
+ }
+ }
+ }
+
+ //validIndexPrefix = targetUpto;
+ validIndexPrefix = currentFrame.prefix;
+
+ currentFrame.scanToFloorFrame(target);
+
+ // Target term is entirely contained in the index:
+ if (!currentFrame.hasTerms) {
+ termExists = false;
+ term.length = targetUpto;
+ // if (DEBUG) {
+ // System.out.println(" FAST NOT_FOUND term=" + brToString(term));
+ // }
+ return false;
+ }
+
+ currentFrame.loadBlock();
+
+ final SeekStatus result = currentFrame.scanToTerm(target, true);
+ if (result == SeekStatus.FOUND) {
+ // if (DEBUG) {
+ // System.out.println(" return FOUND term=" + term.utf8ToString() + " " + term);
+ // }
+ return true;
+ } else {
+ // if (DEBUG) {
+ // System.out.println(" got result " + result + "; return NOT_FOUND term=" + term.utf8ToString());
+ // }
+
+ return false;
+ }
+ }
+
+ @Override
+ public SeekStatus seekCeil(final BytesRef target, final boolean useCache) throws IOException {
+ if (index == null) {
+ throw new IllegalStateException("terms index was not loaded");
+ }
+
+ if (term.bytes.length <= target.length) {
+ term.bytes = ArrayUtil.grow(term.bytes, 1+target.length);
+ }
+
+ assert clearEOF();
+
+ //if (DEBUG) {
+ //System.out.println("\nBTTR.seekCeil seg=" + segment + " target=" + fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
+ //printSeekState();
+ //}
+
+ FST.Arc<BytesRef> arc;
+ int targetUpto;
+ BytesRef output;
+
+ targetBeforeCurrentLength = currentFrame.ord;
+
+ if (currentFrame != staticFrame) {
+
+ // We are already seek'd; find the common
+ // prefix of new seek term vs current term and
+ // re-use the corresponding seek state. For
+ // example, if app first seeks to foobar, then
+ // seeks to foobaz, we can re-use the seek state
+ // for the first 5 bytes.
+
+ //if (DEBUG) {
+ //System.out.println(" re-use current seek state validIndexPrefix=" + validIndexPrefix);
+ //}
+
+ arc = arcs[0];
+ assert arc.isFinal();
+ output = arc.output;
+ targetUpto = 0;
+
+ Frame lastFrame = stack[0];
+ assert validIndexPrefix <= term.length;
+
+ final int targetLimit = Math.min(target.length, validIndexPrefix);
+
+ int cmp = 0;
+
+ // TOOD: we should write our vLong backwards (MSB
+ // first) to get better sharing from the FST
+
+ // First compare up to valid seek frames:
+ while (targetUpto < targetLimit) {
+ cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
+ //if (DEBUG) {
+ //System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + " output=" + output);
+ //}
+ if (cmp != 0) {
+ break;
+ }
+ arc = arcs[1+targetUpto];
+ assert arc.label == (target.bytes[target.offset + targetUpto] & 0xFF): "arc.label=" + (char) arc.label + " targetLabel=" + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
+ // TOOD: we could save the outputs in local
+ // byte[][] instead of making new objs ever
+ // seek; but, often the FST doesn't have any
+ // shared bytes (but this could change if we
+ // reverse vLong byte order)
+ if (arc.output != NO_OUTPUT) {
+ output = fstOutputs.add(output, arc.output);
+ }
+ if (arc.isFinal()) {
+ lastFrame = stack[1+lastFrame.ord];
+ }
+ targetUpto++;
+ }
+
+
+ if (cmp == 0) {
+ final int targetUptoMid = targetUpto;
+ // Second compare the rest of the term, but
+ // don't save arc/output/frame:
+ final int targetLimit2 = Math.min(target.length, term.length);
+ while (targetUpto < targetLimit2) {
+ cmp = (term.bytes[targetUpto]&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
+ //if (DEBUG) {
+ //System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
+ //}
+ if (cmp != 0) {
+ break;
+ }
+ targetUpto++;
+ }
+
+ if (cmp == 0) {
+ cmp = term.length - target.length;
+ }
+ targetUpto = targetUptoMid;
+ }
+
+ if (cmp < 0) {
+ // Common case: target term is after current
+ // term, ie, app is seeking multiple terms
+ // in sorted order
+ //if (DEBUG) {
+ //System.out.println(" target is after current (shares prefixLen=" + targetUpto + "); clear frame.scanned ord=" + lastFrame.ord);
+ //}
+ currentFrame = lastFrame;
+
+ } else if (cmp > 0) {
+ // Uncommon case: target term
+ // is before current term; this means we can
+ // keep the currentFrame but we must rewind it
+ // (so we scan from the start)
+ targetBeforeCurrentLength = 0;
+ //if (DEBUG) {
+ //System.out.println(" target is before current (shares prefixLen=" + targetUpto + "); rewind frame ord=" + lastFrame.ord);
+ //}
+ currentFrame = lastFrame;
+ currentFrame.rewind();
+ } else {
+ // Target is exactly the same as current term
+ assert term.length == target.length;
+ if (termExists) {
+ //if (DEBUG) {
+ //System.out.println(" target is same as current; return FOUND");
+ //}
+ return SeekStatus.FOUND;
+ } else {
+ //if (DEBUG) {
+ //System.out.println(" target is same as current but term doesn't exist");
+ //}
+ }
+ }
+
+ } else {
+
+ targetBeforeCurrentLength = -1;
+ arc = index.getFirstArc(arcs[0]);
+
+ // Empty string prefix must have an output (block) in the index!
+ assert arc.isFinal();
+ assert arc.output != null;
+
+ //if (DEBUG) {
+ //System.out.println(" no seek state; push root frame");
+ //}
+
+ output = arc.output;
+
+ currentFrame = staticFrame;
+
+ //term.length = 0;
+ targetUpto = 0;
+ currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), 0);
+ }
+
+ //if (DEBUG) {
+ //System.out.println(" start index loop targetUpto=" + targetUpto + " output=" + output + " currentFrame.ord+1=" + currentFrame.ord + " targetBeforeCurrentLength=" + targetBeforeCurrentLength);
+ //}
+
+ while (targetUpto < target.length) {
+
+ final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
+
+ final FST.Arc<BytesRef> nextArc = index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
+
+ if (nextArc == null) {
+
+ // Index is exhausted
+ // if (DEBUG) {
+ // System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel));
+ // }
+
+ validIndexPrefix = currentFrame.prefix;
+ //validIndexPrefix = targetUpto;
+
+ currentFrame.scanToFloorFrame(target);
+
+ currentFrame.loadBlock();
+
+ final SeekStatus result = currentFrame.scanToTerm(target, false);
+ if (result == SeekStatus.END) {
+ term.copyBytes(target);
+ termExists = false;
+
+ if (next() != null) {
+ //if (DEBUG) {
+ //System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term);
+ //}
+ return SeekStatus.NOT_FOUND;
+ } else {
+ //if (DEBUG) {
+ //System.out.println(" return END");
+ //}
+ return SeekStatus.END;
+ }
+ } else {
+ //if (DEBUG) {
+ //System.out.println(" return " + result + " term=" + brToString(term) + " " + term);
+ //}
+ return result;
+ }
+ } else {
+ // Follow this arc
+ term.bytes[targetUpto] = (byte) targetLabel;
+ arc = nextArc;
+ // Aggregate output as we go:
+ assert arc.output != null;
+ if (arc.output != NO_OUTPUT) {
+ output = fstOutputs.add(output, arc.output);
+ }
+
+ //if (DEBUG) {
+ //System.out.println(" index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
+ //}
+ targetUpto++;
+
+ if (arc.isFinal()) {
+ //if (DEBUG) System.out.println(" arc is final!");
+ currentFrame = pushFrame(arc, fstOutputs.add(output, arc.nextFinalOutput), targetUpto);
+ //if (DEBUG) System.out.println(" curFrame.ord=" + currentFrame.ord + " hasTerms=" + currentFrame.hasTerms);
+ }
+ }
+ }
+
+ //validIndexPrefix = targetUpto;
+ validIndexPrefix = currentFrame.prefix;
+
+ currentFrame.scanToFloorFrame(target);
+
+ currentFrame.loadBlock();
+
+ final SeekStatus result = currentFrame.scanToTerm(target, false);
+
+ if (result == SeekStatus.END) {
+ term.copyBytes(target);
+ termExists = false;
+ if (next() != null) {
+ //if (DEBUG) {
+ //System.out.println(" return NOT_FOUND term=" + term.utf8ToString() + " " + term);
+ //}
+ return SeekStatus.NOT_FOUND;
+ } else {
+ //if (DEBUG) {
+ //System.out.println(" return END");
+ //}
+ return SeekStatus.END;
+ }
+ } else {
+ return result;
+ }
+ }
+
+ @SuppressWarnings("unused")
+ private void printSeekState(PrintStream out) throws IOException {
+ if (currentFrame == staticFrame) {
+ out.println(" no prior seek");
+ } else {
+ out.println(" prior seek state:");
+ int ord = 0;
+ boolean isSeekFrame = true;
+ while(true) {
+ Frame f = getFrame(ord);
+ assert f != null;
+ final BytesRef prefix = new BytesRef(term.bytes, 0, f.prefix);
+ if (f.nextEnt == -1) {
+ out.println(" frame " + (isSeekFrame ? "(seek)" : "(next)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<<BlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? BlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? BlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0)) + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
+ } else {
+ out.println(" frame " + (isSeekFrame ? "(seek, loaded)" : "(next, loaded)") + " ord=" + ord + " fp=" + f.fp + (f.isFloor ? (" (fpOrig=" + f.fpOrig + ")") : "") + " prefixLen=" + f.prefix + " prefix=" + prefix + " nextEnt=" + f.nextEnt + (f.nextEnt == -1 ? "" : (" (of " + f.entCount + ")")) + " hasTerms=" + f.hasTerms + " isFloor=" + f.isFloor + " code=" + ((f.fp<<BlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) + (f.hasTerms ? BlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) + (f.isFloor ? BlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0)) + " lastSubFP=" + f.lastSubFP + " isLastInFloor=" + f.isLastInFloor + " mdUpto=" + f.metaDataUpto + " tbOrd=" + f.getTermBlockOrd());
+ }
+ if (index != null) {
+ assert !isSeekFrame || f.arc != null: "isSeekFrame=" + isSeekFrame + " f.arc=" + f.arc;
+ if (f.prefix > 0 && isSeekFrame && f.arc.label != (term.bytes[f.prefix-1]&0xFF)) {
+ out.println(" broken seek state: arc.label=" + (char) f.arc.label + " vs term byte=" + (char) (term.bytes[f.prefix-1]&0xFF));
+ throw new RuntimeException("seek state is broken");
+ }
+ BytesRef output = Util.get(index, prefix);
+ if (output == null) {
+ out.println(" broken seek state: prefix is not final in index");
+ throw new RuntimeException("seek state is broken");
+ } else if (isSeekFrame && !f.isFloor) {
+ final ByteArrayDataInput reader = new ByteArrayDataInput(output.bytes, output.offset, output.length);
+ final long codeOrig = reader.readVLong();
+ final long code = (f.fp << BlockTreeTermsWriter.OUTPUT_FLAGS_NUM_BITS) | (f.hasTerms ? BlockTreeTermsWriter.OUTPUT_FLAG_HAS_TERMS:0) | (f.isFloor ? BlockTreeTermsWriter.OUTPUT_FLAG_IS_FLOOR:0);
+ if (codeOrig != code) {
+ out.println(" broken seek state: output code=" + codeOrig + " doesn't match frame code=" + code);
+ throw new RuntimeException("seek state is broken");
+ }
+ }
+ }
+ if (f == currentFrame) {
+ break;
+ }
+ if (f.prefix == validIndexPrefix) {
+ isSeekFrame = false;
+ }
+ ord++;
+ }
+ }
+ }
+
+ /* Decodes only the term bytes of the next term. If caller then asks for
+ metadata, ie docFreq, totalTermFreq or pulls a D/&PEnum, we then (lazily)
+ decode all metadata up to the current term. */
+ @Override
+ public BytesRef next() throws IOException {
+
+ if (in == null) {
+ // Fresh TermsEnum; seek to first term:
+ final FST.Arc<BytesRef> arc;
+ if (index != null) {
+ arc = index.getFirstArc(arcs[0]);
+ // Empty string prefix must have an output in the index!
+ assert arc.isFinal();
+ } else {
+ arc = null;
+ }
+ currentFrame = pushFrame(arc, rootCode, 0);
+ currentFrame.loadBlock();
+ }
+
+ targetBeforeCurrentLength = currentFrame.ord;
+
+ assert !eof;
+ //if (DEBUG) {
+ //System.out.println("\nBTTR.next seg=" + segment + " term=" + brToString(term) + " termExists?=" + termExists + " field=" + fieldInfo.name + " termBlockOrd=" + currentFrame.state.termBlockOrd + " validIndexPrefix=" + validIndexPrefix);
+ //printSeekState();
+ //}
+
+ if (currentFrame == staticFrame) {
+ // If seek was previously called and the term was
+ // cached, or seek(TermState) was called, usually
+ // caller is just going to pull a D/&PEnum or get
+ // docFreq, etc. But, if they then call next(),
+ // this method catches up all internal state so next()
+ // works properly:
+ //if (DEBUG) System.out.println(" re-seek to pending term=" + term.utf8ToString() + " " + term);
+ final boolean result = seekExact(term, false);
+ assert result;
+ }
+
+ // Pop finished blocks
+ while (currentFrame.nextEnt == currentFrame.entCount) {
+ if (!currentFrame.isLastInFloor) {
+ currentFrame.loadNextFloorBlock();
+ } else {
+ //if (DEBUG) System.out.println(" pop frame");
+ if (currentFrame.ord == 0) {
+ //if (DEBUG) System.out.println(" return null");
+ assert setEOF();
+ term.length = 0;
+ validIndexPrefix = 0;
+ currentFrame.rewind();
+ termExists = false;
+ return null;
+ }
+ final long lastFP = currentFrame.fpOrig;
+ currentFrame = stack[currentFrame.ord-1];
+
+ if (currentFrame.nextEnt == -1 || currentFrame.lastSubFP != lastFP) {
+ // We popped into a frame that's not loaded
+ // yet or not scan'd to the right entry
+ currentFrame.scanToFloorFrame(term);
+ currentFrame.loadBlock();
+ currentFrame.scanToSubBlock(lastFP);
+ }
+
+ // Note that the seek state (last seek) has been
+ // invalidated beyond this depth
+ validIndexPrefix = Math.min(validIndexPrefix, currentFrame.prefix);
+ //if (DEBUG) {
+ //System.out.println(" reset validIndexPrefix=" + validIndexPrefix);
+ //}
+ }
+ }
+
+ while(true) {
+ if (currentFrame.next()) {
+ // Push to new block:
+ //if (DEBUG) System.out.println(" push frame");
+ currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length);
+ // This is a "next" frame -- even if it's
+ // floor'd we must pretend it isn't so we don't
+ // try to scan to the right floor frame:
+ currentFrame.isFloor = false;
+ //currentFrame.hasTerms = true;
+ currentFrame.loadBlock();
+ } else {
+ //if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + " currentFrame.ord=" + currentFrame.ord);
+ return term;
+ }
+ }
+ }
+
+ @Override
+ public BytesRef term() {
+ assert !eof;
+ return term;
+ }
+
+ @Override
+ public int docFreq() throws IOException {
+ assert !eof;
+ //if (DEBUG) System.out.println("BTR.docFreq");
+ currentFrame.decodeMetaData();
+ //if (DEBUG) System.out.println(" return " + currentFrame.state.docFreq);
+ return currentFrame.state.docFreq;
+ }
+
+ @Override
+ public long totalTermFreq() throws IOException {
+ assert !eof;
+ currentFrame.decodeMetaData();
+ return currentFrame.state.totalTermFreq;
+ }
+
+ @Override
+ public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
+ assert !eof;
+ //if (DEBUG) {
+ //System.out.println("BTTR.docs seg=" + segment);
+ //}
+ currentFrame.decodeMetaData();
+ //if (DEBUG) {
+ //System.out.println(" state=" + currentFrame.state);
+ //}
+ return postingsReader.docs(fieldInfo, currentFrame.state, skipDocs, reuse, flags);
+ }
+
+ @Override
+ public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
+ if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
+ // Positions were not indexed:
+ return null;
+ }
+
+ assert !eof;
+ currentFrame.decodeMetaData();
+ return postingsReader.docsAndPositions(fieldInfo, currentFrame.state, skipDocs, reuse, flags);
+ }
+
+ @Override
+ public void seekExact(BytesRef target, TermState otherState) {
+ // if (DEBUG) {
+ // System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + target.utf8ToString() + " " + target + " state=" + otherState);
+ // }
+ assert clearEOF();
+ if (target.compareTo(term) != 0 || !termExists) {
+ assert otherState != null && otherState instanceof BlockTermState;
+ currentFrame = staticFrame;
+ currentFrame.state.copyFrom(otherState);
+ term.copyBytes(target);
+ currentFrame.metaDataUpto = currentFrame.getTermBlockOrd();
+ assert currentFrame.metaDataUpto > 0;
+ validIndexPrefix = 0;
+ } else {
+ // if (DEBUG) {
+ // System.out.println(" skip seek: already on target state=" + currentFrame.state);
+ // }
+ }
+ }
+
+ @Override
+ public TermState termState() throws IOException {
+ assert !eof;
+ currentFrame.decodeMetaData();
+ TermState ts = currentFrame.state.clone();
+ //if (DEBUG) System.out.println("BTTR.termState seg=" + segment + " state=" + ts);
+ return ts;
+ }
+
+ @Override
+ public void seekExact(long ord) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public long ord() {
+ throw new UnsupportedOperationException();
+ }
+
+ // Not static -- references term, postingsReader,
+ // fieldInfo, in
+ private final class Frame {
+ // Our index in stack[]:
+ final int ord;
+
+ boolean hasTerms;
+ boolean hasTermsOrig;
+ boolean isFloor;
+
+ FST.Arc<BytesRef> arc;
+
+ // File pointer where this block was loaded from
+ long fp;
+ long fpOrig;
+ long fpEnd;
+
+ byte[] suffixBytes = new byte[128];
+ final ByteArrayDataInput suffixesReader = new ByteArrayDataInput();
+
+ byte[] statBytes = new byte[64];
+ final ByteArrayDataInput statsReader = new ByteArrayDataInput();
+
+ byte[] floorData = new byte[32];
+ final ByteArrayDataInput floorDataReader = new ByteArrayDataInput();
+
+ // Length of prefix shared by all terms in this block
+ int prefix;
+
+ // Number of entries (term or sub-block) in this block
+ int entCount;
+
+ // Which term we will next read, or -1 if the block
+ // isn't loaded yet
+ int nextEnt;
+
+ // True if this block is either not a floor block,
+ // or, it's the last sub-block of a floor block
+ boolean isLastInFloor;
+
+ // True if all entries are terms
+ boolean isLeafBlock;
+
+ long lastSubFP;
+
+ int nextFloorLabel;
+ int numFollowFloorBlocks;
+
+ // Next term to decode metaData; we decode metaData
+ // lazily so that scanning to find the matching term is
+ // fast and only if you find a match and app wants the
+ // stats or docs/positions enums, will we decode the
+ // metaData
+ int metaDataUpto;
+
+ final BlockTermState state;
+
+ public Frame(int ord) throws IOException {
+ this.ord = ord;
+ state = postingsReader.newTermState();
+ state.totalTermFreq = -1;
+ }
+
+ public void setFloorData(ByteArrayDataInput in, BytesRef source) {
+ final int numBytes = source.length - (in.getPosition() - source.offset);
+ if (numBytes > floorData.length) {
+ floorData = new byte[ArrayUtil.oversize(numBytes, 1)];
+ }
+ System.arraycopy(source.bytes, source.offset+in.getPosition(), floorData, 0, numBytes);
+ floorDataReader.reset(floorData, 0, numBytes);
+ numFollowFloorBlocks = floorDataReader.readVInt();
+ nextFloorLabel = floorDataReader.readByte() & 0xff;
+ //if (DEBUG) {
+ //System.out.println(" setFloorData fpOrig=" + fpOrig + " bytes=" + new BytesRef(source.bytes, source.offset + in.getPosition(), numBytes) + " numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + toHex(nextFloorLabel));
+ //}
+ }
+
+ public int getTermBlockOrd() {
+ return isLeafBlock ? nextEnt : state.termBlockOrd;
+ }
+
+ void loadNextFloorBlock() throws IOException {
+ //if (DEBUG) {
+ //System.out.println(" loadNextFloorBlock fp=" + fp + " fpEnd=" + fpEnd);
+ //}
+ assert arc == null || isFloor: "arc=" + arc + " isFloor=" + isFloor;
+ fp = fpEnd;
+ nextEnt = -1;
+ loadBlock();
+ }
+
+ /* Does initial decode of next block of terms; this
+ doesn't actually decode the docFreq, totalTermFreq,
+ postings details (frq/prx offset, etc.) metadata;
+ it just loads them as byte[] blobs which are then
+ decoded on-demand if the metadata is ever requested
+ for any term in this block. This enables terms-only
+ intensive consumes (eg certain MTQs, respelling) to
+ not pay the price of decoding metadata they won't
+ use. */
+ void loadBlock() throws IOException {
+
+ // Clone the IndexInput lazily, so that consumers
+ // that just pull a TermsEnum to
+ // seekExact(TermState) don't pay this cost:
+ initIndexInput();
+
+ if (nextEnt != -1) {
+ // Already loaded
+ return;
+ }
+ //System.out.println("blc=" + blockLoadCount);
+
+ in.seek(fp);
+ int code = in.readVInt();
+ entCount = code >>> 1;
+ assert entCount > 0;
+ isLastInFloor = (code & 1) != 0;
+ assert arc == null || (isLastInFloor || isFloor);
+
+ // TODO: if suffixes were stored in random-access
+ // array structure, then we could do binary search
+ // instead of linear scan to find target term; eg
+ // we could have simple array of offsets
+
+ // term suffixes:
+ code = in.readVInt();
+ isLeafBlock = (code & 1) != 0;
+ int numBytes = code >>> 1;
+ if (suffixBytes.length < numBytes) {
+ suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+ }
+ in.readBytes(suffixBytes, 0, numBytes);
+ suffixesReader.reset(suffixBytes, 0, numBytes);
+
+ /*if (DEBUG) {
+ if (arc == null) {
+ System.out.println(" loadBlock (next) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
+ } else {
+ System.out.println(" loadBlock (seek) fp=" + fp + " entCount=" + entCount + " prefixLen=" + prefix + " hasTerms?=" + hasTerms + " isFloor?=" + isFloor + " isLastInFloor=" + isLastInFloor + " leaf?=" + isLeafBlock);
+ }
+ }*/
+
+ // stats
+ numBytes = in.readVInt();
+ if (statBytes.length < numBytes) {
+ statBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
+ }
+ in.readBytes(statBytes, 0, numBytes);
+ statsReader.reset(statBytes, 0, numBytes);
+ metaDataUpto = 0;
+
+ state.termBlockOrd = 0;
+ nextEnt = 0;
+ lastSubFP = -1;
+
+ // TODO: we could skip this if !hasTerms; but
+ // that's rare so won't help much
+ postingsReader.readTermsBlock(in, fieldInfo, state);
+
+ // Sub-blocks of a single floor block are always
+ // written one after another -- tail recurse:
+ fpEnd = in.getFilePointer();
+ // if (DEBUG) {
+ // System.out.println(" fpEnd=" + fpEnd);
+ // }
+ }
+
+ void rewind() {
+
+ // Force reload:
+ fp = fpOrig;
+ nextEnt = -1;
+ hasTerms = hasTermsOrig;
+ if (isFloor) {
+ floorDataReader.rewind();
+ numFollowFloorBlocks = floorDataReader.readVInt();
+ nextFloorLabel = floorDataReader.readByte() & 0xff;
+ }
+
+ /*
+ //System.out.println("rewind");
+ // Keeps the block loaded, but rewinds its state:
+ if (nextEnt > 0 || fp != fpOrig) {
+ if (DEBUG) {
+ System.out.println(" rewind frame ord=" + ord + " fpOrig=" + fpOrig + " fp=" + fp + " hasTerms?=" + hasTerms + " isFloor?=" + isFloor + " nextEnt=" + nextEnt + " prefixLen=" + prefix);
+ }
+ if (fp != fpOrig) {
+ fp = fpOrig;
+ nextEnt = -1;
+ } else {
+ nextEnt = 0;
+ }
+ hasTerms = hasTermsOrig;
+ if (isFloor) {
+ floorDataReader.rewind();
+ numFollowFloorBlocks = floorDataReader.readVInt();
+ nextFloorLabel = floorDataReader.readByte() & 0xff;
+ }
+ assert suffixBytes != null;
+ suffixesReader.rewind();
+ assert statBytes != null;
+ statsReader.rewind();
+ metaDataUpto = 0;
+ state.termBlockOrd = 0;
+ // TODO: skip this if !hasTerms? Then postings
+ // impl wouldn't have to write useless 0 byte
+ postingsReader.resetTermsBlock(fieldInfo, state);
+ lastSubFP = -1;
+ } else if (DEBUG) {
+ System.out.println(" skip rewind fp=" + fp + " fpOrig=" + fpOrig + " nextEnt=" + nextEnt + " ord=" + ord);
+ }
+ */
+ }
+
+ public boolean next() {
+ return isLeafBlock ? nextLeaf() : nextNonLeaf();
+ }
+
+ // Decodes next entry; returns true if it's a sub-block
+ public boolean nextLeaf() {
+ //if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
+ assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+ nextEnt++;
+ suffix = suffixesReader.readVInt();
+ startBytePos = suffixesReader.getPosition();
+ term.length = prefix + suffix;
+ if (term.bytes.length < term.length) {
+ term.grow(term.length);
+ }
+ suffixesReader.readBytes(term.bytes, prefix, suffix);
+ // A normal term
+ termExists = true;
+ return false;
+ }
+
+ public boolean nextNonLeaf() {
+ //if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
+ assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
+ nextEnt++;
+ final int code = suffixesReader.readVInt();
+ suffix = code >>> 1;
+ startBytePos = suffixesReader.getPosition();
+ term.length = prefix + suffix;
+ if (term.bytes.length < term.length) {
+ term.grow(term.length);
+ }
+ suffixesReader.readBytes(term.bytes, prefix, suffix);
+ if ((code & 1) == 0) {
+ // A normal term
+ termExists = true;
+ subCode = 0;
+ state.termBlockOrd++;
+ return false;
+ } else {
+ // A sub-block; make sub-FP absolute:
+ termExists = false;
+ subCode = suffixesReader.readVLong();
+ lastSubFP = fp - subCode;
+ //if (DEBUG) {
+ //System.out.println(" lastSubFP=" + lastSubFP);
+ //}
+ return true;
+ }
+ }
+
+ // TODO: make this array'd so we can do bin search?
+ // likely not worth it? need to measure how many
+ // floor blocks we "typically" get
+ public void scanToFloorFrame(BytesRef target) {
+
+ if (!isFloor || target.length <= prefix) {
+ // if (DEBUG) {
+ // System.out.println(" scanToFloorFrame skip: isFloor=" + isFloor + " target.length=" + target.length + " vs prefix=" + prefix);
+ // }
+ return;
+ }
+
+ final int targetLabel = target.bytes[target.offset + prefix] & 0xFF;
+
+ // if (DEBUG) {
+ // System.out.println(" scanToFloorFrame fpOrig=" + fpOrig + " targetLabel=" + toHex(targetLabel) + " vs nextFloorLabel=" + toHex(nextFloorLabel) + " numFollowFloorBlocks=" + numFollowFloorBlocks);
+ // }
+
+ if (targetLabel < nextFloorLabel) {
+ // if (DEBUG) {
+ // System.out.println(" already on correct block");
+ // }
+ return;
+ }
+
+ assert numFollowFloorBlocks != 0;
+
+ long newFP = fpOrig;
+ while (true) {
+ final long code = floorDataReader.readVLong();
+ newFP = fpOrig + (code >>> 1);
+ hasTerms = (code & 1) != 0;
+ // if (DEBUG) {
+ // System.out.println(" label=" + toHex(nextFloorLabel) + " fp=" + newFP + " hasTerms?=" + hasTerms + " numFollowFloor=" + numFollowFloorBlocks);
+ // }
+
+ isLastInFloor = numFollowFloorBlocks == 1;
+ numFollowFloorBlocks--;
+
+ if (isLastInFloor) {
+ nextFloorLabel = 256;
+ // if (DEBUG) {
+ // System.out.println(" stop! last block nextFloorLabel=" + toHex(nextFloorLabel));
+ // }
+ break;
+ } else {
+ nextFloorLabel = floorDataReader.readByte() & 0xff;
+ if (targetLabel < nextFloorLabel) {
+ // if (DEBUG) {
+ // System.out.println(" stop! nextFloorLabel=" + toHex(nextFloorLabel));
+ // }
+ break;
+ }
+ }
+ }
+
+ if (newFP != fp) {
+ // Force re-load of the block:
+ // if (DEBUG) {
+ // System.out.println(" force switch to fp=" + newFP + " oldFP=" + fp);
+ // }
+ nextEnt = -1;
+ fp = newFP;
+ } else {
+ // if (DEBUG) {
+ // System.out.println(" stay on same fp=" + newFP);
+ // }
+ }
+ }
+
+ public void decodeMetaData() throws IOException {
+
+ //if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);
+
+ // lazily catch up on metadata decode:
+ final int limit = getTermBlockOrd();
+ assert limit > 0;
+
+ // We must set/incr state.termCount because
+ // postings impl can look at this
+ state.termBlockOrd = metaDataUpto;
+
+ // TODO: better API would be "jump straight to term=N"???
+ while (metaDataUpto < limit) {
+
+ // TODO: we could make "tiers" of metadata, ie,
+ // decode docFreq/totalTF but don't decode postings
+ // metadata; this way caller could get
+ // docFreq/totalTF w/o paying decode cost for
+ // postings
+
+ // TODO: if docFreq were bulk decoded we could
+ // just skipN here:
+ state.docFreq = statsReader.readVInt();
+ //if (DEBUG) System.out.println(" dF=" + state.docFreq);
+ if (fieldInfo.getIndexOptions() != IndexOptions.DOCS_ONLY) {
+ state.totalTermFreq = state.docFreq + statsReader.readVLong();
+ //if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq);
+ }
+
+ postingsReader.nextTerm(fieldInfo, state);
+ metaDataUpto++;
+ state.termBlockOrd++;
+ }
+ }
+
+ // Used only by assert
+ private boolean prefixMatches(BytesRef target) {
+ for(int bytePos=0;bytePos<prefix;bytePos++) {
+ if (target.bytes[target.offset + bytePos] != term.bytes[bytePos]) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ // Scans to sub-block that has this target fp; only
+ // called by next(); NOTE: does not set
+ // startBytePos/suffix as a side effect
+ public void scanToSubBlock(long subFP) {
+ assert !isLeafBlock;
+ //if (DEBUG) System.out.println(" scanToSubBlock fp=" + fp + " subFP=" + subFP + " entCount=" + entCount + " lastSubFP=" + lastSubFP);
+ //assert nextEnt == 0;
+ if (lastSubFP == subFP) {
+ //if (DEBUG) System.out.println(" already positioned");
+ return;
+ }
+ assert subFP < fp : "fp=" + fp + " subFP=" + subFP;
+ final long targetSubCode = fp - subFP;
+ //if (DEBUG) System.out.println(" targetSubCode=" + targetSubCode);
+ while(true) {
+ assert nextEnt < entCount;
+ nextEnt++;
+ final int code = suffixesReader.readVInt();
+ suffixesReader.skipBytes(isLeafBlock ? code : code >>> 1);
+ //if (DEBUG) System.out.println(" " + nextEnt + " (of " + entCount + ") ent isSubBlock=" + ((code&1)==1));
+ if ((code & 1) != 0) {
+ final long subCode = suffixesReader.readVLong();
+ //if (DEBUG) System.out.println(" subCode=" + subCode);
+ if (targetSubCode == subCode) {
+ //if (DEBUG) System.out.println(" match!");
+ lastSubFP = subFP;
+ return;
+ }
+ } else {
+ state.termBlockOrd++;
+ }
+ }
+ }
+
+ // NOTE: sets startBytePos/suffix as a side effect
+ public SeekStatus scanToTerm(BytesRef target, boolean exactOnly) throws IOException {
+ return isLeafBlock ? scanToTermLeaf(target, exactOnly) : scanToTermNonLeaf(target, exactOnly);
+ }
+
+ private int startBytePos;
+ private int suffix;
+ private long subCode;
+
+ // Target's prefix matches this block's prefix; we
+ // scan the entries check if the suffix matches.
+ public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
+
+ // if (DEBUG) System.out.println(" scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + brToString(term));
+
+ assert nextEnt != -1;
+
+ termExists = true;
+ subCode = 0;
+
+ if (nextEnt == entCount) {
+ if (exactOnly) {
+ fillTerm();
+ }
+ return SeekStatus.END;
+ }
+
+ assert prefixMatches(target);
+
+ // Loop over each entry (term or sub-block) in this block:
+ //nextTerm: while(nextEnt < entCount) {
+ nextTerm: while (true) {
+ nextEnt++;
+
+ suffix = suffixesReader.readVInt();
+
+ // if (DEBUG) {
+ // BytesRef suffixBytesRef = new BytesRef();
+ // suffixBytesRef.bytes = suffixBytes;
+ // suffixBytesRef.offset = suffixesReader.getPosition();
+ // suffixBytesRef.length = suffix;
+ // System.out.println(" cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
+ // }
+
+ final int termLen = prefix + suffix;
+ startBytePos = suffixesReader.getPosition();
+ suffixesReader.skipBytes(suffix);
+
+ final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen);
+ int targetPos = target.offset + prefix;
+
+ // Loop over bytes in the suffix, comparing to
+ // the target
+ int bytePos = startBytePos;
+ while(true) {
+ final int cmp;
+ final boolean stop;
+ if (targetPos < targetLimit) {
+ cmp = (suffixBytes[bytePos++]&0xFF) - (target.bytes[targetPos++]&0xFF);
+ stop = false;
+ } else {
+ assert targetPos == targetLimit;
+ cmp = termLen - target.length;
+ stop = true;
+ }
+
+ if (cmp < 0) {
+ // Current entry is still before the target;
+ // keep scanning
+
+ if (nextEnt == entCount) {
+ if (exactOnly) {
+ fillTerm();
+ }
+ // We are done scanning this block
+ break nextTerm;
+ } else {
+ continue nextTerm;
+ }
+ } else if (cmp > 0) {
+
+ // Done! Current entry is after target --
+ // return NOT_FOUND:
+ fillTerm();
+
+ if (!exactOnly && !termExists) {
+ // We are on a sub-block, and caller wants
+ // us to position to the next term after
+ // the target, so we must recurse into the
+ // sub-frame(s):
+ currentFrame = pushFrame(null, currentFrame.lastSubFP, termLen);
+ currentFrame.loadBlock();
+ while (currentFrame.next()) {
+ currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length);
+ currentFrame.loadBlock();
+ }
+ }
+
+ //if (DEBUG) System.out.println(" not found");
+ return SeekStatus.NOT_FOUND;
+ } else if (stop) {
+ // Exact match!
+
+ // This cannot be a sub-block because we
+ // would have followed the index to this
+ // sub-block from the start:
+
+ assert termExists;
+ fillTerm();
+ //if (DEBUG) System.out.println(" found!");
+ return SeekStatus.FOUND;
+ }
+ }
+ }
+
+ // It is possible (and OK) that terms index pointed us
+ // at this block, but, we scanned the entire block and
+ // did not find the term to position to. This happens
+ // when the target is after the last term in the block
+ // (but, before the next term in the index). EG
+ // target could be foozzz, and terms index pointed us
+ // to the foo* block, but the last term in this block
+ // was fooz (and, eg, first term in the next block will
+ // bee fop).
+ //if (DEBUG) System.out.println(" block end");
+ if (exactOnly) {
+ fillTerm();
+ }
+
+ // TODO: not consistent that in the
+ // not-exact case we don't next() into the next
+ // frame here
+ return SeekStatus.END;
+ }
+
+ // Target's prefix matches this block's prefix; we
+ // scan the entries check if the suffix matches.
+ public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
+
+ //if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + brToString(term));
+
+ assert nextEnt != -1;
+
+ if (nextEnt == entCount) {
+ if (exactOnly) {
+ fillTerm();
+ termExists = subCode == 0;
+ }
+ return SeekStatus.END;
+ }
+
+ assert prefixMatches(target);
+
+ // Loop over each entry (term or sub-block) in this block:
+ //nextTerm: while(nextEnt < entCount) {
+ nextTerm: while (true) {
+ nextEnt++;
+
+ final int code = suffixesReader.readVInt();
+ suffix = code >>> 1;
+ // if (DEBUG) {
+ // BytesRef suffixBytesRef = new BytesRef();
+ // suffixBytesRef.bytes = suffixBytes;
+ // suffixBytesRef.offset = suffixesReader.getPosition();
+ // suffixBytesRef.length = suffix;
+ // System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " + (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
+ // }
+
+ termExists = (code & 1) == 0;
+ final int termLen = prefix + suffix;
+ startBytePos = suffixesReader.getPosition();
+ suffixesReader.skipBytes(suffix);
+ if (termExists) {
+ state.termBlockOrd++;
+ subCode = 0;
+ } else {
+ subCode = suffixesReader.readVLong();
+ lastSubFP = fp - subCode;
+ }
+
+ final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen);
+ int targetPos = target.offset + prefix;
+
+ // Loop over bytes in the suffix, comparing to
+ // the target
+ int bytePos = startBytePos;
+ while(true) {
+ final int cmp;
+ final boolean stop;
+ if (targetPos < targetLimit) {
+ cmp = (suffixBytes[bytePos++]&0xFF) - (target.bytes[targetPos++]&0xFF);
+ stop = false;
+ } else {
+ assert targetPos == targetLimit;
+ cmp = termLen - target.length;
+ stop = true;
+ }
+
+ if (cmp < 0) {
+ // Current entry is still before the target;
+ // keep scanning
+
+ if (nextEnt == entCount) {
+ if (exactOnly) {
+ fillTerm();
+ //termExists = true;
+ }
+ // We are done scanning this block
+ break nextTerm;
+ } else {
+ continue nextTerm;
+ }
+ } else if (cmp > 0) {
+
+ // Done! Current entry is after target --
+ // return NOT_FOUND:
+ fillTerm();
+
+ if (!exactOnly && !termExists) {
+ // We are on a sub-block, and caller wants
+ // us to position to the next term after
+ // the target, so we must recurse into the
+ // sub-frame(s):
+ currentFrame = pushFrame(null, currentFrame.lastSubFP, termLen);
+ currentFrame.loadBlock();
+ while (currentFrame.next()) {
+ currentFrame = pushFrame(null, currentFrame.lastSubFP, term.length);
+ currentFrame.loadBlock();
+ }
+ }
+
+ //if (DEBUG) System.out.println(" not found");
+ return SeekStatus.NOT_FOUND;
+ } else if (stop) {
+ // Exact match!
+
+ // This cannot be a sub-block because we
+ // would have followed the index to this
+ // sub-block from the start:
+
+ assert termExists;
+ fillTerm();
+ //if (DEBUG) System.out.println(" found!");
+ return SeekStatus.FOUND;
+ }
+ }
+ }
+
+ // It is possible (and OK) that terms index pointed us
+ // at this block, but, we scanned the entire block and
+ // did not find the term to position to. This happens
+ // when the target is after the last term in the block
+ // (but, before the next term in the index). EG
+ // target could be foozzz, and terms index pointed us
+ // to the foo* block, but the last term in this block
+ // was fooz (and, eg, first term in the next block will
+ // bee fop).
+ //if (DEBUG) System.out.println(" block end");
+ if (exactOnly) {
+ fillTerm();
+ }
+
+ // TODO: not consistent that in the
+ // not-exact case we don't next() into the next
+ // frame here
+ return SeekStatus.END;
+ }
+
+ private void fillTerm() {
+ final int termLength = prefix + suffix;
+ term.length = prefix + suffix;
+ if (term.bytes.length < termLength) {
+ term.grow(termLength);
+ }
+ System.arraycopy(suffixBytes, startBytePos, term.bytes, prefix, suffix);
+ }
+ }
+ }
+ }
+}
diff --git a/blur-thrift/src/main/java/org/apache/blur/thrift/BlurClientManager.java b/blur-thrift/src/main/java/org/apache/blur/thrift/BlurClientManager.java
index 8c0e9ba..766cc59 100644
--- a/blur-thrift/src/main/java/org/apache/blur/thrift/BlurClientManager.java
+++ b/blur-thrift/src/main/java/org/apache/blur/thrift/BlurClientManager.java
@@ -50,6 +50,7 @@
import org.apache.blur.trace.Trace.TraceId;
import org.apache.blur.trace.Tracer;
import org.apache.blur.user.UserContext;
+import org.apache.blur.utils.ThreadValue;
public class BlurClientManager {
@@ -141,7 +142,7 @@
List<Connection> shuffledConnections = new ArrayList<Connection>();
}
- private static ThreadLocal<Random> _random = new ThreadLocal<Random>() {
+ private static ThreadValue<Random> _random = new ThreadValue<Random>() {
@Override
protected Random initialValue() {
return new Random();
diff --git a/blur-thrift/src/main/java/org/apache/blur/thrift/sasl/TSaslTransport.java b/blur-thrift/src/main/java/org/apache/blur/thrift/sasl/TSaslTransport.java
index 3cf80a4..15361f0 100644
--- a/blur-thrift/src/main/java/org/apache/blur/thrift/sasl/TSaslTransport.java
+++ b/blur-thrift/src/main/java/org/apache/blur/thrift/sasl/TSaslTransport.java
@@ -38,6 +38,7 @@
import org.apache.blur.thirdparty.thrift_0_9_0.transport.TSocket;
import org.apache.blur.thirdparty.thrift_0_9_0.transport.TTransport;
import org.apache.blur.thirdparty.thrift_0_9_0.transport.TTransportException;
+import org.apache.blur.utils.ThreadValue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -311,7 +312,7 @@
_currentConnection.set(null);
}
- static ThreadLocal<InetSocketAddress> _currentConnection = new ThreadLocal<InetSocketAddress>();
+ static ThreadValue<InetSocketAddress> _currentConnection = new ThreadValue<InetSocketAddress>();
private void setupConnectionInfo() {
if (underlyingTransport instanceof TSocket) {
diff --git a/blur-util/src/main/java/org/apache/blur/trace/Trace.java b/blur-util/src/main/java/org/apache/blur/trace/Trace.java
index 65c2bfa..8f4bbce 100644
--- a/blur-util/src/main/java/org/apache/blur/trace/Trace.java
+++ b/blur-util/src/main/java/org/apache/blur/trace/Trace.java
@@ -27,6 +27,7 @@
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
+import org.apache.blur.utils.ThreadValue;
import org.json.JSONException;
import org.json.JSONObject;
@@ -77,10 +78,10 @@
}
};
- private static ThreadLocal<TraceCollector> _tracer = new ThreadLocal<TraceCollector>();
+ private static ThreadValue<TraceCollector> _tracer = new ThreadValue<TraceCollector>();
private static TraceStorage _storage;
private static String _nodeName;
- private static ThreadLocal<Random> _random = new ThreadLocal<Random>() {
+ private static ThreadValue<Random> _random = new ThreadValue<Random>() {
@Override
protected Random initialValue() {
return new Random();
diff --git a/blur-util/src/main/java/org/apache/blur/user/UserContext.java b/blur-util/src/main/java/org/apache/blur/user/UserContext.java
index 6ae373b..1b4fb1a 100644
--- a/blur-util/src/main/java/org/apache/blur/user/UserContext.java
+++ b/blur-util/src/main/java/org/apache/blur/user/UserContext.java
@@ -19,6 +19,8 @@
import java.util.HashMap;
import java.util.Map;
+import org.apache.blur.utils.ThreadValue;
+
public class UserContext {
private static User _defaultUser;
@@ -45,7 +47,7 @@
return new User(user.getUsername(), null);
}
- private static ThreadLocal<User> _user = new ThreadLocal<User>() {
+ private static ThreadValue<User> _user = new ThreadValue<User>() {
@Override
protected User initialValue() {
return getDefaultUser();
diff --git a/blur-util/src/main/java/org/apache/blur/utils/ThreadValue.java b/blur-util/src/main/java/org/apache/blur/utils/ThreadValue.java
new file mode 100644
index 0000000..80c59b6
--- /dev/null
+++ b/blur-util/src/main/java/org/apache/blur/utils/ThreadValue.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the =License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.blur.utils;
+
+import java.util.concurrent.ConcurrentMap;
+
+import com.google.common.collect.MapMaker;
+
+public class ThreadValue<T> {
+
+ static class Value<T> {
+ T value;
+
+ Value(T value) {
+ this.value = value;
+ }
+ }
+
+ private final ConcurrentMap<Thread, Value<T>> refs = new MapMaker().weakKeys().makeMap();
+
+ protected T initialValue() {
+ return null;
+ }
+
+ public T get() {
+ Value<T> value = refs.get(Thread.currentThread());
+ if (value == null) {
+ refs.put(Thread.currentThread(), value = new Value<>(initialValue()));
+ }
+ return value.value;
+ }
+
+ public void set(T v) {
+ Value<T> value = refs.get(Thread.currentThread());
+ if (value == null) {
+ refs.put(Thread.currentThread(), value = new Value<>(v));
+ } else {
+ value.value = v;
+ }
+ }
+
+}
diff --git a/blur-util/src/test/java/org/apache/blur/utils/BlurConstantsTest.java b/blur-util/src/test/java/org/apache/blur/utils/BlurConstantsTest.java
index 25b5967..139afb2 100644
--- a/blur-util/src/test/java/org/apache/blur/utils/BlurConstantsTest.java
+++ b/blur-util/src/test/java/org/apache/blur/utils/BlurConstantsTest.java
@@ -43,7 +43,7 @@
"DELETE_MARKER_VALUE", "DELETE_MARKER", "BLUR_ZOOKEEPER_TIMEOUT_DEFAULT", "BLUR_THRIFT_DEFAULT_MAX_FRAME_SIZE",
"ZK_WAIT_TIME", "ACL_DISCOVER", "ACL_READ", "FAST_DECOMPRESSION", "FAST", "HIGH_COMPRESSION", "DEFAULT_VALUE",
"OFF_HEAP", "DEFAULT", "BLUR_CLUSTER", "BLUR_NODENAME", "BLUR_HTTP_STATUS_RUNNING_PORT",
- "BLUR_STREAM_SERVER_RUNNING_PORT", "SHARED_MERGE_SCHEDULER_PREFIX"));
+ "BLUR_STREAM_SERVER_RUNNING_PORT", "SHARED_MERGE_SCHEDULER_PREFIX", "UPDATE_ROW", "NEW_ROW", "INTERNAL"));
_emptyDefaultProperties = new HashSet<String>();
_emptyDefaultProperties.addAll(Arrays.asList("BLUR_HDFS_TRACE_PATH", "BLUR_SHARD_HOSTNAME",
"BLUR_SHARD_BLOCK_CACHE_TOTAL_SIZE", "BLUR_CONTROLLER_HOSTNAME", "BLUR_SHARD_READ_INTERCEPTOR",