blob: 24640298d484a74f2add808a624a95e25d499a0a [file] [log] [blame]
Index: src/java/org/apache/lucene/index/SegmentTermEnum.java
===================================================================
--- src/java/org/apache/lucene/index/SegmentTermEnum.java (revision 658582)
+++ src/java/org/apache/lucene/index/SegmentTermEnum.java (working copy)
@@ -150,10 +150,15 @@
return true;
}
- /** Optimized scan, without allocating new terms. */
- final void scanTo(Term term) throws IOException {
+ /** Optimized scan, without allocating new terms.
+ * Return number of invocations to next(). */
+ final int scanTo(Term term) throws IOException {
scanBuffer.set(term);
- while (scanBuffer.compareTo(termBuffer) > 0 && next()) {}
+ int count = 0;
+ while (scanBuffer.compareTo(termBuffer) > 0 && next()) {
+ count++;
+ }
+ return count;
}
/** Returns the current Term in the enumeration.
Index: src/java/org/apache/lucene/index/TermInfosReader.java
===================================================================
--- src/java/org/apache/lucene/index/TermInfosReader.java (revision 658582)
+++ src/java/org/apache/lucene/index/TermInfosReader.java (working copy)
@@ -21,6 +21,8 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.BufferedIndexInput;
+import org.apache.lucene.util.cache.Cache;
+import org.apache.lucene.util.cache.SimpleLRUCache;
/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
* Directory. Pairs are accessed either by Term or by ordinal position the
@@ -31,7 +33,7 @@
private String segment;
private FieldInfos fieldInfos;
- private ThreadLocal enumerators = new ThreadLocal();
+ private ThreadLocal threadResources = new ThreadLocal();
private SegmentTermEnum origEnum;
private long size;
@@ -44,6 +46,18 @@
private int indexDivisor = 1;
private int totalIndexInterval;
+ private final static int DEFAULT_CACHE_SIZE = 1024;
+
+ /**
+ * Per-thread resources managed by ThreadLocal
+ */
+ private static final class ThreadResources {
+ SegmentTermEnum termEnum;
+
+ // Used for caching the least recently looked-up Terms
+ Cache termInfoCache;
+ }
+
TermInfosReader(Directory dir, String seg, FieldInfos fis)
throws CorruptIndexException, IOException {
this(dir, seg, fis, BufferedIndexInput.BUFFER_SIZE);
@@ -129,7 +143,7 @@
origEnum.close();
if (indexEnum != null)
indexEnum.close();
- enumerators.set(null);
+ threadResources.set(null);
}
/** Returns the number of term/value pairs in the set. */
@@ -137,13 +151,16 @@
return size;
}
- private SegmentTermEnum getEnum() {
- SegmentTermEnum termEnum = (SegmentTermEnum)enumerators.get();
- if (termEnum == null) {
- termEnum = terms();
- enumerators.set(termEnum);
+ private ThreadResources getThreadResources() {
+ ThreadResources resources = (ThreadResources)threadResources.get();
+ if (resources == null) {
+ resources = new ThreadResources();
+ resources.termEnum = terms();
+ // Cache does not have to be thread-safe, it is only used by one thread at the same time
+ resources.termInfoCache = new SimpleLRUCache(DEFAULT_CACHE_SIZE);
+ threadResources.set(resources);
}
- return termEnum;
+ return resources;
}
private synchronized void ensureIndexIsRead() throws IOException {
@@ -189,60 +206,94 @@
return hi;
}
- private final void seekEnum(int indexOffset) throws IOException {
- getEnum().seek(indexPointers[indexOffset],
+ private final void seekEnum(SegmentTermEnum enumerator, int indexOffset) throws IOException {
+ enumerator.seek(indexPointers[indexOffset],
(indexOffset * totalIndexInterval) - 1,
indexTerms[indexOffset], indexInfos[indexOffset]);
}
/** Returns the TermInfo for a Term in the set, or null. */
TermInfo get(Term term) throws IOException {
+ return get(term, true);
+ }
+
+ /** Returns the TermInfo for a Term in the set, or null. */
+ private TermInfo get(Term term, boolean useCache) throws IOException {
if (size == 0) return null;
ensureIndexIsRead();
-
+
+ TermInfo ti;
+ ThreadResources resources = getThreadResources();
+ Cache cache = null;
+
+ if (useCache) {
+ cache = resources.termInfoCache;
+ // check the cache first if the term was recently looked up
+ ti = (TermInfo) cache.get(term);
+ if (ti != null) {
+ return ti;
+ }
+ }
+
// optimize sequential access: first try scanning cached enum w/o seeking
- SegmentTermEnum enumerator = getEnum();
+ SegmentTermEnum enumerator = resources.termEnum;
if (enumerator.term() != null // term is at or past current
&& ((enumerator.prev() != null && term.compareTo(enumerator.prev())> 0)
|| term.compareTo(enumerator.term()) >= 0)) {
int enumOffset = (int)(enumerator.position/totalIndexInterval)+1;
if (indexTerms.length == enumOffset // but before end of block
- || term.compareTo(indexTerms[enumOffset]) < 0)
- return scanEnum(term); // no need to seek
+ || term.compareTo(indexTerms[enumOffset]) < 0) {
+ // no need to seek
+
+ int numScans = enumerator.scanTo(term);
+ if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
+ ti = enumerator.termInfo();
+ if (cache != null && numScans > 1) {
+ // we only want to put this TermInfo into the cache if
+ // scanEnum skipped more than one dictionary entry.
+ // This prevents RangeQueries or WildcardQueries to
+ // wipe out the cache when they iterate over a large numbers
+ // of terms in order
+ cache.put(term, ti);
+ }
+ } else {
+ ti = null;
+ }
+
+ return ti;
+ }
}
// random-access: must seek
- seekEnum(getIndexOffset(term));
- return scanEnum(term);
- }
-
- /** Scans within block for matching term. */
- private final TermInfo scanEnum(Term term) throws IOException {
- SegmentTermEnum enumerator = getEnum();
+ seekEnum(enumerator, getIndexOffset(term));
enumerator.scanTo(term);
- if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0)
- return enumerator.termInfo();
- else
- return null;
+ if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
+ ti = enumerator.termInfo();
+ if (cache != null) {
+ cache.put(term, ti);
+ }
+ } else {
+ ti = null;
+ }
+ return ti;
}
/** Returns the nth term in the set. */
final Term get(int position) throws IOException {
if (size == 0) return null;
- SegmentTermEnum enumerator = getEnum();
+ SegmentTermEnum enumerator = getThreadResources().termEnum;
if (enumerator != null && enumerator.term() != null &&
position >= enumerator.position &&
position < (enumerator.position + totalIndexInterval))
- return scanEnum(position); // can avoid seek
+ return scanEnum(enumerator, position); // can avoid seek
- seekEnum(position/totalIndexInterval); // must seek
- return scanEnum(position);
+ seekEnum(enumerator, position/totalIndexInterval); // must seek
+ return scanEnum(enumerator, position);
}
- private final Term scanEnum(int position) throws IOException {
- SegmentTermEnum enumerator = getEnum();
+ private final Term scanEnum(SegmentTermEnum enumerator, int position) throws IOException {
while(enumerator.position < position)
if (!enumerator.next())
return null;
@@ -256,9 +307,10 @@
ensureIndexIsRead();
int indexOffset = getIndexOffset(term);
- seekEnum(indexOffset);
+
+ SegmentTermEnum enumerator = getThreadResources().termEnum;
+ seekEnum(enumerator, indexOffset);
- SegmentTermEnum enumerator = getEnum();
while(term.compareTo(enumerator.term()) > 0 && enumerator.next()) {}
if (term.compareTo(enumerator.term()) == 0)
@@ -274,7 +326,9 @@
/** Returns an enumeration of terms starting at or after the named term. */
public SegmentTermEnum terms(Term term) throws IOException {
- get(term);
- return (SegmentTermEnum)getEnum().clone();
+ // don't use the cache in this call because we want to reposition the
+ // enumeration
+ get(term, false);
+ return (SegmentTermEnum)getThreadResources().termEnum.clone();
}
}
Index: src/java/org/apache/lucene/util/cache/Cache.java
===================================================================
--- src/java/org/apache/lucene/util/cache/Cache.java (revision 0)
+++ src/java/org/apache/lucene/util/cache/Cache.java (revision 0)
@@ -0,0 +1,100 @@
+package org.apache.lucene.util.cache;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * Base class for cache implementations.
+ */
+public abstract class Cache {
+
+ /**
+ * Simple Cache wrapper that synchronizes all
+ * calls that access the cache.
+ */
+ static class SynchronizedCache extends Cache {
+ Object mutex;
+ Cache cache;
+
+ SynchronizedCache(Cache cache) {
+ this.cache = cache;
+ this.mutex = this;
+ }
+
+ SynchronizedCache(Cache cache, Object mutex) {
+ this.cache = cache;
+ this.mutex = mutex;
+ }
+
+ public void put(Object key, Object value) {
+ synchronized(mutex) {cache.put(key, value);}
+ }
+
+ public Object get(Object key) {
+ synchronized(mutex) {return cache.get(key);}
+ }
+
+ public boolean containsKey(Object key) {
+ synchronized(mutex) {return cache.containsKey(key);}
+ }
+
+ public void close() {
+ synchronized(mutex) {cache.close();}
+ }
+
+ Cache getSynchronizedCache() {
+ return this;
+ }
+ }
+
+ public static Cache synchronizedCache(Cache cache) {
+ return cache.getSynchronizedCache();
+ }
+
+ /**
+ * Called by {@link #synchronizedCache(Cache)}. This method
+ * returns a {@link SynchronizedCache} instance that wraps
+ * this instance by default and can be overridden to return
+ * e. g. subclasses of {@link SynchronizedCache} or this
+ * in case this cache is already synchronized.
+ */
+ Cache getSynchronizedCache() {
+ return new SynchronizedCache(this);
+ }
+
+ /**
+ * Puts a (key, value)-pair into the cache.
+ */
+ public abstract void put(Object key, Object value);
+
+ /**
+ * Returns the value for the given key.
+ */
+ public abstract Object get(Object key);
+
+ /**
+ * Returns whether the given key is in this cache.
+ */
+ public abstract boolean containsKey(Object key);
+
+ /**
+ * Closes the cache.
+ */
+ public abstract void close();
+
+}
Index: src/java/org/apache/lucene/util/cache/SimpleLRUCache.java
===================================================================
--- src/java/org/apache/lucene/util/cache/SimpleLRUCache.java (revision 0)
+++ src/java/org/apache/lucene/util/cache/SimpleLRUCache.java (revision 0)
@@ -0,0 +1,49 @@
+package org.apache.lucene.util.cache;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+/**
+ * Simple LRU cache implementation that uses a LinkedHashMap.
+ * This cache is not synchronized, use {@link Cache#synchronizedCache(Cache)}
+ * if needed.
+ *
+ */
+public class SimpleLRUCache extends SimpleMapCache {
+ private final static float LOADFACTOR = 0.75f;
+
+ private int cacheSize;
+
+ /**
+ * Creates a last-recently-used cache with the specified size.
+ */
+ public SimpleLRUCache(int cacheSize) {
+ super(null);
+ this.cacheSize = cacheSize;
+ int capacity = (int) Math.ceil(cacheSize / LOADFACTOR) + 1;
+
+ super.map = new LinkedHashMap(capacity, LOADFACTOR, true) {
+ protected boolean removeEldestEntry(Map.Entry eldest) {
+ return size() > SimpleLRUCache.this.cacheSize;
+ }
+ };
+ }
+
+}
Index: src/java/org/apache/lucene/util/cache/SimpleMapCache.java
===================================================================
--- src/java/org/apache/lucene/util/cache/SimpleMapCache.java (revision 0)
+++ src/java/org/apache/lucene/util/cache/SimpleMapCache.java (revision 0)
@@ -0,0 +1,100 @@
+package org.apache.lucene.util.cache;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Simple cache implementation that uses a HashMap to store (key, value) pairs.
+ * This cache is not synchronized, use {@link Cache#synchronizedCache(Cache)}
+ * if needed.
+ */
+public class SimpleMapCache extends Cache {
+ Map map;
+
+ public SimpleMapCache() {
+ this(new HashMap());
+ }
+
+ public SimpleMapCache(Map map) {
+ this.map = map;
+ }
+
+ public Object get(Object key) {
+ return map.get(key);
+ }
+
+ public void put(Object key, Object value) {
+ map.put(key, value);
+ }
+
+ public void close() {
+ // NOOP
+ }
+
+ public boolean containsKey(Object key) {
+ return map.containsKey(key);
+ }
+
+ /**
+ * Returns a Set containing all keys in this cache.
+ */
+ public Set keySet() {
+ return map.keySet();
+ }
+
+ Cache getSynchronizedCache() {
+ return new SynchronizedSimpleMapCache(this);
+ }
+
+ private static class SynchronizedSimpleMapCache extends SimpleMapCache {
+ Object mutex;
+ SimpleMapCache cache;
+
+ SynchronizedSimpleMapCache(SimpleMapCache cache) {
+ this.cache = cache;
+ this.mutex = this;
+ }
+
+ public void put(Object key, Object value) {
+ synchronized(mutex) {cache.put(key, value);}
+ }
+
+ public Object get(Object key) {
+ synchronized(mutex) {return cache.get(key);}
+ }
+
+ public boolean containsKey(Object key) {
+ synchronized(mutex) {return cache.containsKey(key);}
+ }
+
+ public void close() {
+ synchronized(mutex) {cache.close();}
+ }
+
+ public Set keySet() {
+ synchronized(mutex) {return cache.keySet();}
+ }
+
+ Cache getSynchronizedCache() {
+ return this;
+ }
+ }
+}
Index: src/test/org/apache/lucene/util/cache/TestSimpleLRUCache.java
===================================================================
--- src/test/org/apache/lucene/util/cache/TestSimpleLRUCache.java (revision 0)
+++ src/test/org/apache/lucene/util/cache/TestSimpleLRUCache.java (revision 0)
@@ -0,0 +1,63 @@
+package org.apache.lucene.util.cache;
+
+/**
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+import junit.framework.TestCase;
+
+public class TestSimpleLRUCache extends TestCase {
+
+ public void testLRUCache() throws Exception {
+ final int n = 100;
+ Object dummy = new Object();
+
+ Cache cache = new SimpleLRUCache(n);
+
+ for (int i = 0; i < n; i++) {
+ cache.put(new Integer(i), dummy);
+ }
+
+ // access every 2nd item in cache
+ for (int i = 0; i < n; i+=2) {
+ assertNotNull(cache.get(new Integer(i)));
+ }
+
+ // add n/2 elements to cache, the ones that weren't
+ // touched in the previous loop should now be thrown away
+ for (int i = n; i < n + (n / 2); i++) {
+ cache.put(new Integer(i), dummy);
+ }
+
+ // access every 4th item in cache
+ for (int i = 0; i < n; i+=4) {
+ assertNotNull(cache.get(new Integer(i)));
+ }
+
+ // add 3/4n elements to cache, the ones that weren't
+ // touched in the previous loops should now be thrown away
+ for (int i = n; i < n + (n * 3 / 4); i++) {
+ cache.put(new Integer(i), dummy);
+ }
+
+ // access every 4th item in cache
+ for (int i = 0; i < n; i+=4) {
+ assertNotNull(cache.get(new Integer(i)));
+ }
+
+ }
+
+}