merge trunk (1097442:1097764)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/realtime_search@1097767 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/dev-tools/idea/lucene/contrib/benchmark/benchmark.iml b/dev-tools/idea/lucene/contrib/benchmark/benchmark.iml
new file mode 100644
index 0000000..17863fa
--- /dev/null
+++ b/dev-tools/idea/lucene/contrib/benchmark/benchmark.iml
@@ -0,0 +1,37 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="JAVA_MODULE" version="4">
+ <component name="NewModuleRootManager" inherit-compiler-output="false">
+ <output url="file://$MODULE_DIR$/../../build/contrib/benchmark/classes/java" />
+ <output-test url="file://$MODULE_DIR$/../../build/contrib/benchmark/classes/test" />
+ <exclude-output />
+ <content url="file://$MODULE_DIR$">
+ <sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
+ <sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
+ <sourceFolder url="file://$MODULE_DIR$/scripts" isTestSource="false" />
+ <excludeFolder url="file://$MODULE_DIR$/temp" />
+ <excludeFolder url="file://$MODULE_DIR$/work" />
+ </content>
+ <orderEntry type="module" module-name="highlighter" />
+ <orderEntry type="module" module-name="common" />
+ <orderEntry type="module" module-name="remote" />
+ <orderEntry type="module" module-name="lucene" />
+ <orderEntry type="module" module-name="icu" />
+ <orderEntry type="module" module-name="queries" />
+ <orderEntry type="module" module-name="misc" />
+ <orderEntry type="module" module-name="memory" />
+ <orderEntry type="module" module-name="demo" />
+ <orderEntry type="inheritedJdk" />
+ <orderEntry type="sourceFolder" forTests="false" />
+ <orderEntry type="module-library">
+ <library>
+ <CLASSES>
+ <root url="file://$MODULE_DIR$/lib" />
+ </CLASSES>
+ <JAVADOC />
+ <SOURCES />
+ <jarDirectory url="file://$MODULE_DIR$/lib" recursive="false" />
+ </library>
+ </orderEntry>
+ <orderEntry type="library" scope="TEST" name="JUnit" level="project" />
+ </component>
+</module>
diff --git a/dev-tools/idea/lucene/contrib/remote/remote.iml b/dev-tools/idea/lucene/contrib/remote/remote.iml
new file mode 100644
index 0000000..9c4b274
--- /dev/null
+++ b/dev-tools/idea/lucene/contrib/remote/remote.iml
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="JAVA_MODULE" version="4">
+ <component name="NewModuleRootManager" inherit-compiler-output="false">
+ <output url="file://$MODULE_DIR$/../../build/contrib/remote/classes/java" />
+ <output-test url="file://$MODULE_DIR$/../../build/contrib/remote/classes/test" />
+ <exclude-output />
+ <content url="file://$MODULE_DIR$">
+ <sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
+ <sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
+ </content>
+ <orderEntry type="module" module-name="lucene" />
+ <orderEntry type="module" module-name="misc" />
+ <orderEntry type="inheritedJdk" />
+ <orderEntry type="sourceFolder" forTests="false" />
+ <orderEntry type="library" scope="TEST" name="JUnit" level="project" />
+ </component>
+</module>
diff --git a/dev-tools/maven/lucene/contrib/ant/pom.xml.template b/dev-tools/maven/lucene/contrib/ant/pom.xml.template
index e300d0f..7071e96 100644
--- a/dev-tools/maven/lucene/contrib/ant/pom.xml.template
+++ b/dev-tools/maven/lucene/contrib/ant/pom.xml.template
@@ -43,7 +43,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/lucene/contrib/db/bdb-je/pom.xml.template b/dev-tools/maven/lucene/contrib/db/bdb-je/pom.xml.template
index a5d7647..3284122 100644
--- a/dev-tools/maven/lucene/contrib/db/bdb-je/pom.xml.template
+++ b/dev-tools/maven/lucene/contrib/db/bdb-je/pom.xml.template
@@ -43,7 +43,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/lucene/contrib/db/bdb/pom.xml.template b/dev-tools/maven/lucene/contrib/db/bdb/pom.xml.template
index d5d5123..2a249e5 100644
--- a/dev-tools/maven/lucene/contrib/db/bdb/pom.xml.template
+++ b/dev-tools/maven/lucene/contrib/db/bdb/pom.xml.template
@@ -43,7 +43,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/lucene/contrib/demo/pom.xml.template b/dev-tools/maven/lucene/contrib/demo/pom.xml.template
index 7672424..e51ba82 100644
--- a/dev-tools/maven/lucene/contrib/demo/pom.xml.template
+++ b/dev-tools/maven/lucene/contrib/demo/pom.xml.template
@@ -43,7 +43,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/lucene/contrib/highlighter/pom.xml.template b/dev-tools/maven/lucene/contrib/highlighter/pom.xml.template
index b54c05f..d435d4b 100644
--- a/dev-tools/maven/lucene/contrib/highlighter/pom.xml.template
+++ b/dev-tools/maven/lucene/contrib/highlighter/pom.xml.template
@@ -45,7 +45,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/lucene/contrib/instantiated/pom.xml.template b/dev-tools/maven/lucene/contrib/instantiated/pom.xml.template
index bc8b0e3..d06359e 100644
--- a/dev-tools/maven/lucene/contrib/instantiated/pom.xml.template
+++ b/dev-tools/maven/lucene/contrib/instantiated/pom.xml.template
@@ -43,7 +43,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/lucene/contrib/lucli/pom.xml.template b/dev-tools/maven/lucene/contrib/lucli/pom.xml.template
index efa4c53..7e5615c 100644
--- a/dev-tools/maven/lucene/contrib/lucli/pom.xml.template
+++ b/dev-tools/maven/lucene/contrib/lucli/pom.xml.template
@@ -43,7 +43,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/lucene/contrib/memory/pom.xml.template b/dev-tools/maven/lucene/contrib/memory/pom.xml.template
index 8017f82..8bef7fa 100644
--- a/dev-tools/maven/lucene/contrib/memory/pom.xml.template
+++ b/dev-tools/maven/lucene/contrib/memory/pom.xml.template
@@ -45,7 +45,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/lucene/contrib/spatial/pom.xml.template b/dev-tools/maven/lucene/contrib/spatial/pom.xml.template
index 1b822d7..e206309 100644
--- a/dev-tools/maven/lucene/contrib/spatial/pom.xml.template
+++ b/dev-tools/maven/lucene/contrib/spatial/pom.xml.template
@@ -43,7 +43,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/lucene/contrib/spellchecker/pom.xml.template b/dev-tools/maven/lucene/contrib/spellchecker/pom.xml.template
index 93e0a58..e21b6a8 100644
--- a/dev-tools/maven/lucene/contrib/spellchecker/pom.xml.template
+++ b/dev-tools/maven/lucene/contrib/spellchecker/pom.xml.template
@@ -43,7 +43,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/lucene/contrib/swing/pom.xml.template b/dev-tools/maven/lucene/contrib/swing/pom.xml.template
index bb5178d..6d4fdf3 100644
--- a/dev-tools/maven/lucene/contrib/swing/pom.xml.template
+++ b/dev-tools/maven/lucene/contrib/swing/pom.xml.template
@@ -43,7 +43,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/lucene/contrib/wordnet/pom.xml.template b/dev-tools/maven/lucene/contrib/wordnet/pom.xml.template
index 458a6e0..2f18e37 100644
--- a/dev-tools/maven/lucene/contrib/wordnet/pom.xml.template
+++ b/dev-tools/maven/lucene/contrib/wordnet/pom.xml.template
@@ -43,7 +43,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/lucene/contrib/xml-query-parser/pom.xml.template b/dev-tools/maven/lucene/contrib/xml-query-parser/pom.xml.template
index b22d237..f5fbd62 100644
--- a/dev-tools/maven/lucene/contrib/xml-query-parser/pom.xml.template
+++ b/dev-tools/maven/lucene/contrib/xml-query-parser/pom.xml.template
@@ -43,7 +43,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/lucene/pom.xml.template b/dev-tools/maven/lucene/pom.xml.template
index 19fda15..b6c2fdf 100644
--- a/dev-tools/maven/lucene/pom.xml.template
+++ b/dev-tools/maven/lucene/pom.xml.template
@@ -33,7 +33,6 @@
<description>Lucene parent POM</description>
<modules>
<module>src</module>
- <module>src/test-framework</module>
<module>contrib</module>
</modules>
<build>
diff --git a/dev-tools/maven/lucene/src/pom.xml.template b/dev-tools/maven/lucene/src/pom.xml.template
index 6488755..4f65081 100644
--- a/dev-tools/maven/lucene/src/pom.xml.template
+++ b/dev-tools/maven/lucene/src/pom.xml.template
@@ -121,24 +121,6 @@
</programs>
</configuration>
</plugin>
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>build-helper-maven-plugin</artifactId>
- <executions>
- <execution>
- <id>add-test-source</id>
- <phase>generate-test-sources</phase>
- <goals>
- <goal>add-test-source</goal>
- </goals>
- <configuration>
- <sources>
- <source>test-framework</source>
- </sources>
- </configuration>
- </execution>
- </executions>
- </plugin>
</plugins>
</build>
</project>
diff --git a/dev-tools/maven/modules/analysis/phonetic/pom.xml.template b/dev-tools/maven/modules/analysis/phonetic/pom.xml.template
index b641f8a..2cdc3dd 100644
--- a/dev-tools/maven/modules/analysis/phonetic/pom.xml.template
+++ b/dev-tools/maven/modules/analysis/phonetic/pom.xml.template
@@ -45,7 +45,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/modules/analysis/smartcn/pom.xml.template b/dev-tools/maven/modules/analysis/smartcn/pom.xml.template
index 8a1239a..fcf5550 100644
--- a/dev-tools/maven/modules/analysis/smartcn/pom.xml.template
+++ b/dev-tools/maven/modules/analysis/smartcn/pom.xml.template
@@ -43,7 +43,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/modules/analysis/stempel/pom.xml.template b/dev-tools/maven/modules/analysis/stempel/pom.xml.template
index 4e1cc7d..4f10e0d 100644
--- a/dev-tools/maven/modules/analysis/stempel/pom.xml.template
+++ b/dev-tools/maven/modules/analysis/stempel/pom.xml.template
@@ -43,7 +43,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/modules/benchmark/pom.xml.template b/dev-tools/maven/modules/benchmark/pom.xml.template
index 75fb304..21b0c1a 100755
--- a/dev-tools/maven/modules/benchmark/pom.xml.template
+++ b/dev-tools/maven/modules/benchmark/pom.xml.template
@@ -43,7 +43,7 @@
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template b/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template
index 59b0d0a..08e70bf 100644
--- a/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template
@@ -75,7 +75,7 @@
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/solr/contrib/clustering/pom.xml.template b/dev-tools/maven/solr/contrib/clustering/pom.xml.template
index fd205c1..7ac4497 100644
--- a/dev-tools/maven/solr/contrib/clustering/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/clustering/pom.xml.template
@@ -60,7 +60,7 @@
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/solr/contrib/dataimporthandler/src/extras/pom.xml.template b/dev-tools/maven/solr/contrib/dataimporthandler/src/extras/pom.xml.template
index a9ee1f7..4a2aba3 100644
--- a/dev-tools/maven/solr/contrib/dataimporthandler/src/extras/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/dataimporthandler/src/extras/pom.xml.template
@@ -67,7 +67,7 @@
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/solr/contrib/dataimporthandler/src/pom.xml.template b/dev-tools/maven/solr/contrib/dataimporthandler/src/pom.xml.template
index 302ac33..fc32779 100644
--- a/dev-tools/maven/solr/contrib/dataimporthandler/src/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/dataimporthandler/src/pom.xml.template
@@ -60,7 +60,7 @@
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/solr/contrib/extraction/pom.xml.template b/dev-tools/maven/solr/contrib/extraction/pom.xml.template
index da012e5..9d82c74 100644
--- a/dev-tools/maven/solr/contrib/extraction/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/extraction/pom.xml.template
@@ -63,7 +63,7 @@
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/dev-tools/maven/solr/contrib/pom.xml.template b/dev-tools/maven/solr/contrib/pom.xml.template
index e4a731a..bd05a43 100644
--- a/dev-tools/maven/solr/contrib/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/pom.xml.template
@@ -35,7 +35,6 @@
<module>clustering</module>
<module>dataimporthandler</module>
<module>extraction</module>
- <module>uima</module>
</modules>
<build>
<directory>../build/solr-contrib-aggregator</directory>
diff --git a/dev-tools/maven/solr/src/pom.xml.template b/dev-tools/maven/solr/src/pom.xml.template
index 85ddb31..da036fb 100644
--- a/dev-tools/maven/solr/src/pom.xml.template
+++ b/dev-tools/maven/solr/src/pom.xml.template
@@ -48,7 +48,7 @@
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
- <artifactId>lucene-test-framework</artifactId>
+ <artifactId>lucene-core</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 03781b9..2a9553b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -173,6 +173,70 @@
globally across IndexWriter sessions and persisted into a X.fnx file on
successful commit. The corresponding file format changes are backwards-
compatible. (Michael Busch, Simon Willnauer)
+
+* LUCENE-2956, LUCENE-2573, LUCENE-2324, LUCENE-2555: Changes from
+ DocumentsWriterPerThread:
+
+ - IndexWriter now uses a DocumentsWriter per thread when indexing documents.
+ Each DocumentsWriterPerThread indexes documents in its own private segment,
+ and the in memory segments are no longer merged on flush. Instead, each
+ segment is separately flushed to disk and subsequently merged with normal
+ segment merging.
+
+ - DocumentsWriterPerThread (DWPT) is now flushed concurrently based on a
+ FlushPolicy. When a DWPT is flushed, a fresh DWPT is swapped in so that
+ indexing may continue concurrently with flushing. The selected
+ DWPT flushes all its RAM resident documents do disk. Note: Segment flushes
+ don't flush all RAM resident documents but only the documents private to
+ the DWPT selected for flushing.
+
+ - Flushing is now controlled by FlushPolicy that is called for every add,
+ update or delete on IndexWriter. By default DWPTs are flushed either on
+ maxBufferedDocs per DWPT or the global active used memory. Once the active
+ memory exceeds ramBufferSizeMB only the largest DWPT is selected for
+ flushing and the memory used by this DWPT is substracted from the active
+ memory and added to a flushing memory pool, which can lead to temporarily
+ higher memory usage due to ongoing indexing.
+
+ - IndexWriter now can utilize ramBufferSize > 2048 MB. Each DWPT can address
+ up to 2048 MB memory such that the ramBufferSize is now bounded by the max
+ number of DWPT avaliable in the used DocumentsWriterPerThreadPool.
+ IndexWriters net memory consumption can grow far beyond the 2048 MB limit if
+ the applicatoin can use all available DWPTs. To prevent a DWPT from
+ exhausting its address space IndexWriter will forcefully flush a DWPT if its
+ hard memory limit is exceeded. The RAMPerThreadHardLimitMB can be controlled
+ via IndexWriterConfig and defaults to 1945 MB.
+ Since IndexWriter flushes DWPT concurrently not all memory is released
+ immediately. Applications should still use a ramBufferSize significantly
+ lower than the JVMs avaliable heap memory since under high load multiple
+ flushing DWPT can consume substantial transient memory when IO performance
+ is slow relative to indexing rate.
+
+ - IndexWriter#commit now doesn't block concurrent indexing while flushing all
+ 'currently' RAM resident documents to disk. Yet, flushes that occur while a
+ a full flush is running are queued and will happen after all DWPT involved
+ in the full flush are done flushing. Applications using multiple threads
+ during indexing and trigger a full flush (eg call commmit() or open a new
+ NRT reader) can use significantly more transient memory.
+
+ - IndexWriter#addDocument and IndexWriter.updateDocument can block indexing
+ threads if the number of active + number of flushing DWPT exceed a
+ safety limit. By default this happens if 2 * max number available thread
+ states (DWPTPool) is exceeded. This safety limit prevents applications from
+ exhausting their available memory if flushing can't keep up with
+ concurrently indexing threads.
+
+ - IndexWriter only applies and flushes deletes if the maxBufferedDelTerms
+ limit is reached during indexing. No segment flushes will be triggered
+ due to this setting.
+
+ - IndexWriter#flush(boolean, boolean) doesn't synchronized on IndexWriter
+ anymore. A dedicated flushLock has been introduced to prevent multiple full-
+ flushes happening concurrently.
+
+ - DocumentsWriter doesn't write shared doc stores anymore.
+
+ (Mike McCandless, Michael Busch, Simon Willnauer)
API Changes
diff --git a/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java b/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java
index c72a1f6..ae544cb 100644
--- a/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java
+++ b/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java
@@ -72,13 +72,18 @@
public static final Integer MAX_INT = Integer.valueOf(Integer.MAX_VALUE);
- final AtomicLong bytesUsed = new AtomicLong();
+ final AtomicLong bytesUsed;
private final static boolean VERBOSE_DELETES = false;
long gen;
-
public BufferedDeletes(boolean sortTerms) {
+ this(sortTerms, new AtomicLong());
+ }
+
+ BufferedDeletes(boolean sortTerms, AtomicLong bytesUsed) {
+ assert bytesUsed != null;
+ this.bytesUsed = bytesUsed;
if (sortTerms) {
terms = new TreeMap<Term,Integer>();
} else {
diff --git a/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java
index 692496b..11e5573 100644
--- a/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java
+++ b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java
@@ -33,8 +33,8 @@
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
-/* Tracks the stream of {@link BuffereDeletes}.
- * When DocumensWriter flushes, its buffered
+/* Tracks the stream of {@link BufferedDeletes}.
+ * When DocumentsWriterPerThread flushes, its buffered
* deletes are appended to this stream. We later
* apply these deletes (resolve them to the actual
* docIDs, per segment) when a merge is started
@@ -60,7 +60,7 @@
// used only by assert
private Term lastDeleteTerm;
-
+
private PrintStream infoStream;
private final AtomicLong bytesUsed = new AtomicLong();
private final AtomicInteger numTerms = new AtomicInteger();
@@ -75,26 +75,36 @@
infoStream.println("BD " + messageID + " [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message);
}
}
-
+
public synchronized void setInfoStream(PrintStream infoStream) {
this.infoStream = infoStream;
}
// Appends a new packet of buffered deletes to the stream,
// setting its generation:
- public synchronized void push(FrozenBufferedDeletes packet) {
+ public synchronized long push(FrozenBufferedDeletes packet) {
+ /*
+ * The insert operation must be atomic. If we let threads increment the gen
+ * and push the packet afterwards we risk that packets are out of order.
+ * With DWPT this is possible if two or more flushes are racing for pushing
+ * updates. If the pushed packets get our of order would loose documents
+ * since deletes are applied to the wrong segments.
+ */
+ packet.setDelGen(nextGen++);
assert packet.any();
- assert checkDeleteStats();
- assert packet.gen < nextGen;
+ assert checkDeleteStats();
+ assert packet.delGen() < nextGen;
+ assert deletes.isEmpty() || deletes.get(deletes.size()-1).delGen() < packet.delGen() : "Delete packets must be in order";
deletes.add(packet);
numTerms.addAndGet(packet.numTermDeletes);
bytesUsed.addAndGet(packet.bytesUsed);
if (infoStream != null) {
- message("push deletes " + packet + " delGen=" + packet.gen + " packetCount=" + deletes.size());
+ message("push deletes " + packet + " delGen=" + packet.delGen() + " packetCount=" + deletes.size());
}
- assert checkDeleteStats();
+ assert checkDeleteStats();
+ return packet.delGen();
}
-
+
public synchronized void clear() {
deletes.clear();
nextGen = 1;
@@ -132,7 +142,7 @@
}
// Sorts SegmentInfos from smallest to biggest bufferedDelGen:
- private static final Comparator<SegmentInfo> sortByDelGen = new Comparator<SegmentInfo>() {
+ private static final Comparator<SegmentInfo> sortSegInfoByDelGen = new Comparator<SegmentInfo>() {
// @Override -- not until Java 1.6
public int compare(SegmentInfo si1, SegmentInfo si2) {
final long cmp = si1.getBufferedDeletesGen() - si2.getBufferedDeletesGen();
@@ -147,10 +157,10 @@
@Override
public boolean equals(Object other) {
- return sortByDelGen == other;
+ return sortSegInfoByDelGen == other;
}
};
-
+
/** Resolves the buffered deleted Term/Query/docIDs, into
* actual deleted docIDs in the deletedDocs BitVector for
* each SegmentReader. */
@@ -174,7 +184,7 @@
SegmentInfos infos2 = new SegmentInfos();
infos2.addAll(infos);
- Collections.sort(infos2, sortByDelGen);
+ Collections.sort(infos2, sortSegInfoByDelGen);
BufferedDeletes coalescedDeletes = null;
boolean anyNewDeletes = false;
@@ -191,19 +201,30 @@
final SegmentInfo info = infos2.get(infosIDX);
final long segGen = info.getBufferedDeletesGen();
- if (packet != null && segGen < packet.gen) {
+ if (packet != null && segGen < packet.delGen()) {
//System.out.println(" coalesce");
if (coalescedDeletes == null) {
coalescedDeletes = new BufferedDeletes(true);
}
- coalescedDeletes.update(packet);
+ if (!packet.isSegmentPrivate) {
+ /*
+ * Only coalesce if we are NOT on a segment private del packet: the segment private del packet
+ * must only applied to segments with the same delGen. Yet, if a segment is already deleted
+ * from the SI since it had no more documents remaining after some del packets younger than
+ * its segPrivate packet (higher delGen) have been applied, the segPrivate packet has not been
+ * removed.
+ */
+ coalescedDeletes.update(packet);
+ }
+
delIDX--;
- } else if (packet != null && segGen == packet.gen) {
+ } else if (packet != null && segGen == packet.delGen()) {
+ assert packet.isSegmentPrivate : "Packet and Segments deletegen can only match on a segment private del packet";
//System.out.println(" eq");
// Lock order: IW -> BD -> RP
assert readerPool.infoIsLive(info);
- SegmentReader reader = readerPool.get(info, false);
+ final SegmentReader reader = readerPool.get(info, false);
int delCount = 0;
final boolean segAllDeletes;
try {
@@ -213,7 +234,7 @@
delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), reader);
}
//System.out.println(" del exact");
- // Don't delete by Term here; DocumentsWriter
+ // Don't delete by Term here; DocumentsWriterPerThread
// already did that on flush:
delCount += applyQueryDeletes(packet.queriesIterable(), reader);
segAllDeletes = reader.numDocs() == 0;
@@ -236,7 +257,12 @@
if (coalescedDeletes == null) {
coalescedDeletes = new BufferedDeletes(true);
}
- coalescedDeletes.update(packet);
+
+ /*
+ * Since we are on a segment private del packet we must not
+ * update the coalescedDeletes here! We can simply advance to the
+ * next packet and seginfo.
+ */
delIDX--;
infosIDX--;
info.setBufferedDeletesGen(nextGen);
@@ -281,11 +307,11 @@
message("applyDeletes took " + (System.currentTimeMillis()-t0) + " msec");
}
// assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any;
-
+
return new ApplyDeletesResult(anyNewDeletes, nextGen++, allDeleted);
}
- public synchronized long getNextGen() {
+ synchronized long getNextGen() {
return nextGen++;
}
@@ -303,10 +329,9 @@
if (infoStream != null) {
message("prune sis=" + segmentInfos + " minGen=" + minGen + " packetCount=" + deletes.size());
}
-
final int limit = deletes.size();
for(int delIDX=0;delIDX<limit;delIDX++) {
- if (deletes.get(delIDX).gen >= minGen) {
+ if (deletes.get(delIDX).delGen() >= minGen) {
prune(delIDX);
assert checkDeleteStats();
return;
@@ -345,10 +370,10 @@
}
TermsEnum termsEnum = null;
-
+
String currentField = null;
DocsEnum docs = null;
-
+
assert checkDeleteTerm(null);
for (Term term : termsIter) {
@@ -372,10 +397,10 @@
assert checkDeleteTerm(term);
// System.out.println(" term=" + term);
-
+
if (termsEnum.seek(term.bytes(), false) == TermsEnum.SeekStatus.FOUND) {
DocsEnum docsEnum = termsEnum.docs(reader.getDeletedDocs(), docs);
-
+
if (docsEnum != null) {
while (true) {
final int docID = docsEnum.nextDoc();
@@ -401,7 +426,7 @@
public final Query query;
public final int limit;
public QueryAndLimit(Query query, int limit) {
- this.query = query;
+ this.query = query;
this.limit = limit;
}
}
@@ -449,7 +474,7 @@
lastDeleteTerm = term;
return true;
}
-
+
// only for assert
private boolean checkDeleteStats() {
int numTerms2 = 0;
diff --git a/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java b/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java
index 5355dee..5c8b921 100644
--- a/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java
@@ -81,6 +81,6 @@
}
public int getAddress() {
- return upto + (offset0 & DocumentsWriter.BYTE_BLOCK_NOT_MASK);
+ return upto + (offset0 & DocumentsWriterPerThread.BYTE_BLOCK_NOT_MASK);
}
}
\ No newline at end of file
diff --git a/lucene/src/java/org/apache/lucene/index/DocConsumer.java b/lucene/src/java/org/apache/lucene/index/DocConsumer.java
index 25d21a3..c227964 100644
--- a/lucene/src/java/org/apache/lucene/index/DocConsumer.java
+++ b/lucene/src/java/org/apache/lucene/index/DocConsumer.java
@@ -18,11 +18,12 @@
*/
import java.io.IOException;
-import java.util.Collection;
abstract class DocConsumer {
- abstract DocConsumerPerThread addThread(DocumentsWriterThreadState perThread) throws IOException;
- abstract void flush(final Collection<DocConsumerPerThread> threads, final SegmentWriteState state) throws IOException;
+ abstract void processDocument(FieldInfos fieldInfos) throws IOException;
+ abstract void finishDocument() throws IOException;
+ abstract void flush(final SegmentWriteState state) throws IOException;
abstract void abort();
abstract boolean freeRAM();
+ abstract void doAfterFlush();
}
diff --git a/lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java b/lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java
deleted file mode 100644
index 098e688..0000000
--- a/lucene/src/java/org/apache/lucene/index/DocConsumerPerThread.java
+++ /dev/null
@@ -1,34 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-abstract class DocConsumerPerThread {
-
- /** Process the document. If there is
- * something for this document to be done in docID order,
- * you should encapsulate that as a
- * DocumentsWriter.DocWriter and return it.
- * DocumentsWriter then calls finish() on this object
- * when it's its turn. */
- abstract DocumentsWriter.DocWriter processDocument(FieldInfos fieldInfos) throws IOException;
-
- abstract void doAfterFlush();
- abstract void abort();
-}
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java b/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java
index 2abc0bb..1855530 100644
--- a/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java
+++ b/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java
@@ -18,22 +18,25 @@
*/
import java.io.IOException;
-import java.util.Collection;
import java.util.Map;
abstract class DocFieldConsumer {
- /** Called when DocumentsWriter decides to create a new
+ /** Called when DocumentsWriterPerThread decides to create a new
* segment */
- abstract void flush(Map<DocFieldConsumerPerThread,Collection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException;
+ abstract void flush(Map<FieldInfo, DocFieldConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException;
/** Called when an aborting exception is hit */
abstract void abort();
- /** Add a new thread */
- abstract DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) throws IOException;
-
- /** Called when DocumentsWriter is using too much RAM.
+ /** Called when DocumentsWriterPerThread is using too much RAM.
* The consumer should free RAM, if possible, returning
* true if any RAM was in fact freed. */
abstract boolean freeRAM();
- }
+
+ abstract void startDocument() throws IOException;
+
+ abstract DocFieldConsumerPerField addField(FieldInfo fi);
+
+ abstract void finishDocument() throws IOException;
+
+}
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java b/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java
index f70e815..960ea59 100644
--- a/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java
+++ b/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java
@@ -24,4 +24,5 @@
/** Processes all occurrences of a single field */
abstract void processFields(Fieldable[] fields, int count) throws IOException;
abstract void abort();
+ abstract FieldInfo getFieldInfo();
}
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerThread.java b/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerThread.java
deleted file mode 100644
index c8bc164..0000000
--- a/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerThread.java
+++ /dev/null
@@ -1,27 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-abstract class DocFieldConsumerPerThread {
- abstract void startDocument() throws IOException;
- abstract DocumentsWriter.DocWriter finishDocument() throws IOException;
- abstract DocFieldConsumerPerField addField(FieldInfo fi);
- abstract void abort();
-}
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldConsumers.java b/lucene/src/java/org/apache/lucene/index/DocFieldConsumers.java
new file mode 100644
index 0000000..3d20248
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/DocFieldConsumers.java
@@ -0,0 +1,90 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/** This is just a "splitter" class: it lets you wrap two
+ * DocFieldConsumer instances as a single consumer. */
+
+final class DocFieldConsumers extends DocFieldConsumer {
+ final DocFieldConsumer one;
+ final DocFieldConsumer two;
+ final DocumentsWriterPerThread.DocState docState;
+
+ public DocFieldConsumers(DocFieldProcessor processor, DocFieldConsumer one, DocFieldConsumer two) {
+ this.one = one;
+ this.two = two;
+ this.docState = processor.docState;
+ }
+
+ @Override
+ public void flush(Map<FieldInfo, DocFieldConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException {
+
+ Map<FieldInfo, DocFieldConsumerPerField> oneFieldsToFlush = new HashMap<FieldInfo, DocFieldConsumerPerField>();
+ Map<FieldInfo, DocFieldConsumerPerField> twoFieldsToFlush = new HashMap<FieldInfo, DocFieldConsumerPerField>();
+
+ for (Map.Entry<FieldInfo, DocFieldConsumerPerField> fieldToFlush : fieldsToFlush.entrySet()) {
+ DocFieldConsumersPerField perField = (DocFieldConsumersPerField) fieldToFlush.getValue();
+ oneFieldsToFlush.put(fieldToFlush.getKey(), perField.one);
+ twoFieldsToFlush.put(fieldToFlush.getKey(), perField.two);
+ }
+
+ one.flush(oneFieldsToFlush, state);
+ two.flush(twoFieldsToFlush, state);
+ }
+
+ @Override
+ public void abort() {
+ try {
+ one.abort();
+ } finally {
+ two.abort();
+ }
+ }
+
+ @Override
+ public boolean freeRAM() {
+ boolean any = one.freeRAM();
+ any |= two.freeRAM();
+ return any;
+ }
+
+ @Override
+ public void finishDocument() throws IOException {
+ try {
+ one.finishDocument();
+ } finally {
+ two.finishDocument();
+ }
+ }
+
+ @Override
+ public void startDocument() throws IOException {
+ one.startDocument();
+ two.startDocument();
+ }
+
+ @Override
+ public DocFieldConsumerPerField addField(FieldInfo fi) {
+ return new DocFieldConsumersPerField(this, fi, one.addField(fi), two.addField(fi));
+ }
+
+}
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldConsumersPerField.java b/lucene/src/java/org/apache/lucene/index/DocFieldConsumersPerField.java
new file mode 100644
index 0000000..5abf003
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/DocFieldConsumersPerField.java
@@ -0,0 +1,56 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.document.Fieldable;
+
+final class DocFieldConsumersPerField extends DocFieldConsumerPerField {
+
+ final DocFieldConsumerPerField one;
+ final DocFieldConsumerPerField two;
+ final DocFieldConsumers parent;
+ final FieldInfo fieldInfo;
+
+ public DocFieldConsumersPerField(DocFieldConsumers parent, FieldInfo fi, DocFieldConsumerPerField one, DocFieldConsumerPerField two) {
+ this.parent = parent;
+ this.one = one;
+ this.two = two;
+ this.fieldInfo = fi;
+ }
+
+ @Override
+ public void processFields(Fieldable[] fields, int count) throws IOException {
+ one.processFields(fields, count);
+ two.processFields(fields, count);
+ }
+
+ @Override
+ public void abort() {
+ try {
+ one.abort();
+ } finally {
+ two.abort();
+ }
+ }
+
+ @Override
+ FieldInfo getFieldInfo() {
+ return fieldInfo;
+ }
+}
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
index 6416f51..7dbeb09 100644
--- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
+++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
@@ -19,8 +19,13 @@
import java.io.IOException;
import java.util.Collection;
-import java.util.Map;
import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Fieldable;
/**
@@ -33,26 +38,39 @@
final class DocFieldProcessor extends DocConsumer {
- final DocumentsWriter docWriter;
final DocFieldConsumer consumer;
final StoredFieldsWriter fieldsWriter;
- public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer) {
- this.docWriter = docWriter;
+ // Holds all fields seen in current doc
+ DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1];
+ int fieldCount;
+
+ // Hash table for all fields ever seen
+ DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2];
+ int hashMask = 1;
+ int totalFieldCount;
+
+ float docBoost;
+ int fieldGen;
+ final DocumentsWriterPerThread.DocState docState;
+
+ public DocFieldProcessor(DocumentsWriterPerThread docWriter, DocFieldConsumer consumer) {
+ this.docState = docWriter.docState;
this.consumer = consumer;
fieldsWriter = new StoredFieldsWriter(docWriter);
}
@Override
- public void flush(Collection<DocConsumerPerThread> threads, SegmentWriteState state) throws IOException {
+ public void flush(SegmentWriteState state) throws IOException {
- Map<DocFieldConsumerPerThread, Collection<DocFieldConsumerPerField>> childThreadsAndFields = new HashMap<DocFieldConsumerPerThread, Collection<DocFieldConsumerPerField>>();
- for ( DocConsumerPerThread thread : threads) {
- DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread) thread;
- childThreadsAndFields.put(perThread.consumer, perThread.fields());
+ Map<FieldInfo, DocFieldConsumerPerField> childFields = new HashMap<FieldInfo, DocFieldConsumerPerField>();
+ Collection<DocFieldConsumerPerField> fields = fields();
+ for (DocFieldConsumerPerField f : fields) {
+ childFields.put(f.getFieldInfo(), f);
}
+
fieldsWriter.flush(state);
- consumer.flush(childThreadsAndFields, state);
+ consumer.flush(childFields, state);
// Important to save after asking consumer to flush so
// consumer can alter the FieldInfo* if necessary. EG,
@@ -64,8 +82,20 @@
@Override
public void abort() {
- fieldsWriter.abort();
- consumer.abort();
+ for(int i=0;i<fieldHash.length;i++) {
+ DocFieldProcessorPerField field = fieldHash[i];
+ while(field != null) {
+ final DocFieldProcessorPerField next = field.next;
+ field.abort();
+ field = next;
+ }
+ }
+
+ try {
+ fieldsWriter.abort();
+ } finally {
+ consumer.abort();
+ }
}
@Override
@@ -73,8 +103,213 @@
return consumer.freeRAM();
}
+ public Collection<DocFieldConsumerPerField> fields() {
+ Collection<DocFieldConsumerPerField> fields = new HashSet<DocFieldConsumerPerField>();
+ for(int i=0;i<fieldHash.length;i++) {
+ DocFieldProcessorPerField field = fieldHash[i];
+ while(field != null) {
+ fields.add(field.consumer);
+ field = field.next;
+ }
+ }
+ assert fields.size() == totalFieldCount;
+ return fields;
+ }
+
+ /** In flush we reset the fieldHash to not maintain per-field state
+ * across segments */
@Override
- public DocConsumerPerThread addThread(DocumentsWriterThreadState threadState) throws IOException {
- return new DocFieldProcessorPerThread(threadState, this);
+ void doAfterFlush() {
+ fieldHash = new DocFieldProcessorPerField[2];
+ hashMask = 1;
+ totalFieldCount = 0;
+ }
+
+ private void rehash() {
+ final int newHashSize = (fieldHash.length*2);
+ assert newHashSize > fieldHash.length;
+
+ final DocFieldProcessorPerField newHashArray[] = new DocFieldProcessorPerField[newHashSize];
+
+ // Rehash
+ int newHashMask = newHashSize-1;
+ for(int j=0;j<fieldHash.length;j++) {
+ DocFieldProcessorPerField fp0 = fieldHash[j];
+ while(fp0 != null) {
+ final int hashPos2 = fp0.fieldInfo.name.hashCode() & newHashMask;
+ DocFieldProcessorPerField nextFP0 = fp0.next;
+ fp0.next = newHashArray[hashPos2];
+ newHashArray[hashPos2] = fp0;
+ fp0 = nextFP0;
+ }
+ }
+
+ fieldHash = newHashArray;
+ hashMask = newHashMask;
+ }
+
+ @Override
+ public void processDocument(FieldInfos fieldInfos) throws IOException {
+
+ consumer.startDocument();
+ fieldsWriter.startDocument();
+
+ final Document doc = docState.doc;
+
+ fieldCount = 0;
+
+ final int thisFieldGen = fieldGen++;
+
+ final List<Fieldable> docFields = doc.getFields();
+ final int numDocFields = docFields.size();
+
+ // Absorb any new fields first seen in this document.
+ // Also absorb any changes to fields we had already
+ // seen before (eg suddenly turning on norms or
+ // vectors, etc.):
+
+ for(int i=0;i<numDocFields;i++) {
+ Fieldable field = docFields.get(i);
+ final String fieldName = field.name();
+
+ // Make sure we have a PerField allocated
+ final int hashPos = fieldName.hashCode() & hashMask;
+ DocFieldProcessorPerField fp = fieldHash[hashPos];
+ while(fp != null && !fp.fieldInfo.name.equals(fieldName)) {
+ fp = fp.next;
+ }
+
+ if (fp == null) {
+
+ // TODO FI: we need to genericize the "flags" that a
+ // field holds, and, how these flags are merged; it
+ // needs to be more "pluggable" such that if I want
+ // to have a new "thing" my Fields can do, I can
+ // easily add it
+ FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(),
+ field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
+ field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
+
+ fp = new DocFieldProcessorPerField(this, fi);
+ fp.next = fieldHash[hashPos];
+ fieldHash[hashPos] = fp;
+ totalFieldCount++;
+
+ if (totalFieldCount >= fieldHash.length/2)
+ rehash();
+ } else {
+ fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(),
+ field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
+ field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
+ }
+
+ if (thisFieldGen != fp.lastGen) {
+
+ // First time we're seeing this field for this doc
+ fp.fieldCount = 0;
+
+ if (fieldCount == fields.length) {
+ final int newSize = fields.length*2;
+ DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize];
+ System.arraycopy(fields, 0, newArray, 0, fieldCount);
+ fields = newArray;
+ }
+
+ fields[fieldCount++] = fp;
+ fp.lastGen = thisFieldGen;
+ }
+
+ fp.addField(field);
+
+ if (field.isStored()) {
+ fieldsWriter.addField(field, fp.fieldInfo);
+ }
+ }
+
+ // If we are writing vectors then we must visit
+ // fields in sorted order so they are written in
+ // sorted order. TODO: we actually only need to
+ // sort the subset of fields that have vectors
+ // enabled; we could save [small amount of] CPU
+ // here.
+ quickSort(fields, 0, fieldCount-1);
+
+ for(int i=0;i<fieldCount;i++)
+ fields[i].consumer.processFields(fields[i].fields, fields[i].fieldCount);
+
+ if (docState.maxTermPrefix != null && docState.infoStream != null) {
+ docState.infoStream.println("WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
+ docState.maxTermPrefix = null;
+ }
+ }
+
+ @Override
+ void finishDocument() throws IOException {
+ try {
+ fieldsWriter.finishDocument();
+ } finally {
+ consumer.finishDocument();
+ }
+ }
+
+ void quickSort(DocFieldProcessorPerField[] array, int lo, int hi) {
+ if (lo >= hi)
+ return;
+ else if (hi == 1+lo) {
+ if (array[lo].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) {
+ final DocFieldProcessorPerField tmp = array[lo];
+ array[lo] = array[hi];
+ array[hi] = tmp;
+ }
+ return;
+ }
+
+ int mid = (lo + hi) >>> 1;
+
+ if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) {
+ DocFieldProcessorPerField tmp = array[lo];
+ array[lo] = array[mid];
+ array[mid] = tmp;
+ }
+
+ if (array[mid].fieldInfo.name.compareTo(array[hi].fieldInfo.name) > 0) {
+ DocFieldProcessorPerField tmp = array[mid];
+ array[mid] = array[hi];
+ array[hi] = tmp;
+
+ if (array[lo].fieldInfo.name.compareTo(array[mid].fieldInfo.name) > 0) {
+ DocFieldProcessorPerField tmp2 = array[lo];
+ array[lo] = array[mid];
+ array[mid] = tmp2;
+ }
+ }
+
+ int left = lo + 1;
+ int right = hi - 1;
+
+ if (left >= right)
+ return;
+
+ DocFieldProcessorPerField partition = array[mid];
+
+ for (; ;) {
+ while (array[right].fieldInfo.name.compareTo(partition.fieldInfo.name) > 0)
+ --right;
+
+ while (left < right && array[left].fieldInfo.name.compareTo(partition.fieldInfo.name) <= 0)
+ ++left;
+
+ if (left < right) {
+ DocFieldProcessorPerField tmp = array[left];
+ array[left] = array[right];
+ array[right] = tmp;
+ --right;
+ } else {
+ break;
+ }
+ }
+
+ quickSort(array, lo, left);
+ quickSort(array, left + 1, hi);
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java
index 8fb1da4..36b1908 100644
--- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java
+++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java
@@ -18,6 +18,8 @@
*/
import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
/**
* Holds all per thread, per field state.
@@ -34,11 +36,22 @@
int fieldCount;
Fieldable[] fields = new Fieldable[1];
- public DocFieldProcessorPerField(final DocFieldProcessorPerThread perThread, final FieldInfo fieldInfo) {
- this.consumer = perThread.consumer.addField(fieldInfo);
+ public DocFieldProcessorPerField(final DocFieldProcessor docFieldProcessor, final FieldInfo fieldInfo) {
+ this.consumer = docFieldProcessor.consumer.addField(fieldInfo);
this.fieldInfo = fieldInfo;
}
+ public void addField(Fieldable field) {
+ if (fieldCount == fields.length) {
+ int newSize = ArrayUtil.oversize(fieldCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
+ Fieldable[] newArray = new Fieldable[newSize];
+ System.arraycopy(fields, 0, newArray, 0, fieldCount);
+ fields = newArray;
+ }
+
+ fields[fieldCount++] = field;
+ }
+
public void abort() {
consumer.abort();
}
diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
deleted file mode 100644
index f17530f..0000000
--- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerThread.java
+++ /dev/null
@@ -1,307 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.Comparator;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.List;
-import java.io.IOException;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.RamUsageEstimator;
-
-/**
- * Gathers all Fieldables for a document under the same
- * name, updates FieldInfos, and calls per-field consumers
- * to process field by field.
- *
- * Currently, only a single thread visits the fields,
- * sequentially, for processing.
- */
-
-final class DocFieldProcessorPerThread extends DocConsumerPerThread {
-
- float docBoost;
- int fieldGen;
- final DocFieldProcessor docFieldProcessor;
- final DocFieldConsumerPerThread consumer;
-
- // Holds all fields seen in current doc
- DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1];
- int fieldCount;
-
- // Hash table for all fields seen in current segment
- DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2];
- int hashMask = 1;
- int totalFieldCount;
-
- final StoredFieldsWriterPerThread fieldsWriter;
-
- final DocumentsWriter.DocState docState;
-
- public DocFieldProcessorPerThread(DocumentsWriterThreadState threadState, DocFieldProcessor docFieldProcessor) throws IOException {
- this.docState = threadState.docState;
- this.docFieldProcessor = docFieldProcessor;
- this.consumer = docFieldProcessor.consumer.addThread(this);
- fieldsWriter = docFieldProcessor.fieldsWriter.addThread(docState);
- }
-
- @Override
- public void abort() {
- for(int i=0;i<fieldHash.length;i++) {
- DocFieldProcessorPerField field = fieldHash[i];
- while(field != null) {
- final DocFieldProcessorPerField next = field.next;
- field.abort();
- field = next;
- }
- }
- doAfterFlush();
- fieldsWriter.abort();
- consumer.abort();
- }
-
- public Collection<DocFieldConsumerPerField> fields() {
- Collection<DocFieldConsumerPerField> fields = new HashSet<DocFieldConsumerPerField>();
- for(int i=0;i<fieldHash.length;i++) {
- DocFieldProcessorPerField field = fieldHash[i];
- while(field != null) {
- fields.add(field.consumer);
- field = field.next;
- }
- }
- assert fields.size() == totalFieldCount;
- return fields;
- }
-
- /** In flush we reset the fieldHash to not maintain per-field state
- * across segments */
- @Override
- void doAfterFlush() {
- fieldHash = new DocFieldProcessorPerField[2];
- hashMask = 1;
- totalFieldCount = 0;
- }
-
- private void rehash() {
- final int newHashSize = (fieldHash.length*2);
- assert newHashSize > fieldHash.length;
-
- final DocFieldProcessorPerField newHashArray[] = new DocFieldProcessorPerField[newHashSize];
-
- // Rehash
- int newHashMask = newHashSize-1;
- for(int j=0;j<fieldHash.length;j++) {
- DocFieldProcessorPerField fp0 = fieldHash[j];
- while(fp0 != null) {
- final int hashPos2 = fp0.fieldInfo.name.hashCode() & newHashMask;
- DocFieldProcessorPerField nextFP0 = fp0.next;
- fp0.next = newHashArray[hashPos2];
- newHashArray[hashPos2] = fp0;
- fp0 = nextFP0;
- }
- }
-
- fieldHash = newHashArray;
- hashMask = newHashMask;
- }
-
- @Override
- public DocumentsWriter.DocWriter processDocument(FieldInfos fieldInfos) throws IOException {
-
- consumer.startDocument();
- fieldsWriter.startDocument();
-
- final Document doc = docState.doc;
-
- assert docFieldProcessor.docWriter.writer.testPoint("DocumentsWriter.ThreadState.init start");
-
- fieldCount = 0;
-
- final int thisFieldGen = fieldGen++;
-
- final List<Fieldable> docFields = doc.getFields();
- final int numDocFields = docFields.size();
-
- // Absorb any new fields first seen in this document.
- // Also absorb any changes to fields we had already
- // seen before (eg suddenly turning on norms or
- // vectors, etc.):
-
- for(int i=0;i<numDocFields;i++) {
- Fieldable field = docFields.get(i);
- final String fieldName = field.name();
-
- // Make sure we have a PerField allocated
- final int hashPos = fieldName.hashCode() & hashMask;
- DocFieldProcessorPerField fp = fieldHash[hashPos];
- while(fp != null && !fp.fieldInfo.name.equals(fieldName))
- fp = fp.next;
-
- if (fp == null) {
-
- // TODO FI: we need to genericize the "flags" that a
- // field holds, and, how these flags are merged; it
- // needs to be more "pluggable" such that if I want
- // to have a new "thing" my Fields can do, I can
- // easily add it
- FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(),
- field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
- field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
- fp = new DocFieldProcessorPerField(this, fi);
- fp.next = fieldHash[hashPos];
- fieldHash[hashPos] = fp;
- totalFieldCount++;
-
- if (totalFieldCount >= fieldHash.length/2)
- rehash();
- } else {
- fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(),
- field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
- field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
- }
- if (thisFieldGen != fp.lastGen) {
-
- // First time we're seeing this field for this doc
- fp.fieldCount = 0;
-
- if (fieldCount == fields.length) {
- final int newSize = fields.length*2;
- DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize];
- System.arraycopy(fields, 0, newArray, 0, fieldCount);
- fields = newArray;
- }
-
- fields[fieldCount++] = fp;
- fp.lastGen = thisFieldGen;
- }
-
- if (fp.fieldCount == fp.fields.length) {
- Fieldable[] newArray = new Fieldable[fp.fields.length*2];
- System.arraycopy(fp.fields, 0, newArray, 0, fp.fieldCount);
- fp.fields = newArray;
- }
-
- fp.fields[fp.fieldCount++] = field;
- if (field.isStored()) {
- fieldsWriter.addField(field, fp.fieldInfo);
- }
- }
-
- // If we are writing vectors then we must visit
- // fields in sorted order so they are written in
- // sorted order. TODO: we actually only need to
- // sort the subset of fields that have vectors
- // enabled; we could save [small amount of] CPU
- // here.
- ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp);
-
- for(int i=0;i<fieldCount;i++)
- fields[i].consumer.processFields(fields[i].fields, fields[i].fieldCount);
-
- if (docState.maxTermPrefix != null && docState.infoStream != null) {
- docState.infoStream.println("WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriter.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
- docState.maxTermPrefix = null;
- }
-
- final DocumentsWriter.DocWriter one = fieldsWriter.finishDocument();
- final DocumentsWriter.DocWriter two = consumer.finishDocument();
- if (one == null) {
- return two;
- } else if (two == null) {
- return one;
- } else {
- PerDoc both = getPerDoc();
- both.docID = docState.docID;
- assert one.docID == docState.docID;
- assert two.docID == docState.docID;
- both.one = one;
- both.two = two;
- return both;
- }
- }
-
- private static final Comparator<DocFieldProcessorPerField> fieldsComp = new Comparator<DocFieldProcessorPerField>() {
- public int compare(DocFieldProcessorPerField o1, DocFieldProcessorPerField o2) {
- return o1.fieldInfo.name.compareTo(o2.fieldInfo.name);
- }
- };
-
- PerDoc[] docFreeList = new PerDoc[1];
- int freeCount;
- int allocCount;
-
- synchronized PerDoc getPerDoc() {
- if (freeCount == 0) {
- allocCount++;
- if (allocCount > docFreeList.length) {
- // Grow our free list up front to make sure we have
- // enough space to recycle all outstanding PerDoc
- // instances
- assert allocCount == 1+docFreeList.length;
- docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
- }
- return new PerDoc();
- } else
- return docFreeList[--freeCount];
- }
-
- synchronized void freePerDoc(PerDoc perDoc) {
- assert freeCount < docFreeList.length;
- docFreeList[freeCount++] = perDoc;
- }
-
- class PerDoc extends DocumentsWriter.DocWriter {
-
- DocumentsWriter.DocWriter one;
- DocumentsWriter.DocWriter two;
-
- @Override
- public long sizeInBytes() {
- return one.sizeInBytes() + two.sizeInBytes();
- }
-
- @Override
- public void finish() throws IOException {
- try {
- try {
- one.finish();
- } finally {
- two.finish();
- }
- } finally {
- freePerDoc(this);
- }
- }
-
- @Override
- public void abort() {
- try {
- try {
- one.abort();
- } finally {
- two.abort();
- }
- } finally {
- freePerDoc(this);
- }
- }
- }
-}
\ No newline at end of file
diff --git a/lucene/src/java/org/apache/lucene/index/DocInverter.java b/lucene/src/java/org/apache/lucene/index/DocInverter.java
index 48e8edf..95c0976 100644
--- a/lucene/src/java/org/apache/lucene/index/DocInverter.java
+++ b/lucene/src/java/org/apache/lucene/index/DocInverter.java
@@ -18,12 +18,13 @@
*/
import java.io.IOException;
-import java.util.Collection;
import java.util.HashMap;
-import java.util.HashSet;
-
import java.util.Map;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.util.AttributeSource;
+
/** This is a DocFieldConsumer that inverts each field,
* separately, from a Document, and accepts a
@@ -34,42 +35,72 @@
final InvertedDocConsumer consumer;
final InvertedDocEndConsumer endConsumer;
- public DocInverter(InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer) {
+ final DocumentsWriterPerThread.DocState docState;
+
+ final FieldInvertState fieldState = new FieldInvertState();
+
+ final SingleTokenAttributeSource singleToken = new SingleTokenAttributeSource();
+
+ static class SingleTokenAttributeSource extends AttributeSource {
+ final CharTermAttribute termAttribute;
+ final OffsetAttribute offsetAttribute;
+
+ private SingleTokenAttributeSource() {
+ termAttribute = addAttribute(CharTermAttribute.class);
+ offsetAttribute = addAttribute(OffsetAttribute.class);
+ }
+
+ public void reinit(String stringValue, int startOffset, int endOffset) {
+ termAttribute.setEmpty().append(stringValue);
+ offsetAttribute.setOffset(startOffset, endOffset);
+ }
+ }
+
+ // Used to read a string value for a field
+ final ReusableStringReader stringReader = new ReusableStringReader();
+
+ public DocInverter(DocumentsWriterPerThread.DocState docState, InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer) {
+ this.docState = docState;
this.consumer = consumer;
this.endConsumer = endConsumer;
}
@Override
- void flush(Map<DocFieldConsumerPerThread, Collection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException {
+ void flush(Map<FieldInfo, DocFieldConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException {
- Map<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> childThreadsAndFields = new HashMap<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>>();
- Map<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> endChildThreadsAndFields = new HashMap<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>>();
+ Map<FieldInfo, InvertedDocConsumerPerField> childFieldsToFlush = new HashMap<FieldInfo, InvertedDocConsumerPerField>();
+ Map<FieldInfo, InvertedDocEndConsumerPerField> endChildFieldsToFlush = new HashMap<FieldInfo, InvertedDocEndConsumerPerField>();
- for (Map.Entry<DocFieldConsumerPerThread,Collection<DocFieldConsumerPerField>> entry : threadsAndFields.entrySet() ) {
-
-
- DocInverterPerThread perThread = (DocInverterPerThread) entry.getKey();
-
- Collection<InvertedDocConsumerPerField> childFields = new HashSet<InvertedDocConsumerPerField>();
- Collection<InvertedDocEndConsumerPerField> endChildFields = new HashSet<InvertedDocEndConsumerPerField>();
- for (final DocFieldConsumerPerField field: entry.getValue() ) {
- DocInverterPerField perField = (DocInverterPerField) field;
- childFields.add(perField.consumer);
- endChildFields.add(perField.endConsumer);
- }
-
- childThreadsAndFields.put(perThread.consumer, childFields);
- endChildThreadsAndFields.put(perThread.endConsumer, endChildFields);
+ for (Map.Entry<FieldInfo, DocFieldConsumerPerField> fieldToFlush : fieldsToFlush.entrySet()) {
+ DocInverterPerField perField = (DocInverterPerField) fieldToFlush.getValue();
+ childFieldsToFlush.put(fieldToFlush.getKey(), perField.consumer);
+ endChildFieldsToFlush.put(fieldToFlush.getKey(), perField.endConsumer);
}
-
- consumer.flush(childThreadsAndFields, state);
- endConsumer.flush(endChildThreadsAndFields, state);
+
+ consumer.flush(childFieldsToFlush, state);
+ endConsumer.flush(endChildFieldsToFlush, state);
+ }
+
+ @Override
+ public void startDocument() throws IOException {
+ consumer.startDocument();
+ endConsumer.startDocument();
+ }
+
+ public void finishDocument() throws IOException {
+ // TODO: allow endConsumer.finishDocument to also return
+ // a DocWriter
+ endConsumer.finishDocument();
+ consumer.finishDocument();
}
@Override
void abort() {
- consumer.abort();
- endConsumer.abort();
+ try {
+ consumer.abort();
+ } finally {
+ endConsumer.abort();
+ }
}
@Override
@@ -78,7 +109,8 @@
}
@Override
- public DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) {
- return new DocInverterPerThread(docFieldProcessorPerThread, this);
+ public DocFieldConsumerPerField addField(FieldInfo fi) {
+ return new DocInverterPerField(this, fi);
}
+
}
diff --git a/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java b/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
index d360fbf..2463326 100644
--- a/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
+++ b/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
@@ -35,20 +35,20 @@
final class DocInverterPerField extends DocFieldConsumerPerField {
- final private DocInverterPerThread perThread;
- final private FieldInfo fieldInfo;
+ final private DocInverter parent;
+ final FieldInfo fieldInfo;
final InvertedDocConsumerPerField consumer;
final InvertedDocEndConsumerPerField endConsumer;
- final DocumentsWriter.DocState docState;
+ final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
- public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo) {
- this.perThread = perThread;
+ public DocInverterPerField(DocInverter parent, FieldInfo fieldInfo) {
+ this.parent = parent;
this.fieldInfo = fieldInfo;
- docState = perThread.docState;
- fieldState = perThread.fieldState;
- this.consumer = perThread.consumer.addField(this, fieldInfo);
- this.endConsumer = perThread.endConsumer.addField(this, fieldInfo);
+ docState = parent.docState;
+ fieldState = parent.fieldState;
+ this.consumer = parent.consumer.addField(this, fieldInfo);
+ this.endConsumer = parent.endConsumer.addField(this, fieldInfo);
}
@Override
@@ -80,8 +80,8 @@
if (!field.isTokenized()) { // un-tokenized field
String stringValue = field.stringValue();
final int valueLength = stringValue.length();
- perThread.singleToken.reinit(stringValue, 0, valueLength);
- fieldState.attributeSource = perThread.singleToken;
+ parent.singleToken.reinit(stringValue, 0, valueLength);
+ fieldState.attributeSource = parent.singleToken;
consumer.start(field);
boolean success = false;
@@ -89,8 +89,9 @@
consumer.add();
success = true;
} finally {
- if (!success)
+ if (!success) {
docState.docWriter.setAborting();
+ }
}
fieldState.offset += valueLength;
fieldState.length++;
@@ -114,8 +115,8 @@
if (stringValue == null) {
throw new IllegalArgumentException("field must have either TokenStream, String or Reader value");
}
- perThread.stringReader.init(stringValue);
- reader = perThread.stringReader;
+ parent.stringReader.init(stringValue);
+ reader = parent.stringReader;
}
// Tokenize field and add to postingTable
@@ -166,8 +167,9 @@
consumer.add();
success = true;
} finally {
- if (!success)
+ if (!success) {
docState.docWriter.setAborting();
+ }
}
fieldState.length++;
fieldState.position++;
@@ -195,4 +197,9 @@
consumer.finish();
endConsumer.finish();
}
+
+ @Override
+ FieldInfo getFieldInfo() {
+ return fieldInfo;
+ }
}
diff --git a/lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java b/lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java
deleted file mode 100644
index 2816519..0000000
--- a/lucene/src/java/org/apache/lucene/index/DocInverterPerThread.java
+++ /dev/null
@@ -1,92 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-/** This is a DocFieldConsumer that inverts each field,
- * separately, from a Document, and accepts a
- * InvertedTermsConsumer to process those terms. */
-
-final class DocInverterPerThread extends DocFieldConsumerPerThread {
- final DocInverter docInverter;
- final InvertedDocConsumerPerThread consumer;
- final InvertedDocEndConsumerPerThread endConsumer;
- final SingleTokenAttributeSource singleToken = new SingleTokenAttributeSource();
-
- static class SingleTokenAttributeSource extends AttributeSource {
- final CharTermAttribute termAttribute;
- final OffsetAttribute offsetAttribute;
-
- private SingleTokenAttributeSource() {
- termAttribute = addAttribute(CharTermAttribute.class);
- offsetAttribute = addAttribute(OffsetAttribute.class);
- }
-
- public void reinit(String stringValue, int startOffset, int endOffset) {
- termAttribute.setEmpty().append(stringValue);
- offsetAttribute.setOffset(startOffset, endOffset);
- }
- }
-
- final DocumentsWriter.DocState docState;
-
- final FieldInvertState fieldState = new FieldInvertState();
-
- // Used to read a string value for a field
- final ReusableStringReader stringReader = new ReusableStringReader();
-
- public DocInverterPerThread(DocFieldProcessorPerThread docFieldProcessorPerThread, DocInverter docInverter) {
- this.docInverter = docInverter;
- docState = docFieldProcessorPerThread.docState;
- consumer = docInverter.consumer.addThread(this);
- endConsumer = docInverter.endConsumer.addThread(this);
- }
-
- @Override
- public void startDocument() throws IOException {
- consumer.startDocument();
- endConsumer.startDocument();
- }
-
- @Override
- public DocumentsWriter.DocWriter finishDocument() throws IOException {
- // TODO: allow endConsumer.finishDocument to also return
- // a DocWriter
- endConsumer.finishDocument();
- return consumer.finishDocument();
- }
-
- @Override
- void abort() {
- try {
- consumer.abort();
- } finally {
- endConsumer.abort();
- }
- }
-
- @Override
- public DocFieldConsumerPerField addField(FieldInfo fi) {
- return new DocInverterPerField(this, fi);
- }
-}
diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
index 196a1d8..5e316c2 100644
--- a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
@@ -19,36 +19,27 @@
import java.io.IOException;
import java.io.PrintStream;
-import java.text.NumberFormat;
-import java.util.ArrayList;
import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
import java.util.List;
-import java.util.concurrent.atomic.AtomicLong;
+import java.util.Queue;
+import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment;
+import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
+import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
+import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.RAMFile;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BitVector;
-import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.RecyclingByteBlockAllocator;
-import org.apache.lucene.util.ThreadInterruptedException;
-
-import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
-import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
/**
* This class accepts multiple added documents and directly
- * writes a single segment file. It does this more
- * efficiently than creating a single segment per document
- * (with DocumentWriter) and doing standard merges on those
- * segments.
+ * writes segment files.
*
* Each added document is passed to the {@link DocConsumer},
* which in turn processes the document and interacts with
@@ -111,266 +102,117 @@
*/
final class DocumentsWriter {
- final AtomicLong bytesUsed = new AtomicLong(0);
- IndexWriter writer;
Directory directory;
- String segment; // Current segment we are working on
-
- private int nextDocID; // Next docID to be added
- private int numDocs; // # of docs added, but not yet flushed
-
- // Max # ThreadState instances; if there are more threads
- // than this they share ThreadStates
- private DocumentsWriterThreadState[] threadStates = new DocumentsWriterThreadState[0];
- private final HashMap<Thread,DocumentsWriterThreadState> threadBindings = new HashMap<Thread,DocumentsWriterThreadState>();
-
- boolean bufferIsFull; // True when it's time to write segment
- private boolean aborting; // True if an abort is pending
+ private volatile boolean closed;
PrintStream infoStream;
SimilarityProvider similarityProvider;
- // max # simultaneous threads; if there are more than
- // this, they wait for others to finish first
- private final int maxThreadStates;
+ List<String> newFiles;
- // TODO: cutover to BytesRefHash
- // Deletes for our still-in-RAM (to be flushed next) segment
- private BufferedDeletes pendingDeletes = new BufferedDeletes(false);
-
- static class DocState {
- DocumentsWriter docWriter;
- Analyzer analyzer;
- PrintStream infoStream;
- SimilarityProvider similarityProvider;
- int docID;
- Document doc;
- String maxTermPrefix;
+ final IndexWriter indexWriter;
- // Only called by asserts
- public boolean testPoint(String name) {
- return docWriter.writer.testPoint(name);
- }
+ private AtomicInteger numDocsInRAM = new AtomicInteger(0);
- public void clear() {
- // don't hold onto doc nor analyzer, in case it is
- // largish:
- doc = null;
- analyzer = null;
- }
- }
+ // TODO: cut over to BytesRefHash in BufferedDeletes
+ volatile DocumentsWriterDeleteQueue deleteQueue = new DocumentsWriterDeleteQueue();
+ private final Queue<FlushTicket> ticketQueue = new LinkedList<DocumentsWriter.FlushTicket>();
- /** Consumer returns this on each doc. This holds any
- * state that must be flushed synchronized "in docID
- * order". We gather these and flush them in order. */
- abstract static class DocWriter {
- DocWriter next;
- int docID;
- abstract void finish() throws IOException;
- abstract void abort();
- abstract long sizeInBytes();
+ private Collection<String> abortedFiles; // List of files that were written before last abort()
- void setNext(DocWriter next) {
- this.next = next;
- }
- }
+ final IndexingChain chain;
- /**
- * Create and return a new DocWriterBuffer.
- */
- PerDocBuffer newPerDocBuffer() {
- return new PerDocBuffer();
- }
-
- /**
- * RAMFile buffer for DocWriters.
- */
- class PerDocBuffer extends RAMFile {
-
- /**
- * Allocate bytes used from shared pool.
- */
- @Override
- protected byte[] newBuffer(int size) {
- assert size == PER_DOC_BLOCK_SIZE;
- return perDocAllocator.getByteBlock();
- }
-
- /**
- * Recycle the bytes used.
- */
- synchronized void recycle() {
- if (buffers.size() > 0) {
- setLength(0);
-
- // Recycle the blocks
- perDocAllocator.recycleByteBlocks(buffers);
- buffers.clear();
- sizeInBytes = 0;
-
- assert numBuffers() == 0;
- }
- }
- }
-
- /**
- * The IndexingChain must define the {@link #getChain(DocumentsWriter)} method
- * which returns the DocConsumer that the DocumentsWriter calls to process the
- * documents.
- */
- abstract static class IndexingChain {
- abstract DocConsumer getChain(DocumentsWriter documentsWriter);
- }
-
- static final IndexingChain defaultIndexingChain = new IndexingChain() {
-
- @Override
- DocConsumer getChain(DocumentsWriter documentsWriter) {
- /*
- This is the current indexing chain:
-
- DocConsumer / DocConsumerPerThread
- --> code: DocFieldProcessor / DocFieldProcessorPerThread
- --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField
- --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField
- --> code: DocInverter / DocInverterPerThread / DocInverterPerField
- --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
- --> code: TermsHash / TermsHashPerThread / TermsHashPerField
- --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField
- --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField
- --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField
- --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
- --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField
- --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField
- */
-
- // Build up indexing chain:
-
- final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriter);
- final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter();
- /*
- * nesting TermsHash instances here to allow the secondary (TermVectors) share the interned postings
- * via a shared ByteBlockPool. See TermsHashPerField for details.
- */
- final TermsHash termVectorsTermHash = new TermsHash(documentsWriter, false, termVectorsWriter, null);
- final InvertedDocConsumer termsHash = new TermsHash(documentsWriter, true, freqProxWriter, termVectorsTermHash);
- final NormsWriter normsWriter = new NormsWriter();
- final DocInverter docInverter = new DocInverter(termsHash, normsWriter);
- return new DocFieldProcessor(documentsWriter, docInverter);
- }
- };
-
- final DocConsumer consumer;
-
- // How much RAM we can use before flushing. This is 0 if
- // we are flushing by doc count instead.
-
- private final IndexWriterConfig config;
-
- private boolean closed;
- private FieldInfos fieldInfos;
-
- private final BufferedDeletesStream bufferedDeletesStream;
- private final IndexWriter.FlushControl flushControl;
-
- DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, IndexingChain indexingChain, FieldInfos fieldInfos,
+ final DocumentsWriterPerThreadPool perThreadPool;
+ final FlushPolicy flushPolicy;
+ final DocumentsWriterFlushControl flushControl;
+ final Healthiness healthiness;
+ DocumentsWriter(IndexWriterConfig config, Directory directory, IndexWriter writer, FieldNumberBiMap globalFieldNumbers,
BufferedDeletesStream bufferedDeletesStream) throws IOException {
this.directory = directory;
- this.writer = writer;
+ this.indexWriter = writer;
this.similarityProvider = config.getSimilarityProvider();
- this.maxThreadStates = config.getMaxThreadStates();
- this.fieldInfos = fieldInfos;
- this.bufferedDeletesStream = bufferedDeletesStream;
- flushControl = writer.flushControl;
- consumer = config.getIndexingChain().getChain(this);
- this.config = config;
+ this.perThreadPool = config.getIndexerThreadPool();
+ this.chain = config.getIndexingChain();
+ this.perThreadPool.initialize(this, globalFieldNumbers, config);
+ final FlushPolicy configuredPolicy = config.getFlushPolicy();
+ if (configuredPolicy == null) {
+ flushPolicy = new FlushByRamOrCountsPolicy();
+ } else {
+ flushPolicy = configuredPolicy;
+ }
+ flushPolicy.init(this);
+
+ healthiness = new Healthiness();
+ final long maxRamPerDWPT = config.getRAMPerThreadHardLimitMB() * 1024 * 1024;
+ flushControl = new DocumentsWriterFlushControl(this, healthiness, maxRamPerDWPT);
}
- // Buffer a specific docID for deletion. Currently only
- // used when we hit a exception when adding a document
- synchronized void deleteDocID(int docIDUpto) {
- pendingDeletes.addDocID(docIDUpto);
- // NOTE: we do not trigger flush here. This is
- // potentially a RAM leak, if you have an app that tries
- // to add docs but every single doc always hits a
- // non-aborting exception. Allowing a flush here gets
- // very messy because we are only invoked when handling
- // exceptions so to do this properly, while handling an
- // exception we'd have to go off and flush new deletes
- // which is risky (likely would hit some other
- // confounding exception).
- }
-
- boolean deleteQueries(Query... queries) {
- final boolean doFlush = flushControl.waitUpdate(0, queries.length);
- synchronized(this) {
- for (Query query : queries) {
- pendingDeletes.addQuery(query, numDocs);
- }
+ synchronized void deleteQueries(final Query... queries) throws IOException {
+ deleteQueue.addDelete(queries);
+ flushControl.doOnDelete();
+ if (flushControl.doApplyAllDeletes()) {
+ applyAllDeletes(deleteQueue);
}
- return doFlush;
- }
-
- boolean deleteQuery(Query query) {
- final boolean doFlush = flushControl.waitUpdate(0, 1);
- synchronized(this) {
- pendingDeletes.addQuery(query, numDocs);
- }
- return doFlush;
- }
-
- boolean deleteTerms(Term... terms) {
- final boolean doFlush = flushControl.waitUpdate(0, terms.length);
- synchronized(this) {
- for (Term term : terms) {
- pendingDeletes.addTerm(term, numDocs);
- }
- }
- return doFlush;
}
// TODO: we could check w/ FreqProxTermsWriter: if the
// term doesn't exist, don't bother buffering into the
// per-DWPT map (but still must go into the global map)
- boolean deleteTerm(Term term, boolean skipWait) {
- final boolean doFlush = flushControl.waitUpdate(0, 1, skipWait);
- synchronized(this) {
- pendingDeletes.addTerm(term, numDocs);
+ synchronized void deleteTerms(final Term... terms) throws IOException {
+ final DocumentsWriterDeleteQueue deleteQueue = this.deleteQueue;
+ deleteQueue.addDelete(terms);
+ flushControl.doOnDelete();
+ if (flushControl.doApplyAllDeletes()) {
+ applyAllDeletes(deleteQueue);
}
- return doFlush;
}
- /** If non-null, various details of indexing are printed
- * here. */
+ DocumentsWriterDeleteQueue currentDeleteSession() {
+ return deleteQueue;
+ }
+
+ private void applyAllDeletes(DocumentsWriterDeleteQueue deleteQueue) throws IOException {
+ if (deleteQueue != null) {
+ synchronized (ticketQueue) {
+ // Freeze and insert the delete flush ticket in the queue
+ ticketQueue.add(new FlushTicket(deleteQueue.freezeGlobalBuffer(null), false));
+ applyFlushTickets();
+ }
+ }
+ indexWriter.applyAllDeletes();
+ indexWriter.flushCount.incrementAndGet();
+ }
+
synchronized void setInfoStream(PrintStream infoStream) {
this.infoStream = infoStream;
- for(int i=0;i<threadStates.length;i++) {
- threadStates[i].docState.infoStream = infoStream;
+ final Iterator<ThreadState> it = perThreadPool.getAllPerThreadsIterator();
+ while (it.hasNext()) {
+ it.next().perThread.docState.infoStream = infoStream;
}
}
- /** Get current segment name we are writing. */
- synchronized String getSegment() {
- return segment;
- }
-
/** Returns how many docs are currently buffered in RAM. */
- synchronized int getNumDocs() {
- return numDocs;
+ int getNumDocs() {
+ return numDocsInRAM.get();
}
- void message(String message) {
- if (infoStream != null) {
- writer.message("DW: " + message);
- }
+ Collection<String> abortedFiles() {
+ return abortedFiles;
}
- synchronized void setAborting() {
+ // returns boolean for asserts
+ boolean message(String message) {
if (infoStream != null) {
- message("setAborting");
+ indexWriter.message("DW: " + message);
}
- aborting = true;
+ return true;
+ }
+
+ private void ensureOpen() throws AlreadyClosedException {
+ if (closed) {
+ throw new AlreadyClosedException("this IndexWriter is closed");
+ }
}
/** Called if we hit an exception at a bad time (when
@@ -378,816 +220,335 @@
* currently buffered docs. This resets our state,
* discarding any docs added since last flush. */
synchronized void abort() throws IOException {
- if (infoStream != null) {
- message("docWriter: abort");
- }
-
boolean success = false;
+ synchronized (this) {
+ deleteQueue.clear();
+ }
+
try {
-
- // Forcefully remove waiting ThreadStates from line
- waitQueue.abort();
-
- // Wait for all other threads to finish with
- // DocumentsWriter:
- waitIdle();
-
if (infoStream != null) {
- message("docWriter: abort waitIdle done");
+ message("docWriter: abort");
}
- assert 0 == waitQueue.numWaiting: "waitQueue.numWaiting=" + waitQueue.numWaiting;
+ final Iterator<ThreadState> threadsIterator = perThreadPool.getActivePerThreadsIterator();
- waitQueue.waitingBytes = 0;
-
- pendingDeletes.clear();
-
- for (DocumentsWriterThreadState threadState : threadStates)
+ while (threadsIterator.hasNext()) {
+ ThreadState perThread = threadsIterator.next();
+ perThread.lock();
try {
- threadState.consumer.abort();
- } catch (Throwable t) {
+ if (perThread.isActive()) { // we might be closed
+ perThread.perThread.abort();
+ perThread.perThread.checkAndResetHasAborted();
+ } else {
+ assert closed;
+ }
+ } finally {
+ perThread.unlock();
}
-
- try {
- consumer.abort();
- } catch (Throwable t) {
}
- // Reset all postings data
- doAfterFlush();
success = true;
} finally {
- aborting = false;
- notifyAll();
if (infoStream != null) {
- message("docWriter: done abort; success=" + success);
+ message("docWriter: done abort; abortedFiles=" + abortedFiles + " success=" + success);
}
}
}
- /** Reset after a flush */
- private void doAfterFlush() throws IOException {
- // All ThreadStates should be idle when we are called
- assert allThreadsIdle();
- for (DocumentsWriterThreadState threadState : threadStates) {
- threadState.consumer.doAfterFlush();
- }
-
- threadBindings.clear();
- waitQueue.reset();
- segment = null;
- fieldInfos = new FieldInfos(fieldInfos);
- numDocs = 0;
- nextDocID = 0;
- bufferIsFull = false;
- for(int i=0;i<threadStates.length;i++) {
- threadStates[i].doAfterFlush();
- }
+ boolean anyChanges() {
+ return numDocsInRAM.get() != 0 || anyDeletions();
}
- private synchronized boolean allThreadsIdle() {
- for(int i=0;i<threadStates.length;i++) {
- if (!threadStates[i].isIdle) {
- return false;
- }
- }
- return true;
+ public int getBufferedDeleteTermsSize() {
+ return deleteQueue.getBufferedDeleteTermsSize();
}
- synchronized boolean anyChanges() {
- return numDocs != 0 || pendingDeletes.any();
- }
-
- // for testing
- public BufferedDeletes getPendingDeletes() {
- return pendingDeletes;
- }
-
- private void pushDeletes(SegmentInfo newSegment, SegmentInfos segmentInfos) {
- // Lock order: DW -> BD
- final long delGen = bufferedDeletesStream.getNextGen();
- if (pendingDeletes.any()) {
- if (segmentInfos.size() > 0 || newSegment != null) {
- final FrozenBufferedDeletes packet = new FrozenBufferedDeletes(pendingDeletes, delGen);
- if (infoStream != null) {
- message("flush: push buffered deletes startSize=" + pendingDeletes.bytesUsed.get() + " frozenSize=" + packet.bytesUsed);
- }
- bufferedDeletesStream.push(packet);
- if (infoStream != null) {
- message("flush: delGen=" + packet.gen);
- }
- if (newSegment != null) {
- newSegment.setBufferedDeletesGen(packet.gen);
- }
- } else {
- if (infoStream != null) {
- message("flush: drop buffered deletes: no segments");
- }
- // We can safely discard these deletes: since
- // there are no segments, the deletions cannot
- // affect anything.
- }
- pendingDeletes.clear();
- } else if (newSegment != null) {
- newSegment.setBufferedDeletesGen(delGen);
- }
+ //for testing
+ public int getNumBufferedDeleteTerms() {
+ return deleteQueue.numGlobalTermDeletes();
}
public boolean anyDeletions() {
- return pendingDeletes.any();
+ return deleteQueue.anyChanges();
}
- /** Flush all pending docs to a new segment */
- // Lock order: IW -> DW
- synchronized SegmentInfo flush(IndexWriter writer, IndexFileDeleter deleter, MergePolicy mergePolicy, SegmentInfos segmentInfos) throws IOException {
-
- final long startTime = System.currentTimeMillis();
-
- // We change writer's segmentInfos:
- assert Thread.holdsLock(writer);
-
- waitIdle();
-
- if (numDocs == 0) {
- // nothing to do!
- if (infoStream != null) {
- message("flush: no docs; skipping");
- }
- // Lock order: IW -> DW -> BD
- pushDeletes(null, segmentInfos);
- return null;
- }
-
- if (aborting) {
- if (infoStream != null) {
- message("flush: skip because aborting is set");
- }
- return null;
- }
-
- boolean success = false;
-
- SegmentInfo newSegment;
-
- try {
- assert nextDocID == numDocs;
- assert waitQueue.numWaiting == 0;
- assert waitQueue.waitingBytes == 0;
-
- if (infoStream != null) {
- message("flush postings as segment " + segment + " numDocs=" + numDocs);
- }
-
- final SegmentWriteState flushState = new SegmentWriteState(infoStream, directory, segment, fieldInfos,
- numDocs, writer.getConfig().getTermIndexInterval(),
- fieldInfos.buildSegmentCodecs(true),
- pendingDeletes);
- // Apply delete-by-docID now (delete-byDocID only
- // happens when an exception is hit processing that
- // doc, eg if analyzer has some problem w/ the text):
- if (pendingDeletes.docIDs.size() > 0) {
- flushState.deletedDocs = new BitVector(numDocs);
- for(int delDocID : pendingDeletes.docIDs) {
- flushState.deletedDocs.set(delDocID);
- }
- pendingDeletes.bytesUsed.addAndGet(-pendingDeletes.docIDs.size() * BufferedDeletes.BYTES_PER_DEL_DOCID);
- pendingDeletes.docIDs.clear();
- }
-
- newSegment = new SegmentInfo(segment, numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false, fieldInfos);
-
- Collection<DocConsumerPerThread> threads = new HashSet<DocConsumerPerThread>();
- for (DocumentsWriterThreadState threadState : threadStates) {
- threads.add(threadState.consumer);
- }
-
- double startMBUsed = bytesUsed()/1024./1024.;
-
- consumer.flush(threads, flushState);
-
- newSegment.setHasVectors(flushState.hasVectors);
-
- if (infoStream != null) {
- message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors"));
- if (flushState.deletedDocs != null) {
- message("new segment has " + flushState.deletedDocs.count() + " deleted docs");
- }
- message("flushedFiles=" + newSegment.files());
- message("flushed codecs=" + newSegment.getSegmentCodecs());
- }
-
- if (mergePolicy.useCompoundFile(segmentInfos, newSegment)) {
- final String cfsFileName = IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_EXTENSION);
-
- if (infoStream != null) {
- message("flush: create compound file \"" + cfsFileName + "\"");
- }
-
- CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, cfsFileName);
- for(String fileName : newSegment.files()) {
- cfsWriter.addFile(fileName);
- }
- cfsWriter.close();
- deleter.deleteNewFiles(newSegment.files());
- newSegment.setUseCompoundFile(true);
- }
-
- // Must write deleted docs after the CFS so we don't
- // slurp the del file into CFS:
- if (flushState.deletedDocs != null) {
- final int delCount = flushState.deletedDocs.count();
- assert delCount > 0;
- newSegment.setDelCount(delCount);
- newSegment.advanceDelGen();
- final String delFileName = newSegment.getDelFileName();
- if (infoStream != null) {
- message("flush: write " + delCount + " deletes to " + delFileName);
- }
- boolean success2 = false;
- try {
- // TODO: in the NRT case it'd be better to hand
- // this del vector over to the
- // shortly-to-be-opened SegmentReader and let it
- // carry the changes; there's no reason to use
- // filesystem as intermediary here.
- flushState.deletedDocs.write(directory, delFileName);
- success2 = true;
- } finally {
- if (!success2) {
- try {
- directory.deleteFile(delFileName);
- } catch (Throwable t) {
- // suppress this so we keep throwing the
- // original exception
- }
- }
- }
- }
-
- if (infoStream != null) {
- message("flush: segment=" + newSegment);
- final double newSegmentSizeNoStore = newSegment.sizeInBytes(false)/1024./1024.;
- final double newSegmentSize = newSegment.sizeInBytes(true)/1024./1024.;
- message(" ramUsed=" + nf.format(startMBUsed) + " MB" +
- " newFlushedSize=" + nf.format(newSegmentSize) + " MB" +
- " (" + nf.format(newSegmentSizeNoStore) + " MB w/o doc stores)" +
- " docs/MB=" + nf.format(numDocs / newSegmentSize) +
- " new/old=" + nf.format(100.0 * newSegmentSizeNoStore / startMBUsed) + "%");
- }
-
- success = true;
- } finally {
- notifyAll();
- if (!success) {
- if (segment != null) {
- deleter.refresh(segment);
- }
- abort();
- }
- }
-
- doAfterFlush();
-
- // Lock order: IW -> DW -> BD
- pushDeletes(newSegment, segmentInfos);
- if (infoStream != null) {
- message("flush time " + (System.currentTimeMillis()-startTime) + " msec");
- }
-
- return newSegment;
- }
-
- synchronized void close() {
+ void close() {
closed = true;
- notifyAll();
+ flushControl.setClosed();
}
- /** Returns a free (idle) ThreadState that may be used for
- * indexing this one document. This call also pauses if a
- * flush is pending. If delTerm is non-null then we
- * buffer this deleted term after the thread state has
- * been acquired. */
- synchronized DocumentsWriterThreadState getThreadState(Document doc, Term delTerm) throws IOException {
+ boolean updateDocument(final Document doc, final Analyzer analyzer,
+ final Term delTerm) throws CorruptIndexException, IOException {
+ ensureOpen();
+ boolean maybeMerge = false;
+ final boolean isUpdate = delTerm != null;
+ if (healthiness.anyStalledThreads()) {
- final Thread currentThread = Thread.currentThread();
- assert !Thread.holdsLock(writer);
+ // Help out flushing any pending DWPTs so we can un-stall:
+ if (infoStream != null) {
+ message("WARNING DocumentsWriter has stalled threads; will hijack this thread to flush pending segment(s)");
+ }
- // First, find a thread state. If this thread already
- // has affinity to a specific ThreadState, use that one
- // again.
- DocumentsWriterThreadState state = threadBindings.get(currentThread);
- if (state == null) {
-
- // First time this thread has called us since last
- // flush. Find the least loaded thread state:
- DocumentsWriterThreadState minThreadState = null;
- for(int i=0;i<threadStates.length;i++) {
- DocumentsWriterThreadState ts = threadStates[i];
- if (minThreadState == null || ts.numThreads < minThreadState.numThreads) {
- minThreadState = ts;
+ // Try pick up pending threads here if possible
+ DocumentsWriterPerThread flushingDWPT;
+ while ((flushingDWPT = flushControl.nextPendingFlush()) != null) {
+ // Don't push the delete here since the update could fail!
+ maybeMerge = doFlush(flushingDWPT);
+ if (!healthiness.anyStalledThreads()) {
+ break;
}
}
- if (minThreadState != null && (minThreadState.numThreads == 0 || threadStates.length >= maxThreadStates)) {
- state = minThreadState;
- state.numThreads++;
- } else {
- // Just create a new "private" thread state
- DocumentsWriterThreadState[] newArray = new DocumentsWriterThreadState[1+threadStates.length];
- if (threadStates.length > 0) {
- System.arraycopy(threadStates, 0, newArray, 0, threadStates.length);
- }
- state = newArray[threadStates.length] = new DocumentsWriterThreadState(this);
- threadStates = newArray;
- }
- threadBindings.put(currentThread, state);
- }
- // Next, wait until my thread state is idle (in case
- // it's shared with other threads), and no flush/abort
- // pending
- waitReady(state);
-
- // Allocate segment name if this is the first doc since
- // last flush:
- if (segment == null) {
- segment = writer.newSegmentName();
- assert numDocs == 0;
- }
-
- state.docState.docID = nextDocID++;
-
- if (delTerm != null) {
- pendingDeletes.addTerm(delTerm, state.docState.docID);
- }
-
- numDocs++;
- state.isIdle = false;
- return state;
- }
-
- boolean addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {
- return updateDocument(doc, analyzer, null);
- }
-
- boolean updateDocument(Document doc, Analyzer analyzer, Term delTerm)
- throws CorruptIndexException, IOException {
-
- // Possibly trigger a flush, or wait until any running flush completes:
- boolean doFlush = flushControl.waitUpdate(1, delTerm != null ? 1 : 0);
-
- // This call is synchronized but fast
- final DocumentsWriterThreadState state = getThreadState(doc, delTerm);
-
- final DocState docState = state.docState;
- docState.doc = doc;
- docState.analyzer = analyzer;
-
- boolean success = false;
- try {
- // This call is not synchronized and does all the
- // work
- final DocWriter perDoc;
- try {
- perDoc = state.consumer.processDocument(fieldInfos);
- } finally {
- docState.clear();
+ if (infoStream != null && healthiness.anyStalledThreads()) {
+ message("WARNING DocumentsWriter still has stalled threads; waiting");
}
- // This call is synchronized but fast
- finishDocument(state, perDoc);
+ healthiness.waitIfStalled(); // block if stalled
- success = true;
- } finally {
- if (!success) {
-
- // If this thread state had decided to flush, we
- // must clear it so another thread can flush
- if (doFlush) {
- flushControl.clearFlushPending();
- }
-
- if (infoStream != null) {
- message("exception in updateDocument aborting=" + aborting);
- }
-
- synchronized(this) {
-
- state.isIdle = true;
- notifyAll();
-
- if (aborting) {
- abort();
- } else {
- skipDocWriter.docID = docState.docID;
- boolean success2 = false;
- try {
- waitQueue.add(skipDocWriter);
- success2 = true;
- } finally {
- if (!success2) {
- abort();
- return false;
- }
- }
-
- // Immediately mark this document as deleted
- // since likely it was partially added. This
- // keeps indexing as "all or none" (atomic) when
- // adding a document:
- deleteDocID(state.docState.docID);
- }
- }
+ if (infoStream != null && healthiness.anyStalledThreads()) {
+ message("WARNING DocumentsWriter done waiting");
}
}
- doFlush |= flushControl.flushByRAMUsage("new document");
-
- return doFlush;
- }
-
- public synchronized void waitIdle() {
- while (!allThreadsIdle()) {
- try {
- wait();
- } catch (InterruptedException ie) {
- throw new ThreadInterruptedException(ie);
- }
- }
- }
-
- synchronized void waitReady(DocumentsWriterThreadState state) {
- while (!closed && (!state.isIdle || aborting)) {
- try {
- wait();
- } catch (InterruptedException ie) {
- throw new ThreadInterruptedException(ie);
- }
- }
-
- if (closed) {
- throw new AlreadyClosedException("this IndexWriter is closed");
- }
- }
-
- /** Does the synchronized work to finish/flush the
- * inverted document. */
- private void finishDocument(DocumentsWriterThreadState perThread, DocWriter docWriter) throws IOException {
-
- // Must call this w/o holding synchronized(this) else
- // we'll hit deadlock:
- balanceRAM();
-
- synchronized(this) {
-
- assert docWriter == null || docWriter.docID == perThread.docState.docID;
-
- if (aborting) {
-
- // We are currently aborting, and another thread is
- // waiting for me to become idle. We just forcefully
- // idle this threadState; it will be fully reset by
- // abort()
- if (docWriter != null) {
- try {
- docWriter.abort();
- } catch (Throwable t) {
- }
- }
-
- perThread.isIdle = true;
-
- // wakes up any threads waiting on the wait queue
- notifyAll();
-
- return;
- }
-
- final boolean doPause;
-
- if (docWriter != null) {
- doPause = waitQueue.add(docWriter);
- } else {
- skipDocWriter.docID = perThread.docState.docID;
- doPause = waitQueue.add(skipDocWriter);
- }
-
- if (doPause) {
- waitForWaitQueue();
- }
-
- perThread.isIdle = true;
-
- // wakes up any threads waiting on the wait queue
- notifyAll();
- }
- }
-
- synchronized void waitForWaitQueue() {
- do {
- try {
- wait();
- } catch (InterruptedException ie) {
- throw new ThreadInterruptedException(ie);
- }
- } while (!waitQueue.doResume());
- }
-
- private static class SkipDocWriter extends DocWriter {
- @Override
- void finish() {
- }
- @Override
- void abort() {
- }
- @Override
- long sizeInBytes() {
- return 0;
- }
- }
- final SkipDocWriter skipDocWriter = new SkipDocWriter();
-
- NumberFormat nf = NumberFormat.getInstance();
-
- /* Initial chunks size of the shared byte[] blocks used to
- store postings data */
- final static int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK;
-
- /* if you increase this, you must fix field cache impl for
- * getTerms/getTermsIndex requires <= 32768. */
- final static int MAX_TERM_LENGTH_UTF8 = BYTE_BLOCK_SIZE-2;
-
- /* Initial chunks size of the shared int[] blocks used to
- store postings data */
- final static int INT_BLOCK_SHIFT = 13;
- final static int INT_BLOCK_SIZE = 1 << INT_BLOCK_SHIFT;
- final static int INT_BLOCK_MASK = INT_BLOCK_SIZE - 1;
-
- private List<int[]> freeIntBlocks = new ArrayList<int[]>();
-
- /* Allocate another int[] from the shared pool */
- synchronized int[] getIntBlock() {
- final int size = freeIntBlocks.size();
- final int[] b;
- if (0 == size) {
- b = new int[INT_BLOCK_SIZE];
- bytesUsed.addAndGet(INT_BLOCK_SIZE*RamUsageEstimator.NUM_BYTES_INT);
- } else {
- b = freeIntBlocks.remove(size-1);
- }
- return b;
- }
-
- long bytesUsed() {
- return bytesUsed.get() + pendingDeletes.bytesUsed.get();
- }
-
- /* Return int[]s to the pool */
- synchronized void recycleIntBlocks(int[][] blocks, int start, int end) {
- for(int i=start;i<end;i++) {
- freeIntBlocks.add(blocks[i]);
- blocks[i] = null;
- }
- }
-
- final RecyclingByteBlockAllocator byteBlockAllocator = new RecyclingByteBlockAllocator(BYTE_BLOCK_SIZE, Integer.MAX_VALUE, bytesUsed);
-
- final static int PER_DOC_BLOCK_SIZE = 1024;
-
- final RecyclingByteBlockAllocator perDocAllocator = new RecyclingByteBlockAllocator(PER_DOC_BLOCK_SIZE, Integer.MAX_VALUE, bytesUsed);
-
- String toMB(long v) {
- return nf.format(v/1024./1024.);
- }
-
- /* We have three pools of RAM: Postings, byte blocks
- * (holds freq/prox posting data) and per-doc buffers
- * (stored fields/term vectors). Different docs require
- * varying amount of storage from these classes. For
- * example, docs with many unique single-occurrence short
- * terms will use up the Postings RAM and hardly any of
- * the other two. Whereas docs with very large terms will
- * use alot of byte blocks RAM. This method just frees
- * allocations from the pools once we are over-budget,
- * which balances the pools to match the current docs. */
- void balanceRAM() {
-
- final boolean doBalance;
- final long deletesRAMUsed;
-
- deletesRAMUsed = bufferedDeletesStream.bytesUsed();
-
- final long ramBufferSize;
- final double mb = config.getRAMBufferSizeMB();
- if (mb == IndexWriterConfig.DISABLE_AUTO_FLUSH) {
- ramBufferSize = IndexWriterConfig.DISABLE_AUTO_FLUSH;
- } else {
- ramBufferSize = (long) (mb*1024*1024);
- }
-
- synchronized(this) {
- if (ramBufferSize == IndexWriterConfig.DISABLE_AUTO_FLUSH || bufferIsFull) {
- return;
- }
+ final ThreadState perThread = perThreadPool.getAndLock(Thread.currentThread(),
+ this, doc);
+ final DocumentsWriterPerThread flushingDWPT;
- doBalance = bytesUsed() + deletesRAMUsed >= ramBufferSize;
+ try {
+
+ if (!perThread.isActive()) {
+ ensureOpen();
+ assert false: "perThread is not active but we are still open";
+ }
+
+ final DocumentsWriterPerThread dwpt = perThread.perThread;
+ try {
+ dwpt.updateDocument(doc, analyzer, delTerm);
+ numDocsInRAM.incrementAndGet();
+ } finally {
+ if (dwpt.checkAndResetHasAborted()) {
+ flushControl.doOnAbort(perThread);
+ }
+ }
+ flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate);
+ } finally {
+ perThread.unlock();
}
-
- if (doBalance) {
-
- if (infoStream != null) {
- message(" RAM: balance allocations: usedMB=" + toMB(bytesUsed()) +
- " vs trigger=" + toMB(ramBufferSize) +
- " deletesMB=" + toMB(deletesRAMUsed) +
- " byteBlockFree=" + toMB(byteBlockAllocator.bytesUsed()) +
- " perDocFree=" + toMB(perDocAllocator.bytesUsed()));
+
+ if (flushingDWPT != null) {
+ maybeMerge |= doFlush(flushingDWPT);
+ } else {
+ final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush();
+ if (nextPendingFlush != null) {
+ maybeMerge |= doFlush(nextPendingFlush);
}
+ }
+ return maybeMerge;
+ }
- final long startBytesUsed = bytesUsed() + deletesRAMUsed;
-
- int iter = 0;
-
- // We free equally from each pool in 32 KB
- // chunks until we are below our threshold
- // (freeLevel)
-
- boolean any = true;
-
- final long freeLevel = (long) (0.95 * ramBufferSize);
-
- while(bytesUsed()+deletesRAMUsed > freeLevel) {
+ private boolean doFlush(DocumentsWriterPerThread flushingDWPT) throws IOException {
+ boolean maybeMerge = false;
+ while (flushingDWPT != null) {
+ maybeMerge = true;
+ boolean success = false;
+ FlushTicket ticket = null;
- synchronized(this) {
- if (0 == perDocAllocator.numBufferedBlocks() &&
- 0 == byteBlockAllocator.numBufferedBlocks() &&
- 0 == freeIntBlocks.size() && !any) {
- // Nothing else to free -- must flush now.
- bufferIsFull = bytesUsed()+deletesRAMUsed > ramBufferSize;
- if (infoStream != null) {
- if (bytesUsed()+deletesRAMUsed > ramBufferSize) {
- message(" nothing to free; set bufferIsFull");
- } else {
- message(" nothing to free");
- }
+ try {
+ assert currentFullFlushDelQueue == null
+ || flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: "
+ + currentFullFlushDelQueue + "but was: " + flushingDWPT.deleteQueue
+ + " " + flushControl.isFullFlush();
+ /*
+ * Since with DWPT the flush process is concurrent and several DWPT
+ * could flush at the same time we must maintain the order of the
+ * flushes before we can apply the flushed segment and the frozen global
+ * deletes it is buffering. The reason for this is that the global
+ * deletes mark a certain point in time where we took a DWPT out of
+ * rotation and freeze the global deletes.
+ *
+ * Example: A flush 'A' starts and freezes the global deletes, then
+ * flush 'B' starts and freezes all deletes occurred since 'A' has
+ * started. if 'B' finishes before 'A' we need to wait until 'A' is done
+ * otherwise the deletes frozen by 'B' are not applied to 'A' and we
+ * might miss to deletes documents in 'A'.
+ */
+ try {
+ synchronized (ticketQueue) {
+ // Each flush is assigned a ticket in the order they accquire the ticketQueue lock
+ ticket = new FlushTicket(flushingDWPT.prepareFlush(), true);
+ ticketQueue.add(ticket);
+ }
+
+ // flush concurrently without locking
+ final FlushedSegment newSegment = flushingDWPT.flush();
+ synchronized (ticketQueue) {
+ ticket.segment = newSegment;
+ }
+ // flush was successful once we reached this point - new seg. has been assigned to the ticket!
+ success = true;
+ } finally {
+ if (!success && ticket != null) {
+ synchronized (ticketQueue) {
+ // In the case of a failure make sure we are making progress and
+ // apply all the deletes since the segment flush failed since the flush
+ // ticket could hold global deletes see FlushTicket#canPublish()
+ ticket.isSegmentFlush = false;
}
- break;
- }
-
- if ((0 == iter % 4) && byteBlockAllocator.numBufferedBlocks() > 0) {
- byteBlockAllocator.freeBlocks(1);
- }
- if ((1 == iter % 4) && freeIntBlocks.size() > 0) {
- freeIntBlocks.remove(freeIntBlocks.size()-1);
- bytesUsed.addAndGet(-INT_BLOCK_SIZE * RamUsageEstimator.NUM_BYTES_INT);
- }
- if ((2 == iter % 4) && perDocAllocator.numBufferedBlocks() > 0) {
- perDocAllocator.freeBlocks(32); // Remove upwards of 32 blocks (each block is 1K)
}
}
-
- if ((3 == iter % 4) && any) {
- // Ask consumer to free any recycled state
- any = consumer.freeRAM();
- }
-
- iter++;
+ /*
+ * Now we are done and try to flush the ticket queue if the head of the
+ * queue has already finished the flush.
+ */
+ applyFlushTickets();
+ } finally {
+ flushControl.doAfterFlush(flushingDWPT);
+ flushingDWPT.checkAndResetHasAborted();
+ indexWriter.flushCount.incrementAndGet();
}
+
+ flushingDWPT = flushControl.nextPendingFlush();
+ }
+ return maybeMerge;
+ }
- if (infoStream != null) {
- message(" after free: freedMB=" + nf.format((startBytesUsed-bytesUsed()-deletesRAMUsed)/1024./1024.) + " usedMB=" + nf.format((bytesUsed()+deletesRAMUsed)/1024./1024.));
+ private void applyFlushTickets() throws IOException {
+ synchronized (ticketQueue) {
+ while (true) {
+ // Keep publishing eligible flushed segments:
+ final FlushTicket head = ticketQueue.peek();
+ if (head != null && head.canPublish()) {
+ ticketQueue.poll();
+ finishFlush(head.segment, head.frozenDeletes);
+ } else {
+ break;
+ }
}
}
}
- final WaitQueue waitQueue = new WaitQueue();
-
- private class WaitQueue {
- DocWriter[] waiting;
- int nextWriteDocID;
- int nextWriteLoc;
- int numWaiting;
- long waitingBytes;
-
- public WaitQueue() {
- waiting = new DocWriter[10];
- }
-
- synchronized void reset() {
- // NOTE: nextWriteLoc doesn't need to be reset
- assert numWaiting == 0;
- assert waitingBytes == 0;
- nextWriteDocID = 0;
- }
-
- synchronized boolean doResume() {
- final double mb = config.getRAMBufferSizeMB();
- final long waitQueueResumeBytes;
- if (mb == IndexWriterConfig.DISABLE_AUTO_FLUSH) {
- waitQueueResumeBytes = 2*1024*1024;
- } else {
- waitQueueResumeBytes = (long) (mb*1024*1024*0.05);
- }
- return waitingBytes <= waitQueueResumeBytes;
- }
-
- synchronized boolean doPause() {
- final double mb = config.getRAMBufferSizeMB();
- final long waitQueuePauseBytes;
- if (mb == IndexWriterConfig.DISABLE_AUTO_FLUSH) {
- waitQueuePauseBytes = 4*1024*1024;
- } else {
- waitQueuePauseBytes = (long) (mb*1024*1024*0.1);
- }
- return waitingBytes > waitQueuePauseBytes;
- }
-
- synchronized void abort() {
- int count = 0;
- for(int i=0;i<waiting.length;i++) {
- final DocWriter doc = waiting[i];
- if (doc != null) {
- doc.abort();
- waiting[i] = null;
- count++;
+ private void finishFlush(FlushedSegment newSegment, FrozenBufferedDeletes bufferedDeletes)
+ throws IOException {
+ // Finish the flushed segment and publish it to IndexWriter
+ if (newSegment == null) {
+ assert bufferedDeletes != null;
+ if (bufferedDeletes != null && bufferedDeletes.any()) {
+ indexWriter.bufferedDeletesStream.push(bufferedDeletes);
+ if (infoStream != null) {
+ message("flush: push buffered deletes: " + bufferedDeletes);
}
}
- waitingBytes = 0;
- assert count == numWaiting;
- numWaiting = 0;
+ } else {
+ publishFlushedSegment(newSegment, bufferedDeletes);
}
+ }
- private void writeDocument(DocWriter doc) throws IOException {
- assert doc == skipDocWriter || nextWriteDocID == doc.docID;
- boolean success = false;
- try {
- doc.finish();
- nextWriteDocID++;
- nextWriteLoc++;
- assert nextWriteLoc <= waiting.length;
- if (nextWriteLoc == waiting.length) {
- nextWriteLoc = 0;
- }
- success = true;
- } finally {
- if (!success) {
- setAborting();
- }
+ final void subtractFlushedNumDocs(int numFlushed) {
+ int oldValue = numDocsInRAM.get();
+ while (!numDocsInRAM.compareAndSet(oldValue, oldValue - numFlushed)) {
+ oldValue = numDocsInRAM.get();
+ }
+ }
+
+ /**
+ * Publishes the flushed segment, segment private deletes (if any) and its
+ * associated global delete (if present) to IndexWriter. The actual
+ * publishing operation is synced on IW -> BDS so that the {@link SegmentInfo}'s
+ * delete generation is always GlobalPacket_deleteGeneration + 1
+ */
+ private void publishFlushedSegment(FlushedSegment newSegment, FrozenBufferedDeletes globalPacket)
+ throws IOException {
+ assert newSegment != null;
+ final SegmentInfo segInfo = indexWriter.prepareFlushedSegment(newSegment);
+ final BufferedDeletes deletes = newSegment.segmentDeletes;
+ FrozenBufferedDeletes packet = null;
+ if (deletes != null && deletes.any()) {
+ // Segment private delete
+ packet = new FrozenBufferedDeletes(deletes, true);
+ if (infoStream != null) {
+ message("flush: push buffered seg private deletes: " + packet);
}
}
- synchronized public boolean add(DocWriter doc) throws IOException {
+ // now publish!
+ indexWriter.publishFlushedSegment(segInfo, packet, globalPacket);
+ }
+
+ // for asserts
+ private volatile DocumentsWriterDeleteQueue currentFullFlushDelQueue = null;
+ // for asserts
+ private synchronized boolean setFlushingDeleteQueue(DocumentsWriterDeleteQueue session) {
+ currentFullFlushDelQueue = session;
+ return true;
+ }
+
+ /*
+ * FlushAllThreads is synced by IW fullFlushLock. Flushing all threads is a
+ * two stage operation; the caller must ensure (in try/finally) that finishFlush
+ * is called after this method, to release the flush lock in DWFlushControl
+ */
+ final boolean flushAllThreads()
+ throws IOException {
+ final DocumentsWriterDeleteQueue flushingDeleteQueue;
- assert doc.docID >= nextWriteDocID;
-
- if (doc.docID == nextWriteDocID) {
- writeDocument(doc);
- while(true) {
- doc = waiting[nextWriteLoc];
- if (doc != null) {
- numWaiting--;
- waiting[nextWriteLoc] = null;
- waitingBytes -= doc.sizeInBytes();
- writeDocument(doc);
- } else {
- break;
- }
- }
- } else {
-
- // I finished before documents that were added
- // before me. This can easily happen when I am a
- // small doc and the docs before me were large, or,
- // just due to luck in the thread scheduling. Just
- // add myself to the queue and when that large doc
- // finishes, it will flush me:
- int gap = doc.docID - nextWriteDocID;
- if (gap >= waiting.length) {
- // Grow queue
- DocWriter[] newArray = new DocWriter[ArrayUtil.oversize(gap, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
- assert nextWriteLoc >= 0;
- System.arraycopy(waiting, nextWriteLoc, newArray, 0, waiting.length-nextWriteLoc);
- System.arraycopy(waiting, 0, newArray, waiting.length-nextWriteLoc, nextWriteLoc);
- nextWriteLoc = 0;
- waiting = newArray;
- gap = doc.docID - nextWriteDocID;
- }
-
- int loc = nextWriteLoc + gap;
- if (loc >= waiting.length) {
- loc -= waiting.length;
- }
-
- // We should only wrap one time
- assert loc < waiting.length;
-
- // Nobody should be in my spot!
- assert waiting[loc] == null;
- waiting[loc] = doc;
- numWaiting++;
- waitingBytes += doc.sizeInBytes();
+ synchronized (this) {
+ flushingDeleteQueue = deleteQueue;
+ /* Cutover to a new delete queue. This must be synced on the flush control
+ * otherwise a new DWPT could sneak into the loop with an already flushing
+ * delete queue */
+ flushControl.markForFullFlush(); // swaps the delQueue synced on FlushControl
+ assert setFlushingDeleteQueue(flushingDeleteQueue);
+ }
+ assert currentFullFlushDelQueue != null;
+ assert currentFullFlushDelQueue != deleteQueue;
+
+ boolean anythingFlushed = false;
+ try {
+ DocumentsWriterPerThread flushingDWPT;
+ // Help out with flushing:
+ while ((flushingDWPT = flushControl.nextPendingFlush()) != null) {
+ anythingFlushed |= doFlush(flushingDWPT);
}
-
- return doPause();
+ // If a concurrent flush is still in flight wait for it
+ while (flushControl.anyFlushing()) {
+ flushControl.waitForFlush();
+ }
+ if (!anythingFlushed) { // apply deletes if we did not flush any document
+ synchronized (ticketQueue) {
+ ticketQueue.add(new FlushTicket(flushingDeleteQueue.freezeGlobalBuffer(null), false));
+ }
+ applyFlushTickets();
+ }
+ } finally {
+ assert flushingDeleteQueue == currentFullFlushDelQueue;
+ }
+ return anythingFlushed;
+ }
+
+ final void finishFullFlush(boolean success) {
+ assert setFlushingDeleteQueue(null);
+ if (success) {
+ // Release the flush lock
+ flushControl.finishFullFlush();
+ } else {
+ flushControl.abortFullFlushes();
+ }
+ }
+
+ static final class FlushTicket {
+ final FrozenBufferedDeletes frozenDeletes;
+ /* access to non-final members must be synchronized on DW#ticketQueue */
+ FlushedSegment segment;
+ boolean isSegmentFlush;
+
+ FlushTicket(FrozenBufferedDeletes frozenDeletes, boolean isSegmentFlush) {
+ this.frozenDeletes = frozenDeletes;
+ this.isSegmentFlush = isSegmentFlush;
+ }
+
+ boolean canPublish() {
+ return (!isSegmentFlush || segment != null);
}
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java
new file mode 100644
index 0000000..486c126
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java
@@ -0,0 +1,396 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.apache.lucene.search.Query;
+
+/**
+ * {@link DocumentsWriterDeleteQueue} is a non-blocking linked pending deletes
+ * queue. In contrast to other queue implementation we only maintain the
+ * tail of the queue. A delete queue is always used in a context of a set of
+ * DWPTs and a global delete pool. Each of the DWPT and the global pool need to
+ * maintain their 'own' head of the queue (as a DeleteSlice instance per DWPT).
+ * The difference between the DWPT and the global pool is that the DWPT starts
+ * maintaining a head once it has added its first document since for its segments
+ * private deletes only the deletes after that document are relevant. The global
+ * pool instead starts maintaining the head once this instance is created by
+ * taking the sentinel instance as its initial head.
+ * <p>
+ * Since each {@link DeleteSlice} maintains its own head and the list is only
+ * single linked the garbage collector takes care of pruning the list for us.
+ * All nodes in the list that are still relevant should be either directly or
+ * indirectly referenced by one of the DWPT's private {@link DeleteSlice} or by
+ * the global {@link BufferedDeletes} slice.
+ * <p>
+ * Each DWPT as well as the global delete pool maintain their private
+ * DeleteSlice instance. In the DWPT case updating a slice is equivalent to
+ * atomically finishing the document. The slice update guarantees a "happens
+ * before" relationship to all other updates in the same indexing session. When a
+ * DWPT updates a document it:
+ *
+ * <ol>
+ * <li>consumes a document and finishes its processing</li>
+ * <li>updates its private {@link DeleteSlice} either by calling
+ * {@link #updateSlice(DeleteSlice)} or {@link #add(Term, DeleteSlice)} (if the
+ * document has a delTerm)</li>
+ * <li>applies all deletes in the slice to its private {@link BufferedDeletes}
+ * and resets it</li>
+ * <li>increments its internal document id</li>
+ * </ol>
+ *
+ * The DWPT also doesn't apply its current documents delete term until it has
+ * updated its delete slice which ensures the consistency of the update. If the
+ * update fails before the DeleteSlice could have been updated the deleteTerm
+ * will also not be added to its private deletes neither to the global deletes.
+ *
+ */
+final class DocumentsWriterDeleteQueue {
+
+ private volatile Node tail;
+
+ private static final AtomicReferenceFieldUpdater<DocumentsWriterDeleteQueue, Node> tailUpdater = AtomicReferenceFieldUpdater
+ .newUpdater(DocumentsWriterDeleteQueue.class, Node.class, "tail");
+
+ private final DeleteSlice globalSlice;
+ private final BufferedDeletes globalBufferedDeletes;
+ /* only acquired to update the global deletes */
+ private final ReentrantLock globalBufferLock = new ReentrantLock();
+
+ final long generation;
+
+ DocumentsWriterDeleteQueue() {
+ this(0);
+ }
+
+ DocumentsWriterDeleteQueue(long generation) {
+ this(new BufferedDeletes(false), generation);
+ }
+
+ DocumentsWriterDeleteQueue(BufferedDeletes globalBufferedDeletes, long generation) {
+ this.globalBufferedDeletes = globalBufferedDeletes;
+ this.generation = generation;
+ /*
+ * we use a sentinel instance as our initial tail. No slice will ever try to
+ * apply this tail since the head is always omitted.
+ */
+ tail = new Node(null); // sentinel
+ globalSlice = new DeleteSlice(tail);
+ }
+
+ void addDelete(Query... queries) {
+ add(new QueryArrayNode(queries));
+ tryApplyGlobalSlice();
+ }
+
+ void addDelete(Term... terms) {
+ add(new TermArrayNode(terms));
+ tryApplyGlobalSlice();
+ }
+
+ /**
+ * invariant for document update
+ */
+ void add(Term term, DeleteSlice slice) {
+ final TermNode termNode = new TermNode(term);
+ add(termNode);
+ /*
+ * this is an update request where the term is the updated documents
+ * delTerm. in that case we need to guarantee that this insert is atomic
+ * with regards to the given delete slice. This means if two threads try to
+ * update the same document with in turn the same delTerm one of them must
+ * win. By taking the node we have created for our del term as the new tail
+ * it is guaranteed that if another thread adds the same right after us we
+ * will apply this delete next time we update our slice and one of the two
+ * competing updates wins!
+ */
+ slice.sliceTail = termNode;
+ assert slice.sliceHead != slice.sliceTail : "slice head and tail must differ after add";
+ tryApplyGlobalSlice(); // TODO doing this each time is not necessary maybe
+ // we can do it just every n times or so?
+ }
+
+ void add(Node item) {
+ /*
+ * this non-blocking / 'wait-free' linked list add was inspired by Apache
+ * Harmony's ConcurrentLinkedQueue Implementation.
+ */
+ while (true) {
+ final Node currentTail = this.tail;
+ final Node tailNext = currentTail.next;
+ if (tail == currentTail) {
+ if (tailNext != null) {
+ /*
+ * we are in intermediate state here. the tails next pointer has been
+ * advanced but the tail itself might not be updated yet. help to
+ * advance the tail and try again updating it.
+ */
+ tailUpdater.compareAndSet(this, currentTail, tailNext); // can fail
+ } else {
+ /*
+ * we are in quiescent state and can try to insert the item to the
+ * current tail if we fail to insert we just retry the operation since
+ * somebody else has already added its item
+ */
+ if (currentTail.casNext(null, item)) {
+ /*
+ * now that we are done we need to advance the tail while another
+ * thread could have advanced it already so we can ignore the return
+ * type of this CAS call
+ */
+ tailUpdater.compareAndSet(this, currentTail, item);
+ return;
+ }
+ }
+ }
+ }
+ }
+
+ boolean anyChanges() {
+ globalBufferLock.lock();
+ try {
+ return !globalSlice.isEmpty() || globalBufferedDeletes.any();
+ } finally {
+ globalBufferLock.unlock();
+ }
+ }
+
+ void tryApplyGlobalSlice() {
+ if (globalBufferLock.tryLock()) {
+ /*
+ * The global buffer must be locked but we don't need to upate them if
+ * there is an update going on right now. It is sufficient to apply the
+ * deletes that have been added after the current in-flight global slices
+ * tail the next time we can get the lock!
+ */
+ try {
+ if (updateSlice(globalSlice)) {
+ globalSlice.apply(globalBufferedDeletes, BufferedDeletes.MAX_INT);
+ }
+ } finally {
+ globalBufferLock.unlock();
+ }
+ }
+ }
+
+ FrozenBufferedDeletes freezeGlobalBuffer(DeleteSlice callerSlice) {
+ globalBufferLock.lock();
+ /*
+ * Here we freeze the global buffer so we need to lock it, apply all
+ * deletes in the queue and reset the global slice to let the GC prune the
+ * queue.
+ */
+ final Node currentTail = tail; // take the current tail make this local any
+ // Changes after this call are applied later
+ // and not relevant here
+ if (callerSlice != null) {
+ // Update the callers slices so we are on the same page
+ callerSlice.sliceTail = currentTail;
+ }
+ try {
+ if (globalSlice.sliceTail != currentTail) {
+ globalSlice.sliceTail = currentTail;
+ globalSlice.apply(globalBufferedDeletes, BufferedDeletes.MAX_INT);
+ }
+
+ final FrozenBufferedDeletes packet = new FrozenBufferedDeletes(
+ globalBufferedDeletes, false);
+ globalBufferedDeletes.clear();
+ return packet;
+ } finally {
+ globalBufferLock.unlock();
+ }
+ }
+
+ DeleteSlice newSlice() {
+ return new DeleteSlice(tail);
+ }
+
+ boolean updateSlice(DeleteSlice slice) {
+ if (slice.sliceTail != tail) { // If we are the same just
+ slice.sliceTail = tail;
+ return true;
+ }
+ return false;
+ }
+
+ static class DeleteSlice {
+ // No need to be volatile, slices are thread captive (only accessed by one thread)!
+ Node sliceHead; // we don't apply this one
+ Node sliceTail;
+
+ DeleteSlice(Node currentTail) {
+ assert currentTail != null;
+ /*
+ * Initially this is a 0 length slice pointing to the 'current' tail of
+ * the queue. Once we update the slice we only need to assign the tail and
+ * have a new slice
+ */
+ sliceHead = sliceTail = currentTail;
+ }
+
+ void apply(BufferedDeletes del, int docIDUpto) {
+ if (sliceHead == sliceTail) {
+ // 0 length slice
+ return;
+ }
+ /*
+ * When we apply a slice we take the head and get its next as our first
+ * item to apply and continue until we applied the tail. If the head and
+ * tail in this slice are not equal then there will be at least one more
+ * non-null node in the slice!
+ */
+ Node current = sliceHead;
+ do {
+ current = current.next;
+ assert current != null : "slice property violated between the head on the tail must not be a null node";
+ current.apply(del, docIDUpto);
+ } while (current != sliceTail);
+ reset();
+ }
+
+ void reset() {
+ // Reset to a 0 length slice
+ sliceHead = sliceTail;
+ }
+
+ /**
+ * Returns <code>true</code> iff the given item is identical to the item
+ * hold by the slices tail, otherwise <code>false</code>.
+ */
+ boolean isTailItem(Object item) {
+ return sliceTail.item == item;
+ }
+
+ boolean isEmpty() {
+ return sliceHead == sliceTail;
+ }
+ }
+
+ public int numGlobalTermDeletes() {
+ return globalBufferedDeletes.numTermDeletes.get();
+ }
+
+ void clear() {
+ globalBufferLock.lock();
+ try {
+ final Node currentTail = tail;
+ globalSlice.sliceHead = globalSlice.sliceTail = currentTail;
+ globalBufferedDeletes.clear();
+ } finally {
+ globalBufferLock.unlock();
+ }
+ }
+
+ private static class Node {
+ volatile Node next;
+ final Object item;
+
+ private Node(Object item) {
+ this.item = item;
+ }
+
+ static final AtomicReferenceFieldUpdater<Node, Node> nextUpdater = AtomicReferenceFieldUpdater
+ .newUpdater(Node.class, Node.class, "next");
+
+ void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
+ assert false : "sentinel item must never be applied";
+ }
+
+ boolean casNext(Node cmp, Node val) {
+ return nextUpdater.compareAndSet(this, cmp, val);
+ }
+ }
+
+ private static final class TermNode extends Node {
+
+ TermNode(Term term) {
+ super(term);
+ }
+
+ @Override
+ void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
+ bufferedDeletes.addTerm((Term) item, docIDUpto);
+ }
+ }
+
+ private static final class QueryArrayNode extends Node {
+ QueryArrayNode(Query[] query) {
+ super(query);
+ }
+
+ @Override
+ void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
+ final Query[] queries = (Query[]) item;
+ for (Query query : queries) {
+ bufferedDeletes.addQuery(query, docIDUpto);
+ }
+ }
+ }
+
+ private static final class TermArrayNode extends Node {
+ TermArrayNode(Term[] term) {
+ super(term);
+ }
+
+ @Override
+ void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
+ final Term[] terms = (Term[]) item;
+ for (Term term : terms) {
+ bufferedDeletes.addTerm(term, docIDUpto);
+ }
+ }
+ }
+
+
+ private boolean forceApplyGlobalSlice() {
+ globalBufferLock.lock();
+ final Node currentTail = tail;
+ try {
+ if (globalSlice.sliceTail != currentTail) {
+ globalSlice.sliceTail = currentTail;
+ globalSlice.apply(globalBufferedDeletes, BufferedDeletes.MAX_INT);
+ }
+ return globalBufferedDeletes.any();
+ } finally {
+ globalBufferLock.unlock();
+ }
+ }
+
+ public int getBufferedDeleteTermsSize() {
+ globalBufferLock.lock();
+ try {
+ forceApplyGlobalSlice();
+ return globalBufferedDeletes.terms.size();
+ } finally {
+ globalBufferLock.unlock();
+ }
+ }
+
+ public long bytesUsed() {
+ return globalBufferedDeletes.bytesUsed.get();
+ }
+
+ @Override
+ public String toString() {
+ return "DWDQ: [ generation: " + generation + " ]";
+ }
+
+
+}
diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java
new file mode 100644
index 0000000..443df51
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java
@@ -0,0 +1,394 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.Queue;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
+import org.apache.lucene.util.ThreadInterruptedException;
+
+/**
+ * This class controls {@link DocumentsWriterPerThread} flushing during
+ * indexing. It tracks the memory consumption per
+ * {@link DocumentsWriterPerThread} and uses a configured {@link FlushPolicy} to
+ * decide if a {@link DocumentsWriterPerThread} must flush.
+ * <p>
+ * In addition to the {@link FlushPolicy} the flush control might set certain
+ * {@link DocumentsWriterPerThread} as flush pending iff a
+ * {@link DocumentsWriterPerThread} exceeds the
+ * {@link IndexWriterConfig#getRAMPerThreadHardLimitMB()} to prevent address
+ * space exhaustion.
+ */
+public final class DocumentsWriterFlushControl {
+
+ private final long hardMaxBytesPerDWPT;
+ private long activeBytes = 0;
+ private long flushBytes = 0;
+ private volatile int numPending = 0;
+ private volatile int numFlushing = 0;
+ final AtomicBoolean flushDeletes = new AtomicBoolean(false);
+ private boolean fullFlush = false;
+ private Queue<DocumentsWriterPerThread> flushQueue = new LinkedList<DocumentsWriterPerThread>();
+ // only for safety reasons if a DWPT is close to the RAM limit
+ private Queue<DocumentsWriterPerThread> blockedFlushes = new LinkedList<DocumentsWriterPerThread>();
+
+
+ long peakActiveBytes = 0;// only with assert
+ long peakFlushBytes = 0;// only with assert
+ long peakNetBytes = 0;// only with assert
+ private final Healthiness healthiness;
+ private final DocumentsWriterPerThreadPool perThreadPool;
+ private final FlushPolicy flushPolicy;
+ private boolean closed = false;
+ private final HashMap<DocumentsWriterPerThread, Long> flushingWriters = new HashMap<DocumentsWriterPerThread, Long>();
+ private final DocumentsWriter documentsWriter;
+
+ DocumentsWriterFlushControl(DocumentsWriter documentsWriter,
+ Healthiness healthiness, long hardMaxBytesPerDWPT) {
+ this.healthiness = healthiness;
+ this.perThreadPool = documentsWriter.perThreadPool;
+ this.flushPolicy = documentsWriter.flushPolicy;
+ this.hardMaxBytesPerDWPT = hardMaxBytesPerDWPT;
+ this.documentsWriter = documentsWriter;
+ }
+
+ public synchronized long activeBytes() {
+ return activeBytes;
+ }
+
+ public synchronized long flushBytes() {
+ return flushBytes;
+ }
+
+ public synchronized long netBytes() {
+ return flushBytes + activeBytes;
+ }
+
+ private void commitPerThreadBytes(ThreadState perThread) {
+ final long delta = perThread.perThread.bytesUsed()
+ - perThread.bytesUsed;
+ perThread.bytesUsed += delta;
+ /*
+ * We need to differentiate here if we are pending since setFlushPending
+ * moves the perThread memory to the flushBytes and we could be set to
+ * pending during a delete
+ */
+ if (perThread.flushPending) {
+ flushBytes += delta;
+ } else {
+ activeBytes += delta;
+ }
+ assert updatePeaks(delta);
+ }
+
+ // only for asserts
+ private boolean updatePeaks(long delta) {
+ peakActiveBytes = Math.max(peakActiveBytes, activeBytes);
+ peakFlushBytes = Math.max(peakFlushBytes, flushBytes);
+ peakNetBytes = Math.max(peakNetBytes, netBytes());
+ return true;
+ }
+
+ synchronized DocumentsWriterPerThread doAfterDocument(ThreadState perThread,
+ boolean isUpdate) {
+ commitPerThreadBytes(perThread);
+ if (!perThread.flushPending) {
+ if (isUpdate) {
+ flushPolicy.onUpdate(this, perThread);
+ } else {
+ flushPolicy.onInsert(this, perThread);
+ }
+ if (!perThread.flushPending && perThread.bytesUsed > hardMaxBytesPerDWPT) {
+ // Safety check to prevent a single DWPT exceeding its RAM limit. This
+ // is super important since we can not address more than 2048 MB per DWPT
+ setFlushPending(perThread);
+ if (fullFlush) {
+ DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread, false);
+ assert toBlock != null;
+ blockedFlushes.add(toBlock);
+ }
+ }
+ }
+ final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread, false);
+ healthiness.updateStalled(this);
+ return flushingDWPT;
+ }
+
+ synchronized void doAfterFlush(DocumentsWriterPerThread dwpt) {
+ assert flushingWriters.containsKey(dwpt);
+ try {
+ numFlushing--;
+ Long bytes = flushingWriters.remove(dwpt);
+ flushBytes -= bytes.longValue();
+ perThreadPool.recycle(dwpt);
+ healthiness.updateStalled(this);
+ } finally {
+ notifyAll();
+ }
+ }
+
+ public synchronized boolean anyFlushing() {
+ return numFlushing != 0;
+ }
+
+ public synchronized void waitForFlush() {
+ if (numFlushing != 0) {
+ try {
+ this.wait();
+ } catch (InterruptedException e) {
+ throw new ThreadInterruptedException(e);
+ }
+ }
+ }
+
+ /**
+ * Sets flush pending state on the given {@link ThreadState}. The
+ * {@link ThreadState} must have indexed at least on Document and must not be
+ * already pending.
+ */
+ public synchronized void setFlushPending(ThreadState perThread) {
+ assert !perThread.flushPending;
+ if (perThread.perThread.getNumDocsInRAM() > 0) {
+ perThread.flushPending = true; // write access synced
+ final long bytes = perThread.bytesUsed;
+ flushBytes += bytes;
+ activeBytes -= bytes;
+ numPending++; // write access synced
+ } // don't assert on numDocs since we could hit an abort excp. while selecting that dwpt for flushing
+
+ }
+
+ synchronized void doOnAbort(ThreadState state) {
+ if (state.flushPending) {
+ flushBytes -= state.bytesUsed;
+ } else {
+ activeBytes -= state.bytesUsed;
+ }
+ // Take it out of the loop this DWPT is stale
+ perThreadPool.replaceForFlush(state, closed);
+ healthiness.updateStalled(this);
+ }
+
+ synchronized DocumentsWriterPerThread tryCheckoutForFlush(
+ ThreadState perThread, boolean setPending) {
+ if (fullFlush) {
+ return null;
+ }
+ return internalTryCheckOutForFlush(perThread, setPending);
+ }
+
+ private DocumentsWriterPerThread internalTryCheckOutForFlush(
+ ThreadState perThread, boolean setPending) {
+ if (setPending && !perThread.flushPending) {
+ setFlushPending(perThread);
+ }
+ if (perThread.flushPending) {
+ // We are pending so all memory is already moved to flushBytes
+ if (perThread.tryLock()) {
+ try {
+ if (perThread.isActive()) {
+ assert perThread.isHeldByCurrentThread();
+ final DocumentsWriterPerThread dwpt;
+ final long bytes = perThread.bytesUsed; // do that before
+ // replace!
+ dwpt = perThreadPool.replaceForFlush(perThread, closed);
+ assert !flushingWriters.containsKey(dwpt) : "DWPT is already flushing";
+ // Record the flushing DWPT to reduce flushBytes in doAfterFlush
+ flushingWriters.put(dwpt, Long.valueOf(bytes));
+ numPending--; // write access synced
+ numFlushing++;
+ return dwpt;
+ }
+ } finally {
+ perThread.unlock();
+ }
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public String toString() {
+ return "DocumentsWriterFlushControl [activeBytes=" + activeBytes
+ + ", flushBytes=" + flushBytes + "]";
+ }
+
+ DocumentsWriterPerThread nextPendingFlush() {
+ synchronized (this) {
+ DocumentsWriterPerThread poll = flushQueue.poll();
+ if (poll != null) {
+ return poll;
+ }
+ }
+ if (numPending > 0) {
+ final Iterator<ThreadState> allActiveThreads = perThreadPool
+ .getActivePerThreadsIterator();
+ while (allActiveThreads.hasNext() && numPending > 0) {
+ ThreadState next = allActiveThreads.next();
+ if (next.flushPending) {
+ final DocumentsWriterPerThread dwpt = tryCheckoutForFlush(next, false);
+ if (dwpt != null) {
+ return dwpt;
+ }
+ }
+ }
+ }
+ return null;
+ }
+
+ synchronized void setClosed() {
+ // set by DW to signal that we should not release new DWPT after close
+ this.closed = true;
+ }
+
+ /**
+ * Returns an iterator that provides access to all currently active {@link ThreadState}s
+ */
+ public Iterator<ThreadState> allActiveThreads() {
+ return perThreadPool.getActivePerThreadsIterator();
+ }
+
+ synchronized void doOnDelete() {
+ // pass null this is a global delete no update
+ flushPolicy.onDelete(this, null);
+ }
+
+ /**
+ * Returns the number of delete terms in the global pool
+ */
+ public int getNumGlobalTermDeletes() {
+ return documentsWriter.deleteQueue.numGlobalTermDeletes();
+ }
+
+ int numFlushingDWPT() {
+ return numFlushing;
+ }
+
+ public boolean doApplyAllDeletes() {
+ return flushDeletes.getAndSet(false);
+ }
+
+ public void setApplyAllDeletes() {
+ flushDeletes.set(true);
+ }
+
+ int numActiveDWPT() {
+ return this.perThreadPool.getMaxThreadStates();
+ }
+
+ void markForFullFlush() {
+ final DocumentsWriterDeleteQueue flushingQueue;
+ synchronized (this) {
+ assert !fullFlush;
+ fullFlush = true;
+ flushingQueue = documentsWriter.deleteQueue;
+ // Set a new delete queue - all subsequent DWPT will use this queue until
+ // we do another full flush
+ DocumentsWriterDeleteQueue newQueue = new DocumentsWriterDeleteQueue(flushingQueue.generation+1);
+ documentsWriter.deleteQueue = newQueue;
+ }
+ final Iterator<ThreadState> allActiveThreads = perThreadPool
+ .getActivePerThreadsIterator();
+ final ArrayList<DocumentsWriterPerThread> toFlush = new ArrayList<DocumentsWriterPerThread>();
+ while (allActiveThreads.hasNext()) {
+ final ThreadState next = allActiveThreads.next();
+ next.lock();
+ try {
+ if (!next.isActive()) {
+ continue;
+ }
+ assert next.perThread.deleteQueue == flushingQueue
+ || next.perThread.deleteQueue == documentsWriter.deleteQueue : " flushingQueue: "
+ + flushingQueue
+ + " currentqueue: "
+ + documentsWriter.deleteQueue
+ + " perThread queue: "
+ + next.perThread.deleteQueue
+ + " numDocsInRam: " + next.perThread.getNumDocsInRAM();
+ if (next.perThread.deleteQueue != flushingQueue) {
+ // this one is already a new DWPT
+ continue;
+ }
+ if (next.perThread.getNumDocsInRAM() > 0 ) {
+ final DocumentsWriterPerThread dwpt = next.perThread; // just for assert
+ final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next, true);
+ assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents";
+ assert dwpt == flushingDWPT : "flushControl returned different DWPT";
+ toFlush.add(flushingDWPT);
+ } else {
+ // get the new delete queue from DW
+ next.perThread.initialize();
+ }
+ } finally {
+ next.unlock();
+ }
+ }
+ synchronized (this) {
+ assert assertBlockedFlushes(flushingQueue);
+ flushQueue.addAll(blockedFlushes);
+ blockedFlushes.clear();
+ flushQueue.addAll(toFlush);
+ }
+ }
+
+ synchronized void finishFullFlush() {
+ assert fullFlush;
+ assert flushQueue.isEmpty();
+ try {
+ if (!blockedFlushes.isEmpty()) {
+ assert assertBlockedFlushes(documentsWriter.deleteQueue);
+ flushQueue.addAll(blockedFlushes);
+ blockedFlushes.clear();
+ }
+ } finally {
+ fullFlush = false;
+ }
+ }
+
+ boolean assertBlockedFlushes(DocumentsWriterDeleteQueue flushingQueue) {
+ Queue<DocumentsWriterPerThread> flushes = this.blockedFlushes;
+ for (DocumentsWriterPerThread documentsWriterPerThread : flushes) {
+ assert documentsWriterPerThread.deleteQueue == flushingQueue;
+ }
+ return true;
+ }
+
+ synchronized void abortFullFlushes() {
+ try {
+ for (DocumentsWriterPerThread dwpt : flushQueue) {
+ doAfterFlush(dwpt);
+ }
+ for (DocumentsWriterPerThread dwpt : blockedFlushes) {
+ doAfterFlush(dwpt);
+ }
+
+ } finally {
+ fullFlush = false;
+ flushQueue.clear();
+ blockedFlushes.clear();
+ }
+ }
+
+ synchronized boolean isFullFlush() {
+ return fullFlush;
+ }
+}
\ No newline at end of file
diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
new file mode 100644
index 0000000..4ffb1e0
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
@@ -0,0 +1,496 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
+import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.text.NumberFormat;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice;
+import org.apache.lucene.search.SimilarityProvider;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BitVector;
+import org.apache.lucene.util.ByteBlockPool.Allocator;
+import org.apache.lucene.util.RamUsageEstimator;
+
+public class DocumentsWriterPerThread {
+
+ /**
+ * The IndexingChain must define the {@link #getChain(DocumentsWriter)} method
+ * which returns the DocConsumer that the DocumentsWriter calls to process the
+ * documents.
+ */
+ abstract static class IndexingChain {
+ abstract DocConsumer getChain(DocumentsWriterPerThread documentsWriterPerThread);
+ }
+
+
+ static final IndexingChain defaultIndexingChain = new IndexingChain() {
+
+ @Override
+ DocConsumer getChain(DocumentsWriterPerThread documentsWriterPerThread) {
+ /*
+ This is the current indexing chain:
+
+ DocConsumer / DocConsumerPerThread
+ --> code: DocFieldProcessor / DocFieldProcessorPerThread
+ --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField
+ --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField
+ --> code: DocInverter / DocInverterPerThread / DocInverterPerField
+ --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
+ --> code: TermsHash / TermsHashPerThread / TermsHashPerField
+ --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField
+ --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField
+ --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField
+ --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField
+ --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField
+ --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField
+ */
+
+ // Build up indexing chain:
+
+ final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(documentsWriterPerThread);
+ final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter();
+
+ final InvertedDocConsumer termsHash = new TermsHash(documentsWriterPerThread, freqProxWriter, true,
+ new TermsHash(documentsWriterPerThread, termVectorsWriter, false, null));
+ final NormsWriter normsWriter = new NormsWriter();
+ final DocInverter docInverter = new DocInverter(documentsWriterPerThread.docState, termsHash, normsWriter);
+ return new DocFieldProcessor(documentsWriterPerThread, docInverter);
+ }
+ };
+
+ static class DocState {
+ final DocumentsWriterPerThread docWriter;
+ Analyzer analyzer;
+ PrintStream infoStream;
+ SimilarityProvider similarityProvider;
+ int docID;
+ Document doc;
+ String maxTermPrefix;
+
+ DocState(DocumentsWriterPerThread docWriter) {
+ this.docWriter = docWriter;
+ }
+
+ // Only called by asserts
+ public boolean testPoint(String name) {
+ return docWriter.writer.testPoint(name);
+ }
+
+ public void clear() {
+ // don't hold onto doc nor analyzer, in case it is
+ // largish:
+ doc = null;
+ analyzer = null;
+ }
+ }
+
+ static class FlushedSegment {
+ final SegmentInfo segmentInfo;
+ final BufferedDeletes segmentDeletes;
+ final BitVector deletedDocuments;
+
+ private FlushedSegment(SegmentInfo segmentInfo,
+ BufferedDeletes segmentDeletes, BitVector deletedDocuments) {
+ this.segmentInfo = segmentInfo;
+ this.segmentDeletes = segmentDeletes;
+ this.deletedDocuments = deletedDocuments;
+ }
+ }
+
+ /** Called if we hit an exception at a bad time (when
+ * updating the index files) and must discard all
+ * currently buffered docs. This resets our state,
+ * discarding any docs added since last flush. */
+ void abort() throws IOException {
+ hasAborted = aborting = true;
+ try {
+ if (infoStream != null) {
+ message("docWriter: now abort");
+ }
+ try {
+ consumer.abort();
+ } catch (Throwable t) {
+ }
+
+ pendingDeletes.clear();
+ deleteSlice = deleteQueue.newSlice();
+ // Reset all postings data
+ doAfterFlush();
+
+ } finally {
+ aborting = false;
+ if (infoStream != null) {
+ message("docWriter: done abort");
+ }
+ }
+ }
+
+ final DocumentsWriter parent;
+ final IndexWriter writer;
+ final Directory directory;
+ final DocState docState;
+ final DocConsumer consumer;
+ final AtomicLong bytesUsed;
+
+ SegmentWriteState flushState;
+ //Deletes for our still-in-RAM (to be flushed next) segment
+ BufferedDeletes pendingDeletes;
+ String segment; // Current segment we are working on
+ boolean aborting = false; // True if an abort is pending
+ boolean hasAborted = false; // True if the last exception throws by #updateDocument was aborting
+
+ private FieldInfos fieldInfos;
+ private final PrintStream infoStream;
+ private int numDocsInRAM;
+ private int flushedDocCount;
+ DocumentsWriterDeleteQueue deleteQueue;
+ DeleteSlice deleteSlice;
+ private final NumberFormat nf = NumberFormat.getInstance();
+
+
+ public DocumentsWriterPerThread(Directory directory, DocumentsWriter parent,
+ FieldInfos fieldInfos, IndexingChain indexingChain) {
+ this.directory = directory;
+ this.parent = parent;
+ this.fieldInfos = fieldInfos;
+ this.writer = parent.indexWriter;
+ this.infoStream = parent.indexWriter.getInfoStream();
+ this.docState = new DocState(this);
+ this.docState.similarityProvider = parent.indexWriter.getConfig()
+ .getSimilarityProvider();
+
+ consumer = indexingChain.getChain(this);
+ bytesUsed = new AtomicLong(0);
+ pendingDeletes = new BufferedDeletes(false);
+ initialize();
+ }
+
+ public DocumentsWriterPerThread(DocumentsWriterPerThread other, FieldInfos fieldInfos) {
+ this(other.directory, other.parent, fieldInfos, other.parent.chain);
+ }
+
+ void initialize() {
+ deleteQueue = parent.deleteQueue;
+ assert numDocsInRAM == 0 : "num docs " + numDocsInRAM;
+ pendingDeletes.clear();
+ deleteSlice = null;
+ }
+
+ void setAborting() {
+ aborting = true;
+ }
+
+ boolean checkAndResetHasAborted() {
+ final boolean retval = hasAborted;
+ hasAborted = false;
+ return retval;
+ }
+
+ public void updateDocument(Document doc, Analyzer analyzer, Term delTerm) throws IOException {
+ assert writer.testPoint("DocumentsWriterPerThread addDocument start");
+ assert deleteQueue != null;
+ docState.doc = doc;
+ docState.analyzer = analyzer;
+ docState.docID = numDocsInRAM;
+ if (segment == null) {
+ // this call is synchronized on IndexWriter.segmentInfos
+ segment = writer.newSegmentName();
+ assert numDocsInRAM == 0;
+ }
+
+ boolean success = false;
+ try {
+ try {
+ consumer.processDocument(fieldInfos);
+ } finally {
+ docState.clear();
+ }
+ success = true;
+ } finally {
+ if (!success) {
+ if (!aborting) {
+ // mark document as deleted
+ deleteDocID(docState.docID);
+ numDocsInRAM++;
+ } else {
+ abort();
+ }
+ }
+ }
+ success = false;
+ try {
+ consumer.finishDocument();
+ success = true;
+ } finally {
+ if (!success) {
+ abort();
+ }
+ }
+ finishDocument(delTerm);
+ }
+
+ private void finishDocument(Term delTerm) throws IOException {
+ /*
+ * here we actually finish the document in two steps 1. push the delete into
+ * the queue and update our slice. 2. increment the DWPT private document
+ * id.
+ *
+ * the updated slice we get from 1. holds all the deletes that have occurred
+ * since we updated the slice the last time.
+ */
+ if (deleteSlice == null) {
+ deleteSlice = deleteQueue.newSlice();
+ if (delTerm != null) {
+ deleteQueue.add(delTerm, deleteSlice);
+ deleteSlice.reset();
+ }
+
+ } else {
+ if (delTerm != null) {
+ deleteQueue.add(delTerm, deleteSlice);
+ assert deleteSlice.isTailItem(delTerm) : "expected the delete term as the tail item";
+ deleteSlice.apply(pendingDeletes, numDocsInRAM);
+ } else if (deleteQueue.updateSlice(deleteSlice)) {
+ deleteSlice.apply(pendingDeletes, numDocsInRAM);
+ }
+ }
+ ++numDocsInRAM;
+ }
+
+ // Buffer a specific docID for deletion. Currently only
+ // used when we hit a exception when adding a document
+ void deleteDocID(int docIDUpto) {
+ pendingDeletes.addDocID(docIDUpto);
+ // NOTE: we do not trigger flush here. This is
+ // potentially a RAM leak, if you have an app that tries
+ // to add docs but every single doc always hits a
+ // non-aborting exception. Allowing a flush here gets
+ // very messy because we are only invoked when handling
+ // exceptions so to do this properly, while handling an
+ // exception we'd have to go off and flush new deletes
+ // which is risky (likely would hit some other
+ // confounding exception).
+ }
+
+ /**
+ * Returns the number of delete terms in this {@link DocumentsWriterPerThread}
+ */
+ public int numDeleteTerms() {
+ // public for FlushPolicy
+ return pendingDeletes.numTermDeletes.get();
+ }
+
+ /**
+ * Returns the number of RAM resident documents in this {@link DocumentsWriterPerThread}
+ */
+ public int getNumDocsInRAM() {
+ // public for FlushPolicy
+ return numDocsInRAM;
+ }
+
+ SegmentCodecs getCodec() {
+ return flushState.segmentCodecs;
+ }
+
+ /** Reset after a flush */
+ private void doAfterFlush() throws IOException {
+ segment = null;
+ consumer.doAfterFlush();
+ fieldInfos = new FieldInfos(fieldInfos);
+ parent.subtractFlushedNumDocs(numDocsInRAM);
+ numDocsInRAM = 0;
+ }
+
+ /**
+ * Prepares this DWPT for flushing. This method will freeze and return the
+ * {@link DocumentsWriterDeleteQueue}s global buffer and apply all pending
+ * deletes to this DWPT.
+ */
+ FrozenBufferedDeletes prepareFlush() {
+ assert numDocsInRAM > 0;
+ final FrozenBufferedDeletes globalDeletes = deleteQueue.freezeGlobalBuffer(deleteSlice);
+ /* deleteSlice can possibly be null if we have hit non-aborting exceptions during indexing and never succeeded
+ adding a document. */
+ if (deleteSlice != null) {
+ // apply all deletes before we flush and release the delete slice
+ deleteSlice.apply(pendingDeletes, numDocsInRAM);
+ assert deleteSlice.isEmpty();
+ deleteSlice = null;
+ }
+ return globalDeletes;
+ }
+
+ /** Flush all pending docs to a new segment */
+ FlushedSegment flush() throws IOException {
+ assert numDocsInRAM > 0;
+ assert deleteSlice == null : "all deletes must be applied in prepareFlush";
+ flushState = new SegmentWriteState(infoStream, directory, segment, fieldInfos,
+ numDocsInRAM, writer.getConfig().getTermIndexInterval(),
+ fieldInfos.buildSegmentCodecs(true), pendingDeletes);
+ final double startMBUsed = parent.flushControl.netBytes() / 1024. / 1024.;
+ // Apply delete-by-docID now (delete-byDocID only
+ // happens when an exception is hit processing that
+ // doc, eg if analyzer has some problem w/ the text):
+ if (pendingDeletes.docIDs.size() > 0) {
+ flushState.deletedDocs = new BitVector(numDocsInRAM);
+ for(int delDocID : pendingDeletes.docIDs) {
+ flushState.deletedDocs.set(delDocID);
+ }
+ pendingDeletes.bytesUsed.addAndGet(-pendingDeletes.docIDs.size() * BufferedDeletes.BYTES_PER_DEL_DOCID);
+ pendingDeletes.docIDs.clear();
+ }
+
+ if (infoStream != null) {
+ message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM);
+ }
+
+ if (aborting) {
+ if (infoStream != null) {
+ message("flush: skip because aborting is set");
+ }
+ return null;
+ }
+
+ boolean success = false;
+
+ try {
+
+ SegmentInfo newSegment = new SegmentInfo(segment, flushState.numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false, fieldInfos);
+ consumer.flush(flushState);
+ pendingDeletes.terms.clear();
+ newSegment.setHasVectors(flushState.hasVectors);
+
+ if (infoStream != null) {
+ message("new segment has " + (flushState.deletedDocs == null ? 0 : flushState.deletedDocs.count()) + " deleted docs");
+ message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors"));
+ message("flushedFiles=" + newSegment.files());
+ message("flushed codecs=" + newSegment.getSegmentCodecs());
+ }
+ flushedDocCount += flushState.numDocs;
+
+ final BufferedDeletes segmentDeletes;
+ if (pendingDeletes.queries.isEmpty()) {
+ pendingDeletes.clear();
+ segmentDeletes = null;
+ } else {
+ segmentDeletes = pendingDeletes;
+ pendingDeletes = new BufferedDeletes(false);
+ }
+
+ if (infoStream != null) {
+ final double newSegmentSizeNoStore = newSegment.sizeInBytes(false)/1024./1024.;
+ final double newSegmentSize = newSegment.sizeInBytes(true)/1024./1024.;
+ message("flushed: segment=" + newSegment +
+ " ramUsed=" + nf.format(startMBUsed) + " MB" +
+ " newFlushedSize=" + nf.format(newSegmentSize) + " MB" +
+ " (" + nf.format(newSegmentSizeNoStore) + " MB w/o doc stores)" +
+ " docs/MB=" + nf.format(flushedDocCount / newSegmentSize) +
+ " new/old=" + nf.format(100.0 * newSegmentSizeNoStore / startMBUsed) + "%");
+ }
+ doAfterFlush();
+ success = true;
+
+ return new FlushedSegment(newSegment, segmentDeletes, flushState.deletedDocs);
+ } finally {
+ if (!success) {
+ if (segment != null) {
+ synchronized(parent.indexWriter) {
+ parent.indexWriter.deleter.refresh(segment);
+ }
+ }
+ abort();
+ }
+ }
+ }
+
+ /** Get current segment name we are writing. */
+ String getSegment() {
+ return segment;
+ }
+
+ long bytesUsed() {
+ return bytesUsed.get() + pendingDeletes.bytesUsed.get();
+ }
+
+ FieldInfos getFieldInfos() {
+ return fieldInfos;
+ }
+
+ void message(String message) {
+ writer.message("DWPT: " + message);
+ }
+
+ /* Initial chunks size of the shared byte[] blocks used to
+ store postings data */
+ final static int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK;
+
+ /* if you increase this, you must fix field cache impl for
+ * getTerms/getTermsIndex requires <= 32768 */
+ final static int MAX_TERM_LENGTH_UTF8 = BYTE_BLOCK_SIZE-2;
+
+ /* Initial chunks size of the shared int[] blocks used to
+ store postings data */
+ final static int INT_BLOCK_SHIFT = 13;
+ final static int INT_BLOCK_SIZE = 1 << INT_BLOCK_SHIFT;
+ final static int INT_BLOCK_MASK = INT_BLOCK_SIZE - 1;
+
+ /* Allocate another int[] from the shared pool */
+ int[] getIntBlock() {
+ int[] b = new int[INT_BLOCK_SIZE];
+ bytesUsed.addAndGet(INT_BLOCK_SIZE*RamUsageEstimator.NUM_BYTES_INT);
+ return b;
+ }
+
+ void recycleIntBlocks(int[][] blocks, int offset, int length) {
+ bytesUsed.addAndGet(-(length *(INT_BLOCK_SIZE*RamUsageEstimator.NUM_BYTES_INT)));
+ }
+
+ final Allocator byteBlockAllocator = new DirectTrackingAllocator();
+
+
+ private class DirectTrackingAllocator extends Allocator {
+ public DirectTrackingAllocator() {
+ this(BYTE_BLOCK_SIZE);
+ }
+
+ public DirectTrackingAllocator(int blockSize) {
+ super(blockSize);
+ }
+
+ public byte[] getByteBlock() {
+ bytesUsed.addAndGet(blockSize);
+ return new byte[blockSize];
+ }
+ @Override
+ public void recycleByteBlocks(byte[][] blocks, int start, int end) {
+ bytesUsed.addAndGet(-((end-start)* blockSize));
+ for (int i = start; i < end; i++) {
+ blocks[i] = null;
+ }
+ }
+
+ };
+}
diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java
new file mode 100644
index 0000000..00be816
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThreadPool.java
@@ -0,0 +1,272 @@
+package org.apache.lucene.index;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Iterator;
+import java.util.concurrent.locks.ReentrantLock;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
+import org.apache.lucene.index.SegmentCodecs.SegmentCodecsBuilder;
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.util.SetOnce;
+
+/**
+ * {@link DocumentsWriterPerThreadPool} controls {@link ThreadState} instances
+ * and their thread assignments during indexing. Each {@link ThreadState} holds
+ * a reference to a {@link DocumentsWriterPerThread} that is once a
+ * {@link ThreadState} is obtained from the pool exclusively used for indexing a
+ * single document by the obtaining thread. Each indexing thread must obtain
+ * such a {@link ThreadState} to make progress. Depending on the
+ * {@link DocumentsWriterPerThreadPool} implementation {@link ThreadState}
+ * assignments might differ from document to document.
+ * <p>
+ * Once a {@link DocumentsWriterPerThread} is selected for flush the thread pool
+ * is reusing the flushing {@link DocumentsWriterPerThread}s ThreadState with a
+ * new {@link DocumentsWriterPerThread} instance.
+ * </p>
+ */
+public abstract class DocumentsWriterPerThreadPool {
+ /** The maximum number of simultaneous threads that may be
+ * indexing documents at once in IndexWriter; if more
+ * than this many threads arrive they will wait for
+ * others to finish. */
+ public final static int DEFAULT_MAX_THREAD_STATES = 8;
+
+ /**
+ * {@link ThreadState} references and guards a
+ * {@link DocumentsWriterPerThread} instance that is used during indexing to
+ * build a in-memory index segment. {@link ThreadState} also holds all flush
+ * related per-thread data controlled by {@link DocumentsWriterFlushControl}.
+ * <p>
+ * A {@link ThreadState}, its methods and members should only accessed by one
+ * thread a time. Users must acquire the lock via {@link ThreadState#lock()}
+ * and release the lock in a finally block via {@link ThreadState#unlock()}
+ * before accessing the state.
+ */
+ @SuppressWarnings("serial")
+ public final static class ThreadState extends ReentrantLock {
+ // package private for FlushPolicy
+ DocumentsWriterPerThread perThread;
+ // write access guarded by DocumentsWriterFlushControl
+ volatile boolean flushPending = false;
+ // write access guarded by DocumentsWriterFlushControl
+ long bytesUsed = 0;
+ // guarded by Reentrant lock
+ private boolean isActive = true;
+
+ ThreadState(DocumentsWriterPerThread perThread) {
+ this.perThread = perThread;
+ }
+
+ /**
+ * Resets the internal {@link DocumentsWriterPerThread} with the given one.
+ * if the given DWPT is <code>null</code> this ThreadState is marked as inactive and should not be used
+ * for indexing anymore.
+ * @see #isActive()
+ */
+ void resetWriter(DocumentsWriterPerThread perThread) {
+ assert this.isHeldByCurrentThread();
+ if (perThread == null) {
+ isActive = false;
+ }
+ this.perThread = perThread;
+ this.bytesUsed = 0;
+ this.flushPending = false;
+ }
+
+ /**
+ * Returns <code>true</code> if this ThreadState is still open. This will
+ * only return <code>false</code> iff the DW has been closed and this
+ * ThreadState is already checked out for flush.
+ */
+ boolean isActive() {
+ assert this.isHeldByCurrentThread();
+ return isActive;
+ }
+
+ /**
+ * Returns the number of currently active bytes in this ThreadState's
+ * {@link DocumentsWriterPerThread}
+ */
+ public long getBytesUsedPerThread() {
+ assert this.isHeldByCurrentThread();
+ // public for FlushPolicy
+ return bytesUsed;
+ }
+
+ /**
+ * Returns this {@link ThreadState}s {@link DocumentsWriterPerThread}
+ */
+ public DocumentsWriterPerThread getDocumentsWriterPerThread() {
+ assert this.isHeldByCurrentThread();
+ // public for FlushPolicy
+ return perThread;
+ }
+
+ /**
+ * Returns <code>true</code> iff this {@link ThreadState} is marked as flush
+ * pending otherwise <code>false</code>
+ */
+ public boolean isFlushPending() {
+ return flushPending;
+ }
+ }
+
+ private final ThreadState[] perThreads;
+ private volatile int numThreadStatesActive;
+ private CodecProvider codecProvider;
+ private FieldNumberBiMap globalFieldMap;
+ private final SetOnce<DocumentsWriter> documentsWriter = new SetOnce<DocumentsWriter>();
+
+ /**
+ * Creates a new {@link DocumentsWriterPerThreadPool} with max.
+ * {@link #DEFAULT_MAX_THREAD_STATES} thread states.
+ */
+ public DocumentsWriterPerThreadPool() {
+ this(DEFAULT_MAX_THREAD_STATES);
+ }
+
+ public DocumentsWriterPerThreadPool(int maxNumPerThreads) {
+ maxNumPerThreads = (maxNumPerThreads < 1) ? DEFAULT_MAX_THREAD_STATES : maxNumPerThreads;
+ perThreads = new ThreadState[maxNumPerThreads];
+ numThreadStatesActive = 0;
+ }
+
+ public void initialize(DocumentsWriter documentsWriter, FieldNumberBiMap globalFieldMap, IndexWriterConfig config) {
+ this.documentsWriter.set(documentsWriter); // thread pool is bound to DW
+ final CodecProvider codecs = config.getCodecProvider();
+ this.codecProvider = codecs;
+ this.globalFieldMap = globalFieldMap;
+ for (int i = 0; i < perThreads.length; i++) {
+ final FieldInfos infos = globalFieldMap.newFieldInfos(SegmentCodecsBuilder.create(codecs));
+ perThreads[i] = new ThreadState(new DocumentsWriterPerThread(documentsWriter.directory, documentsWriter, infos, documentsWriter.chain));
+ }
+ }
+
+ /**
+ * Returns the max number of {@link ThreadState} instances available in this
+ * {@link DocumentsWriterPerThreadPool}
+ */
+ public int getMaxThreadStates() {
+ return perThreads.length;
+ }
+
+ /**
+ * Returns a new {@link ThreadState} iff any new state is available otherwise
+ * <code>null</code>.
+ * <p>
+ * NOTE: the returned {@link ThreadState} is already locked iff non-
+ * <code>null</code>.
+ *
+ * @return a new {@link ThreadState} iff any new state is available otherwise
+ * <code>null</code>
+ */
+ public synchronized ThreadState newThreadState() {
+ if (numThreadStatesActive < perThreads.length) {
+ final ThreadState threadState = perThreads[numThreadStatesActive];
+ threadState.lock(); // lock so nobody else will get this ThreadState
+ numThreadStatesActive++; // increment will publish the ThreadState
+ threadState.perThread.initialize();
+ return threadState;
+ }
+ return null;
+ }
+
+ protected DocumentsWriterPerThread replaceForFlush(ThreadState threadState, boolean closed) {
+ assert threadState.isHeldByCurrentThread();
+ final DocumentsWriterPerThread dwpt = threadState.perThread;
+ if (!closed) {
+ final FieldInfos infos = globalFieldMap.newFieldInfos(SegmentCodecsBuilder.create(codecProvider));
+ final DocumentsWriterPerThread newDwpt = new DocumentsWriterPerThread(dwpt, infos);
+ newDwpt.initialize();
+ threadState.resetWriter(newDwpt);
+ } else {
+ threadState.resetWriter(null);
+ }
+ return dwpt;
+ }
+
+ public void recycle(DocumentsWriterPerThread dwpt) {
+ // don't recycle DWPT by default
+ }
+
+ public abstract ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter, Document doc);
+
+ //public abstract void clearThreadBindings(ThreadState perThread);
+
+ //public abstract void clearAllThreadBindings();
+
+ /**
+ * Returns an iterator providing access to all {@link ThreadState}
+ * instances.
+ */
+ // TODO: new Iterator per indexed doc is overkill...?
+ public Iterator<ThreadState> getAllPerThreadsIterator() {
+ return getPerThreadsIterator(this.perThreads.length);
+ }
+
+ /**
+ * Returns an iterator providing access to all active {@link ThreadState}
+ * instances.
+ * <p>
+ * Note: The returned iterator will only iterator
+ * {@link ThreadState}s that are active at the point in time when this method
+ * has been called.
+ *
+ */
+ // TODO: new Iterator per indexed doc is overkill...?
+ public Iterator<ThreadState> getActivePerThreadsIterator() {
+ return getPerThreadsIterator(numThreadStatesActive);
+ }
+
+ private Iterator<ThreadState> getPerThreadsIterator(final int upto) {
+ return new Iterator<ThreadState>() {
+ int i = 0;
+
+ public boolean hasNext() {
+ return i < upto;
+ }
+
+ public ThreadState next() {
+ return perThreads[i++];
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException("remove() not supported.");
+ }
+ };
+ }
+
+ /**
+ * Returns the ThreadState with the minimum estimated number of threads
+ * waiting to acquire its lock or <code>null</code> if no {@link ThreadState}
+ * is yet visible to the calling thread.
+ */
+ protected ThreadState minContendedThreadState() {
+ ThreadState minThreadState = null;
+ // TODO: new Iterator per indexed doc is overkill...?
+ final Iterator<ThreadState> it = getActivePerThreadsIterator();
+ while (it.hasNext()) {
+ final ThreadState state = it.next();
+ if (minThreadState == null || state.getQueueLength() < minThreadState.getQueueLength()) {
+ minThreadState = state;
+ }
+ }
+ return minThreadState;
+ }
+}
diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java
deleted file mode 100644
index 611098a..0000000
--- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterThreadState.java
+++ /dev/null
@@ -1,47 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-/** Used by DocumentsWriter to maintain per-thread state.
- * We keep a separate Posting hash and other state for each
- * thread and then merge postings hashes from all threads
- * when writing the segment. */
-final class DocumentsWriterThreadState {
-
- boolean isIdle = true; // false if this is currently in use by a thread
- int numThreads = 1; // Number of threads that share this instance
- final DocConsumerPerThread consumer;
- final DocumentsWriter.DocState docState;
-
- final DocumentsWriter docWriter;
-
- public DocumentsWriterThreadState(DocumentsWriter docWriter) throws IOException {
- this.docWriter = docWriter;
- docState = new DocumentsWriter.DocState();
- docState.infoStream = docWriter.infoStream;
- docState.similarityProvider = docWriter.similarityProvider;
- docState.docWriter = docWriter;
- consumer = docWriter.consumer.addThread(this);
- }
-
- void doAfterFlush() {
- numThreads = 0;
- }
-}
diff --git a/lucene/src/java/org/apache/lucene/index/FieldsWriter.java b/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
index f694bb4..303aa91 100644
--- a/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
@@ -2,13 +2,13 @@
/**
* Copyright 2004 The Apache Software Foundation
- *
+ *
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
@@ -22,15 +22,14 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.RAMOutputStream;
-import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
final class FieldsWriter {
static final byte FIELD_IS_TOKENIZED = 0x1;
static final byte FIELD_IS_BINARY = 0x2;
-
+
// Lucene 3.0: Removal of compressed fields
static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
@@ -38,7 +37,7 @@
// than the current one, and always change this if you
// switch to a new format!
static final int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
-
+
// when removing support for old versions, leave the last supported version here
static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
@@ -83,10 +82,9 @@
// and adds a new entry for this document into the index
// stream. This assumes the buffer was already written
// in the correct fields format.
- void flushDocument(int numStoredFields, RAMOutputStream buffer) throws IOException {
+ void startDocument(int numStoredFields) throws IOException {
indexStream.writeLong(fieldsStream.getFilePointer());
fieldsStream.writeVInt(numStoredFields);
- buffer.writeTo(fieldsStream);
}
void skipDocument() throws IOException {
@@ -121,8 +119,8 @@
}
}
- final void writeField(FieldInfo fi, Fieldable field) throws IOException {
- fieldsStream.writeVInt(fi.number);
+ final void writeField(int fieldNumber, Fieldable field) throws IOException {
+ fieldsStream.writeVInt(fieldNumber);
byte bits = 0;
if (field.isTokenized())
bits |= FieldsWriter.FIELD_IS_TOKENIZED;
@@ -175,10 +173,9 @@
fieldsStream.writeVInt(storedCount);
-
for (Fieldable field : fields) {
if (field.isStored())
- writeField(fieldInfos.fieldInfo(field.name()), field);
+ writeField(fieldInfos.fieldNumber(field.name()), field);
}
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java b/lucene/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java
new file mode 100644
index 0000000..e684cb3
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/FlushByRamOrCountsPolicy.java
@@ -0,0 +1,128 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
+
+/**
+ * Default {@link FlushPolicy} implementation that flushes based on RAM used,
+ * document count and number of buffered deletes depending on the IndexWriter's
+ * {@link IndexWriterConfig}.
+ *
+ * <ul>
+ * <li>{@link #onDelete(DocumentsWriterFlushControl, ThreadState)} - flushes
+ * based on the global number of buffered delete terms iff
+ * {@link IndexWriterConfig#getMaxBufferedDeleteTerms()} is enabled</li>
+ * <li>{@link #onInsert(DocumentsWriterFlushControl, ThreadState)} - flushes
+ * either on the number of documents per {@link DocumentsWriterPerThread} (
+ * {@link DocumentsWriterPerThread#getNumDocsInRAM()}) or on the global active
+ * memory consumption in the current indexing session iff
+ * {@link IndexWriterConfig#getMaxBufferedDocs()} or
+ * {@link IndexWriterConfig#getRAMBufferSizeMB()} is enabled respectively</li>
+ * <li>{@link #onUpdate(DocumentsWriterFlushControl, ThreadState)} - calls
+ * {@link #onInsert(DocumentsWriterFlushControl, ThreadState)} and
+ * {@link #onDelete(DocumentsWriterFlushControl, ThreadState)} in order</li>
+ * </ul>
+ * All {@link IndexWriterConfig} settings are used to mark
+ * {@link DocumentsWriterPerThread} as flush pending during indexing with
+ * respect to their live updates.
+ * <p>
+ * If {@link IndexWriterConfig#setRAMBufferSizeMB(double)} is enabled, the
+ * largest ram consuming {@link DocumentsWriterPerThread} will be marked as
+ * pending iff the global active RAM consumption is >= the configured max RAM
+ * buffer.
+ */
+public class FlushByRamOrCountsPolicy extends FlushPolicy {
+
+ @Override
+ public void onDelete(DocumentsWriterFlushControl control, ThreadState state) {
+ if (flushOnDeleteTerms()) {
+ // Flush this state by num del terms
+ final int maxBufferedDeleteTerms = indexWriterConfig
+ .getMaxBufferedDeleteTerms();
+ if (control.getNumGlobalTermDeletes() >= maxBufferedDeleteTerms) {
+ control.setApplyAllDeletes();
+ }
+ }
+ final DocumentsWriter writer = this.writer.get();
+ // If deletes alone are consuming > 1/2 our RAM
+ // buffer, force them all to apply now. This is to
+ // prevent too-frequent flushing of a long tail of
+ // tiny segments:
+ if ((flushOnRAM() &&
+ writer.deleteQueue.bytesUsed() > (1024*1024*indexWriterConfig.getRAMBufferSizeMB()/2))) {
+ control.setApplyAllDeletes();
+ if (writer.infoStream != null) {
+ writer.message("force apply deletes bytesUsed=" + writer.deleteQueue.bytesUsed() + " vs ramBuffer=" + (1024*1024*indexWriterConfig.getRAMBufferSizeMB()));
+ }
+ }
+ }
+
+ @Override
+ public void onInsert(DocumentsWriterFlushControl control, ThreadState state) {
+ if (flushOnDocCount()
+ && state.perThread.getNumDocsInRAM() >= indexWriterConfig
+ .getMaxBufferedDocs()) {
+ // Flush this state by num docs
+ control.setFlushPending(state);
+ } else if (flushOnRAM()) {// flush by RAM
+ final long limit = (long) (indexWriterConfig.getRAMBufferSizeMB() * 1024.d * 1024.d);
+ final long totalRam = control.activeBytes();
+ if (totalRam >= limit) {
+ markLargestWriterPending(control, state, totalRam);
+ }
+ }
+ }
+
+ /**
+ * Marks the most ram consuming active {@link DocumentsWriterPerThread} flush
+ * pending
+ */
+ protected void markLargestWriterPending(DocumentsWriterFlushControl control,
+ ThreadState perThreadState, final long currentBytesPerThread) {
+ control
+ .setFlushPending(findLargestNonPendingWriter(control, perThreadState));
+ }
+
+ /**
+ * Returns <code>true</code> if this {@link FlushPolicy} flushes on
+ * {@link IndexWriterConfig#getMaxBufferedDocs()}, otherwise
+ * <code>false</code>.
+ */
+ protected boolean flushOnDocCount() {
+ return indexWriterConfig.getMaxBufferedDocs() != IndexWriterConfig.DISABLE_AUTO_FLUSH;
+ }
+
+ /**
+ * Returns <code>true</code> if this {@link FlushPolicy} flushes on
+ * {@link IndexWriterConfig#getMaxBufferedDeleteTerms()}, otherwise
+ * <code>false</code>.
+ */
+ protected boolean flushOnDeleteTerms() {
+ return indexWriterConfig.getMaxBufferedDeleteTerms() != IndexWriterConfig.DISABLE_AUTO_FLUSH;
+ }
+
+ /**
+ * Returns <code>true</code> if this {@link FlushPolicy} flushes on
+ * {@link IndexWriterConfig#getRAMBufferSizeMB()}, otherwise
+ * <code>false</code>.
+ */
+ protected boolean flushOnRAM() {
+ return indexWriterConfig.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH;
+ }
+}
diff --git a/lucene/src/java/org/apache/lucene/index/FlushPolicy.java b/lucene/src/java/org/apache/lucene/index/FlushPolicy.java
new file mode 100644
index 0000000..c5a3008
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/FlushPolicy.java
@@ -0,0 +1,131 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.Iterator;
+
+import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.SetOnce;
+
+/**
+ * {@link FlushPolicy} controls when segments are flushed from a RAM resident
+ * internal data-structure to the {@link IndexWriter}s {@link Directory}.
+ * <p>
+ * Segments are traditionally flushed by:
+ * <ul>
+ * <li>RAM consumption - configured via
+ * {@link IndexWriterConfig#setRAMBufferSizeMB(double)}</li>
+ * <li>Number of RAM resident documents - configured via
+ * {@link IndexWriterConfig#setMaxBufferedDocs(int)}</li>
+ * <li>Number of buffered delete terms/queries - configured via
+ * {@link IndexWriterConfig#setMaxBufferedDeleteTerms(int)}</li>
+ * </ul>
+ *
+ * The {@link IndexWriter} consults a provided {@link FlushPolicy} to control the
+ * flushing process. The policy is informed for each added or
+ * updated document as well as for each delete term. Based on the
+ * {@link FlushPolicy}, the information provided via {@link ThreadState} and
+ * {@link DocumentsWriterFlushControl}, the {@link FlushPolicy} decides if a
+ * {@link DocumentsWriterPerThread} needs flushing and mark it as
+ * flush-pending via
+ * {@link DocumentsWriterFlushControl#setFlushPending(ThreadState)}.
+ *
+ * @see ThreadState
+ * @see DocumentsWriterFlushControl
+ * @see DocumentsWriterPerThread
+ * @see IndexWriterConfig#setFlushPolicy(FlushPolicy)
+ */
+public abstract class FlushPolicy {
+ protected final SetOnce<DocumentsWriter> writer = new SetOnce<DocumentsWriter>();
+ protected IndexWriterConfig indexWriterConfig;
+
+ /**
+ * Called for each delete term. If this is a delete triggered due to an update
+ * the given {@link ThreadState} is non-null.
+ * <p>
+ * Note: This method is called synchronized on the given
+ * {@link DocumentsWriterFlushControl} and it is guaranteed that the calling
+ * thread holds the lock on the given {@link ThreadState}
+ */
+ public abstract void onDelete(DocumentsWriterFlushControl control,
+ ThreadState state);
+
+ /**
+ * Called for each document update on the given {@link ThreadState}'s
+ * {@link DocumentsWriterPerThread}.
+ * <p>
+ * Note: This method is called synchronized on the given
+ * {@link DocumentsWriterFlushControl} and it is guaranteed that the calling
+ * thread holds the lock on the given {@link ThreadState}
+ */
+ public void onUpdate(DocumentsWriterFlushControl control, ThreadState state) {
+ onInsert(control, state);
+ if (!state.flushPending) {
+ onDelete(control, state);
+ }
+ }
+
+ /**
+ * Called for each document addition on the given {@link ThreadState}s
+ * {@link DocumentsWriterPerThread}.
+ * <p>
+ * Note: This method is synchronized by the given
+ * {@link DocumentsWriterFlushControl} and it is guaranteed that the calling
+ * thread holds the lock on the given {@link ThreadState}
+ */
+ public abstract void onInsert(DocumentsWriterFlushControl control,
+ ThreadState state);
+
+ /**
+ * Called by {@link DocumentsWriter} to initialize the FlushPolicy
+ */
+ protected synchronized void init(DocumentsWriter docsWriter) {
+ writer.set(docsWriter);
+ indexWriterConfig = docsWriter.indexWriter.getConfig();
+ }
+
+ /**
+ * Returns the current most RAM consuming non-pending {@link ThreadState} with
+ * at least one indexed document.
+ * <p>
+ * This method will never return <code>null</code>
+ */
+ protected ThreadState findLargestNonPendingWriter(
+ DocumentsWriterFlushControl control, ThreadState perThreadState) {
+ assert perThreadState.perThread.getNumDocsInRAM() > 0;
+ long maxRamSoFar = perThreadState.bytesUsed;
+ // the dwpt which needs to be flushed eventually
+ ThreadState maxRamUsingThreadState = perThreadState;
+ assert !perThreadState.flushPending : "DWPT should have flushed";
+ Iterator<ThreadState> activePerThreadsIterator = control.allActiveThreads();
+ while (activePerThreadsIterator.hasNext()) {
+ ThreadState next = activePerThreadsIterator.next();
+ if (!next.flushPending) {
+ final long nextRam = next.bytesUsed;
+ if (nextRam > maxRamSoFar && next.perThread.getNumDocsInRAM() > 0) {
+ maxRamSoFar = nextRam;
+ maxRamUsingThreadState = next;
+ }
+ }
+ }
+ assert writer.get().message(
+ "set largest ram consuming thread pending on lower watermark");
+ return maxRamUsingThreadState;
+ }
+
+}
diff --git a/lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java b/lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java
deleted file mode 100644
index de2a8cc..0000000
--- a/lucene/src/java/org/apache/lucene/index/FreqProxFieldMergeState.java
+++ /dev/null
@@ -1,115 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import java.util.Comparator;
-
-import org.apache.lucene.util.ByteBlockPool;
-import org.apache.lucene.util.BytesRef;
-
-import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray;
-
-// TODO FI: some of this is "generic" to TermsHash* so we
-// should factor it out so other consumers don't have to
-// duplicate this code
-
-/** Used by DocumentsWriter to merge the postings from
- * multiple ThreadStates when creating a segment */
-final class FreqProxFieldMergeState {
-
- final FreqProxTermsWriterPerField field;
- final int numPostings;
- private final ByteBlockPool bytePool;
- final int[] termIDs;
- final FreqProxPostingsArray postings;
- int currentTermID;
-
- final BytesRef text = new BytesRef();
-
- private int postingUpto = -1;
-
- final ByteSliceReader freq = new ByteSliceReader();
- final ByteSliceReader prox = new ByteSliceReader();
-
- int docID;
- int termFreq;
-
- public FreqProxFieldMergeState(FreqProxTermsWriterPerField field, Comparator<BytesRef> termComp) {
- this.field = field;
- this.numPostings = field.termsHashPerField.bytesHash.size();
- this.bytePool = field.perThread.termsHashPerThread.bytePool;
- this.termIDs = field.termsHashPerField.sortPostings(termComp);
- this.postings = (FreqProxPostingsArray) field.termsHashPerField.postingsArray;
- }
-
- boolean nextTerm() throws IOException {
- postingUpto++;
- if (postingUpto == numPostings) {
- return false;
- }
-
- currentTermID = termIDs[postingUpto];
- docID = 0;
-
- // Get BytesRef
- final int textStart = postings.textStarts[currentTermID];
- bytePool.setBytesRef(text, textStart);
-
- field.termsHashPerField.initReader(freq, currentTermID, 0);
- if (!field.fieldInfo.omitTermFreqAndPositions) {
- field.termsHashPerField.initReader(prox, currentTermID, 1);
- }
-
- // Should always be true
- boolean result = nextDoc();
- assert result;
-
- return true;
- }
-
- public boolean nextDoc() throws IOException {
- if (freq.eof()) {
- if (postings.lastDocCodes[currentTermID] != -1) {
- // Return last doc
- docID = postings.lastDocIDs[currentTermID];
- if (!field.omitTermFreqAndPositions)
- termFreq = postings.docFreqs[currentTermID];
- postings.lastDocCodes[currentTermID] = -1;
- return true;
- } else
- // EOF
- return false;
- }
-
- final int code = freq.readVInt();
- if (field.omitTermFreqAndPositions)
- docID += code;
- else {
- docID += code >>> 1;
- if ((code & 1) != 0)
- termFreq = 1;
- else
- termFreq = freq.readVInt();
- }
-
- assert docID != postings.lastDocIDs[currentTermID];
-
- return true;
- }
-}
diff --git a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
index ee49c8a..0622fc6 100644
--- a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
@@ -19,55 +19,35 @@
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Comparator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.codecs.FieldsConsumer;
-import org.apache.lucene.index.codecs.PostingsConsumer;
-import org.apache.lucene.index.codecs.TermStats;
-import org.apache.lucene.index.codecs.TermsConsumer;
-import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CollectionUtil;
final class FreqProxTermsWriter extends TermsHashConsumer {
@Override
- public TermsHashConsumerPerThread addThread(TermsHashPerThread perThread) {
- return new FreqProxTermsWriterPerThread(perThread);
- }
-
- @Override
void abort() {}
- private int flushedDocCount;
-
// TODO: would be nice to factor out more of this, eg the
// FreqProxFieldMergeState, and code to visit all Fields
// under the same FieldInfo together, up into TermsHash*.
// Other writers would presumably share alot of this...
@Override
- public void flush(Map<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> threadsAndFields, final SegmentWriteState state) throws IOException {
+ public void flush(Map<FieldInfo, TermsHashConsumerPerField> fieldsToFlush, final SegmentWriteState state) throws IOException {
// Gather all FieldData's that have postings, across all
// ThreadStates
List<FreqProxTermsWriterPerField> allFields = new ArrayList<FreqProxTermsWriterPerField>();
-
- flushedDocCount = state.numDocs;
- for (Map.Entry<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> entry : threadsAndFields.entrySet()) {
-
- Collection<TermsHashConsumerPerField> fields = entry.getValue();
-
-
- for (final TermsHashConsumerPerField i : fields) {
- final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) i;
- if (perField.termsHashPerField.bytesHash.size() > 0)
+ for (TermsHashConsumerPerField f : fieldsToFlush.values()) {
+ final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) f;
+ if (perField.termsHashPerField.bytesHash.size() > 0) {
allFields.add(perField);
- }
+ }
}
final int numAllFields = allFields.size();
@@ -77,6 +57,8 @@
final FieldsConsumer consumer = state.segmentCodecs.codec().fieldsConsumer(state);
+ TermsHash termsHash = null;
+
/*
Current writer chain:
FieldsConsumer
@@ -89,257 +71,48 @@
-> IMPL: FormatPostingsPositionsWriter
*/
- int start = 0;
- while(start < numAllFields) {
- final FieldInfo fieldInfo = allFields.get(start).fieldInfo;
- final String fieldName = fieldInfo.name;
+ for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) {
+ final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo;
- int end = start+1;
- while(end < numAllFields && allFields.get(end).fieldInfo.name.equals(fieldName))
- end++;
-
- FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end-start];
- for(int i=start;i<end;i++) {
- fields[i-start] = allFields.get(i);
+ final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber);
- // Aggregate the storePayload as seen by the same
- // field across multiple threads
- if (!fieldInfo.omitTermFreqAndPositions) {
- fieldInfo.storePayloads |= fields[i-start].hasPayloads;
- }
+ // Aggregate the storePayload as seen by the same
+ // field across multiple threads
+ if (!fieldInfo.omitTermFreqAndPositions) {
+ fieldInfo.storePayloads |= fieldWriter.hasPayloads;
}
// If this field has postings then add them to the
// segment
- appendPostings(fieldName, state, fields, consumer);
+ fieldWriter.flush(fieldInfo.name, consumer, state);
- for(int i=0;i<fields.length;i++) {
- TermsHashPerField perField = fields[i].termsHashPerField;
- int numPostings = perField.bytesHash.size();
- perField.reset();
- perField.shrinkHash(numPostings);
- fields[i].reset();
- }
-
- start = end;
+ TermsHashPerField perField = fieldWriter.termsHashPerField;
+ assert termsHash == null || termsHash == perField.termsHash;
+ termsHash = perField.termsHash;
+ int numPostings = perField.bytesHash.size();
+ perField.reset();
+ perField.shrinkHash(numPostings);
+ fieldWriter.reset();
}
- for (Map.Entry<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> entry : threadsAndFields.entrySet()) {
- FreqProxTermsWriterPerThread perThread = (FreqProxTermsWriterPerThread) entry.getKey();
- perThread.termsHashPerThread.reset(true);
+ if (termsHash != null) {
+ termsHash.reset();
}
consumer.close();
}
BytesRef payload;
- /* Walk through all unique text tokens (Posting
- * instances) found in this field and serialize them
- * into a single RAM segment. */
- void appendPostings(String fieldName, SegmentWriteState state,
- FreqProxTermsWriterPerField[] fields,
- FieldsConsumer consumer)
- throws CorruptIndexException, IOException {
+ @Override
+ public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) {
+ return new FreqProxTermsWriterPerField(termsHashPerField, this, fieldInfo);
+ }
- int numFields = fields.length;
+ @Override
+ void finishDocument(TermsHash termsHash) throws IOException {
+ }
- final BytesRef text = new BytesRef();
-
- final FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields];
-
- final TermsConsumer termsConsumer = consumer.addField(fields[0].fieldInfo);
- final Comparator<BytesRef> termComp = termsConsumer.getComparator();
-
- for(int i=0;i<numFields;i++) {
- FreqProxFieldMergeState fms = mergeStates[i] = new FreqProxFieldMergeState(fields[i], termComp);
-
- assert fms.field.fieldInfo == fields[0].fieldInfo;
-
- // Should always be true
- boolean result = fms.nextTerm();
- assert result;
- }
-
- final Term protoTerm = new Term(fieldName);
-
- FreqProxFieldMergeState[] termStates = new FreqProxFieldMergeState[numFields];
-
- final boolean currentFieldOmitTermFreqAndPositions = fields[0].fieldInfo.omitTermFreqAndPositions;
- //System.out.println("flush terms field=" + fields[0].fieldInfo.name);
-
- final Map<Term,Integer> segDeletes;
- if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
- segDeletes = state.segDeletes.terms;
- } else {
- segDeletes = null;
- }
-
- // TODO: really TermsHashPerField should take over most
- // of this loop, including merge sort of terms from
- // multiple threads and interacting with the
- // TermsConsumer, only calling out to us (passing us the
- // DocsConsumer) to handle delivery of docs/positions
- long sumTotalTermFreq = 0;
- while(numFields > 0) {
-
- // Get the next term to merge
- termStates[0] = mergeStates[0];
- int numToMerge = 1;
-
- // TODO: pqueue
- for(int i=1;i<numFields;i++) {
- final int cmp = termComp.compare(mergeStates[i].text, termStates[0].text);
- if (cmp < 0) {
- termStates[0] = mergeStates[i];
- numToMerge = 1;
- } else if (cmp == 0) {
- termStates[numToMerge++] = mergeStates[i];
- }
- }
-
- // Need shallow copy here because termStates[0].text
- // changes by the time we call finishTerm
- text.bytes = termStates[0].text.bytes;
- text.offset = termStates[0].text.offset;
- text.length = termStates[0].text.length;
-
- //System.out.println(" term=" + text.toUnicodeString());
- //System.out.println(" term=" + text.toString());
-
- final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text);
-
- final int delDocLimit;
- if (segDeletes != null) {
- final Integer docIDUpto = segDeletes.get(protoTerm.createTerm(text));
- if (docIDUpto != null) {
- delDocLimit = docIDUpto;
- } else {
- delDocLimit = 0;
- }
- } else {
- delDocLimit = 0;
- }
-
- // Now termStates has numToMerge FieldMergeStates
- // which all share the same term. Now we must
- // interleave the docID streams.
- int numDocs = 0;
- long totTF = 0;
- while(numToMerge > 0) {
-
- FreqProxFieldMergeState minState = termStates[0];
- for(int i=1;i<numToMerge;i++) {
- if (termStates[i].docID < minState.docID) {
- minState = termStates[i];
- }
- }
-
- final int termDocFreq = minState.termFreq;
- numDocs++;
-
- assert minState.docID < flushedDocCount: "doc=" + minState.docID + " maxDoc=" + flushedDocCount;
-
- // NOTE: we could check here if the docID was
- // deleted, and skip it. However, this is somewhat
- // dangerous because it can yield non-deterministic
- // behavior since we may see the docID before we see
- // the term that caused it to be deleted. This
- // would mean some (but not all) of its postings may
- // make it into the index, which'd alter the docFreq
- // for those terms. We could fix this by doing two
- // passes, ie first sweep marks all del docs, and
- // 2nd sweep does the real flush, but I suspect
- // that'd add too much time to flush.
-
- postingsConsumer.startDoc(minState.docID, termDocFreq);
- if (minState.docID < delDocLimit) {
- // Mark it deleted. TODO: we could also skip
- // writing its postings; this would be
- // deterministic (just for this Term's docs).
- if (state.deletedDocs == null) {
- state.deletedDocs = new BitVector(state.numDocs);
- }
- state.deletedDocs.set(minState.docID);
- }
-
- final ByteSliceReader prox = minState.prox;
-
- // Carefully copy over the prox + payload info,
- // changing the format to match Lucene's segment
- // format.
- if (!currentFieldOmitTermFreqAndPositions) {
- // omitTermFreqAndPositions == false so we do write positions &
- // payload
- int position = 0;
- totTF += termDocFreq;
- for(int j=0;j<termDocFreq;j++) {
- final int code = prox.readVInt();
- position += code >> 1;
- //System.out.println(" pos=" + position);
-
- final int payloadLength;
- final BytesRef thisPayload;
-
- if ((code & 1) != 0) {
- // This position has a payload
- payloadLength = prox.readVInt();
-
- if (payload == null) {
- payload = new BytesRef();
- payload.bytes = new byte[payloadLength];
- } else if (payload.bytes.length < payloadLength) {
- payload.grow(payloadLength);
- }
-
- prox.readBytes(payload.bytes, 0, payloadLength);
- payload.length = payloadLength;
- thisPayload = payload;
-
- } else {
- payloadLength = 0;
- thisPayload = null;
- }
-
- postingsConsumer.addPosition(position, thisPayload);
- } //End for
-
- postingsConsumer.finishDoc();
- }
-
- if (!minState.nextDoc()) {
-
- // Remove from termStates
- int upto = 0;
- // TODO: inefficient O(N) where N = number of
- // threads that had seen this term:
- for(int i=0;i<numToMerge;i++) {
- if (termStates[i] != minState) {
- termStates[upto++] = termStates[i];
- }
- }
- numToMerge--;
- assert upto == numToMerge;
-
- // Advance this state to the next term
-
- if (!minState.nextTerm()) {
- // OK, no more terms, so remove from mergeStates
- // as well
- upto = 0;
- for(int i=0;i<numFields;i++)
- if (mergeStates[i] != minState)
- mergeStates[upto++] = mergeStates[i];
- numFields--;
- assert upto == numFields;
- }
- }
- }
-
- assert numDocs > 0;
- termsConsumer.finishTerm(text, new TermStats(numDocs, totTF));
- sumTotalTermFreq += totTF;
- }
-
- termsConsumer.finish(sumTotalTermFreq);
+ @Override
+ void startDocument() throws IOException {
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
index b504f15..639786c 100644
--- a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
+++ b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
@@ -18,9 +18,17 @@
*/
import java.io.IOException;
+import java.util.Comparator;
+import java.util.Map;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.index.codecs.FieldsConsumer;
+import org.apache.lucene.index.codecs.PostingsConsumer;
+import org.apache.lucene.index.codecs.TermStats;
+import org.apache.lucene.index.codecs.TermsConsumer;
+import org.apache.lucene.util.BitVector;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
// TODO: break into separate freq and prox writers as
@@ -28,17 +36,17 @@
// be configured as any number of files 1..N
final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implements Comparable<FreqProxTermsWriterPerField> {
- final FreqProxTermsWriterPerThread perThread;
+ final FreqProxTermsWriter parent;
final TermsHashPerField termsHashPerField;
final FieldInfo fieldInfo;
- final DocumentsWriter.DocState docState;
+ final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
boolean omitTermFreqAndPositions;
PayloadAttribute payloadAttribute;
- public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo) {
+ public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriter parent, FieldInfo fieldInfo) {
this.termsHashPerField = termsHashPerField;
- this.perThread = perThread;
+ this.parent = parent;
this.fieldInfo = fieldInfo;
docState = termsHashPerField.docState;
fieldState = termsHashPerField.fieldState;
@@ -78,8 +86,8 @@
if (fields[i].isIndexed())
return true;
return false;
- }
-
+ }
+
@Override
void start(Fieldable f) {
if (fieldState.attributeSource.hasAttribute(PayloadAttribute.class)) {
@@ -96,18 +104,18 @@
} else {
payload = payloadAttribute.getPayload();
}
-
+
if (payload != null && payload.length > 0) {
termsHashPerField.writeVInt(1, (proxCode<<1)|1);
termsHashPerField.writeVInt(1, payload.length);
termsHashPerField.writeBytes(1, payload.data, payload.offset, payload.length);
- hasPayloads = true;
+ hasPayloads = true;
} else
termsHashPerField.writeVInt(1, proxCode<<1);
-
+
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
postings.lastPositions[termID] = fieldState.position;
-
+
}
@Override
@@ -115,7 +123,7 @@
// First time we're seeing this term since the last
// flush
assert docState.testPoint("FreqProxTermsWriterPerField.newTerm start");
-
+
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
postings.lastDocIDs[termID] = docState.docID;
if (omitTermFreqAndPositions) {
@@ -132,9 +140,9 @@
void addTerm(final int termID) {
assert docState.testPoint("FreqProxTermsWriterPerField.addTerm start");
-
+
FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
-
+
assert omitTermFreqAndPositions || postings.docFreqs[termID] > 0;
if (omitTermFreqAndPositions) {
@@ -169,7 +177,7 @@
}
}
}
-
+
@Override
ParallelPostingsArray createPostingsArray(int size) {
return new FreqProxPostingsArray(size);
@@ -212,7 +220,180 @@
return ParallelPostingsArray.BYTES_PER_POSTING + 4 * RamUsageEstimator.NUM_BYTES_INT;
}
}
-
+
public void abort() {}
+
+ BytesRef payload;
+
+ /* Walk through all unique text tokens (Posting
+ * instances) found in this field and serialize them
+ * into a single RAM segment. */
+ void flush(String fieldName, FieldsConsumer consumer, final SegmentWriteState state)
+ throws CorruptIndexException, IOException {
+
+ final TermsConsumer termsConsumer = consumer.addField(fieldInfo);
+ final Comparator<BytesRef> termComp = termsConsumer.getComparator();
+
+ final Term protoTerm = new Term(fieldName);
+
+ final boolean currentFieldOmitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
+
+ final Map<Term,Integer> segDeletes;
+ if (state.segDeletes != null && state.segDeletes.terms.size() > 0) {
+ segDeletes = state.segDeletes.terms;
+ } else {
+ segDeletes = null;
+ }
+
+ final int[] termIDs = termsHashPerField.sortPostings(termComp);
+ final int numTerms = termsHashPerField.bytesHash.size();
+ final BytesRef text = new BytesRef();
+ final FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;
+ final ByteSliceReader freq = new ByteSliceReader();
+ final ByteSliceReader prox = new ByteSliceReader();
+
+ long sumTotalTermFreq = 0;
+ for (int i = 0; i < numTerms; i++) {
+ final int termID = termIDs[i];
+ // Get BytesRef
+ final int textStart = postings.textStarts[termID];
+ termsHashPerField.bytePool.setBytesRef(text, textStart);
+
+ termsHashPerField.initReader(freq, termID, 0);
+ if (!fieldInfo.omitTermFreqAndPositions) {
+ termsHashPerField.initReader(prox, termID, 1);
+ }
+
+ // TODO: really TermsHashPerField should take over most
+ // of this loop, including merge sort of terms from
+ // multiple threads and interacting with the
+ // TermsConsumer, only calling out to us (passing us the
+ // DocsConsumer) to handle delivery of docs/positions
+
+ final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text);
+
+ final int delDocLimit;
+ if (segDeletes != null) {
+ final Integer docIDUpto = segDeletes.get(protoTerm.createTerm(text));
+ if (docIDUpto != null) {
+ delDocLimit = docIDUpto;
+ } else {
+ delDocLimit = 0;
+ }
+ } else {
+ delDocLimit = 0;
+ }
+
+ // Now termStates has numToMerge FieldMergeStates
+ // which all share the same term. Now we must
+ // interleave the docID streams.
+ int numDocs = 0;
+ long totTF = 0;
+ int docID = 0;
+ int termFreq = 0;
+
+ while(true) {
+ if (freq.eof()) {
+ if (postings.lastDocCodes[termID] != -1) {
+ // Return last doc
+ docID = postings.lastDocIDs[termID];
+ if (!omitTermFreqAndPositions) {
+ termFreq = postings.docFreqs[termID];
+ }
+ postings.lastDocCodes[termID] = -1;
+ } else {
+ // EOF
+ break;
+ }
+ } else {
+ final int code = freq.readVInt();
+ if (omitTermFreqAndPositions) {
+ docID += code;
+ } else {
+ docID += code >>> 1;
+ if ((code & 1) != 0) {
+ termFreq = 1;
+ } else {
+ termFreq = freq.readVInt();
+ }
+ }
+
+ assert docID != postings.lastDocIDs[termID];
+ }
+
+ numDocs++;
+ assert docID < state.numDocs: "doc=" + docID + " maxDoc=" + state.numDocs;
+ final int termDocFreq = termFreq;
+
+ // NOTE: we could check here if the docID was
+ // deleted, and skip it. However, this is somewhat
+ // dangerous because it can yield non-deterministic
+ // behavior since we may see the docID before we see
+ // the term that caused it to be deleted. This
+ // would mean some (but not all) of its postings may
+ // make it into the index, which'd alter the docFreq
+ // for those terms. We could fix this by doing two
+ // passes, ie first sweep marks all del docs, and
+ // 2nd sweep does the real flush, but I suspect
+ // that'd add too much time to flush.
+ postingsConsumer.startDoc(docID, termDocFreq);
+ if (docID < delDocLimit) {
+ // Mark it deleted. TODO: we could also skip
+ // writing its postings; this would be
+ // deterministic (just for this Term's docs).
+ if (state.deletedDocs == null) {
+ state.deletedDocs = new BitVector(state.numDocs);
+ }
+ state.deletedDocs.set(docID);
+ }
+
+ // Carefully copy over the prox + payload info,
+ // changing the format to match Lucene's segment
+ // format.
+ if (!currentFieldOmitTermFreqAndPositions) {
+ // omitTermFreqAndPositions == false so we do write positions &
+ // payload
+ int position = 0;
+ totTF += termDocFreq;
+ for(int j=0;j<termDocFreq;j++) {
+ final int code = prox.readVInt();
+ position += code >> 1;
+
+ final int payloadLength;
+ final BytesRef thisPayload;
+
+ if ((code & 1) != 0) {
+ // This position has a payload
+ payloadLength = prox.readVInt();
+
+ if (payload == null) {
+ payload = new BytesRef();
+ payload.bytes = new byte[payloadLength];
+ } else if (payload.bytes.length < payloadLength) {
+ payload.grow(payloadLength);
+ }
+
+ prox.readBytes(payload.bytes, 0, payloadLength);
+ payload.length = payloadLength;
+ thisPayload = payload;
+
+ } else {
+ payloadLength = 0;
+ thisPayload = null;
+ }
+
+ postingsConsumer.addPosition(position, thisPayload);
+ }
+
+ postingsConsumer.finishDoc();
+ }
+ }
+ termsConsumer.finishTerm(text, new TermStats(numDocs, totTF));
+ sumTotalTermFreq += totTF;
+ }
+
+ termsConsumer.finish(sumTotalTermFreq);
+ }
+
}
diff --git a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerThread.java
deleted file mode 100644
index 87af860..0000000
--- a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerThread.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-final class FreqProxTermsWriterPerThread extends TermsHashConsumerPerThread {
- final TermsHashPerThread termsHashPerThread;
- final DocumentsWriter.DocState docState;
-
- public FreqProxTermsWriterPerThread(TermsHashPerThread perThread) {
- docState = perThread.docState;
- termsHashPerThread = perThread;
- }
-
- @Override
- public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) {
- return new FreqProxTermsWriterPerField(termsHashPerField, this, fieldInfo);
- }
-
- @Override
- void startDocument() {
- }
-
- @Override
- DocumentsWriter.DocWriter finishDocument() {
- return null;
- }
-
- @Override
- public void abort() {}
-}
diff --git a/lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java b/lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java
index b542139..8ff3142 100644
--- a/lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java
+++ b/lucene/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java
@@ -52,9 +52,15 @@
final int[] queryLimits;
final int bytesUsed;
final int numTermDeletes;
- final long gen;
+ private long gen = -1; // assigned by BufferedDeletesStream once pushed
+ final boolean isSegmentPrivate; // set to true iff this frozen packet represents
+ // a segment private deletes. in that case is should
+ // only have Queries
- public FrozenBufferedDeletes(BufferedDeletes deletes, long gen) {
+
+ public FrozenBufferedDeletes(BufferedDeletes deletes, boolean isSegmentPrivate) {
+ this.isSegmentPrivate = isSegmentPrivate;
+ assert !isSegmentPrivate || deletes.terms.size() == 0 : "segment private package should only have del queries";
terms = deletes.terms.keySet().toArray(new Term[deletes.terms.size()]);
queries = new Query[deletes.queries.size()];
queryLimits = new int[deletes.queries.size()];
@@ -66,8 +72,17 @@
}
bytesUsed = terms.length * BYTES_PER_DEL_TERM + queries.length * BYTES_PER_DEL_QUERY;
numTermDeletes = deletes.numTermDeletes.get();
+ }
+
+ public void setDelGen(long gen) {
+ assert this.gen == -1;
this.gen = gen;
}
+
+ public long delGen() {
+ assert gen != -1;
+ return gen;
+ }
public Iterable<Term> termsIterable() {
return new Iterable<Term>() {
diff --git a/lucene/src/java/org/apache/lucene/index/Healthiness.java b/lucene/src/java/org/apache/lucene/index/Healthiness.java
new file mode 100644
index 0000000..dcb9868
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/Healthiness.java
@@ -0,0 +1,121 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.concurrent.locks.AbstractQueuedSynchronizer;
+
+import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
+
+/**
+ * Controls the health status of a {@link DocumentsWriter} sessions. This class
+ * used to block incoming indexing threads if flushing significantly slower than
+ * indexing to ensure the {@link DocumentsWriter}s healthiness. If flushing is
+ * significantly slower than indexing the net memory used within an
+ * {@link IndexWriter} session can increase very quickly and easily exceed the
+ * JVM's available memory.
+ * <p>
+ * To prevent OOM Errors and ensure IndexWriter's stability this class blocks
+ * incoming threads from indexing once 2 x number of available
+ * {@link ThreadState}s in {@link DocumentsWriterPerThreadPool} is exceeded.
+ * Once flushing catches up and the number of flushing DWPT is equal or lower
+ * than the number of active {@link ThreadState}s threads are released and can
+ * continue indexing.
+ */
+//TODO: rename this to DocumentsWriterStallControl (or something like that)?
+final class Healthiness {
+
+ @SuppressWarnings("serial")
+ private static final class Sync extends AbstractQueuedSynchronizer {
+ volatile boolean hasBlockedThreads = false; // only with assert
+
+ Sync() {
+ setState(0);
+ }
+
+ boolean isHealthy() {
+ return getState() == 0;
+ }
+
+ boolean trySetStalled() {
+ int state = getState();
+ return compareAndSetState(state, state + 1);
+ }
+
+ boolean tryReset() {
+ final int oldState = getState();
+ if (oldState == 0)
+ return true;
+ if (compareAndSetState(oldState, 0)) {
+ releaseShared(0);
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public int tryAcquireShared(int acquires) {
+ assert maybeSetHasBlocked(getState());
+ return getState() == 0 ? 1 : -1;
+ }
+
+ // only used for testing
+ private boolean maybeSetHasBlocked(int state) {
+ hasBlockedThreads |= getState() != 0;
+ return true;
+ }
+
+ @Override
+ public boolean tryReleaseShared(int newState) {
+ return (getState() == 0);
+ }
+ }
+
+ private final Sync sync = new Sync();
+ volatile boolean wasStalled = false; // only with asserts
+
+ boolean anyStalledThreads() {
+ return !sync.isHealthy();
+ }
+
+ /**
+ * Update the stalled flag status. This method will set the stalled flag to
+ * <code>true</code> iff the number of flushing
+ * {@link DocumentsWriterPerThread} is greater than the number of active
+ * {@link DocumentsWriterPerThread}. Otherwise it will reset the
+ * {@link Healthiness} to healthy and release all threads waiting on
+ * {@link #waitIfStalled()}
+ */
+ void updateStalled(DocumentsWriterFlushControl flushControl) {
+ do {
+ // if we have more flushing DWPT than numActiveDWPT we stall!
+ while (flushControl.numActiveDWPT() < flushControl.numFlushingDWPT()) {
+ if (sync.trySetStalled()) {
+ assert wasStalled = true;
+ return;
+ }
+ }
+ } while (!sync.tryReset());
+ }
+
+ void waitIfStalled() {
+ sync.acquireShared(0);
+ }
+
+ boolean hasBlocked() {
+ return sync.hasBlockedThreads;
+ }
+}
\ No newline at end of file
diff --git a/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java b/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java
index 5d2f959..ecf41ba 100644
--- a/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java
@@ -21,7 +21,13 @@
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.PrintStream;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.store.Directory;
@@ -49,12 +55,12 @@
* (IndexDeletionPolicy) is consulted on creation (onInit)
* and once per commit (onCommit), to decide when a commit
* should be removed.
- *
+ *
* It is the business of the IndexDeletionPolicy to choose
* when to delete commit points. The actual mechanics of
* file deletion, retrying, etc, derived from the deletion
* of commit points is the business of the IndexFileDeleter.
- *
+ *
* The current default deletion policy is {@link
* KeepOnlyLastCommitDeletionPolicy}, which removes all
* prior commits when a new commit has completed. This
@@ -72,7 +78,7 @@
* so we will retry them again later: */
private List<String> deletable;
- /* Reference count for all files in the index.
+ /* Reference count for all files in the index.
* Counts how many existing commits reference a file.
**/
private Map<String, RefCount> refCounts = new HashMap<String, RefCount>();
@@ -88,7 +94,7 @@
* non-commit checkpoint: */
private List<Collection<String>> lastFiles = new ArrayList<Collection<String>>();
- /* Commits that the IndexDeletionPolicy have decided to delete: */
+ /* Commits that the IndexDeletionPolicy have decided to delete: */
private List<CommitPoint> commitsToDelete = new ArrayList<CommitPoint>();
private PrintStream infoStream;
@@ -108,7 +114,7 @@
message("setInfoStream deletionPolicy=" + policy);
}
}
-
+
private void message(String message) {
infoStream.println("IFD [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message);
}
@@ -139,12 +145,12 @@
// counts:
long currentGen = segmentInfos.getGeneration();
indexFilenameFilter = new IndexFileNameFilter(codecs);
-
+
CommitPoint currentCommitPoint = null;
String[] files = null;
try {
files = directory.listAll();
- } catch (NoSuchDirectoryException e) {
+ } catch (NoSuchDirectoryException e) {
// it means the directory is empty, so ignore it.
files = new String[0];
}
@@ -152,7 +158,7 @@
for (String fileName : files) {
if ((indexFilenameFilter.accept(null, fileName)) && !fileName.endsWith("write.lock") && !fileName.equals(IndexFileNames.SEGMENTS_GEN)) {
-
+
// Add this file to refCounts with initial count 0:
getRefCount(fileName);
@@ -233,7 +239,7 @@
// Now delete anything with ref count at 0. These are
// presumably abandoned files eg due to crash of
// IndexWriter.
- for(Map.Entry<String, RefCount> entry : refCounts.entrySet() ) {
+ for(Map.Entry<String, RefCount> entry : refCounts.entrySet() ) {
RefCount rc = entry.getValue();
final String fileName = entry.getKey();
if (0 == rc.count) {
@@ -253,7 +259,7 @@
// Always protect the incoming segmentInfos since
// sometime it may not be the most recent commit
checkpoint(segmentInfos, false);
-
+
startingCommitDeleted = currentCommitPoint == null ? false : currentCommitPoint.isDeleted();
deleteCommits();
@@ -327,7 +333,7 @@
segmentPrefix1 = null;
segmentPrefix2 = null;
}
-
+
for(int i=0;i<files.length;i++) {
String fileName = files[i];
if ((segmentName == null || fileName.startsWith(segmentPrefix1) || fileName.startsWith(segmentPrefix2)) &&
@@ -379,7 +385,7 @@
deleteCommits();
}
}
-
+
public void deletePendingFiles() throws IOException {
if (deletable != null) {
List<String> oldDeletable = deletable;
@@ -397,7 +403,7 @@
/**
* For definition of "check point" see IndexWriter comments:
* "Clarification: Check Points (and commits)".
- *
+ *
* Writer calls this when it has made a "consistent
* change" to the index, meaning new files are written to
* the index and the in-memory SegmentInfos have been
@@ -417,7 +423,7 @@
public void checkpoint(SegmentInfos segmentInfos, boolean isCommit) throws IOException {
if (infoStream != null) {
- message("now checkpoint \"" + segmentInfos.getCurrentSegmentFileName() + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]");
+ message("now checkpoint \"" + segmentInfos + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]");
}
// Try again now to delete any previously un-deletable
diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java
index 99d9b10..478674a 100644
--- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java
@@ -35,6 +35,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment;
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor;
@@ -46,6 +47,7 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.ThreadInterruptedException;
@@ -54,17 +56,16 @@
/**
An <code>IndexWriter</code> creates and maintains an index.
- <p>The <code>create</code> argument to the {@link
- #IndexWriter(Directory, IndexWriterConfig) constructor} determines
+ <p>The {@link OpenMode} option on
+ {@link IndexWriterConfig#setOpenMode(OpenMode)} determines
whether a new index is created, or whether an existing index is
- opened. Note that you can open an index with <code>create=true</code>
- even while readers are using the index. The old readers will
+ opened. Note that you can open an index with {@link OpenMode#CREATE}
+ even while readers are using the index. The old readers will
continue to search the "point in time" snapshot they had opened,
- and won't see the newly created index until they re-open. There are
- also {@link #IndexWriter(Directory, IndexWriterConfig) constructors}
- with no <code>create</code> argument which will create a new index
- if there is not already an index at the provided path and otherwise
- open the existing index.</p>
+ and won't see the newly created index until they re-open. If
+ {@link OpenMode#CREATE_OR_APPEND} is used IndexWriter will create a
+ new index if there is not already an index at the provided path
+ and otherwise open the existing index.</p>
<p>In either case, documents are added with {@link #addDocument(Document)
addDocument} and removed with {@link #deleteDocuments(Term)} or {@link
@@ -76,15 +77,19 @@
<a name="flush"></a>
<p>These changes are buffered in memory and periodically
flushed to the {@link Directory} (during the above method
- calls). A flush is triggered when there are enough
- buffered deletes (see {@link IndexWriterConfig#setMaxBufferedDeleteTerms})
- or enough added documents since the last flush, whichever
- is sooner. For the added documents, flushing is triggered
- either by RAM usage of the documents (see {@link
- IndexWriterConfig#setRAMBufferSizeMB}) or the number of added documents.
- The default is to flush when RAM usage hits 16 MB. For
+ calls). A flush is triggered when there are enough added documents
+ since the last flush. Flushing is triggered either by RAM usage of the
+ documents (see {@link IndexWriterConfig#setRAMBufferSizeMB}) or the
+ number of added documents (see {@link IndexWriterConfig#setMaxBufferedDocs(int)}).
+ The default is to flush when RAM usage hits
+ {@value IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} MB. For
best indexing speed you should flush by RAM usage with a
- large RAM buffer. Note that flushing just moves the
+ large RAM buffer. Additionally, if IndexWriter reaches the configured number of
+ buffered deletes (see {@link IndexWriterConfig#setMaxBufferedDeleteTerms})
+ the deleted terms and queries are flushed and applied to existing segments.
+ In contrast to the other flush options {@link IndexWriterConfig#setRAMBufferSizeMB} and
+ {@link IndexWriterConfig#setMaxBufferedDocs(int)}, deleted terms
+ won't trigger a segment flush. Note that flushing just moves the
internal buffered state in IndexWriter into the index, but
these changes are not visible to IndexReader until either
{@link #commit()} or {@link #close} is called. A flush may
@@ -165,21 +170,21 @@
/*
* Clarification: Check Points (and commits)
* IndexWriter writes new index files to the directory without writing a new segments_N
- * file which references these new files. It also means that the state of
+ * file which references these new files. It also means that the state of
* the in memory SegmentInfos object is different than the most recent
* segments_N file written to the directory.
- *
- * Each time the SegmentInfos is changed, and matches the (possibly
- * modified) directory files, we have a new "check point".
- * If the modified/new SegmentInfos is written to disk - as a new
- * (generation of) segments_N file - this check point is also an
+ *
+ * Each time the SegmentInfos is changed, and matches the (possibly
+ * modified) directory files, we have a new "check point".
+ * If the modified/new SegmentInfos is written to disk - as a new
+ * (generation of) segments_N file - this check point is also an
* IndexCommit.
- *
- * A new checkpoint always replaces the previous checkpoint and
- * becomes the new "front" of the index. This allows the IndexFileDeleter
+ *
+ * A new checkpoint always replaces the previous checkpoint and
+ * becomes the new "front" of the index. This allows the IndexFileDeleter
* to delete files that are referenced only by stale checkpoints.
* (files that were created since the last commit, but are no longer
- * referenced by the "front" of the index). For this, IndexFileDeleter
+ * referenced by the "front" of the index). For this, IndexFileDeleter
* keeps track of the last non commit checkpoint.
*/
public class IndexWriter implements Closeable {
@@ -195,7 +200,7 @@
* printed to infoStream, if set (see {@link
* #setInfoStream}).
*/
- public final static int MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH_UTF8;
+ public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8;
// The normal read buffer size defaults to 1024, but
// increasing this during merging seems to yield
@@ -225,7 +230,7 @@
final FieldNumberBiMap globalFieldNumberMap;
private DocumentsWriter docWriter;
- private IndexFileDeleter deleter;
+ final IndexFileDeleter deleter;
private Set<SegmentInfo> segmentsToOptimize = new HashSet<SegmentInfo>(); // used by optimize to note those needing optimization
private int optimizeMaxNumSegments;
@@ -247,12 +252,12 @@
private long mergeGen;
private boolean stopMerges;
- private final AtomicInteger flushCount = new AtomicInteger();
- private final AtomicInteger flushDeletesCount = new AtomicInteger();
+ final AtomicInteger flushCount = new AtomicInteger();
+ final AtomicInteger flushDeletesCount = new AtomicInteger();
final ReaderPool readerPool = new ReaderPool();
final BufferedDeletesStream bufferedDeletesStream;
-
+
// This is a "write once" variable (like the organic dye
// on a DVD-R that may or may not be heated by a laser and
// then cooled to permanently record the event): it's
@@ -339,31 +344,58 @@
*/
IndexReader getReader(boolean applyAllDeletes) throws IOException {
ensureOpen();
-
+
final long tStart = System.currentTimeMillis();
if (infoStream != null) {
message("flush at getReader");
}
-
// Do this up front before flushing so that the readers
// obtained during this flush are pooled, the first time
// this method is called:
poolReaders = true;
-
- // Prevent segmentInfos from changing while opening the
- // reader; in theory we could do similar retry logic,
- // just like we do when loading segments_N
- IndexReader r;
- synchronized(this) {
- flush(false, applyAllDeletes);
- r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes);
- if (infoStream != null) {
- message("return reader version=" + r.getVersion() + " reader=" + r);
+ final IndexReader r;
+ doBeforeFlush();
+ final boolean anySegmentFlushed;
+ /*
+ * for releasing a NRT reader we must ensure that
+ * DW doesn't add any segments or deletes until we are
+ * done with creating the NRT DirectoryReader.
+ * We release the two stage full flush after we are done opening the
+ * directory reader!
+ */
+ synchronized (fullFlushLock) {
+ boolean success = false;
+ try {
+ anySegmentFlushed = docWriter.flushAllThreads();
+ if (!anySegmentFlushed) {
+ // prevent double increment since docWriter#doFlush increments the flushcount
+ // if we flushed anything.
+ flushCount.incrementAndGet();
+ }
+ success = true;
+ // Prevent segmentInfos from changing while opening the
+ // reader; in theory we could do similar retry logic,
+ // just like we do when loading segments_N
+ synchronized(this) {
+ maybeApplyDeletes(applyAllDeletes);
+ r = new DirectoryReader(this, segmentInfos, config.getReaderTermsIndexDivisor(), codecs, applyAllDeletes);
+ if (infoStream != null) {
+ message("return reader version=" + r.getVersion() + " reader=" + r);
+ }
+ }
+ } finally {
+ if (!success && infoStream != null) {
+ message("hit exception during while NRT reader");
+ }
+ // Done: finish the full flush!
+ docWriter.finishFullFlush(success);
+ doAfterFlush();
}
}
- maybeMerge();
-
+ if (anySegmentFlushed) {
+ maybeMerge();
+ }
if (infoStream != null) {
message("getReader took " + (System.currentTimeMillis() - tStart) + " msec");
}
@@ -400,10 +432,10 @@
if (r != null) {
r.hasChanges = false;
}
- }
+ }
}
}
-
+
// used only by asserts
public synchronized boolean infoIsLive(SegmentInfo info) {
int idx = segmentInfos.indexOf(info);
@@ -419,7 +451,7 @@
}
return info;
}
-
+
/**
* Release the segment reader (i.e. decRef it and close if there
* are no more references.
@@ -432,7 +464,7 @@
public synchronized boolean release(SegmentReader sr) throws IOException {
return release(sr, false);
}
-
+
/**
* Release the segment reader (i.e. decRef it and close if there
* are no more references.
@@ -493,7 +525,7 @@
sr.close();
}
}
-
+
/** Remove all our references to readers, and commits
* any pending changes. */
synchronized void close() throws IOException {
@@ -503,7 +535,7 @@
Iterator<Map.Entry<SegmentInfo,SegmentReader>> iter = readerMap.entrySet().iterator();
while (iter.hasNext()) {
-
+
Map.Entry<SegmentInfo,SegmentReader> ent = iter.next();
SegmentReader sr = ent.getValue();
@@ -526,7 +558,7 @@
sr.decRef();
}
}
-
+
/**
* Commit all segment reader in the pool.
* @throws IOException
@@ -550,7 +582,7 @@
}
}
}
-
+
/**
* Returns a ref to a clone. NOTE: this clone is not
* enrolled in the pool, so you should simply close()
@@ -564,7 +596,7 @@
sr.decRef();
}
}
-
+
/**
* Obtain a SegmentReader from the readerPool. The reader
* must be returned by calling {@link #release(SegmentReader)}
@@ -580,7 +612,7 @@
/**
* Obtain a SegmentReader from the readerPool. The reader
* must be returned by calling {@link #release(SegmentReader)}
- *
+ *
* @see #release(SegmentReader)
* @param info
* @param doOpenStores
@@ -638,7 +670,7 @@
return sr;
}
}
-
+
/**
* Obtain the number of deleted docs for a pooled reader.
* If the reader isn't being pooled, the segmentInfo's
@@ -658,7 +690,7 @@
}
}
}
-
+
/**
* Used internally to throw an {@link
* AlreadyClosedException} if this IndexWriter has been
@@ -721,7 +753,7 @@
mergePolicy.setIndexWriter(this);
mergeScheduler = conf.getMergeScheduler();
codecs = conf.getCodecProvider();
-
+
bufferedDeletesStream = new BufferedDeletesStream(messageID);
bufferedDeletesStream.setInfoStream(infoStream);
poolReaders = conf.getReaderPooling();
@@ -790,8 +822,7 @@
// start with previous field numbers, but new FieldInfos
globalFieldNumberMap = segmentInfos.getOrLoadGlobalFieldNumberMap(directory);
- docWriter = new DocumentsWriter(config, directory, this, conf.getIndexingChain(),
- globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)), bufferedDeletesStream);
+ docWriter = new DocumentsWriter(config, directory, this, globalFieldNumberMap, bufferedDeletesStream);
docWriter.setInfoStream(infoStream);
// Default deleter (for backwards compatibility) is
@@ -849,7 +880,7 @@
public IndexWriterConfig getConfig() {
return config;
}
-
+
/** If non-null, this will be the default infoStream used
* by a newly instantiated IndexWriter.
* @see #setInfoStream
@@ -901,7 +932,7 @@
public boolean verbose() {
return infoStream != null;
}
-
+
/**
* Commits all changes to an index and closes all
* associated files. Note that this may be a costly
@@ -916,7 +947,7 @@
* even though part of it (flushing buffered documents)
* may have succeeded, so the write lock will still be
* held.</p>
- *
+ *
* <p> If you can correct the underlying cause (eg free up
* some disk space) then you can call close() again.
* Failing that, if you want to force the write lock to be
@@ -1036,7 +1067,7 @@
if (infoStream != null)
message("now call final commit()");
-
+
if (!hitOOM) {
commitInternal(null);
}
@@ -1049,7 +1080,7 @@
docWriter = null;
deleter.close();
}
-
+
if (writeLock != null) {
writeLock.release(); // release write lock
writeLock = null;
@@ -1072,7 +1103,7 @@
}
/** Returns the Directory used by this index. */
- public Directory getDirectory() {
+ public Directory getDirectory() {
// Pass false because the flush during closing calls getDirectory
ensureOpen(false);
return directory;
@@ -1196,22 +1227,7 @@
* @throws IOException if there is a low-level IO error
*/
public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {
- ensureOpen();
- boolean doFlush = false;
- boolean success = false;
- try {
- try {
- doFlush = docWriter.updateDocument(doc, analyzer, null);
- success = true;
- } finally {
- if (!success && infoStream != null)
- message("hit exception adding document");
- }
- if (doFlush)
- flush(true, false);
- } catch (OutOfMemoryError oom) {
- handleOOM(oom, "addDocument");
- }
+ updateDocument(null, doc, analyzer);
}
/**
@@ -1228,9 +1244,7 @@
public void deleteDocuments(Term term) throws CorruptIndexException, IOException {
ensureOpen();
try {
- if (docWriter.deleteTerm(term, false)) {
- flush(true, false);
- }
+ docWriter.deleteTerms(term);
} catch (OutOfMemoryError oom) {
handleOOM(oom, "deleteDocuments(Term)");
}
@@ -1238,7 +1252,8 @@
/**
* Deletes the document(s) containing any of the
- * terms. All deletes are flushed at the same time.
+ * terms. All given deletes are applied and flushed atomically
+ * at the same time.
*
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
* you should immediately close the writer. See <a
@@ -1252,9 +1267,7 @@
public void deleteDocuments(Term... terms) throws CorruptIndexException, IOException {
ensureOpen();
try {
- if (docWriter.deleteTerms(terms)) {
- flush(true, false);
- }
+ docWriter.deleteTerms(terms);
} catch (OutOfMemoryError oom) {
handleOOM(oom, "deleteDocuments(Term..)");
}
@@ -1274,9 +1287,7 @@
public void deleteDocuments(Query query) throws CorruptIndexException, IOException {
ensureOpen();
try {
- if (docWriter.deleteQuery(query)) {
- flush(true, false);
- }
+ docWriter.deleteQueries(query);
} catch (OutOfMemoryError oom) {
handleOOM(oom, "deleteDocuments(Query)");
}
@@ -1284,7 +1295,7 @@
/**
* Deletes the document(s) matching any of the provided queries.
- * All deletes are flushed at the same time.
+ * All given deletes are applied and flushed atomically at the same time.
*
* <p><b>NOTE</b>: if this method hits an OutOfMemoryError
* you should immediately close the writer. See <a
@@ -1298,9 +1309,7 @@
public void deleteDocuments(Query... queries) throws CorruptIndexException, IOException {
ensureOpen();
try {
- if (docWriter.deleteQueries(queries)) {
- flush(true, false);
- }
+ docWriter.deleteQueries(queries);
} catch (OutOfMemoryError oom) {
handleOOM(oom, "deleteDocuments(Query..)");
}
@@ -1350,17 +1359,18 @@
throws CorruptIndexException, IOException {
ensureOpen();
try {
- boolean doFlush = false;
boolean success = false;
+ boolean anySegmentFlushed = false;
try {
- doFlush = docWriter.updateDocument(doc, analyzer, term);
+ anySegmentFlushed = docWriter.updateDocument(doc, analyzer, term);
success = true;
} finally {
if (!success && infoStream != null)
message("hit exception updating document");
}
- if (doFlush) {
- flush(true, false);
+
+ if (anySegmentFlushed) {
+ maybeMerge();
}
} catch (OutOfMemoryError oom) {
handleOOM(oom, "updateDocument");
@@ -1546,7 +1556,7 @@
resetMergeExceptions();
segmentsToOptimize = new HashSet<SegmentInfo>(segmentInfos);
optimizeMaxNumSegments = maxNumSegments;
-
+
// Now mark all pending & running merges as optimize
// merge:
for(final MergePolicy.OneMerge merge : pendingMerges) {
@@ -1612,12 +1622,12 @@
if (merge.optimize)
return true;
}
-
+
for (final MergePolicy.OneMerge merge : runningMerges) {
if (merge.optimize)
return true;
}
-
+
return false;
}
@@ -1914,7 +1924,7 @@
/**
* Delete all documents in the index.
*
- * <p>This method will drop all buffered documents and will
+ * <p>This method will drop all buffered documents and will
* remove all segments from the index. This change will not be
* visible until a {@link #commit()} has been called. This method
* can be rolled back using {@link #rollback()}.</p>
@@ -1944,7 +1954,7 @@
deleter.refresh();
// Don't bother saving any changes in our segmentInfos
- readerPool.clear(null);
+ readerPool.clear(null);
// Mark that the index has changed
++changeCount;
@@ -1971,7 +1981,7 @@
mergeFinish(merge);
}
pendingMerges.clear();
-
+
for (final MergePolicy.OneMerge merge : runningMerges) {
if (infoStream != null)
message("now abort running merge " + merge.segString(directory));
@@ -1998,7 +2008,7 @@
message("all running merges have aborted");
} else {
- // waitForMerges() will ensure any running addIndexes finishes.
+ // waitForMerges() will ensure any running addIndexes finishes.
// It's fine if a new one attempts to start because from our
// caller above the call will see that we are in the
// process of closing, and will throw an
@@ -2010,7 +2020,7 @@
/**
* Wait for any currently outstanding merges to finish.
*
- * <p>It is guaranteed that any merges started prior to calling this method
+ * <p>It is guaranteed that any merges started prior to calling this method
* will have completed once this method completes.</p>
*/
public synchronized void waitForMerges() {
@@ -2040,6 +2050,125 @@
deleter.checkpoint(segmentInfos, false);
}
+ /**
+ * Prepares the {@link SegmentInfo} for the new flushed segment and persists
+ * the deleted documents {@link BitVector}. Use
+ * {@link #publishFlushedSegment(SegmentInfo, FrozenBufferedDeletes)} to
+ * publish the returned {@link SegmentInfo} together with its segment private
+ * delete packet.
+ *
+ * @see #publishFlushedSegment(SegmentInfo, FrozenBufferedDeletes)
+ */
+ SegmentInfo prepareFlushedSegment(FlushedSegment flushedSegment) throws IOException {
+ assert flushedSegment != null;
+
+ SegmentInfo newSegment = flushedSegment.segmentInfo;
+
+ setDiagnostics(newSegment, "flush");
+
+ boolean success = false;
+ try {
+ if (useCompoundFile(newSegment)) {
+ String compoundFileName = IndexFileNames.segmentFileName(newSegment.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION);
+ message("creating compound file " + compoundFileName);
+ // Now build compound file
+ CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName);
+ for(String fileName : newSegment.files()) {
+ cfsWriter.addFile(fileName);
+ }
+
+ // Perform the merge
+ cfsWriter.close();
+ synchronized(this) {
+ deleter.deleteNewFiles(newSegment.files());
+ }
+
+ newSegment.setUseCompoundFile(true);
+ }
+
+ // Must write deleted docs after the CFS so we don't
+ // slurp the del file into CFS:
+ if (flushedSegment.deletedDocuments != null) {
+ final int delCount = flushedSegment.deletedDocuments.count();
+ assert delCount > 0;
+ newSegment.setDelCount(delCount);
+ newSegment.advanceDelGen();
+ final String delFileName = newSegment.getDelFileName();
+ if (infoStream != null) {
+ message("flush: write " + delCount + " deletes to " + delFileName);
+ }
+ boolean success2 = false;
+ try {
+ // TODO: in the NRT case it'd be better to hand
+ // this del vector over to the
+ // shortly-to-be-opened SegmentReader and let it
+ // carry the changes; there's no reason to use
+ // filesystem as intermediary here.
+ flushedSegment.deletedDocuments.write(directory, delFileName);
+ success2 = true;
+ } finally {
+ if (!success2) {
+ try {
+ directory.deleteFile(delFileName);
+ } catch (Throwable t) {
+ // suppress this so we keep throwing the
+ // original exception
+ }
+ }
+ }
+ }
+
+ success = true;
+ } finally {
+ if (!success) {
+ if (infoStream != null) {
+ message("hit exception " +
+ "reating compound file for newly flushed segment " + newSegment.name);
+ }
+
+ synchronized(this) {
+ deleter.refresh(newSegment.name);
+ }
+ }
+ }
+ return newSegment;
+ }
+
+ /**
+ * Atomically adds the segment private delete packet and publishes the flushed
+ * segments SegmentInfo to the index writer. NOTE: use
+ * {@link #prepareFlushedSegment(FlushedSegment)} to obtain the
+ * {@link SegmentInfo} for the flushed segment.
+ *
+ * @see #prepareFlushedSegment(FlushedSegment)
+ */
+ synchronized void publishFlushedSegment(SegmentInfo newSegment,
+ FrozenBufferedDeletes packet, FrozenBufferedDeletes globalPacket) throws IOException {
+ // Lock order IW -> BDS
+ synchronized (bufferedDeletesStream) {
+ if (globalPacket != null && globalPacket.any()) {
+ bufferedDeletesStream.push(globalPacket);
+ }
+ // Publishing the segment must be synched on IW -> BDS to make the sure
+ // that no merge prunes away the seg. private delete packet
+ final long nextGen;
+ if (packet != null && packet.any()) {
+ nextGen = bufferedDeletesStream.push(packet);
+ } else {
+ // Since we don't have a delete packet to apply we can get a new
+ // generation right away
+ nextGen = bufferedDeletesStream.getNextGen();
+ }
+ newSegment.setBufferedDeletesGen(nextGen);
+ segmentInfos.add(newSegment);
+ checkpoint();
+ }
+ }
+
+ synchronized boolean useCompoundFile(SegmentInfo segmentInfo) throws IOException {
+ return mergePolicy.useCompoundFile(segmentInfos, segmentInfo);
+ }
+
private synchronized void resetMergeExceptions() {
mergeExceptions = new ArrayList<MergePolicy.OneMerge>();
mergeGen++;
@@ -2088,11 +2217,11 @@
* <p>
* <b>NOTE:</b> this method only copies the segments of the incoming indexes
* and does not merge them. Therefore deleted documents are not removed and
- * the new segments are not merged with the existing ones. Also, the segments
- * are copied as-is, meaning they are not converted to CFS if they aren't,
- * and vice-versa. If you wish to do that, you can call {@link #maybeMerge}
+ * the new segments are not merged with the existing ones. Also, the segments
+ * are copied as-is, meaning they are not converted to CFS if they aren't,
+ * and vice-versa. If you wish to do that, you can call {@link #maybeMerge}
* or {@link #optimize} afterwards.
- *
+ *
* <p>This requires this index not be among those to be added.
*
* <p>
@@ -2129,7 +2258,7 @@
docCount += info.docCount;
String newSegName = newSegmentName();
String dsName = info.getDocStoreSegment();
-
+
if (infoStream != null) {
message("addIndexes: process segment origName=" + info.name + " newName=" + newSegName + " dsName=" + dsName + " info=" + info);
}
@@ -2176,7 +2305,7 @@
infos.add(info);
}
- }
+ }
synchronized (this) {
ensureOpen();
@@ -2225,11 +2354,12 @@
SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
mergedName, null, codecs, payloadProcessorProvider,
globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
-
+
for (IndexReader reader : readers) // add new indexes
merger.add(reader);
-
+
int docCount = merger.merge(); // merge 'em
+
final FieldInfos fieldInfos = merger.fieldInfos();
SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
false, fieldInfos.hasProx(), merger.getSegmentCodecs(),
@@ -2241,11 +2371,11 @@
synchronized(this) { // Guard segmentInfos
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, info);
}
-
+
// Now create the compound file if needed
if (useCompoundFile) {
merger.createCompoundFile(mergedName + ".cfs", info);
-
+
// delete new non cfs files directly: they were never
// registered with IFD
deleter.deleteNewFiles(info.files());
@@ -2297,7 +2427,7 @@
* #commit()} to finish the commit, or {@link
* #rollback()} to revert the commit and undo all changes
* done since the writer was opened.</p>
- *
+ *
* You can also just call {@link #commit(Map)} directly
* without prepareCommit first in which case that method
* will internally call prepareCommit.
@@ -2441,6 +2571,10 @@
}
}
+ // Ensures only one flush() is actually flushing segments
+ // at a time:
+ private final Object fullFlushLock = new Object();
+
/**
* Flush all in-memory buffered updates (adds and deletes)
* to the Directory.
@@ -2464,116 +2598,104 @@
}
}
- // TODO: this method should not have to be entirely
- // synchronized, ie, merges should be allowed to commit
- // even while a flush is happening
- private synchronized boolean doFlush(boolean applyAllDeletes) throws CorruptIndexException, IOException {
-
+ private boolean doFlush(boolean applyAllDeletes) throws CorruptIndexException, IOException {
if (hitOOM) {
throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot flush");
}
doBeforeFlush();
-
assert testPoint("startDoFlush");
-
- // We may be flushing because it was triggered by doc
- // count, del count, ram usage (in which case flush
- // pending is already set), or we may be flushing
- // due to external event eg getReader or commit is
- // called (in which case we now set it, and this will
- // pause all threads):
- flushControl.setFlushPendingNoWait("explicit flush");
-
boolean success = false;
-
try {
if (infoStream != null) {
message(" start flush: applyAllDeletes=" + applyAllDeletes);
message(" index before flush " + segString());
}
-
- final SegmentInfo newSegment = docWriter.flush(this, deleter, mergePolicy, segmentInfos);
- if (newSegment != null) {
- setDiagnostics(newSegment, "flush");
- segmentInfos.add(newSegment);
- checkpoint();
- }
-
- if (!applyAllDeletes) {
- // If deletes alone are consuming > 1/2 our RAM
- // buffer, force them all to apply now. This is to
- // prevent too-frequent flushing of a long tail of
- // tiny segments:
- if (flushControl.getFlushDeletes() ||
- (config.getRAMBufferSizeMB() != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
- bufferedDeletesStream.bytesUsed() > (1024*1024*config.getRAMBufferSizeMB()/2))) {
- applyAllDeletes = true;
- if (infoStream != null) {
- message("force apply deletes bytesUsed=" + bufferedDeletesStream.bytesUsed() + " vs ramBuffer=" + (1024*1024*config.getRAMBufferSizeMB()));
- }
+ final boolean anySegmentFlushed;
+
+ synchronized (fullFlushLock) {
+ try {
+ anySegmentFlushed = docWriter.flushAllThreads();
+ success = true;
+ } finally {
+ docWriter.finishFullFlush(success);
}
}
-
- if (applyAllDeletes) {
- if (infoStream != null) {
- message("apply all deletes during flush");
+ success = false;
+ synchronized(this) {
+ maybeApplyDeletes(applyAllDeletes);
+ doAfterFlush();
+ if (!anySegmentFlushed) {
+ // flushCount is incremented in flushAllThreads
+ flushCount.incrementAndGet();
}
- flushDeletesCount.incrementAndGet();
- final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, segmentInfos);
- if (result.anyDeletes) {
- checkpoint();
- }
- if (!keepFullyDeletedSegments && result.allDeleted != null) {
- if (infoStream != null) {
- message("drop 100% deleted segments: " + result.allDeleted);
- }
- for(SegmentInfo info : result.allDeleted) {
- // If a merge has already registered for this
- // segment, we leave it in the readerPool; the
- // merge will skip merging it and will then drop
- // it once it's done:
- if (!mergingSegments.contains(info)) {
- segmentInfos.remove(info);
- if (readerPool != null) {
- readerPool.drop(info);
- }
- }
- }
- checkpoint();
- }
- bufferedDeletesStream.prune(segmentInfos);
- assert !bufferedDeletesStream.any();
- flushControl.clearDeletes();
- } else if (infoStream != null) {
- message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed());
+ success = true;
+ return anySegmentFlushed;
}
-
- doAfterFlush();
- flushCount.incrementAndGet();
-
- success = true;
-
- return newSegment != null;
-
} catch (OutOfMemoryError oom) {
handleOOM(oom, "doFlush");
// never hit
return false;
} finally {
- flushControl.clearFlushPending();
if (!success && infoStream != null)
message("hit exception during flush");
}
}
+
+ final synchronized void maybeApplyDeletes(boolean applyAllDeletes) throws IOException {
+ if (applyAllDeletes) {
+ if (infoStream != null) {
+ message("apply all deletes during flush");
+ }
+ applyAllDeletes();
+ } else if (infoStream != null) {
+ message("don't apply deletes now delTermCount=" + bufferedDeletesStream.numTerms() + " bytesUsed=" + bufferedDeletesStream.bytesUsed());
+ }
+
+ }
+
+ final synchronized void applyAllDeletes() throws IOException {
+ flushDeletesCount.incrementAndGet();
+ final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream
+ .applyDeletes(readerPool, segmentInfos);
+ if (result.anyDeletes) {
+ checkpoint();
+ }
+ if (!keepFullyDeletedSegments && result.allDeleted != null) {
+ if (infoStream != null) {
+ message("drop 100% deleted segments: " + result.allDeleted);
+ }
+ for (SegmentInfo info : result.allDeleted) {
+ // If a merge has already registered for this
+ // segment, we leave it in the readerPool; the
+ // merge will skip merging it and will then drop
+ // it once it's done:
+ if (!mergingSegments.contains(info)) {
+ segmentInfos.remove(info);
+ if (readerPool != null) {
+ readerPool.drop(info);
+ }
+ }
+ }
+ checkpoint();
+ }
+ bufferedDeletesStream.prune(segmentInfos);
+ }
/** Expert: Return the total size of all index files currently cached in memory.
* Useful for size management with flushRamDocs()
*/
public final long ramSizeInBytes() {
ensureOpen();
- return docWriter.bytesUsed() + bufferedDeletesStream.bytesUsed();
+ return docWriter.flushControl.netBytes() + bufferedDeletesStream.bytesUsed();
+ }
+
+ // for testing only
+ DocumentsWriter getDocsWriter() {
+ boolean test = false;
+ assert test = true;
+ return test?docWriter: null;
}
/** Expert: Return the number of documents currently
@@ -2709,7 +2831,7 @@
}
commitMergedDeletes(merge, mergedReader);
-
+
// If the doc store we are using has been closed and
// is in now compound format (but wasn't when we
// started), then we will switch to the compound
@@ -2723,7 +2845,7 @@
message("merged segment " + merge.info + " is 100% deleted" + (keepFullyDeletedSegments ? "" : "; skipping insert"));
}
- final Set mergedAway = new HashSet<SegmentInfo>(merge.segments);
+ final Set<SegmentInfo> mergedAway = new HashSet<SegmentInfo>(merge.segments);
int segIdx = 0;
int newSegIdx = 0;
boolean inserted = false;
@@ -2770,15 +2892,15 @@
// them so that they don't bother writing them to
// disk, updating SegmentInfo, etc.:
readerPool.clear(merge.segments);
-
+
if (merge.optimize) {
// cascade the optimize:
segmentsToOptimize.add(merge.info);
}
-
+
return true;
}
-
+
final private void handleMergeException(Throwable t, MergePolicy.OneMerge merge) throws IOException {
if (infoStream != null) {
@@ -2867,7 +2989,7 @@
/** Hook that's called when the specified merge is complete. */
void mergeSuccess(MergePolicy.OneMerge merge) {
}
-
+
/** Checks whether this merge involves any segments
* already participating in a merge. If not, this merge
* is "registered", meaning we record that its segments
@@ -2998,7 +3120,6 @@
// Lock order: IW -> BD
bufferedDeletesStream.prune(segmentInfos);
-
Map<String,String> details = new HashMap<String,String>();
details.put("optimize", Boolean.toString(merge.optimize));
details.put("mergeFactor", Integer.toString(merge.segments.size()));
@@ -3019,11 +3140,11 @@
mergingSegments.add(merge.info);
}
- private void setDiagnostics(SegmentInfo info, String source) {
+ static void setDiagnostics(SegmentInfo info, String source) {
setDiagnostics(info, source, null);
}
- private void setDiagnostics(SegmentInfo info, String source, Map<String,String> details) {
+ private static void setDiagnostics(SegmentInfo info, String source, Map<String,String> details) {
Map<String,String> diagnostics = new HashMap<String,String>();
diagnostics.put("source", source);
diagnostics.put("lucene.version", Constants.LUCENE_VERSION);
@@ -3041,7 +3162,7 @@
/** Does fininishing for a merge, which is fast but holds
* the synchronized lock on IndexWriter instance. */
final synchronized void mergeFinish(MergePolicy.OneMerge merge) throws IOException {
-
+
// Optimize, addIndexes or finishMerges may be waiting
// on merges to finish.
notifyAll();
@@ -3113,11 +3234,11 @@
* instance */
private int mergeMiddle(MergePolicy.OneMerge merge)
throws CorruptIndexException, IOException {
-
+
merge.checkAborted(directory);
final String mergedName = merge.info.name;
-
+
int mergedDocCount = 0;
SegmentInfos sourceSegments = merge.segments;
@@ -3191,7 +3312,7 @@
message("merge store matchedCount=" + merger.getMatchedSubReaderCount() + " vs " + merge.readers.size());
}
anyNonBulkMerges |= merger.getAnyNonBulkMerges();
-
+
assert mergedDocCount == totDocCount: "mergedDocCount=" + mergedDocCount + " vs " + totDocCount;
// Very important to do this before opening the reader
@@ -3325,12 +3446,12 @@
// For test purposes.
final int getBufferedDeleteTermsSize() {
- return docWriter.getPendingDeletes().terms.size();
+ return docWriter.getBufferedDeleteTermsSize();
}
// For test purposes.
final int getNumBufferedDeleteTerms() {
- return docWriter.getPendingDeletes().numTermDeletes.get();
+ return docWriter.getNumBufferedDeleteTerms();
}
// utility routines for tests
@@ -3445,17 +3566,17 @@
assert lastCommitChangeCount <= changeCount;
myChangeCount = changeCount;
-
+
if (changeCount == lastCommitChangeCount) {
if (infoStream != null)
message(" skip startCommit(): no changes pending");
return;
}
-
+
// First, we clone & incref the segmentInfos we intend
// to sync, then, without locking, we sync() all files
// referenced by toSync, in the background.
-
+
if (infoStream != null)
message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount);
@@ -3463,10 +3584,10 @@
toSync = (SegmentInfos) segmentInfos.clone();
assert filesExist(toSync);
-
+
if (commitUserData != null)
toSync.setUserData(commitUserData);
-
+
// This protects the segmentInfos we are now going
// to commit. This is important in case, eg, while
// we are trying to sync all referenced files, a
@@ -3598,7 +3719,7 @@
/** Expert: remove any index files that are no longer
* used.
- *
+ *
* <p> IndexWriter normally deletes unused files itself,
* during indexing. However, on Windows, which disallows
* deletion of open files, if there is a reader open on
@@ -3647,7 +3768,7 @@
public void setPayloadProcessorProvider(PayloadProcessorProvider pcp) {
payloadProcessorProvider = pcp;
}
-
+
/**
* Returns the {@link PayloadProcessorProvider} that is used during segment
* merges to process payloads.
@@ -3655,124 +3776,4 @@
public PayloadProcessorProvider getPayloadProcessorProvider() {
return payloadProcessorProvider;
}
-
- // decides when flushes happen
- final class FlushControl {
-
- private boolean flushPending;
- private boolean flushDeletes;
- private int delCount;
- private int docCount;
- private boolean flushing;
-
- private synchronized boolean setFlushPending(String reason, boolean doWait) {
- if (flushPending || flushing) {
- if (doWait) {
- while(flushPending || flushing) {
- try {
- wait();
- } catch (InterruptedException ie) {
- throw new ThreadInterruptedException(ie);
- }
- }
- }
- return false;
- } else {
- if (infoStream != null) {
- message("now trigger flush reason=" + reason);
- }
- flushPending = true;
- return flushPending;
- }
- }
-
- public synchronized void setFlushPendingNoWait(String reason) {
- setFlushPending(reason, false);
- }
-
- public synchronized boolean getFlushPending() {
- return flushPending;
- }
-
- public synchronized boolean getFlushDeletes() {
- return flushDeletes;
- }
-
- public synchronized void clearFlushPending() {
- if (infoStream != null) {
- message("clearFlushPending");
- }
- flushPending = false;
- flushDeletes = false;
- docCount = 0;
- notifyAll();
- }
-
- public synchronized void clearDeletes() {
- delCount = 0;
- }
-
- public synchronized boolean waitUpdate(int docInc, int delInc) {
- return waitUpdate(docInc, delInc, false);
- }
-
- public synchronized boolean waitUpdate(int docInc, int delInc, boolean skipWait) {
- while(flushPending) {
- try {
- wait();
- } catch (InterruptedException ie) {
- throw new ThreadInterruptedException(ie);
- }
- }
-
- // skipWait is only used when a thread is BOTH adding
- // a doc and buffering a del term, and, the adding of
- // the doc already triggered a flush
- if (skipWait) {
- docCount += docInc;
- delCount += delInc;
- return false;
- }
-
- final int maxBufferedDocs = config.getMaxBufferedDocs();
- if (maxBufferedDocs != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
- (docCount+docInc) >= maxBufferedDocs) {
- return setFlushPending("maxBufferedDocs", true);
- }
- docCount += docInc;
-
- final int maxBufferedDeleteTerms = config.getMaxBufferedDeleteTerms();
- if (maxBufferedDeleteTerms != IndexWriterConfig.DISABLE_AUTO_FLUSH &&
- (delCount+delInc) >= maxBufferedDeleteTerms) {
- flushDeletes = true;
- return setFlushPending("maxBufferedDeleteTerms", true);
- }
- delCount += delInc;
-
- return flushByRAMUsage("add delete/doc");
- }
-
- public synchronized boolean flushByRAMUsage(String reason) {
- final double ramBufferSizeMB = config.getRAMBufferSizeMB();
- if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH) {
- final long limit = (long) (ramBufferSizeMB*1024*1024);
- long used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
- if (used >= limit) {
-
- // DocumentsWriter may be able to free up some
- // RAM:
- // Lock order: FC -> DW
- docWriter.balanceRAM();
-
- used = bufferedDeletesStream.bytesUsed() + docWriter.bytesUsed();
- if (used >= limit) {
- return setFlushPending("ram full: " + reason, false);
- }
- }
- }
- return false;
- }
- }
-
- final FlushControl flushControl = new FlushControl();
}
diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
index d7aec02..72002bc 100644
--- a/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
@@ -18,7 +18,7 @@
*/
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.index.DocumentsWriter.IndexingChain;
+import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.search.IndexSearcher;
@@ -41,7 +41,7 @@
* IndexWriterConfig conf = new IndexWriterConfig(analyzer);
* conf.setter1().setter2();
* </pre>
- *
+ *
* @since 3.1
*/
public final class IndexWriterConfig implements Cloneable {
@@ -56,7 +56,7 @@
* </ul>
*/
public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND }
-
+
/** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */
public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here
@@ -77,23 +77,19 @@
/**
* Default value for the write lock timeout (1,000 ms).
- *
+ *
* @see #setDefaultWriteLockTimeout(long)
*/
public static long WRITE_LOCK_TIMEOUT = 1000;
- /** The maximum number of simultaneous threads that may be
- * indexing documents at once in IndexWriter; if more
- * than this many threads arrive they will wait for
- * others to finish. */
- public final static int DEFAULT_MAX_THREAD_STATES = 8;
-
/** Default setting for {@link #setReaderPooling}. */
public final static boolean DEFAULT_READER_POOLING = false;
/** Default value is 1. Change using {@link #setReaderTermsIndexDivisor(int)}. */
public static final int DEFAULT_READER_TERMS_INDEX_DIVISOR = IndexReader.DEFAULT_TERMS_INDEX_DIVISOR;
+ /** Default value is 1945. Change using {@link #setRAMPerThreadHardLimitMB(int)} */
+ public static final int DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB = 1945;
/**
* Sets the default (for any instance) maximum time to wait for a write lock
* (in milliseconds).
@@ -105,7 +101,7 @@
/**
* Returns the default write lock timeout for newly instantiated
* IndexWriterConfigs.
- *
+ *
* @see #setDefaultWriteLockTimeout(long)
*/
public static long getDefaultWriteLockTimeout() {
@@ -127,10 +123,12 @@
private volatile IndexReaderWarmer mergedSegmentWarmer;
private volatile CodecProvider codecProvider;
private volatile MergePolicy mergePolicy;
- private volatile int maxThreadStates;
+ private volatile DocumentsWriterPerThreadPool indexerThreadPool;
private volatile boolean readerPooling;
private volatile int readerTermsIndexDivisor;
-
+ private volatile FlushPolicy flushPolicy;
+ private volatile int perThreadHardLimitMB;
+
private Version matchVersion;
/**
@@ -153,15 +151,16 @@
maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;
ramBufferSizeMB = DEFAULT_RAM_BUFFER_SIZE_MB;
maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS;
- indexingChain = DocumentsWriter.defaultIndexingChain;
+ indexingChain = DocumentsWriterPerThread.defaultIndexingChain;
mergedSegmentWarmer = null;
codecProvider = CodecProvider.getDefault();
mergePolicy = new TieredMergePolicy();
- maxThreadStates = DEFAULT_MAX_THREAD_STATES;
readerPooling = DEFAULT_READER_POOLING;
+ indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool();
readerTermsIndexDivisor = DEFAULT_READER_TERMS_INDEX_DIVISOR;
+ perThreadHardLimitMB = DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB;
}
-
+
@Override
public Object clone() {
// Shallow clone is the only thing that's possible, since parameters like
@@ -186,7 +185,7 @@
this.openMode = openMode;
return this;
}
-
+
/** Returns the {@link OpenMode} set by {@link #setOpenMode(OpenMode)}. */
public OpenMode getOpenMode() {
return openMode;
@@ -261,7 +260,7 @@
public SimilarityProvider getSimilarityProvider() {
return similarityProvider;
}
-
+
/**
* Expert: set the interval between indexed terms. Large values cause less
* memory to be used by IndexReader, but slow random-access to terms. Small
@@ -281,7 +280,7 @@
* In particular, <code>numUniqueTerms/interval</code> terms are read into
* memory by an IndexReader, and, on average, <code>interval/2</code> terms
* must be scanned for each random term access.
- *
+ *
* @see #DEFAULT_TERM_INDEX_INTERVAL
*
* <p>Takes effect immediately, but only applies to newly
@@ -293,7 +292,7 @@
/**
* Returns the interval between indexed terms.
- *
+ *
* @see #setTermIndexInterval(int)
*/
public int getTermIndexInterval() { // TODO: this should be private to the codec, not settable here
@@ -331,10 +330,10 @@
this.writeLockTimeout = writeLockTimeout;
return this;
}
-
+
/**
* Returns allowed timeout when acquiring the write lock.
- *
+ *
* @see #setWriteLockTimeout(long)
*/
public long getWriteLockTimeout() {
@@ -343,15 +342,16 @@
/**
* Determines the minimal number of delete terms required before the buffered
- * in-memory delete terms are applied and flushed. If there are documents
- * buffered in memory at the time, they are merged and a new segment is
- * created.
-
- * <p>Disabled by default (writer flushes by RAM usage).
+ * in-memory delete terms and queries are applied and flushed.
+ * <p>Disabled by default (writer flushes by RAM usage).</p>
+ * <p>
+ * NOTE: This setting won't trigger a segment flush.
+ * </p>
*
* @throws IllegalArgumentException if maxBufferedDeleteTerms
* is enabled but smaller than 1
* @see #setRAMBufferSizeMB
+ * @see #setFlushPolicy(FlushPolicy)
*
* <p>Takes effect immediately, but only the next time a
* document is added, updated or deleted.
@@ -366,9 +366,9 @@
}
/**
- * Returns the number of buffered deleted terms that will trigger a flush if
- * enabled.
- *
+ * Returns the number of buffered deleted terms that will trigger a flush of all
+ * buffered deletes if enabled.
+ *
* @see #setMaxBufferedDeleteTerms(int)
*/
public int getMaxBufferedDeleteTerms() {
@@ -380,45 +380,50 @@
* and deletions before they are flushed to the Directory. Generally for
* faster indexing performance it's best to flush by RAM usage instead of
* document count and use as large a RAM buffer as you can.
- *
* <p>
* When this is set, the writer will flush whenever buffered documents and
* deletions use this much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
* triggering a flush due to RAM usage. Note that if flushing by document
* count is also enabled, then the flush will be triggered by whichever comes
* first.
- *
+ * <p>
+ * The maximum RAM limit is inherently determined by the JVMs available memory.
+ * Yet, an {@link IndexWriter} session can consume a significantly larger amount
+ * of memory than the given RAM limit since this limit is just an indicator when
+ * to flush memory resident documents to the Directory. Flushes are likely happen
+ * concurrently while other threads adding documents to the writer. For application
+ * stability the available memory in the JVM should be significantly larger than
+ * the RAM buffer used for indexing.
* <p>
* <b>NOTE</b>: the account of RAM usage for pending deletions is only
* approximate. Specifically, if you delete by Query, Lucene currently has no
* way to measure the RAM usage of individual Queries so the accounting will
* under-estimate and you should compensate by either calling commit()
* periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)}
- * to flush by count instead of RAM usage (each buffered delete Query counts
- * as one).
- *
+ * to flush and apply buffered deletes by count instead of RAM usage
+ * (for each buffered delete Query a constant number of bytes is used to estimate
+ * RAM usage). Note that enabling {@link #setMaxBufferedDeleteTerms(int)} will
+ * not trigger any segment flushes.
* <p>
- * <b>NOTE</b>: because IndexWriter uses <code>int</code>s when managing its
- * internal storage, the absolute maximum value for this setting is somewhat
- * less than 2048 MB. The precise limit depends on various factors, such as
- * how large your documents are, how many fields have norms, etc., so it's
- * best to set this value comfortably under 2048.
- *
+ * <b>NOTE</b>: It's not guaranteed that all memory resident documents are flushed
+ * once this limit is exceeded. Depending on the configured {@link FlushPolicy} only a
+ * subset of the buffered documents are flushed and therefore only parts of the RAM
+ * buffer is released.
* <p>
+ *
* The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.
- *
+ * @see #setFlushPolicy(FlushPolicy)
+ * @see #setRAMPerThreadHardLimitMB(int)
+ *
* <p>Takes effect immediately, but only the next time a
* document is added, updated or deleted.
*
* @throws IllegalArgumentException
* if ramBufferSize is enabled but non-positive, or it disables
* ramBufferSize when maxBufferedDocs is already disabled
+ *
*/
public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) {
- if (ramBufferSizeMB > 2048.0) {
- throw new IllegalArgumentException("ramBufferSize " + ramBufferSizeMB
- + " is too large; should be comfortably less than 2048");
- }
if (ramBufferSizeMB != DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0)
throw new IllegalArgumentException(
"ramBufferSize should be > 0.0 MB when enabled");
@@ -438,22 +443,22 @@
* Determines the minimal number of documents required before the buffered
* in-memory documents are flushed as a new Segment. Large values generally
* give faster indexing.
- *
+ *
* <p>
* When this is set, the writer will flush every maxBufferedDocs added
* documents. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a
* flush due to number of buffered documents. Note that if flushing by RAM
* usage is also enabled, then the flush will be triggered by whichever comes
* first.
- *
+ *
* <p>
* Disabled by default (writer flushes by RAM usage).
- *
+ *
* <p>Takes effect immediately, but only the next time a
* document is added, updated or deleted.
*
* @see #setRAMBufferSizeMB(double)
- *
+ * @see #setFlushPolicy(FlushPolicy)
* @throws IllegalArgumentException
* if maxBufferedDocs is enabled but smaller than 2, or it disables
* maxBufferedDocs when ramBufferSize is already disabled
@@ -473,7 +478,7 @@
/**
* Returns the number of buffered added documents that will trigger a flush if
* enabled.
- *
+ *
* @see #setMaxBufferedDocs(int)
*/
public int getMaxBufferedDocs() {
@@ -519,32 +524,43 @@
return codecProvider;
}
-
+
/**
* Returns the current MergePolicy in use by this writer.
- *
+ *
* @see #setMergePolicy(MergePolicy)
*/
public MergePolicy getMergePolicy() {
return mergePolicy;
}
- /**
- * Sets the max number of simultaneous threads that may be indexing documents
- * at once in IndexWriter. Values < 1 are invalid and if passed
- * <code>maxThreadStates</code> will be set to
- * {@link #DEFAULT_MAX_THREAD_STATES}.
- *
- * <p>Only takes effect when IndexWriter is first created. */
- public IndexWriterConfig setMaxThreadStates(int maxThreadStates) {
- this.maxThreadStates = maxThreadStates < 1 ? DEFAULT_MAX_THREAD_STATES : maxThreadStates;
+ /** Expert: Sets the {@link DocumentsWriterPerThreadPool} instance used by the
+ * IndexWriter to assign thread-states to incoming indexing threads. If no
+ * {@link DocumentsWriterPerThreadPool} is set {@link IndexWriter} will use
+ * {@link ThreadAffinityDocumentsWriterThreadPool} with max number of
+ * thread-states set to {@value DocumentsWriterPerThreadPool#DEFAULT_MAX_THREAD_STATES} (see
+ * {@link DocumentsWriterPerThreadPool#DEFAULT_MAX_THREAD_STATES}).
+ * </p>
+ * <p>
+ * NOTE: The given {@link DocumentsWriterPerThreadPool} instance must not be used with
+ * other {@link IndexWriter} instances once it has been initialized / associated with an
+ * {@link IndexWriter}.
+ * </p>
+ * <p>
+ * NOTE: This only takes effect when IndexWriter is first created.</p>*/
+ public IndexWriterConfig setIndexerThreadPool(DocumentsWriterPerThreadPool threadPool) {
+ if(threadPool == null) {
+ throw new IllegalArgumentException("DocumentsWriterPerThreadPool must not be nul");
+ }
+ this.indexerThreadPool = threadPool;
return this;
}
- /** Returns the max number of simultaneous threads that
- * may be indexing documents at once in IndexWriter. */
- public int getMaxThreadStates() {
- return maxThreadStates;
+ /** Returns the configured {@link DocumentsWriterPerThreadPool} instance.
+ * @see #setIndexerThreadPool(DocumentsWriterPerThreadPool)
+ * @return the configured {@link DocumentsWriterPerThreadPool} instance.*/
+ public DocumentsWriterPerThreadPool getIndexerThreadPool() {
+ return this.indexerThreadPool;
}
/** By default, IndexWriter does not pool the
@@ -572,10 +588,10 @@
*
* <p>Only takes effect when IndexWriter is first created. */
IndexWriterConfig setIndexingChain(IndexingChain indexingChain) {
- this.indexingChain = indexingChain == null ? DocumentsWriter.defaultIndexingChain : indexingChain;
+ this.indexingChain = indexingChain == null ? DocumentsWriterPerThread.defaultIndexingChain : indexingChain;
return this;
}
-
+
/** Returns the indexing chain set on {@link #setIndexingChain(IndexingChain)}. */
IndexingChain getIndexingChain() {
return indexingChain;
@@ -604,6 +620,53 @@
return readerTermsIndexDivisor;
}
+ /**
+ * Expert: Controls when segments are flushed to disk during indexing.
+ * The {@link FlushPolicy} initialized during {@link IndexWriter} instantiation and once initialized
+ * the given instance is bound to this {@link IndexWriter} and should not be used with another writer.
+ * @see #setMaxBufferedDeleteTerms(int)
+ * @see #setMaxBufferedDocs(int)
+ * @see #setRAMBufferSizeMB(double)
+ */
+ public IndexWriterConfig setFlushPolicy(FlushPolicy flushPolicy) {
+ this.flushPolicy = flushPolicy;
+ return this;
+ }
+
+ /**
+ * Expert: Sets the maximum memory consumption per thread triggering a forced
+ * flush if exceeded. A {@link DocumentsWriterPerThread} is forcefully flushed
+ * once it exceeds this limit even if the {@link #getRAMBufferSizeMB()} has
+ * not been exceeded. This is a safety limit to prevent a
+ * {@link DocumentsWriterPerThread} from address space exhaustion due to its
+ * internal 32 bit signed integer based memory addressing.
+ * The given value must be less that 2GB (2048MB)
+ *
+ * @see #DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB
+ */
+ public IndexWriterConfig setRAMPerThreadHardLimitMB(int perThreadHardLimitMB) {
+ if (perThreadHardLimitMB <= 0 || perThreadHardLimitMB >= 2048) {
+ throw new IllegalArgumentException("PerThreadHardLimit must be greater than 0 and less than 2048MB");
+ }
+ this.perThreadHardLimitMB = perThreadHardLimitMB;
+ return this;
+ }
+
+ /**
+ * Returns the max amount of memory each {@link DocumentsWriterPerThread} can
+ * consume until forcefully flushed.
+ * @see #setRAMPerThreadHardLimitMB(int)
+ */
+ public int getRAMPerThreadHardLimitMB() {
+ return perThreadHardLimitMB;
+ }
+ /**
+ * @see #setFlushPolicy(FlushPolicy)
+ */
+ public FlushPolicy getFlushPolicy() {
+ return flushPolicy;
+ }
+
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
@@ -623,9 +686,13 @@
sb.append("mergedSegmentWarmer=").append(mergedSegmentWarmer).append("\n");
sb.append("codecProvider=").append(codecProvider).append("\n");
sb.append("mergePolicy=").append(mergePolicy).append("\n");
- sb.append("maxThreadStates=").append(maxThreadStates).append("\n");
+ sb.append("indexerThreadPool=").append(indexerThreadPool).append("\n");
sb.append("readerPooling=").append(readerPooling).append("\n");
sb.append("readerTermsIndexDivisor=").append(readerTermsIndexDivisor).append("\n");
+ sb.append("flushPolicy=").append(flushPolicy).append("\n");
+ sb.append("perThreadHardLimitMB=").append(perThreadHardLimitMB).append("\n");
+
return sb.toString();
}
+
}
diff --git a/lucene/src/java/org/apache/lucene/index/IntBlockPool.java b/lucene/src/java/org/apache/lucene/index/IntBlockPool.java
index 013c7b3..16093a5 100644
--- a/lucene/src/java/org/apache/lucene/index/IntBlockPool.java
+++ b/lucene/src/java/org/apache/lucene/index/IntBlockPool.java
@@ -1,5 +1,7 @@
package org.apache.lucene.index;
+import java.util.Arrays;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -22,24 +24,24 @@
public int[][] buffers = new int[10][];
int bufferUpto = -1; // Which buffer we are upto
- public int intUpto = DocumentsWriter.INT_BLOCK_SIZE; // Where we are in head buffer
+ public int intUpto = DocumentsWriterPerThread.INT_BLOCK_SIZE; // Where we are in head buffer
public int[] buffer; // Current head buffer
- public int intOffset = -DocumentsWriter.INT_BLOCK_SIZE; // Current head offset
+ public int intOffset = -DocumentsWriterPerThread.INT_BLOCK_SIZE; // Current head offset
- final private DocumentsWriter docWriter;
+ final private DocumentsWriterPerThread docWriter;
- public IntBlockPool(DocumentsWriter docWriter) {
+ public IntBlockPool(DocumentsWriterPerThread docWriter) {
this.docWriter = docWriter;
}
public void reset() {
if (bufferUpto != -1) {
- if (bufferUpto > 0)
- // Recycle all but the first buffer
- docWriter.recycleIntBlocks(buffers, 1, 1+bufferUpto);
-
// Reuse first buffer
+ if (bufferUpto > 0) {
+ docWriter.recycleIntBlocks(buffers, 1, bufferUpto-1);
+ Arrays.fill(buffers, 1, bufferUpto, null);
+ }
bufferUpto = 0;
intUpto = 0;
intOffset = 0;
@@ -57,7 +59,7 @@
bufferUpto++;
intUpto = 0;
- intOffset += DocumentsWriter.INT_BLOCK_SIZE;
+ intOffset += DocumentsWriterPerThread.INT_BLOCK_SIZE;
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java b/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java
index 76ca1d7..5f4a840 100644
--- a/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java
+++ b/lucene/src/java/org/apache/lucene/index/InvertedDocConsumer.java
@@ -17,20 +17,22 @@
* limitations under the License.
*/
-import java.util.Collection;
-import java.util.Map;
import java.io.IOException;
+import java.util.Map;
abstract class InvertedDocConsumer {
- /** Add a new thread */
- abstract InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread);
-
/** Abort (called after hitting AbortException) */
abstract void abort();
/** Flush a new segment */
- abstract void flush(Map<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException;
+ abstract void flush(Map<FieldInfo, InvertedDocConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException;
+
+ abstract InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo);
+
+ abstract void startDocument() throws IOException;
+
+ abstract void finishDocument() throws IOException;
/** Attempt to free RAM, returning true if any RAM was
* freed */
diff --git a/lucene/src/java/org/apache/lucene/index/InvertedDocConsumerPerThread.java b/lucene/src/java/org/apache/lucene/index/InvertedDocConsumerPerThread.java
deleted file mode 100644
index 8501360..0000000
--- a/lucene/src/java/org/apache/lucene/index/InvertedDocConsumerPerThread.java
+++ /dev/null
@@ -1,27 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-abstract class InvertedDocConsumerPerThread {
- abstract void startDocument() throws IOException;
- abstract InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo);
- abstract DocumentsWriter.DocWriter finishDocument() throws IOException;
- abstract void abort();
-}
diff --git a/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java b/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java
index 351529f..2477cef 100644
--- a/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java
+++ b/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumer.java
@@ -17,12 +17,13 @@
* limitations under the License.
*/
-import java.util.Collection;
-import java.util.Map;
import java.io.IOException;
+import java.util.Map;
abstract class InvertedDocEndConsumer {
- abstract InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread);
- abstract void flush(Map<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException;
+ abstract void flush(Map<FieldInfo, InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException;
abstract void abort();
+ abstract InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo);
+ abstract void startDocument() throws IOException;
+ abstract void finishDocument() throws IOException;
}
diff --git a/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerThread.java b/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerThread.java
deleted file mode 100644
index 4b3119f..0000000
--- a/lucene/src/java/org/apache/lucene/index/InvertedDocEndConsumerPerThread.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-abstract class InvertedDocEndConsumerPerThread {
- abstract void startDocument();
- abstract InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, FieldInfo fieldInfo);
- abstract void finishDocument();
- abstract void abort();
-}
diff --git a/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java b/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
index 1be4f26..7d4170d 100644
--- a/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
+++ b/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
@@ -75,7 +75,7 @@
protected double noCFSRatio = DEFAULT_NO_CFS_RATIO;
protected boolean calibrateSizeByDeletes = true;
-
+
protected boolean useCompoundFile = true;
public LogMergePolicy() {
@@ -103,7 +103,7 @@
}
this.noCFSRatio = noCFSRatio;
}
-
+
protected void message(String message) {
if (verbose())
writer.get().message("LMP: " + message);
@@ -169,7 +169,7 @@
this.calibrateSizeByDeletes = calibrateSizeByDeletes;
}
- /** Returns true if the segment size should be calibrated
+ /** Returns true if the segment size should be calibrated
* by the number of deletes when choosing segments for merge. */
public boolean getCalibrateSizeByDeletes() {
return calibrateSizeByDeletes;
@@ -189,7 +189,7 @@
return info.docCount;
}
}
-
+
protected long sizeBytes(SegmentInfo info) throws IOException {
long byteSize = info.sizeInBytes(true);
if (calibrateSizeByDeletes) {
@@ -201,7 +201,7 @@
return byteSize;
}
}
-
+
protected boolean isOptimized(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
final int numSegments = infos.size();
int numToOptimize = 0;
@@ -273,7 +273,7 @@
return spec.merges.size() == 0 ? null : spec;
}
-
+
/**
* Returns the merges necessary to optimize the index. This method constraints
* the returned merges only by the {@code maxNumSegments} parameter, and
@@ -281,7 +281,7 @@
*/
private MergeSpecification findMergesForOptimizeMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last) throws IOException {
MergeSpecification spec = new MergeSpecification();
-
+
// First, enroll all "full" merges (size
// mergeFactor) to potentially be run concurrently:
while (last - maxNumSegments + 1 >= mergeFactor) {
@@ -331,7 +331,7 @@
}
return spec.merges.size() == 0 ? null : spec;
}
-
+
/** Returns the merges necessary to optimize the index.
* This merge policy defines "optimized" to mean only the
* requested number of segments is left in the index, and
@@ -379,7 +379,7 @@
}
return null;
}
-
+
// There is only one segment already, and it is optimized
if (maxNumSegments == 1 && last == 1 && isOptimized(infos.info(0))) {
if (verbose()) {
@@ -397,7 +397,7 @@
break;
}
}
-
+
if (anyTooLarge) {
return findMergesForOptimizeSizeLimit(infos, maxNumSegments, last);
} else {
@@ -409,7 +409,7 @@
* Finds merges necessary to expunge all deletes from the
* index. We simply merge adjacent segments that have
* deletes, up to mergeFactor at a time.
- */
+ */
@Override
public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos)
throws CorruptIndexException, IOException {
@@ -462,7 +462,7 @@
SegmentInfo info;
float level;
int index;
-
+
public SegmentInfoAndLevel(SegmentInfo info, float level, int index) {
this.info = info;
this.level = level;
@@ -658,5 +658,5 @@
sb.append("]");
return sb.toString();
}
-
+
}
diff --git a/lucene/src/java/org/apache/lucene/index/NormsWriter.java b/lucene/src/java/org/apache/lucene/index/NormsWriter.java
index e0cff83d..5064a47 100644
--- a/lucene/src/java/org/apache/lucene/index/NormsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/NormsWriter.java
@@ -19,11 +19,7 @@
import java.io.IOException;
import java.util.Collection;
-import java.util.Iterator;
-import java.util.HashMap;
import java.util.Map;
-import java.util.List;
-import java.util.ArrayList;
import org.apache.lucene.store.IndexOutput;
@@ -36,10 +32,6 @@
final class NormsWriter extends InvertedDocEndConsumer {
- @Override
- public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
- return new NormsWriterPerThread(docInverterPerThread, this);
- }
@Override
public void abort() {}
@@ -50,40 +42,11 @@
/** Produce _X.nrm if any document had a field with norms
* not disabled */
@Override
- public void flush(Map<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException {
-
- final Map<FieldInfo,List<NormsWriterPerField>> byField = new HashMap<FieldInfo,List<NormsWriterPerField>>();
-
+ public void flush(Map<FieldInfo,InvertedDocEndConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException {
if (!state.fieldInfos.hasNorms()) {
return;
}
- // Typically, each thread will have encountered the same
- // field. So first we collate by field, ie, all
- // per-thread field instances that correspond to the
- // same FieldInfo
- for (final Map.Entry<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> entry : threadsAndFields.entrySet()) {
- final Collection<InvertedDocEndConsumerPerField> fields = entry.getValue();
- final Iterator<InvertedDocEndConsumerPerField> fieldsIt = fields.iterator();
-
- while (fieldsIt.hasNext()) {
- final NormsWriterPerField perField = (NormsWriterPerField) fieldsIt.next();
-
- if (perField.upto > 0) {
- // It has some norms
- List<NormsWriterPerField> l = byField.get(perField.fieldInfo);
- if (l == null) {
- l = new ArrayList<NormsWriterPerField>();
- byField.put(perField.fieldInfo, l);
- }
- l.add(perField);
- } else
- // Remove this field since we haven't seen it
- // since the previous flush
- fieldsIt.remove();
- }
- }
-
final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION);
IndexOutput normsOut = state.directory.createOutput(normsFileName);
@@ -93,60 +56,25 @@
int normCount = 0;
for (FieldInfo fi : state.fieldInfos) {
- final List<NormsWriterPerField> toMerge = byField.get(fi);
+ final NormsWriterPerField toWrite = (NormsWriterPerField) fieldsToFlush.get(fi);
int upto = 0;
- if (toMerge != null) {
-
- final int numFields = toMerge.size();
-
+ if (toWrite != null && toWrite.upto > 0) {
normCount++;
- final NormsWriterPerField[] fields = new NormsWriterPerField[numFields];
- int[] uptos = new int[numFields];
-
- for(int j=0;j<numFields;j++)
- fields[j] = toMerge.get(j);
-
- int numLeft = numFields;
-
- while(numLeft > 0) {
-
- assert uptos[0] < fields[0].docIDs.length : " uptos[0]=" + uptos[0] + " len=" + (fields[0].docIDs.length);
-
- int minLoc = 0;
- int minDocID = fields[0].docIDs[uptos[0]];
-
- for(int j=1;j<numLeft;j++) {
- final int docID = fields[j].docIDs[uptos[j]];
- if (docID < minDocID) {
- minDocID = docID;
- minLoc = j;
- }
- }
-
- assert minDocID < state.numDocs;
-
- // Fill hole
- for(;upto<minDocID;upto++)
+ int docID = 0;
+ for (; docID < state.numDocs; docID++) {
+ if (upto < toWrite.upto && toWrite.docIDs[upto] == docID) {
+ normsOut.writeByte(toWrite.norms[upto]);
+ upto++;
+ } else {
normsOut.writeByte((byte) 0);
-
- normsOut.writeByte(fields[minLoc].norms[uptos[minLoc]]);
- (uptos[minLoc])++;
- upto++;
-
- if (uptos[minLoc] == fields[minLoc].upto) {
- fields[minLoc].reset();
- if (minLoc != numLeft-1) {
- fields[minLoc] = fields[numLeft-1];
- uptos[minLoc] = uptos[numLeft-1];
- }
- numLeft--;
}
}
-
- // Fill final hole with defaultNorm
- for(;upto<state.numDocs;upto++)
- normsOut.writeByte((byte) 0);
+
+ // we should have consumed every norm
+ assert upto == toWrite.upto;
+
+ toWrite.reset();
} else if (fi.isIndexed && !fi.omitNorms) {
normCount++;
// Fill entire field with default norm:
@@ -161,4 +89,16 @@
normsOut.close();
}
}
+
+ @Override
+ void finishDocument() throws IOException {}
+
+ @Override
+ void startDocument() throws IOException {}
+
+ @Override
+ InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField,
+ FieldInfo fieldInfo) {
+ return new NormsWriterPerField(docInverterPerField, fieldInfo);
+ }
}
diff --git a/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java b/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java
index 05cf2fb..8b9cc3b 100644
--- a/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java
+++ b/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java
@@ -27,9 +27,8 @@
final class NormsWriterPerField extends InvertedDocEndConsumerPerField implements Comparable<NormsWriterPerField> {
- final NormsWriterPerThread perThread;
final FieldInfo fieldInfo;
- final DocumentsWriter.DocState docState;
+ final DocumentsWriterPerThread.DocState docState;
final Similarity similarity;
// Holds all docID/norm pairs we've seen
@@ -46,10 +45,9 @@
upto = 0;
}
- public NormsWriterPerField(final DocInverterPerField docInverterPerField, final NormsWriterPerThread perThread, final FieldInfo fieldInfo) {
- this.perThread = perThread;
+ public NormsWriterPerField(final DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) {
this.fieldInfo = fieldInfo;
- docState = perThread.docState;
+ docState = docInverterPerField.docState;
fieldState = docInverterPerField.fieldState;
similarity = docState.similarityProvider.get(fieldInfo.name);
}
diff --git a/lucene/src/java/org/apache/lucene/index/NormsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/NormsWriterPerThread.java
deleted file mode 100644
index fb57104..0000000
--- a/lucene/src/java/org/apache/lucene/index/NormsWriterPerThread.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-final class NormsWriterPerThread extends InvertedDocEndConsumerPerThread {
- final NormsWriter normsWriter;
- final DocumentsWriter.DocState docState;
-
- public NormsWriterPerThread(DocInverterPerThread docInverterPerThread, NormsWriter normsWriter) {
- this.normsWriter = normsWriter;
- docState = docInverterPerThread.docState;
- }
-
- @Override
- InvertedDocEndConsumerPerField addField(DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) {
- return new NormsWriterPerField(docInverterPerField, this, fieldInfo);
- }
-
- @Override
- void abort() {}
-
- @Override
- void startDocument() {}
- @Override
- void finishDocument() {}
-
- boolean freeRAM() {
- return false;
- }
-}
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
index 15ad137..3313a04 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
@@ -37,14 +37,14 @@
/**
* Information about a segment such as it's name, directory, and files related
* to the segment.
- *
+ *
* @lucene.experimental
*/
public final class SegmentInfo {
static final int NO = -1; // e.g. no norms; no deletes;
static final int YES = 1; // e.g. have norms; have deletes;
- static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
+ static final int WITHOUT_GEN = 0; // a file name that has no GEN in it.
public String name; // unique name in dir
public int docCount; // number of docs in seg
@@ -56,7 +56,7 @@
* - YES or higher if there are deletes at generation N
*/
private long delGen;
-
+
/*
* Current generation of each field's norm file. If this array is null,
* means no separate norms. If this array is not null, its values mean:
@@ -65,7 +65,7 @@
*/
private Map<Integer,Long> normGen;
- private boolean isCompoundFile;
+ private boolean isCompoundFile;
private volatile List<String> files; // cached list of files that this segment uses
// in the Directory
@@ -73,10 +73,13 @@
private volatile long sizeInBytesNoStore = -1; // total byte size of all but the store files (computed on demand)
private volatile long sizeInBytesWithStore = -1; // total byte size of all of our files (computed on demand)
+ //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
private int docStoreOffset; // if this segment shares stored fields & vectors, this
// offset is where in that file this segment's docs begin
+ //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
private String docStoreSegment; // name used to derive fields/vectors file we share with
// other segments
+ //TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
private int delCount; // How many deleted docs in this segment
@@ -91,9 +94,9 @@
private Map<String,String> diagnostics;
- // Tracks the Lucene version this segment was created with, since 3.1. Null
+ // Tracks the Lucene version this segment was created with, since 3.1. Null
// indicates an older than 3.0 index, and it's used to detect a too old index.
- // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
+ // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
// specific versions afterwards ("3.0", "3.1" etc.).
// see Constants.LUCENE_MAIN_VERSION.
private String version;
@@ -101,7 +104,7 @@
// NOTE: only used in-RAM by IW to track buffered deletes;
// this is never written to/read from the Directory
private long bufferedDeletesGen;
-
+
public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile,
boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors, FieldInfos fieldInfos) {
this.name = name;
@@ -182,11 +185,13 @@
docStoreSegment = name;
docStoreIsCompoundFile = false;
}
+
if (format > DefaultSegmentInfosWriter.FORMAT_4_0) {
// pre-4.0 indexes write a byte if there is a single norms file
byte b = input.readByte();
assert 1 == b;
}
+
int numNormGen = input.readInt();
if (numNormGen == NO) {
normGen = null;
@@ -207,7 +212,7 @@
assert delCount <= docCount;
hasProx = input.readByte() == YES;
-
+
// System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name);
if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) {
segmentCodecs = new SegmentCodecs(codecs, input);
@@ -217,7 +222,7 @@
segmentCodecs = new SegmentCodecs(codecs, new Codec[] { codecs.lookup("PreFlex")});
}
diagnostics = input.readStringStringMap();
-
+
if (format <= DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
hasVectors = input.readByte() == 1;
} else {
@@ -366,7 +371,7 @@
// against this segment
return null;
} else {
- return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
+ return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
}
}
@@ -432,7 +437,7 @@
if (hasSeparateNorms(number)) {
return IndexFileNames.fileNameFromGeneration(name, "s" + number, normGen.get(number));
} else {
- // single file for all norms
+ // single file for all norms
return IndexFileNames.fileNameFromGeneration(name, IndexFileNames.NORMS_EXTENSION, WITHOUT_GEN);
}
}
@@ -465,39 +470,74 @@
assert delCount <= docCount;
}
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
public int getDocStoreOffset() {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
return docStoreOffset;
}
-
+
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
public boolean getDocStoreIsCompoundFile() {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
return docStoreIsCompoundFile;
}
-
- void setDocStoreIsCompoundFile(boolean v) {
- docStoreIsCompoundFile = v;
- clearFilesCache();
- }
-
- public String getDocStoreSegment() {
- return docStoreSegment;
- }
-
- public void setDocStoreSegment(String segment) {
- docStoreSegment = segment;
- }
-
- void setDocStoreOffset(int offset) {
- docStoreOffset = offset;
+
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
+ public void setDocStoreIsCompoundFile(boolean docStoreIsCompoundFile) {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
+ this.docStoreIsCompoundFile = docStoreIsCompoundFile;
clearFilesCache();
}
- void setDocStore(int offset, String segment, boolean isCompoundFile) {
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
+ void setDocStore(int offset, String segment, boolean isCompoundFile) {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
docStoreOffset = offset;
docStoreSegment = segment;
docStoreIsCompoundFile = isCompoundFile;
clearFilesCache();
}
-
+
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
+ public String getDocStoreSegment() {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
+ return docStoreSegment;
+ }
+
+ /**
+ * @deprecated shared doc stores are not supported in >= 4.0
+ */
+ @Deprecated
+ void setDocStoreOffset(int offset) {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
+ docStoreOffset = offset;
+ clearFilesCache();
+ }
+
+ /**
+ * @deprecated shared doc stores are not supported in 4.0
+ */
+ @Deprecated
+ public void setDocStoreSegment(String docStoreSegment) {
+ // TODO: LUCENE-2555: remove once we don't need to support shared doc stores (pre 4.0)
+ this.docStoreSegment = docStoreSegment;
+ }
+
/** Save this segment's info. */
public void write(IndexOutput output)
throws IOException {
@@ -507,12 +547,14 @@
output.writeString(name);
output.writeInt(docCount);
output.writeLong(delGen);
+
output.writeInt(docStoreOffset);
if (docStoreOffset != -1) {
output.writeString(docStoreSegment);
output.writeByte((byte) (docStoreIsCompoundFile ? 1:0));
}
+
if (normGen == null) {
output.writeInt(NO);
} else {
@@ -522,7 +564,7 @@
output.writeLong(entry.getValue());
}
}
-
+
output.writeByte((byte) (isCompoundFile ? YES : NO));
output.writeInt(delCount);
output.writeByte((byte) (hasProx ? 1:0));
@@ -570,9 +612,9 @@
// Already cached:
return files;
}
-
+
Set<String> fileSet = new HashSet<String>();
-
+
boolean useCompoundFile = getUseCompoundFile();
if (useCompoundFile) {
@@ -606,7 +648,7 @@
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
- }
+ }
}
String delFileName = IndexFileNames.fileNameFromGeneration(name, IndexFileNames.DELETES_EXTENSION, delGen);
@@ -644,7 +686,7 @@
}
/** Used for debugging. Format may suddenly change.
- *
+ *
* <p>Current format looks like
* <code>_a(3.1):c45/4->_1</code>, which means the segment's
* name is <code>_a</code>; it was created with Lucene 3.1 (or
@@ -674,7 +716,7 @@
if (delCount != 0) {
s.append('/').append(delCount);
}
-
+
if (docStoreOffset != -1) {
s.append("->").append(docStoreSegment);
if (docStoreIsCompoundFile) {
@@ -714,13 +756,13 @@
* <b>NOTE:</b> this method is used for internal purposes only - you should
* not modify the version of a SegmentInfo, or it may result in unexpected
* exceptions thrown when you attempt to open the index.
- *
+ *
* @lucene.internal
*/
public void setVersion(String version) {
this.version = version;
}
-
+
/** Returns the version of the code which wrote the segment. */
public String getVersion() {
return version;
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
index 8873029..67e279b 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
@@ -39,24 +39,24 @@
/**
* The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add},
- * into a single Segment. After adding the appropriate readers, call the merge method to combine the
+ * into a single Segment. After adding the appropriate readers, call the merge method to combine the
* segments.
- *
+ *
* @see #merge
* @see #add
*/
final class SegmentMerger {
-
+
/** norms header placeholder */
- static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
-
+ static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1};
+
private Directory directory;
private String segment;
private int termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL;
private List<IndexReader> readers = new ArrayList<IndexReader>();
private final FieldInfos fieldInfos;
-
+
private int mergedDocs;
private final MergeState.CheckAbort checkAbort;
@@ -64,13 +64,13 @@
/** Maximum number of contiguous documents to bulk-copy
when merging stored fields */
private final static int MAX_RAW_MERGE_DOCS = 4192;
-
+
private final CodecProvider codecs;
private Codec codec;
private SegmentWriteState segmentWriteState;
private PayloadProcessorProvider payloadProcessorProvider;
-
+
SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
this.payloadProcessorProvider = payloadProcessorProvider;
directory = dir;
@@ -135,10 +135,10 @@
for (String file : files) {
cfsWriter.addFile(file);
}
-
+
// Perform the merge
cfsWriter.close();
-
+
return files;
}
@@ -196,13 +196,12 @@
}
/**
- *
+ *
* @return The number of documents in all of the readers
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
private int mergeFields() throws CorruptIndexException, IOException {
-
for (IndexReader reader : readers) {
if (reader instanceof SegmentReader) {
SegmentReader segmentReader = (SegmentReader) reader;
@@ -265,7 +264,7 @@
throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + " file=" + fileName + " file exists?=" + directory.fileExists(fileName) + "; now aborting this merge to prevent index corruption");
segmentWriteState = new SegmentWriteState(null, directory, segment, fieldInfos, docCount, termIndexInterval, codecInfo, null);
-
+
return docCount;
}
@@ -283,7 +282,7 @@
++j;
continue;
}
- // We can optimize this case (doing a bulk byte copy) since the field
+ // We can optimize this case (doing a bulk byte copy) since the field
// numbers are identical
int start = j, numDocs = 0;
do {
@@ -295,7 +294,7 @@
break;
}
} while(numDocs < MAX_RAW_MERGE_DOCS);
-
+
IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, start, numDocs);
fieldsWriter.addRawDocuments(stream, rawDocLengths, numDocs);
docCount += numDocs;
@@ -349,7 +348,7 @@
* @throws IOException
*/
private final void mergeVectors() throws IOException {
- TermVectorsWriter termVectorsWriter =
+ TermVectorsWriter termVectorsWriter =
new TermVectorsWriter(directory, segment, fieldInfos);
try {
@@ -369,7 +368,7 @@
copyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, reader);
} else {
copyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, reader);
-
+
}
}
} finally {
@@ -402,7 +401,7 @@
++docNum;
continue;
}
- // We can optimize this case (doing a bulk byte copy) since the field
+ // We can optimize this case (doing a bulk byte copy) since the field
// numbers are identical
int start = docNum, numDocs = 0;
do {
@@ -414,7 +413,7 @@
break;
}
} while(numDocs < MAX_RAW_MERGE_DOCS);
-
+
matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
termVectorsWriter.addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
checkAbort.work(300 * numDocs);
@@ -425,7 +424,7 @@
// skip deleted docs
continue;
}
-
+
// NOTE: it's very important to first assign to vectors then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
TermFreqVector[] vectors = reader.getTermFreqVectors(docNum);
@@ -434,7 +433,7 @@
}
}
}
-
+
private void copyVectorsNoDeletions(final TermVectorsWriter termVectorsWriter,
final TermVectorsReader matchingVectorsReader,
final IndexReader reader)
@@ -470,7 +469,7 @@
// Let CodecProvider decide which codec will be used to write
// the new segment:
-
+
int docBase = 0;
final List<Fields> fields = new ArrayList<Fields>();
@@ -498,7 +497,7 @@
mergeState.readerCount = readers.size();
mergeState.fieldInfos = fieldInfos;
mergeState.mergedDocCount = mergedDocs;
-
+
// Remap docIDs
mergeState.delCounts = new int[mergeState.readerCount];
mergeState.docMaps = new int[mergeState.readerCount][];
@@ -536,7 +535,7 @@
}
assert delCount == mergeState.delCounts[i]: "reader delCount=" + mergeState.delCounts[i] + " vs recomputed delCount=" + delCount;
}
-
+
if (payloadProcessorProvider != null) {
mergeState.dirPayloadProcessor[i] = payloadProcessorProvider.getDirProcessor(reader.directory());
}
@@ -549,7 +548,7 @@
// apart when we step through the docs enums in
// MultiDocsEnum.
mergeState.multiDeletedDocs = new MultiBits(bits, bitsStarts);
-
+
try {
consumer.merge(mergeState,
new MultiFields(fields.toArray(Fields.EMPTY_ARRAY),
@@ -568,7 +567,7 @@
int[] getDelCounts() {
return mergeState.delCounts;
}
-
+
public boolean getAnyNonBulkMerges() {
assert matchedCount <= readers.size();
return matchedCount != readers.size();
@@ -579,7 +578,7 @@
try {
for (FieldInfo fi : fieldInfos) {
if (fi.isIndexed && !fi.omitNorms) {
- if (output == null) {
+ if (output == null) {
output = directory.createOutput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
output.writeBytes(NORMS_HEADER,NORMS_HEADER.length);
}
@@ -610,7 +609,7 @@
}
}
} finally {
- if (output != null) {
+ if (output != null) {
output.close();
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
index e44462c..759337d 100644
--- a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
@@ -65,7 +65,7 @@
this.segmentCodecs = segmentCodecs;
codecId = "";
}
-
+
/**
* Create a shallow {@link SegmentWriteState} copy final a codec ID
*/
diff --git a/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java b/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java
index 9f04dcb..c3aa5c8 100644
--- a/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java
@@ -18,7 +18,8 @@
*/
import java.io.IOException;
-import org.apache.lucene.store.RAMOutputStream;
+
+import org.apache.lucene.document.Fieldable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
@@ -26,22 +27,38 @@
final class StoredFieldsWriter {
FieldsWriter fieldsWriter;
- final DocumentsWriter docWriter;
+ final DocumentsWriterPerThread docWriter;
int lastDocID;
- PerDoc[] docFreeList = new PerDoc[1];
int freeCount;
- public StoredFieldsWriter(DocumentsWriter docWriter) {
+ final DocumentsWriterPerThread.DocState docState;
+
+ public StoredFieldsWriter(DocumentsWriterPerThread docWriter) {
this.docWriter = docWriter;
+ this.docState = docWriter.docState;
}
- public StoredFieldsWriterPerThread addThread(DocumentsWriter.DocState docState) throws IOException {
- return new StoredFieldsWriterPerThread(docState, this);
+ private int numStoredFields;
+ private Fieldable[] storedFields;
+ private int[] fieldNumbers;
+
+ public void reset() {
+ numStoredFields = 0;
+ storedFields = new Fieldable[1];
+ fieldNumbers = new int[1];
}
- synchronized public void flush(SegmentWriteState state) throws IOException {
- if (state.numDocs > lastDocID) {
+ public void startDocument() {
+ reset();
+ }
+
+ public void flush(SegmentWriteState state) throws IOException {
+
+ if (state.numDocs > 0) {
+ // It's possible that all documents seen in this segment
+ // hit non-aborting exceptions, in which case we will
+ // not have yet init'd the FieldsWriter:
initFieldsWriter();
fill(state.numDocs);
}
@@ -67,23 +84,9 @@
int allocCount;
- synchronized PerDoc getPerDoc() {
- if (freeCount == 0) {
- allocCount++;
- if (allocCount > docFreeList.length) {
- // Grow our free list up front to make sure we have
- // enough space to recycle all outstanding PerDoc
- // instances
- assert allocCount == 1+docFreeList.length;
- docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
- }
- return new PerDoc();
- } else {
- return docFreeList[--freeCount];
- }
- }
+ void abort() {
+ reset();
- synchronized void abort() {
if (fieldsWriter != null) {
fieldsWriter.abort();
fieldsWriter = null;
@@ -101,53 +104,40 @@
}
}
- synchronized void finishDocument(PerDoc perDoc) throws IOException {
+ void finishDocument() throws IOException {
assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument start");
+
initFieldsWriter();
+ fill(docState.docID);
- fill(perDoc.docID);
+ if (fieldsWriter != null && numStoredFields > 0) {
+ fieldsWriter.startDocument(numStoredFields);
+ for (int i = 0; i < numStoredFields; i++) {
+ fieldsWriter.writeField(fieldNumbers[i], storedFields[i]);
+ }
+ lastDocID++;
+ }
- // Append stored fields to the real FieldsWriter:
- fieldsWriter.flushDocument(perDoc.numStoredFields, perDoc.fdt);
- lastDocID++;
- perDoc.reset();
- free(perDoc);
+ reset();
assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument end");
}
- synchronized void free(PerDoc perDoc) {
- assert freeCount < docFreeList.length;
- assert 0 == perDoc.numStoredFields;
- assert 0 == perDoc.fdt.length();
- assert 0 == perDoc.fdt.getFilePointer();
- docFreeList[freeCount++] = perDoc;
- }
-
- class PerDoc extends DocumentsWriter.DocWriter {
- final DocumentsWriter.PerDocBuffer buffer = docWriter.newPerDocBuffer();
- RAMOutputStream fdt = new RAMOutputStream(buffer);
- int numStoredFields;
-
- void reset() {
- fdt.reset();
- buffer.recycle();
- numStoredFields = 0;
+ public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException {
+ if (numStoredFields == storedFields.length) {
+ int newSize = ArrayUtil.oversize(numStoredFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
+ Fieldable[] newArray = new Fieldable[newSize];
+ System.arraycopy(storedFields, 0, newArray, 0, numStoredFields);
+ storedFields = newArray;
}
- @Override
- void abort() {
- reset();
- free(this);
+ if (numStoredFields == fieldNumbers.length) {
+ fieldNumbers = ArrayUtil.grow(fieldNumbers);
}
- @Override
- public long sizeInBytes() {
- return buffer.getSizeInBytes();
- }
+ storedFields[numStoredFields] = field;
+ fieldNumbers[numStoredFields] = fieldInfo.number;
+ numStoredFields++;
- @Override
- public void finish() throws IOException {
- finishDocument(this);
- }
+ assert docState.testPoint("StoredFieldsWriterPerThread.processFields.writeField");
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java
deleted file mode 100644
index 85c6b57..0000000
--- a/lucene/src/java/org/apache/lucene/index/StoredFieldsWriterPerThread.java
+++ /dev/null
@@ -1,79 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.document.Fieldable;
-
-final class StoredFieldsWriterPerThread {
-
- final FieldsWriter localFieldsWriter;
- final StoredFieldsWriter storedFieldsWriter;
- final DocumentsWriter.DocState docState;
-
- StoredFieldsWriter.PerDoc doc;
-
- public StoredFieldsWriterPerThread(DocumentsWriter.DocState docState, StoredFieldsWriter storedFieldsWriter) throws IOException {
- this.storedFieldsWriter = storedFieldsWriter;
- this.docState = docState;
- localFieldsWriter = new FieldsWriter((IndexOutput) null, (IndexOutput) null);
- }
-
- public void startDocument() {
- if (doc != null) {
- // Only happens if previous document hit non-aborting
- // exception while writing stored fields into
- // localFieldsWriter:
- doc.reset();
- doc.docID = docState.docID;
- }
- }
-
- public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException {
- if (doc == null) {
- doc = storedFieldsWriter.getPerDoc();
- doc.docID = docState.docID;
- localFieldsWriter.setFieldsStream(doc.fdt);
- assert doc.numStoredFields == 0: "doc.numStoredFields=" + doc.numStoredFields;
- assert 0 == doc.fdt.length();
- assert 0 == doc.fdt.getFilePointer();
- }
-
- localFieldsWriter.writeField(fieldInfo, field);
- assert docState.testPoint("StoredFieldsWriterPerThread.processFields.writeField");
- doc.numStoredFields++;
- }
-
- public DocumentsWriter.DocWriter finishDocument() {
- // If there were any stored fields in this doc, doc will
- // be non-null; else it's null.
- try {
- return doc;
- } finally {
- doc = null;
- }
- }
-
- public void abort() {
- if (doc != null) {
- doc.abort();
- doc = null;
- }
- }
-}
diff --git a/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java b/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
index a5d631e..da43f3a 100644
--- a/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
@@ -17,49 +17,48 @@
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.Map;
+
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.RAMOutputStream;
import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
-import java.io.IOException;
-import java.util.Collection;
-
-import java.util.Map;
-
final class TermVectorsTermsWriter extends TermsHashConsumer {
- final DocumentsWriter docWriter;
- PerDoc[] docFreeList = new PerDoc[1];
+ final DocumentsWriterPerThread docWriter;
int freeCount;
IndexOutput tvx;
IndexOutput tvd;
IndexOutput tvf;
int lastDocID;
+
+ final DocumentsWriterPerThread.DocState docState;
+ final BytesRef flushTerm = new BytesRef();
+
+ // Used by perField when serializing the term vectors
+ final ByteSliceReader vectorSliceReader = new ByteSliceReader();
boolean hasVectors;
- public TermVectorsTermsWriter(DocumentsWriter docWriter) {
+ public TermVectorsTermsWriter(DocumentsWriterPerThread docWriter) {
this.docWriter = docWriter;
+ docState = docWriter.docState;
}
@Override
- public TermsHashConsumerPerThread addThread(TermsHashPerThread termsHashPerThread) {
- return new TermVectorsTermsWriterPerThread(termsHashPerThread, this);
- }
-
- @Override
- synchronized void flush(Map<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> threadsAndFields, final SegmentWriteState state) throws IOException {
+ void flush(Map<FieldInfo, TermsHashConsumerPerField> fieldsToFlush, final SegmentWriteState state) throws IOException {
if (tvx != null) {
// At least one doc in this run had term vectors enabled
fill(state.numDocs);
+ assert state.segmentName != null;
+ String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
tvx.close();
tvf.close();
tvd.close();
tvx = tvd = tvf = null;
- assert state.segmentName != null;
- String idxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.VECTORS_INDEX_EXTENSION);
- if (4 + ((long) state.numDocs) * 16 != state.directory.fileLength(idxName)) {
+ if (4+((long) state.numDocs)*16 != state.directory.fileLength(idxName)) {
throw new RuntimeException("after flush: tvx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(idxName) + " length in bytes of " + idxName + " file exists?=" + state.directory.fileExists(idxName));
}
@@ -68,33 +67,10 @@
hasVectors = false;
}
- for (Map.Entry<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> entry : threadsAndFields.entrySet()) {
- for (final TermsHashConsumerPerField field : entry.getValue() ) {
- TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field;
- perField.termsHashPerField.reset();
- perField.shrinkHash();
- }
-
- TermVectorsTermsWriterPerThread perThread = (TermVectorsTermsWriterPerThread) entry.getKey();
- perThread.termsHashPerThread.reset(true);
- }
- }
-
- int allocCount;
-
- synchronized PerDoc getPerDoc() {
- if (freeCount == 0) {
- allocCount++;
- if (allocCount > docFreeList.length) {
- // Grow our free list up front to make sure we have
- // enough space to recycle all outstanding PerDoc
- // instances
- assert allocCount == 1+docFreeList.length;
- docFreeList = new PerDoc[ArrayUtil.oversize(allocCount, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
- }
- return new PerDoc();
- } else {
- return docFreeList[--freeCount];
+ for (final TermsHashConsumerPerField field : fieldsToFlush.values() ) {
+ TermVectorsTermsWriterPerField perField = (TermVectorsTermsWriterPerField) field;
+ perField.termsHashPerField.reset();
+ perField.shrinkHash();
}
}
@@ -112,18 +88,17 @@
}
}
- synchronized void initTermVectorsWriter() throws IOException {
+ private final void initTermVectorsWriter() throws IOException {
if (tvx == null) {
// If we hit an exception while init'ing the term
// vector output files, we must abort this segment
// because those files will be in an unknown
// state:
- hasVectors = true;
tvx = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_INDEX_EXTENSION));
tvd = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
tvf = docWriter.directory.createOutput(IndexFileNames.segmentFileName(docWriter.getSegment(), "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
-
+
tvx.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvd.writeInt(TermVectorsReader.FORMAT_CURRENT);
tvf.writeInt(TermVectorsReader.FORMAT_CURRENT);
@@ -132,39 +107,44 @@
}
}
- synchronized void finishDocument(PerDoc perDoc) throws IOException {
+ @Override
+ void finishDocument(TermsHash termsHash) throws IOException {
assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument start");
- initTermVectorsWriter();
-
- fill(perDoc.docID);
-
- // Append term vectors to the real outputs:
- tvx.writeLong(tvd.getFilePointer());
- tvx.writeLong(tvf.getFilePointer());
- tvd.writeVInt(perDoc.numVectorFields);
- if (perDoc.numVectorFields > 0) {
- for(int i=0;i<perDoc.numVectorFields;i++) {
- tvd.writeVInt(perDoc.fieldNumbers[i]);
- }
- assert 0 == perDoc.fieldPointers[0];
- long lastPos = perDoc.fieldPointers[0];
- for(int i=1;i<perDoc.numVectorFields;i++) {
- long pos = perDoc.fieldPointers[i];
- tvd.writeVLong(pos-lastPos);
- lastPos = pos;
- }
- perDoc.perDocTvf.writeTo(tvf);
- perDoc.numVectorFields = 0;
+ if (!hasVectors) {
+ return;
}
- assert lastDocID == perDoc.docID;
+ initTermVectorsWriter();
+
+ fill(docState.docID);
+
+ // Append term vectors to the real outputs:
+ long pointer = tvd.getFilePointer();
+ tvx.writeLong(pointer);
+ tvx.writeLong(tvf.getFilePointer());
+ tvd.writeVInt(numVectorFields);
+ if (numVectorFields > 0) {
+ for(int i=0;i<numVectorFields;i++) {
+ tvd.writeVInt(perFields[i].fieldInfo.number);
+ }
+ long lastPos = tvf.getFilePointer();
+ perFields[0].finishDocument();
+ for(int i=1;i<numVectorFields;i++) {
+ long pos = tvf.getFilePointer();
+ tvd.writeVLong(pos-lastPos);
+ lastPos = pos;
+ perFields[i].finishDocument();
+ }
+ }
+
+ assert lastDocID == docState.docID;
lastDocID++;
- perDoc.reset();
- free(perDoc);
+ termsHash.reset();
+ reset();
assert docWriter.writer.testPoint("TermVectorsTermsWriter.finishDocument end");
}
@@ -189,55 +169,58 @@
}
tvx = tvd = tvf = null;
lastDocID = 0;
+
+ reset();
}
- synchronized void free(PerDoc doc) {
- assert freeCount < docFreeList.length;
- docFreeList[freeCount++] = doc;
+ int numVectorFields;
+
+ TermVectorsTermsWriterPerField[] perFields;
+
+ void reset() {
+ numVectorFields = 0;
+ perFields = new TermVectorsTermsWriterPerField[1];
}
- class PerDoc extends DocumentsWriter.DocWriter {
+ @Override
+ public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) {
+ return new TermVectorsTermsWriterPerField(termsHashPerField, this, fieldInfo);
+ }
- final DocumentsWriter.PerDocBuffer buffer = docWriter.newPerDocBuffer();
- RAMOutputStream perDocTvf = new RAMOutputStream(buffer);
-
- int numVectorFields;
-
- int[] fieldNumbers = new int[1];
- long[] fieldPointers = new long[1];
-
- void reset() {
- perDocTvf.reset();
- buffer.recycle();
- numVectorFields = 0;
+ void addFieldToFlush(TermVectorsTermsWriterPerField fieldToFlush) {
+ if (numVectorFields == perFields.length) {
+ int newSize = ArrayUtil.oversize(numVectorFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
+ TermVectorsTermsWriterPerField[] newArray = new TermVectorsTermsWriterPerField[newSize];
+ System.arraycopy(perFields, 0, newArray, 0, numVectorFields);
+ perFields = newArray;
}
- @Override
- void abort() {
- reset();
- free(this);
- }
+ perFields[numVectorFields++] = fieldToFlush;
+ }
- void addField(final int fieldNumber) {
- if (numVectorFields == fieldNumbers.length) {
- fieldNumbers = ArrayUtil.grow(fieldNumbers);
- }
- if (numVectorFields == fieldPointers.length) {
- fieldPointers = ArrayUtil.grow(fieldPointers);
- }
- fieldNumbers[numVectorFields] = fieldNumber;
- fieldPointers[numVectorFields] = perDocTvf.getFilePointer();
- numVectorFields++;
- }
+ @Override
+ void startDocument() throws IOException {
+ assert clearLastVectorFieldName();
+ reset();
+ }
- @Override
- public long sizeInBytes() {
- return buffer.getSizeInBytes();
- }
+ // Called only by assert
+ final boolean clearLastVectorFieldName() {
+ lastVectorFieldName = null;
+ return true;
+ }
- @Override
- public void finish() throws IOException {
- finishDocument(this);
+ // Called only by assert
+ String lastVectorFieldName;
+ final boolean vectorFieldsInOrder(FieldInfo fi) {
+ try {
+ if (lastVectorFieldName != null)
+ return lastVectorFieldName.compareTo(fi.name) < 0;
+ else
+ return true;
+ } finally {
+ lastVectorFieldName = fi.name;
}
}
+
}
diff --git a/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java b/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java
index 2b4e35e..731df7f 100644
--- a/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java
+++ b/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java
@@ -28,11 +28,10 @@
final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField {
- final TermVectorsTermsWriterPerThread perThread;
final TermsHashPerField termsHashPerField;
final TermVectorsTermsWriter termsWriter;
final FieldInfo fieldInfo;
- final DocumentsWriter.DocState docState;
+ final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
boolean doVectors;
@@ -41,11 +40,10 @@
int maxNumPostings;
OffsetAttribute offsetAttribute = null;
-
- public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriterPerThread perThread, FieldInfo fieldInfo) {
+
+ public TermVectorsTermsWriterPerField(TermsHashPerField termsHashPerField, TermVectorsTermsWriter termsWriter, FieldInfo fieldInfo) {
this.termsHashPerField = termsHashPerField;
- this.perThread = perThread;
- this.termsWriter = perThread.termsWriter;
+ this.termsWriter = termsWriter;
this.fieldInfo = fieldInfo;
docState = termsHashPerField.docState;
fieldState = termsHashPerField.fieldState;
@@ -72,22 +70,12 @@
}
if (doVectors) {
- if (perThread.doc == null) {
- perThread.doc = termsWriter.getPerDoc();
- perThread.doc.docID = docState.docID;
- assert perThread.doc.numVectorFields == 0;
- assert 0 == perThread.doc.perDocTvf.length();
- assert 0 == perThread.doc.perDocTvf.getFilePointer();
- }
-
- assert perThread.doc.docID == docState.docID;
-
+ termsWriter.hasVectors = true;
if (termsHashPerField.bytesHash.size() != 0) {
// Only necessary if previous doc hit a
// non-aborting exception while writing vectors in
// this field:
termsHashPerField.reset();
- perThread.termsHashPerThread.reset(false);
}
}
@@ -95,42 +83,42 @@
//perThread.postingsCount = 0;
return doVectors;
- }
+ }
public void abort() {}
/** Called once per field per document if term vectors
* are enabled, to write the vectors to
* RAMOutputStream, which is then quickly flushed to
- * the real term vectors files in the Directory. */
- @Override
+ * the real term vectors files in the Directory. */ @Override
void finish() throws IOException {
+ if (!doVectors || termsHashPerField.bytesHash.size() == 0)
+ return;
+ termsWriter.addFieldToFlush(this);
+ }
+
+ void finishDocument() throws IOException {
assert docState.testPoint("TermVectorsTermsWriterPerField.finish start");
final int numPostings = termsHashPerField.bytesHash.size();
- final BytesRef flushTerm = perThread.flushTerm;
+ final BytesRef flushTerm = termsWriter.flushTerm;
assert numPostings >= 0;
- if (!doVectors || numPostings == 0)
- return;
-
if (numPostings > maxNumPostings)
maxNumPostings = numPostings;
- final IndexOutput tvf = perThread.doc.perDocTvf;
-
// This is called once, after inverting all occurrences
// of a given field in the doc. At this point we flush
// our hash into the DocWriter.
assert fieldInfo.storeTermVector;
- assert perThread.vectorFieldsInOrder(fieldInfo);
+ assert termsWriter.vectorFieldsInOrder(fieldInfo);
- perThread.doc.addField(termsHashPerField.fieldInfo.number);
TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray;
+ final IndexOutput tvf = termsWriter.tvf;
// TODO: we may want to make this sort in same order
// as Codec's terms dict?
@@ -140,21 +128,21 @@
byte bits = 0x0;
if (doVectorPositions)
bits |= TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR;
- if (doVectorOffsets)
+ if (doVectorOffsets)
bits |= TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR;
tvf.writeByte(bits);
int lastLen = 0;
byte[] lastBytes = null;
int lastStart = 0;
-
- final ByteSliceReader reader = perThread.vectorSliceReader;
- final ByteBlockPool termBytePool = perThread.termsHashPerThread.termBytePool;
+
+ final ByteSliceReader reader = termsWriter.vectorSliceReader;
+ final ByteBlockPool termBytePool = termsHashPerField.termBytePool;
for(int j=0;j<numPostings;j++) {
final int termID = termIDs[j];
final int freq = postings.freqs[termID];
-
+
// Get BytesRef
termBytePool.setBytesRef(flushTerm, postings.textStarts[termID]);
@@ -192,20 +180,13 @@
}
termsHashPerField.reset();
-
- // NOTE: we clear, per-field, at the thread level,
- // because term vectors fully write themselves on each
- // field; this saves RAM (eg if large doc has two large
- // fields w/ term vectors on) because we recycle/reuse
- // all RAM after each field:
- perThread.termsHashPerThread.reset(false);
}
void shrinkHash() {
termsHashPerField.shrinkHash(maxNumPostings);
maxNumPostings = 0;
}
-
+
@Override
void start(Fieldable f) {
if (doVectorOffsets) {
@@ -225,7 +206,7 @@
if (doVectorOffsets) {
int startOffset = fieldState.offset + offsetAttribute.startOffset();
int endOffset = fieldState.offset + offsetAttribute.endOffset();
-
+
termsHashPerField.writeVInt(1, startOffset);
termsHashPerField.writeVInt(1, endOffset - startOffset);
postings.lastOffsets[termID] = endOffset;
@@ -243,13 +224,13 @@
assert docState.testPoint("TermVectorsTermsWriterPerField.addTerm start");
TermVectorsPostingsArray postings = (TermVectorsPostingsArray) termsHashPerField.postingsArray;
-
+
postings.freqs[termID]++;
if (doVectorOffsets) {
int startOffset = fieldState.offset + offsetAttribute.startOffset();
int endOffset = fieldState.offset + offsetAttribute.endOffset();
-
+
termsHashPerField.writeVInt(1, startOffset - postings.lastOffsets[termID]);
termsHashPerField.writeVInt(1, endOffset - startOffset);
postings.lastOffsets[termID] = endOffset;
@@ -280,7 +261,7 @@
int[] freqs; // How many times this term occurred in the current doc
int[] lastOffsets; // Last offset we saw
int[] lastPositions; // Last position where this term occurred
-
+
@Override
ParallelPostingsArray newInstance(int size) {
return new TermVectorsPostingsArray(size);
diff --git a/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java
deleted file mode 100644
index bf81fd6..0000000
--- a/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java
+++ /dev/null
@@ -1,89 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.BytesRef;
-
-final class TermVectorsTermsWriterPerThread extends TermsHashConsumerPerThread {
-
- final TermVectorsTermsWriter termsWriter;
- final TermsHashPerThread termsHashPerThread;
- final DocumentsWriter.DocState docState;
- final BytesRef flushTerm = new BytesRef();
-
- TermVectorsTermsWriter.PerDoc doc;
-
- public TermVectorsTermsWriterPerThread(TermsHashPerThread termsHashPerThread, TermVectorsTermsWriter termsWriter) {
- this.termsWriter = termsWriter;
- this.termsHashPerThread = termsHashPerThread;
- docState = termsHashPerThread.docState;
- }
-
- // Used by perField when serializing the term vectors
- final ByteSliceReader vectorSliceReader = new ByteSliceReader();
-
- @Override
- public void startDocument() {
- assert clearLastVectorFieldName();
- if (doc != null) {
- doc.reset();
- doc.docID = docState.docID;
- }
- }
-
- @Override
- public DocumentsWriter.DocWriter finishDocument() {
- try {
- return doc;
- } finally {
- doc = null;
- }
- }
-
- @Override
- public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo) {
- return new TermVectorsTermsWriterPerField(termsHashPerField, this, fieldInfo);
- }
-
- @Override
- public void abort() {
- if (doc != null) {
- doc.abort();
- doc = null;
- }
- }
-
- // Called only by assert
- final boolean clearLastVectorFieldName() {
- lastVectorFieldName = null;
- return true;
- }
-
- // Called only by assert
- String lastVectorFieldName;
- final boolean vectorFieldsInOrder(FieldInfo fi) {
- try {
- if (lastVectorFieldName != null)
- return lastVectorFieldName.compareTo(fi.name) < 0;
- else
- return true;
- } finally {
- lastVectorFieldName = fi.name;
- }
- }
-}
diff --git a/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java b/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java
index 5789f00..b00a528 100644
--- a/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/TermVectorsWriter.java
@@ -20,12 +20,13 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper;
import java.io.IOException;
final class TermVectorsWriter {
-
+
private IndexOutput tvx = null, tvd = null, tvf = null;
private FieldInfos fieldInfos;
@@ -46,7 +47,7 @@
/**
* Add a complete document specified by all its term vectors. If document has no
* term vectors, add value for tvx.
- *
+ *
* @param vectors
* @throws IOException
*/
@@ -99,7 +100,7 @@
final int[] freqs = vectors[i].getTermFrequencies();
for (int j=0; j<numTerms; j++) {
-
+
int start = j == 0 ? 0 : StringHelper.bytesDifference(terms[j-1].bytes,
terms[j-1].length,
terms[j].bytes,
@@ -181,30 +182,11 @@
assert tvd.getFilePointer() == tvdPosition;
assert tvf.getFilePointer() == tvfPosition;
}
-
+
/** Close all streams. */
final void close() throws IOException {
// make an effort to close all streams we can but remember and re-throw
// the first exception encountered in this process
- IOException keep = null;
- if (tvx != null)
- try {
- tvx.close();
- } catch (IOException e) {
- keep = e;
- }
- if (tvd != null)
- try {
- tvd.close();
- } catch (IOException e) {
- if (keep == null) keep = e;
- }
- if (tvf != null)
- try {
- tvf.close();
- } catch (IOException e) {
- if (keep == null) keep = e;
- }
- if (keep != null) throw (IOException) keep.fillInStackTrace();
+ IOUtils.closeSafely(tvx, tvd, tvf);
}
}
diff --git a/lucene/src/java/org/apache/lucene/index/TermsHash.java b/lucene/src/java/org/apache/lucene/index/TermsHash.java
index 2c3bc81..af51996 100644
--- a/lucene/src/java/org/apache/lucene/index/TermsHash.java
+++ b/lucene/src/java/org/apache/lucene/index/TermsHash.java
@@ -18,12 +18,12 @@
*/
import java.io.IOException;
-import java.util.Collection;
import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
import java.util.Map;
+import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.BytesRef;
+
/** This class implements {@link InvertedDocConsumer}, which
* is passed each token produced by the analyzer on each
* field. It stores these tokens in a hash table, and
@@ -36,78 +36,118 @@
final TermsHashConsumer consumer;
final TermsHash nextTermsHash;
- final DocumentsWriter docWriter;
+ final DocumentsWriterPerThread docWriter;
- boolean trackAllocations;
+ final IntBlockPool intPool;
+ final ByteBlockPool bytePool;
+ ByteBlockPool termBytePool;
- public TermsHash(final DocumentsWriter docWriter, boolean trackAllocations, final TermsHashConsumer consumer, final TermsHash nextTermsHash) {
+ final boolean primary;
+ final DocumentsWriterPerThread.DocState docState;
+
+ // Used when comparing postings via termRefComp, in TermsHashPerField
+ final BytesRef tr1 = new BytesRef();
+ final BytesRef tr2 = new BytesRef();
+
+ // Used by perField to obtain terms from the analysis chain
+ final BytesRef termBytesRef = new BytesRef(10);
+
+ final boolean trackAllocations;
+
+
+ public TermsHash(final DocumentsWriterPerThread docWriter, final TermsHashConsumer consumer, boolean trackAllocations, final TermsHash nextTermsHash) {
+ this.docState = docWriter.docState;
this.docWriter = docWriter;
this.consumer = consumer;
+ this.trackAllocations = trackAllocations;
this.nextTermsHash = nextTermsHash;
- this.trackAllocations = trackAllocations;
- }
+ intPool = new IntBlockPool(docWriter);
+ bytePool = new ByteBlockPool(docWriter.byteBlockAllocator);
- @Override
- InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
- return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, null);
- }
-
- TermsHashPerThread addThread(DocInverterPerThread docInverterPerThread, TermsHashPerThread primaryPerThread) {
- return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, primaryPerThread);
+ if (nextTermsHash != null) {
+ // We are primary
+ primary = true;
+ termBytePool = bytePool;
+ nextTermsHash.termBytePool = bytePool;
+ } else {
+ primary = false;
+ }
}
@Override
public void abort() {
- consumer.abort();
- if (nextTermsHash != null)
- nextTermsHash.abort();
- }
-
- @Override
- synchronized void flush(Map<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> threadsAndFields, final SegmentWriteState state) throws IOException {
- Map<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> childThreadsAndFields = new HashMap<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>>();
- Map<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> nextThreadsAndFields;
-
- if (nextTermsHash != null)
- nextThreadsAndFields = new HashMap<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>>();
- else
- nextThreadsAndFields = null;
-
- for (final Map.Entry<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> entry : threadsAndFields.entrySet()) {
-
- TermsHashPerThread perThread = (TermsHashPerThread) entry.getKey();
-
- Collection<InvertedDocConsumerPerField> fields = entry.getValue();
-
- Iterator<InvertedDocConsumerPerField> fieldsIt = fields.iterator();
- Collection<TermsHashConsumerPerField> childFields = new HashSet<TermsHashConsumerPerField>();
- Collection<InvertedDocConsumerPerField> nextChildFields;
-
- if (nextTermsHash != null)
- nextChildFields = new HashSet<InvertedDocConsumerPerField>();
- else
- nextChildFields = null;
-
- while(fieldsIt.hasNext()) {
- TermsHashPerField perField = (TermsHashPerField) fieldsIt.next();
- childFields.add(perField.consumer);
- if (nextTermsHash != null)
- nextChildFields.add(perField.nextPerField);
+ reset();
+ try {
+ consumer.abort();
+ } finally {
+ if (nextTermsHash != null) {
+ nextTermsHash.abort();
}
-
- childThreadsAndFields.put(perThread.consumer, childFields);
- if (nextTermsHash != null)
- nextThreadsAndFields.put(perThread.nextPerThread, nextChildFields);
}
-
- consumer.flush(childThreadsAndFields, state);
+ }
- if (nextTermsHash != null)
- nextTermsHash.flush(nextThreadsAndFields, state);
+ // Clear all state
+ void reset() {
+ intPool.reset();
+ bytePool.reset();
+
+ if (primary) {
+ bytePool.reset();
+ }
}
@Override
- synchronized public boolean freeRAM() {
+ void flush(Map<FieldInfo,InvertedDocConsumerPerField> fieldsToFlush, final SegmentWriteState state) throws IOException {
+ Map<FieldInfo,TermsHashConsumerPerField> childFields = new HashMap<FieldInfo,TermsHashConsumerPerField>();
+ Map<FieldInfo,InvertedDocConsumerPerField> nextChildFields;
+
+ if (nextTermsHash != null) {
+ nextChildFields = new HashMap<FieldInfo,InvertedDocConsumerPerField>();
+ } else {
+ nextChildFields = null;
+ }
+
+ for (final Map.Entry<FieldInfo,InvertedDocConsumerPerField> entry : fieldsToFlush.entrySet()) {
+ TermsHashPerField perField = (TermsHashPerField) entry.getValue();
+ childFields.put(entry.getKey(), perField.consumer);
+ if (nextTermsHash != null) {
+ nextChildFields.put(entry.getKey(), perField.nextPerField);
+ }
+ }
+
+ consumer.flush(childFields, state);
+
+ if (nextTermsHash != null) {
+ nextTermsHash.flush(nextChildFields, state);
+ }
+ }
+
+ @Override
+ InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) {
+ return new TermsHashPerField(docInverterPerField, this, nextTermsHash, fieldInfo);
+ }
+
+ @Override
+ public boolean freeRAM() {
return false;
}
+
+ @Override
+ void finishDocument() throws IOException {
+ try {
+ consumer.finishDocument(this);
+ } finally {
+ if (nextTermsHash != null) {
+ nextTermsHash.consumer.finishDocument(nextTermsHash);
+ }
+ }
+ }
+
+ @Override
+ void startDocument() throws IOException {
+ consumer.startDocument();
+ if (nextTermsHash != null) {
+ nextTermsHash.consumer.startDocument();
+ }
+ }
}
diff --git a/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java b/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java
index 6488d33..3ec6ec2 100644
--- a/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java
+++ b/lucene/src/java/org/apache/lucene/index/TermsHashConsumer.java
@@ -18,11 +18,12 @@
*/
import java.io.IOException;
-import java.util.Collection;
import java.util.Map;
abstract class TermsHashConsumer {
- abstract TermsHashConsumerPerThread addThread(TermsHashPerThread perThread);
- abstract void flush(Map<TermsHashConsumerPerThread,Collection<TermsHashConsumerPerField>> threadsAndFields, final SegmentWriteState state) throws IOException;
+ abstract void flush(Map<FieldInfo, TermsHashConsumerPerField> fieldsToFlush, final SegmentWriteState state) throws IOException;
abstract void abort();
- }
+ abstract void startDocument() throws IOException;
+ abstract void finishDocument(TermsHash termsHash) throws IOException;
+ abstract public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo);
+}
diff --git a/lucene/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java b/lucene/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java
deleted file mode 100644
index 3949cf7..0000000
--- a/lucene/src/java/org/apache/lucene/index/TermsHashConsumerPerThread.java
+++ /dev/null
@@ -1,27 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-abstract class TermsHashConsumerPerThread {
- abstract void startDocument() throws IOException;
- abstract DocumentsWriter.DocWriter finishDocument() throws IOException;
- abstract public TermsHashConsumerPerField addField(TermsHashPerField termsHashPerField, FieldInfo fieldInfo);
- abstract public void abort();
-}
diff --git a/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java b/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java
index 0b3ec24..f3d705e 100644
--- a/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java
+++ b/lucene/src/java/org/apache/lucene/index/TermsHashPerField.java
@@ -34,9 +34,10 @@
final TermsHashConsumerPerField consumer;
+ final TermsHash termsHash;
+
final TermsHashPerField nextPerField;
- final TermsHashPerThread perThread;
- final DocumentsWriter.DocState docState;
+ final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
TermToBytesRefAttribute termAtt;
BytesRef termBytesRef;
@@ -52,27 +53,27 @@
final FieldInfo fieldInfo;
final BytesRefHash bytesHash;
-
+
ParallelPostingsArray postingsArray;
private final AtomicLong bytesUsed;
- public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) {
- this.perThread = perThread;
- intPool = perThread.intPool;
- bytePool = perThread.bytePool;
- termBytePool = perThread.termBytePool;
- docState = perThread.docState;
- bytesUsed = perThread.termsHash.trackAllocations?perThread.termsHash.docWriter.bytesUsed:new AtomicLong();
-
+ public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHash termsHash, final TermsHash nextTermsHash, final FieldInfo fieldInfo) {
+ intPool = termsHash.intPool;
+ bytePool = termsHash.bytePool;
+ termBytePool = termsHash.termBytePool;
+ docState = termsHash.docState;
+ this.termsHash = termsHash;
+ bytesUsed = termsHash.trackAllocations ? termsHash.docWriter.bytesUsed
+ : new AtomicLong();
fieldState = docInverterPerField.fieldState;
- this.consumer = perThread.consumer.addField(this, fieldInfo);
+ this.consumer = termsHash.consumer.addField(this, fieldInfo);
PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed);
- bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts);
+ bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts);
streamCount = consumer.getStreamCount();
numPostingInt = 2*streamCount;
this.fieldInfo = fieldInfo;
- if (nextPerThread != null)
- nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo);
+ if (nextTermsHash != null)
+ nextPerField = (TermsHashPerField) nextTermsHash.addField(docInverterPerField, fieldInfo);
else
nextPerField = null;
}
@@ -80,7 +81,7 @@
void shrinkHash(int targetSize) {
// Fully free the bytesHash on each flush but keep the pool untouched
// bytesHash.clear will clear the ByteStartArray and in turn the ParallelPostingsArray too
- bytesHash.clear(false);
+ bytesHash.clear(false);
}
public void reset() {
@@ -90,7 +91,7 @@
}
@Override
- synchronized public void abort() {
+ public void abort() {
reset();
if (nextPerField != null)
nextPerField.abort();
@@ -99,14 +100,13 @@
public void initReader(ByteSliceReader reader, int termID, int stream) {
assert stream < streamCount;
int intStart = postingsArray.intStarts[termID];
- final int[] ints = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
- final int upto = intStart & DocumentsWriter.INT_BLOCK_MASK;
+ final int[] ints = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT];
+ final int upto = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK;
reader.init(bytePool,
postingsArray.byteStarts[termID]+stream*ByteBlockPool.FIRST_LEVEL_SIZE,
ints[upto+stream]);
}
-
/** Collapse the hash table & sort in-place. */
public int[] sortPostings(Comparator<BytesRef> termComp) {
return bytesHash.sort(termComp);
@@ -124,7 +124,7 @@
nextPerField.start(f);
}
}
-
+
@Override
boolean start(Fieldable[] fields, int count) throws IOException {
doCall = consumer.start(fields, count);
@@ -143,11 +143,12 @@
// First time we are seeing this token since we last
// flushed the hash.
// Init stream slices
- if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE)
+ if (numPostingInt + intPool.intUpto > DocumentsWriterPerThread.INT_BLOCK_SIZE)
intPool.nextBuffer();
- if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE)
+ if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) {
bytePool.nextBuffer();
+ }
intUptos = intPool.buffer;
intUptoStart = intPool.intUpto;
@@ -166,8 +167,8 @@
} else {
termID = (-termID)-1;
int intStart = postingsArray.intStarts[termID];
- intUptos = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
- intUptoStart = intStart & DocumentsWriter.INT_BLOCK_MASK;
+ intUptos = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT];
+ intUptoStart = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK;
consumer.addTerm(termID);
}
}
@@ -192,7 +193,7 @@
if (docState.maxTermPrefix == null) {
final int saved = termBytesRef.length;
try {
- termBytesRef.length = Math.min(30, DocumentsWriter.MAX_TERM_LENGTH_UTF8);
+ termBytesRef.length = Math.min(30, DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8);
docState.maxTermPrefix = termBytesRef.toString();
} finally {
termBytesRef.length = saved;
@@ -204,7 +205,7 @@
if (termID >= 0) {// New posting
bytesHash.byteStart(termID);
// Init stream slices
- if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) {
+ if (numPostingInt + intPool.intUpto > DocumentsWriterPerThread.INT_BLOCK_SIZE) {
intPool.nextBuffer();
}
@@ -229,8 +230,8 @@
} else {
termID = (-termID)-1;
final int intStart = postingsArray.intStarts[termID];
- intUptos = intPool.buffers[intStart >> DocumentsWriter.INT_BLOCK_SHIFT];
- intUptoStart = intStart & DocumentsWriter.INT_BLOCK_MASK;
+ intUptos = intPool.buffers[intStart >> DocumentsWriterPerThread.INT_BLOCK_SHIFT];
+ intUptoStart = intStart & DocumentsWriterPerThread.INT_BLOCK_MASK;
consumer.addTerm(termID);
}
@@ -278,7 +279,7 @@
if (nextPerField != null)
nextPerField.finish();
}
-
+
private static final class PostingsBytesStartArray extends BytesStartArray {
private final TermsHashPerField perField;
@@ -289,10 +290,10 @@
this.perField = perField;
this.bytesUsed = bytesUsed;
}
-
+
@Override
public int[] init() {
- if(perField.postingsArray == null) {
+ if(perField.postingsArray == null) {
perField.postingsArray = perField.consumer.createPostingsArray(2);
bytesUsed.addAndGet(perField.postingsArray.size * perField.postingsArray.bytesPerPosting());
}
@@ -312,7 +313,7 @@
@Override
public int[] clear() {
if(perField.postingsArray != null) {
- bytesUsed.addAndGet(-perField.postingsArray.size * perField.postingsArray.bytesPerPosting());
+ bytesUsed.addAndGet(-(perField.postingsArray.size * perField.postingsArray.bytesPerPosting()));
perField.postingsArray = null;
}
return null;
diff --git a/lucene/src/java/org/apache/lucene/index/TermsHashPerThread.java b/lucene/src/java/org/apache/lucene/index/TermsHashPerThread.java
deleted file mode 100644
index 1a0c429..0000000
--- a/lucene/src/java/org/apache/lucene/index/TermsHashPerThread.java
+++ /dev/null
@@ -1,96 +0,0 @@
-package org.apache.lucene.index;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.util.ByteBlockPool;
-
-import java.io.IOException;
-
-final class TermsHashPerThread extends InvertedDocConsumerPerThread {
-
- final TermsHash termsHash;
- final TermsHashConsumerPerThread consumer;
- final TermsHashPerThread nextPerThread; // the secondary is currently consumed by TermVectorsWriter
- // see secondary entry point in TermsHashPerField#add(int)
-
- final IntBlockPool intPool;
- final ByteBlockPool bytePool;
- final ByteBlockPool termBytePool;
-
- final boolean primary;
- final DocumentsWriter.DocState docState;
-
- public TermsHashPerThread(DocInverterPerThread docInverterPerThread, final TermsHash termsHash, final TermsHash nextTermsHash, final TermsHashPerThread primaryPerThread) {
- docState = docInverterPerThread.docState;
-
- this.termsHash = termsHash;
- this.consumer = termsHash.consumer.addThread(this);
-
- intPool = new IntBlockPool(termsHash.docWriter);
- bytePool = new ByteBlockPool(termsHash.docWriter.byteBlockAllocator); // use the allocator from the docWriter which tracks the used bytes
- primary = nextTermsHash != null;
- if (primary) {
- // We are primary
- termBytePool = bytePool;
- nextPerThread = nextTermsHash.addThread(docInverterPerThread, this); // this will be the primaryPerThread in the secondary
- assert nextPerThread != null;
- } else {
- assert primaryPerThread != null;
- termBytePool = primaryPerThread.bytePool; // we are secondary and share the byte pool with the primary
- nextPerThread = null;
- }
- }
-
- @Override
- InvertedDocConsumerPerField addField(DocInverterPerField docInverterPerField, final FieldInfo fieldInfo) {
- return new TermsHashPerField(docInverterPerField, this, nextPerThread, fieldInfo);
- }
-
- @Override
- synchronized public void abort() {
- reset(true);
- consumer.abort();
- if (primary)
- nextPerThread.abort();
- }
-
- @Override
- public void startDocument() throws IOException {
- consumer.startDocument();
- if (primary)
- nextPerThread.consumer.startDocument();
- }
-
- @Override
- public DocumentsWriter.DocWriter finishDocument() throws IOException {
- final DocumentsWriter.DocWriter doc = consumer.finishDocument();
- final DocumentsWriter.DocWriter docFromSecondary = primary? nextPerThread.consumer.finishDocument():null;
- if (doc == null)
- return docFromSecondary;
- else {
- doc.setNext(docFromSecondary);
- return doc;
- }
- }
-
- // Clear all state
- void reset(boolean recyclePostings) {
- intPool.reset();
- bytePool.reset();
- }
-}
diff --git a/lucene/src/java/org/apache/lucene/index/ThreadAffinityDocumentsWriterThreadPool.java b/lucene/src/java/org/apache/lucene/index/ThreadAffinityDocumentsWriterThreadPool.java
new file mode 100644
index 0000000..9df6b5a
--- /dev/null
+++ b/lucene/src/java/org/apache/lucene/index/ThreadAffinityDocumentsWriterThreadPool.java
@@ -0,0 +1,96 @@
+package org.apache.lucene.index;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.lucene.document.Document;
+
+/**
+ * A {@link DocumentsWriterPerThreadPool} implementation that tries to assign an
+ * indexing thread to the same {@link ThreadState} each time the thread tries to
+ * obtain a {@link ThreadState}. Once a new {@link ThreadState} is created it is
+ * associated with the creating thread. Subsequently, if the threads associated
+ * {@link ThreadState} is not in use it will be associated with the requesting
+ * thread. Otherwise, if the {@link ThreadState} is used by another thread
+ * {@link ThreadAffinityDocumentsWriterThreadPool} tries to find the currently
+ * minimal contended {@link ThreadState}.
+ */
+public class ThreadAffinityDocumentsWriterThreadPool extends DocumentsWriterPerThreadPool {
+ private Map<Thread, ThreadState> threadBindings = new ConcurrentHashMap<Thread, ThreadState>();
+
+ /**
+ * Creates a new {@link DocumentsWriterPerThreadPool} with max.
+ * {@link #DEFAULT_MAX_THREAD_STATES} thread states.
+ */
+ public ThreadAffinityDocumentsWriterThreadPool() {
+ this(DEFAULT_MAX_THREAD_STATES);
+ }
+
+ public ThreadAffinityDocumentsWriterThreadPool(int maxNumPerThreads) {
+ super(maxNumPerThreads);
+ assert getMaxThreadStates() >= 1;
+ }
+
+ @Override
+ public ThreadState getAndLock(Thread requestingThread, DocumentsWriter documentsWriter, Document doc) {
+ ThreadState threadState = threadBindings.get(requestingThread);
+ if (threadState != null) {
+ if (threadState.tryLock()) {
+ return threadState;
+ }
+ }
+ ThreadState minThreadState = null;
+
+
+ /* TODO -- another thread could lock the minThreadState we just got while
+ we should somehow prevent this. */
+ // Find the state that has minimum number of threads waiting
+ minThreadState = minContendedThreadState();
+ if (minThreadState == null || minThreadState.hasQueuedThreads()) {
+ final ThreadState newState = newThreadState(); // state is already locked if non-null
+ if (newState != null) {
+ assert newState.isHeldByCurrentThread();
+ threadBindings.put(requestingThread, newState);
+ return newState;
+ } else if (minThreadState == null) {
+ /*
+ * no new threadState available we just take the minContented one
+ * This must return a valid thread state since we accessed the
+ * synced context in newThreadState() above.
+ */
+ minThreadState = minContendedThreadState();
+ }
+ }
+ assert minThreadState != null: "ThreadState is null";
+
+ minThreadState.lock();
+ return minThreadState;
+ }
+
+ /*
+ @Override
+ public void clearThreadBindings(ThreadState perThread) {
+ threadBindings.clear();
+ }
+
+ @Override
+ public void clearAllThreadBindings() {
+ threadBindings.clear();
+ }
+ */
+}
diff --git a/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java b/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java
index 29dbf23..d18aa2b 100644
--- a/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexReaderBase.java
@@ -36,7 +36,7 @@
* indexed terms (many pairs of CharSequence text + long
* fileOffset), and then this reader must be able to
* retrieve the nearest index term to a provided term
- * text.
+ * text.
* @lucene.experimental */
public abstract class TermsIndexReaderBase implements Closeable {
diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
index 4e1c33e..13cff83 100644
--- a/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
+++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
@@ -330,14 +330,14 @@
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) throws IOException {
- final String seedFileName = IndexFileNames.segmentFileName(segmentInfo.name, codecId, SEED_EXT);
+ final String seedFileName = IndexFileNames.segmentFileName(segmentInfo.name, codecId, SEED_EXT);
files.add(seedFileName);
SepPostingsReaderImpl.files(segmentInfo, codecId, files);
StandardPostingsReader.files(dir, segmentInfo, codecId, files);
BlockTermsReader.files(dir, segmentInfo, codecId, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
-
+
// hackish!
Iterator<String> it = files.iterator();
while(it.hasNext()) {
diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java
index d5d45bf..6b20c9f 100644
--- a/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java
+++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java
@@ -68,7 +68,7 @@
return in.readVInt();
}
}
-
+
class Index extends IntIndexInput.Index {
private long fp;
diff --git a/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java b/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java
index 46c4cf2..736788d 100644
--- a/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java
+++ b/lucene/src/test-framework/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexOutput.java
@@ -25,7 +25,7 @@
/** Writes ints directly to the file (not in blocks) as
* vInt.
- *
+ *
* @lucene.experimental
*/
public class MockSingleIntIndexOutput extends IntIndexOutput {
@@ -77,7 +77,7 @@
}
lastFP = fp;
}
-
+
@Override
public String toString() {
return Long.toString(fp);
diff --git a/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java b/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java
index b993bac..8969110 100644
--- a/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java
+++ b/lucene/src/test-framework/org/apache/lucene/search/QueryUtils.java
@@ -2,13 +2,14 @@
import java.io.IOException;
import java.util.Random;
+import java.lang.reflect.Method;
import junit.framework.Assert;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
-import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiReader;
diff --git a/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java b/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java
index 0829755..17b62a6 100644
--- a/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java
+++ b/lucene/src/test-framework/org/apache/lucene/store/MockDirectoryWrapper.java
@@ -34,6 +34,7 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.ThrottledIndexOutput;
import org.apache.lucene.util._TestUtil;
/**
@@ -69,6 +70,7 @@
private Set<String> createdFiles;
Set<String> openFilesForWrite = new HashSet<String>();
volatile boolean crashed;
+ private ThrottledIndexOutput throttledOutput;
// use this for tracking files for crash.
// additionally: provides debugging information in case you leave one open
@@ -114,6 +116,10 @@
public void setPreventDoubleWrite(boolean value) {
preventDoubleWrite = value;
}
+
+ public void setThrottledIndexOutput(ThrottledIndexOutput throttledOutput) {
+ this.throttledOutput = throttledOutput;
+ }
@Override
public synchronized void sync(Collection<String> names) throws IOException {
@@ -348,7 +354,7 @@
IndexOutput io = new MockIndexOutputWrapper(this, delegate.createOutput(name), name);
openFileHandles.put(io, new RuntimeException("unclosed IndexOutput"));
openFilesForWrite.add(name);
- return io;
+ return throttledOutput == null ? io : throttledOutput.newFromDelegate(io);
}
@Override
@@ -578,4 +584,5 @@
maybeYield();
delegate.copy(to, src, dest);
}
+
}
diff --git a/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java b/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
index 47cf19b..66f7e90 100644
--- a/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
+++ b/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java
@@ -116,7 +116,7 @@
* If this is set, it is the only method that should run.
*/
static final String TEST_METHOD;
-
+
/** Create indexes in this directory, optimally use a subdir, named after the test */
public static final File TEMP_DIR;
static {
@@ -163,11 +163,11 @@
* multiply it by the number of iterations
*/
public static final int RANDOM_MULTIPLIER = Integer.parseInt(System.getProperty("tests.multiplier", "1"));
-
+
private int savedBoolMaxClauseCount;
private volatile Thread.UncaughtExceptionHandler savedUncaughtExceptionHandler = null;
-
+
/** Used to track if setUp and tearDown are called correctly from subclasses */
private boolean setup;
@@ -189,28 +189,28 @@
private static class UncaughtExceptionEntry {
public final Thread thread;
public final Throwable exception;
-
+
public UncaughtExceptionEntry(Thread thread, Throwable exception) {
this.thread = thread;
this.exception = exception;
}
}
private List<UncaughtExceptionEntry> uncaughtExceptions = Collections.synchronizedList(new ArrayList<UncaughtExceptionEntry>());
-
+
// saves default codec: we do this statically as many build indexes in @beforeClass
private static String savedDefaultCodec;
// default codec: not set when we use a per-field provider.
private static Codec codec;
// default codec provider
private static CodecProvider savedCodecProvider;
-
+
private static Locale locale;
private static Locale savedLocale;
private static TimeZone timeZone;
private static TimeZone savedTimeZone;
-
+
private static Map<MockDirectoryWrapper,StackTraceElement[]> stores;
-
+
private static final String[] TEST_CODECS = new String[] {"MockSep", "MockFixedIntBlock", "MockVariableIntBlock", "MockRandom"};
private static void swapCodec(Codec c, CodecProvider cp) {
@@ -288,7 +288,7 @@
// randomly picks from core and test codecs
static String pickRandomCodec(Random rnd) {
- int idx = rnd.nextInt(CodecProvider.CORE_CODECS.length +
+ int idx = rnd.nextInt(CodecProvider.CORE_CODECS.length +
TEST_CODECS.length);
if (idx < CodecProvider.CORE_CODECS.length) {
return CodecProvider.CORE_CODECS[idx];
@@ -321,7 +321,7 @@
/** @deprecated (4.0) until we fix no-fork problems in solr tests */
@Deprecated
private static List<String> testClassesRun = new ArrayList<String>();
-
+
@BeforeClass
public static void beforeClassLuceneTestCaseJ4() {
staticSeed = "random".equals(TEST_SEED) ? seedRand.nextLong() : TwoLongs.fromString(TEST_SEED).l1;
@@ -347,7 +347,7 @@
TimeZone.setDefault(timeZone);
testsFailed = false;
}
-
+
@AfterClass
public static void afterClassLuceneTestCaseJ4() {
if (! "false".equals(TEST_CLEAN_THREADS)) {
@@ -363,12 +363,12 @@
if ("randomPerField".equals(TEST_CODEC)) {
if (cp instanceof RandomCodecProvider)
codecDescription = cp.toString();
- else
+ else
codecDescription = "PreFlex";
} else {
codecDescription = codec.toString();
}
-
+
if (CodecProvider.getDefault() == savedCodecProvider)
removeTestCodecs(codec, CodecProvider.getDefault());
CodecProvider.setDefault(savedCodecProvider);
@@ -398,14 +398,14 @@
stores = null;
// if verbose or tests failed, report some information back
if (VERBOSE || testsFailed)
- System.err.println("NOTE: test params are: codec=" + codecDescription +
- ", locale=" + locale +
+ System.err.println("NOTE: test params are: codec=" + codecDescription +
+ ", locale=" + locale +
", timezone=" + (timeZone == null ? "(null)" : timeZone.getID()));
if (testsFailed) {
System.err.println("NOTE: all tests run in this JVM:");
System.err.println(Arrays.toString(testClassesRun.toArray()));
- System.err.println("NOTE: " + System.getProperty("os.name") + " "
- + System.getProperty("os.version") + " "
+ System.err.println("NOTE: " + System.getProperty("os.name") + " "
+ + System.getProperty("os.version") + " "
+ System.getProperty("os.arch") + "/"
+ System.getProperty("java.vendor") + " "
+ System.getProperty("java.version") + " "
@@ -428,7 +428,7 @@
}
private static boolean testsFailed; /* true if any tests failed */
-
+
// This is how we get control when errors occur.
// Think of this as start/end/success/failed
// events.
@@ -463,7 +463,7 @@
LuceneTestCase.this.name = method.getName();
super.starting(method);
}
-
+
};
@Before
@@ -481,7 +481,7 @@
savedUncaughtExceptionHandler.uncaughtException(t, e);
}
});
-
+
savedBoolMaxClauseCount = BooleanQuery.getMaxClauseCount();
}
@@ -513,7 +513,7 @@
if ("perMethod".equals(TEST_CLEAN_THREADS)) {
int rogueThreads = threadCleanup("test method: '" + getName() + "'");
if (rogueThreads > 0) {
- System.err.println("RESOURCE LEAK: test method: '" + getName()
+ System.err.println("RESOURCE LEAK: test method: '" + getName()
+ "' left " + rogueThreads + " thread(s) running");
// TODO: fail, but print seed for now.
if (!testsFailed && uncaughtExceptions.isEmpty()) {
@@ -535,18 +535,18 @@
fail("Some threads threw uncaught exceptions!");
}
- // calling assertSaneFieldCaches here isn't as useful as having test
- // classes call it directly from the scope where the index readers
- // are used, because they could be gc'ed just before this tearDown
+ // calling assertSaneFieldCaches here isn't as useful as having test
+ // classes call it directly from the scope where the index readers
+ // are used, because they could be gc'ed just before this tearDown
// method is called.
//
// But it's better then nothing.
//
- // If you are testing functionality that you know for a fact
- // "violates" FieldCache sanity, then you should either explicitly
+ // If you are testing functionality that you know for a fact
+ // "violates" FieldCache sanity, then you should either explicitly
// call purgeFieldCache at the end of your test method, or refactor
- // your Test class so that the inconsistant FieldCache usages are
- // isolated in distinct test methods
+ // your Test class so that the inconsistant FieldCache usages are
+ // isolated in distinct test methods
assertSaneFieldCaches(getTestLabel());
} finally {
@@ -557,14 +557,14 @@
private final static int THREAD_STOP_GRACE_MSEC = 50;
// jvm-wide list of 'rogue threads' we found, so they only get reported once.
private final static IdentityHashMap<Thread,Boolean> rogueThreads = new IdentityHashMap<Thread,Boolean>();
-
+
static {
// just a hack for things like eclipse test-runner threads
for (Thread t : Thread.getAllStackTraces().keySet()) {
rogueThreads.put(t, true);
}
}
-
+
/**
* Looks for leftover running threads, trying to kill them off,
* so they don't fail future tests.
@@ -575,20 +575,20 @@
Thread[] stillRunning = new Thread[Thread.activeCount()+1];
int threadCount = 0;
int rogueCount = 0;
-
+
if ((threadCount = Thread.enumerate(stillRunning)) > 1) {
while (threadCount == stillRunning.length) {
// truncated response
stillRunning = new Thread[stillRunning.length*2];
threadCount = Thread.enumerate(stillRunning);
}
-
+
for (int i = 0; i < threadCount; i++) {
Thread t = stillRunning[i];
-
- if (t.isAlive() &&
- !rogueThreads.containsKey(t) &&
- t != Thread.currentThread() &&
+
+ if (t.isAlive() &&
+ !rogueThreads.containsKey(t) &&
+ t != Thread.currentThread() &&
/* its ok to keep your searcher across test cases */
(t.getName().startsWith("LuceneTestCase") && context.startsWith("test method")) == false) {
System.err.println("WARNING: " + context + " left thread running: " + t);
@@ -613,7 +613,7 @@
}
return rogueCount;
}
-
+
/**
* Asserts that FieldCacheSanityChecker does not detect any
* problems with FieldCache.DEFAULT.
@@ -656,13 +656,13 @@
}
}
-
+
// @deprecated (4.0) These deprecated methods should be removed soon, when all tests using no Epsilon are fixed:
@Deprecated
static public void assertEquals(double expected, double actual) {
assertEquals(null, expected, actual);
}
-
+
@Deprecated
static public void assertEquals(String message, double expected, double actual) {
assertEquals(message, Double.valueOf(expected), Double.valueOf(actual));
@@ -677,18 +677,18 @@
static public void assertEquals(String message, float expected, float actual) {
assertEquals(message, Float.valueOf(expected), Float.valueOf(actual));
}
-
+
// Replacement for Assume jUnit class, so we can add a message with explanation:
-
+
private static final class TestIgnoredException extends RuntimeException {
TestIgnoredException(String msg) {
super(msg);
}
-
+
TestIgnoredException(String msg, Throwable t) {
super(msg, t);
}
-
+
@Override
public String getMessage() {
StringBuilder sb = new StringBuilder(super.getMessage());
@@ -696,7 +696,7 @@
sb.append(" - ").append(getCause());
return sb.toString();
}
-
+
// only this one is called by our code, exception is not used outside this class:
@Override
public void printStackTrace(PrintStream s) {
@@ -708,19 +708,19 @@
}
}
}
-
+
public static void assumeTrue(String msg, boolean b) {
Assume.assumeNoException(b ? null : new TestIgnoredException(msg));
}
-
+
public static void assumeFalse(String msg, boolean b) {
assumeTrue(msg, !b);
}
-
+
public static void assumeNoException(String msg, Exception e) {
Assume.assumeNoException(e == null ? null : new TestIgnoredException(msg, e));
}
-
+
public static <T> Set<T> asSet(T... args) {
return new HashSet<T>(Arrays.asList(args));
}
@@ -778,7 +778,7 @@
c.setTermIndexInterval(_TestUtil.nextInt(r, 1, 1000));
}
if (r.nextBoolean()) {
- c.setMaxThreadStates(_TestUtil.nextInt(r, 1, 20));
+ c.setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(_TestUtil.nextInt(r, 1, 20)));
}
if (r.nextBoolean()) {
@@ -864,7 +864,7 @@
public static MockDirectoryWrapper newDirectory() throws IOException {
return newDirectory(random);
}
-
+
/**
* Returns a new Directory instance, using the specified random.
* See {@link #newDirectory()} for more information.
@@ -875,7 +875,7 @@
stores.put(dir, Thread.currentThread().getStackTrace());
return dir;
}
-
+
/**
* Returns a new Directory instance, with contents copied from the
* provided directory. See {@link #newDirectory()} for more
@@ -884,23 +884,23 @@
public static MockDirectoryWrapper newDirectory(Directory d) throws IOException {
return newDirectory(random, d);
}
-
+
/** Returns a new FSDirectory instance over the given file, which must be a folder. */
public static MockDirectoryWrapper newFSDirectory(File f) throws IOException {
return newFSDirectory(f, null);
}
-
+
/** Returns a new FSDirectory instance over the given file, which must be a folder. */
public static MockDirectoryWrapper newFSDirectory(File f, LockFactory lf) throws IOException {
String fsdirClass = TEST_DIRECTORY;
if (fsdirClass.equals("random")) {
fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)];
}
-
+
if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store
fsdirClass = "org.apache.lucene.store." + fsdirClass;
}
-
+
Class<? extends FSDirectory> clazz;
try {
try {
@@ -908,11 +908,11 @@
} catch (ClassCastException e) {
// TEST_DIRECTORY is not a sub-class of FSDirectory, so draw one at random
fsdirClass = FS_DIRECTORIES[random.nextInt(FS_DIRECTORIES.length)];
-
+
if (fsdirClass.indexOf(".") == -1) {// if not fully qualified, assume .store
fsdirClass = "org.apache.lucene.store." + fsdirClass;
}
-
+
clazz = Class.forName(fsdirClass).asSubclass(FSDirectory.class);
}
MockDirectoryWrapper dir = new MockDirectoryWrapper(random, newFSDirectoryImpl(clazz, f, lf));
@@ -922,7 +922,7 @@
throw new RuntimeException(e);
}
}
-
+
/**
* Returns a new Directory instance, using the specified random
* with contents copied from the provided directory. See
@@ -980,44 +980,44 @@
public static Field newField(Random random, String name, String value, Store store, Index index, TermVector tv) {
if (!index.isIndexed())
return new Field(name, value, store, index);
-
+
if (!store.isStored() && random.nextBoolean())
store = Store.YES; // randomly store it
-
+
tv = randomTVSetting(random, tv);
-
+
return new Field(name, value, store, index, tv);
}
-
- static final TermVector tvSettings[] = {
- TermVector.NO, TermVector.YES, TermVector.WITH_OFFSETS,
- TermVector.WITH_POSITIONS, TermVector.WITH_POSITIONS_OFFSETS
+
+ static final TermVector tvSettings[] = {
+ TermVector.NO, TermVector.YES, TermVector.WITH_OFFSETS,
+ TermVector.WITH_POSITIONS, TermVector.WITH_POSITIONS_OFFSETS
};
-
+
private static TermVector randomTVSetting(Random random, TermVector minimum) {
switch(minimum) {
case NO: return tvSettings[_TestUtil.nextInt(random, 0, tvSettings.length-1)];
case YES: return tvSettings[_TestUtil.nextInt(random, 1, tvSettings.length-1)];
- case WITH_OFFSETS: return random.nextBoolean() ? TermVector.WITH_OFFSETS
+ case WITH_OFFSETS: return random.nextBoolean() ? TermVector.WITH_OFFSETS
: TermVector.WITH_POSITIONS_OFFSETS;
- case WITH_POSITIONS: return random.nextBoolean() ? TermVector.WITH_POSITIONS
+ case WITH_POSITIONS: return random.nextBoolean() ? TermVector.WITH_POSITIONS
: TermVector.WITH_POSITIONS_OFFSETS;
default: return TermVector.WITH_POSITIONS_OFFSETS;
}
}
-
+
/** return a random Locale from the available locales on the system */
public static Locale randomLocale(Random random) {
Locale locales[] = Locale.getAvailableLocales();
return locales[random.nextInt(locales.length)];
}
-
+
/** return a random TimeZone from the available timezones on the system */
public static TimeZone randomTimeZone(Random random) {
String tzIds[] = TimeZone.getAvailableIDs();
return TimeZone.getTimeZone(tzIds[random.nextInt(tzIds.length)]);
}
-
+
/** return a Locale object equivalent to its programmatic name */
public static Locale localeForName(String localeName) {
String elements[] = localeName.split("\\_");
@@ -1039,7 +1039,7 @@
"RAMDirectory",
FS_DIRECTORIES[0], FS_DIRECTORIES[1], FS_DIRECTORIES[2]
};
-
+
public static String randomDirectory(Random random) {
if (random.nextInt(10) == 0) {
return CORE_DIRECTORIES[random.nextInt(CORE_DIRECTORIES.length)];
@@ -1064,7 +1064,7 @@
return FSDirectory.open(file);
}
}
-
+
static Directory newDirectoryImpl(Random random, String clazzName) {
if (clazzName.equals("random"))
clazzName = randomDirectory(random);
@@ -1085,9 +1085,9 @@
return clazz.newInstance();
} catch (Exception e) {
throw new RuntimeException(e);
- }
+ }
}
-
+
/** create a new searcher over the reader.
* This searcher might randomly use threads. */
public static IndexSearcher newSearcher(IndexReader r) throws IOException {
@@ -1095,8 +1095,8 @@
return new IndexSearcher(r);
} else {
int threads = 0;
- final ExecutorService ex = (random.nextBoolean()) ? null
- : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8),
+ final ExecutorService ex = (random.nextBoolean()) ? null
+ : Executors.newFixedThreadPool(threads = _TestUtil.nextInt(random, 1, 8),
new NamedThreadFactory("LuceneTestCase"));
if (ex != null && VERBOSE) {
System.out.println("NOTE: newSearcher using ExecutorService with " + threads + " threads");
@@ -1121,12 +1121,12 @@
public String getName() {
return this.name;
}
-
+
/** Gets a resource from the classpath as {@link File}. This method should only be used,
* if a real file is needed. To get a stream, code should prefer
* {@link Class#getResourceAsStream} using {@code this.getClass()}.
*/
-
+
protected File getDataFile(String name) throws IOException {
try {
return new File(this.getClass().getResource(name).toURI());
@@ -1137,11 +1137,11 @@
// We get here from InterceptTestCaseEvents on the 'failed' event....
public void reportAdditionalFailureInfo() {
- System.err.println("NOTE: reproduce with: ant test -Dtestcase=" + getClass().getSimpleName()
+ System.err.println("NOTE: reproduce with: ant test -Dtestcase=" + getClass().getSimpleName()
+ " -Dtestmethod=" + getName() + " -Dtests.seed=" + new TwoLongs(staticSeed, seed)
+ reproduceWithExtraParams());
}
-
+
// extra params that were overridden needed to reproduce the command
private String reproduceWithExtraParams() {
StringBuilder sb = new StringBuilder();
@@ -1157,12 +1157,12 @@
private static long staticSeed;
// seed for individual test methods, changed in @before
private long seed;
-
+
private static final Random seedRand = new Random();
protected static final Random random = new Random(0);
private String name = "<unknown>";
-
+
/**
* Annotation for tests that should only be run during nightly builds.
*/
@@ -1170,7 +1170,7 @@
@Inherited
@Retention(RetentionPolicy.RUNTIME)
public @interface Nightly {}
-
+
/** optionally filters the tests to be run by TEST_METHOD */
public static class LuceneTestCaseRunner extends BlockJUnit4ClassRunner {
private List<FrameworkMethod> testMethods;
@@ -1200,11 +1200,11 @@
testMethods.add(new FrameworkMethod(m));
}
}
-
+
if (testMethods.isEmpty()) {
throw new RuntimeException("No runnable methods!");
}
-
+
if (TEST_NIGHTLY == false) {
if (getTestClass().getJavaClass().isAnnotationPresent(Nightly.class)) {
/* the test class is annotated with nightly, remove all methods */
@@ -1265,9 +1265,9 @@
@Override
public boolean shouldRun(Description d) {
return TEST_METHOD == null || d.getMethodName().equals(TEST_METHOD);
- }
+ }
};
-
+
try {
f.apply(this);
} catch (NoTestsRemainException e) {
@@ -1275,12 +1275,12 @@
}
}
}
-
+
private static class RandomCodecProvider extends CodecProvider {
private List<Codec> knownCodecs = new ArrayList<Codec>();
private Map<String,Codec> previousMappings = new HashMap<String,Codec>();
private final int perFieldSeed;
-
+
RandomCodecProvider(Random random) {
this.perFieldSeed = random.nextInt();
register(new StandardCodec());
@@ -1312,13 +1312,13 @@
}
return codec.name;
}
-
+
@Override
public synchronized String toString() {
return "RandomCodecProvider: " + previousMappings.toString();
}
}
-
+
@Ignore("just a hack")
public final void alwaysIgnoredTestMethod() {}
}
diff --git a/lucene/src/test-framework/org/apache/lucene/util/ThrottledIndexOutput.java b/lucene/src/test-framework/org/apache/lucene/util/ThrottledIndexOutput.java
new file mode 100644
index 0000000..52333bd
--- /dev/null
+++ b/lucene/src/test-framework/org/apache/lucene/util/ThrottledIndexOutput.java
@@ -0,0 +1,147 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.IndexOutput;
+
+public class ThrottledIndexOutput extends IndexOutput {
+ public static final int DEFAULT_MIN_WRITTEN_BYTES = 1024;
+ private final int bytesPerSecond;
+ private IndexOutput delegate;
+ private long flushDelayMillis;
+ private long closeDelayMillis;
+ private long seekDelayMillis;
+ private long pendingBytes;
+ private long minBytesWritten;
+ private long timeElapsed;
+ private final byte[] bytes = new byte[1];
+
+ public ThrottledIndexOutput newFromDelegate(IndexOutput output) {
+ return new ThrottledIndexOutput(bytesPerSecond, flushDelayMillis,
+ closeDelayMillis, seekDelayMillis, minBytesWritten, output);
+ }
+
+ public ThrottledIndexOutput(int bytesPerSecond, long delayInMillis,
+ IndexOutput delegate) {
+ this(bytesPerSecond, delayInMillis, delayInMillis, delayInMillis,
+ DEFAULT_MIN_WRITTEN_BYTES, delegate);
+ }
+
+ public ThrottledIndexOutput(int bytesPerSecond, long delays,
+ int minBytesWritten, IndexOutput delegate) {
+ this(bytesPerSecond, delays, delays, delays, minBytesWritten, delegate);
+ }
+
+ public static final int mBitsToBytes(int mbits) {
+ return mbits * 125000;
+ }
+
+ public ThrottledIndexOutput(int bytesPerSecond, long flushDelayMillis,
+ long closeDelayMillis, long seekDelayMillis, long minBytesWritten,
+ IndexOutput delegate) {
+ assert bytesPerSecond > 0;
+ this.delegate = delegate;
+ this.bytesPerSecond = bytesPerSecond;
+ this.flushDelayMillis = flushDelayMillis;
+ this.closeDelayMillis = closeDelayMillis;
+ this.seekDelayMillis = seekDelayMillis;
+ this.minBytesWritten = minBytesWritten;
+ }
+
+ @Override
+ public void flush() throws IOException {
+ sleep(flushDelayMillis);
+ delegate.flush();
+ }
+
+ @Override
+ public void close() throws IOException {
+ sleep(closeDelayMillis + getDelay(true));
+ delegate.close();
+
+ }
+
+ @Override
+ public long getFilePointer() {
+ return delegate.getFilePointer();
+ }
+
+ @Override
+ public void seek(long pos) throws IOException {
+ sleep(seekDelayMillis);
+ delegate.seek(pos);
+ }
+
+ @Override
+ public long length() throws IOException {
+ return delegate.length();
+ }
+
+ @Override
+ public void writeByte(byte b) throws IOException {
+ bytes[0] = b;
+ writeBytes(bytes, 0, 1);
+ }
+
+ @Override
+ public void writeBytes(byte[] b, int offset, int length) throws IOException {
+ final long before = System.nanoTime();
+ delegate.writeBytes(b, offset, length);
+ timeElapsed += System.nanoTime() - before;
+ pendingBytes += length;
+ sleep(getDelay(false));
+
+ }
+
+ protected long getDelay(boolean closing) {
+ if (pendingBytes > 0 && (closing || pendingBytes > minBytesWritten)) {
+ long actualBps = (timeElapsed / pendingBytes) * 1000000000l; // nano to sec
+ if (actualBps > bytesPerSecond) {
+ long expected = (pendingBytes * 1000l / bytesPerSecond) ;
+ final long delay = expected - (timeElapsed / 1000000l) ;
+ pendingBytes = 0;
+ timeElapsed = 0;
+ return delay;
+ }
+ }
+ return 0;
+
+ }
+
+ private static final void sleep(long ms) {
+ if (ms <= 0)
+ return;
+ try {
+ Thread.sleep(ms);
+ } catch (InterruptedException e) {
+ throw new ThreadInterruptedException(e);
+ }
+ }
+
+ @Override
+ public void setLength(long length) throws IOException {
+ delegate.setLength(length);
+ }
+
+ @Override
+ public void copyBytes(DataInput input, long numBytes) throws IOException {
+ delegate.copyBytes(input, numBytes);
+ }
+}
diff --git a/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java b/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java
index be1a450..51d80b0 100644
--- a/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java
+++ b/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java
@@ -79,23 +79,23 @@
}
}
- /**
- * Convenience method: Unzip zipName + ".zip" under destDir, removing destDir first
+ /**
+ * Convenience method: Unzip zipName + ".zip" under destDir, removing destDir first
*/
public static void unzip(File zipName, File destDir) throws IOException {
-
+
ZipFile zipFile = new ZipFile(zipName);
-
+
Enumeration<? extends ZipEntry> entries = zipFile.entries();
-
+
rmDir(destDir);
-
+
destDir.mkdir();
LuceneTestCase.tempDirs.add(destDir.getAbsolutePath());
while (entries.hasMoreElements()) {
ZipEntry entry = entries.nextElement();
-
+
InputStream in = zipFile.getInputStream(entry);
File targetFile = new File(destDir, entry.getName());
if (entry.isDirectory()) {
@@ -105,24 +105,24 @@
if (targetFile.getParentFile()!=null) {
// be on the safe side: do not rely on that directories are always extracted
// before their children (although this makes sense, but is it guaranteed?)
- targetFile.getParentFile().mkdirs();
+ targetFile.getParentFile().mkdirs();
}
OutputStream out = new BufferedOutputStream(new FileOutputStream(targetFile));
-
+
byte[] buffer = new byte[8192];
int len;
while((len = in.read(buffer)) >= 0) {
out.write(buffer, 0, len);
}
-
+
in.close();
out.close();
}
}
-
+
zipFile.close();
}
-
+
public static void syncConcurrentMerges(IndexWriter writer) {
syncConcurrentMerges(writer.getConfig().getMergeScheduler());
}
@@ -138,7 +138,7 @@
public static CheckIndex.Status checkIndex(Directory dir) throws IOException {
return checkIndex(dir, CodecProvider.getDefault());
}
-
+
/** This runs the CheckIndex tool on the index in. If any
* issues are hit, a RuntimeException is thrown; else,
* true is returned. */
@@ -245,7 +245,7 @@
0x1D200, 0x1D300, 0x1D360, 0x1D400, 0x1F000, 0x1F030, 0x1F100, 0x1F200,
0x20000, 0x2A700, 0x2F800, 0xE0000, 0xE0100, 0xF0000, 0x100000
};
-
+
private static final int[] blockEnds = {
0x007F, 0x00FF, 0x017F, 0x024F, 0x02AF, 0x02FF, 0x036F, 0x03FF, 0x04FF,
0x052F, 0x058F, 0x05FF, 0x06FF, 0x074F, 0x077F, 0x07BF, 0x07FF, 0x083F,
@@ -271,12 +271,12 @@
0x1D24F, 0x1D35F, 0x1D37F, 0x1D7FF, 0x1F02F, 0x1F09F, 0x1F1FF, 0x1F2FF,
0x2A6DF, 0x2B73F, 0x2FA1F, 0xE007F, 0xE01EF, 0xFFFFF, 0x10FFFF
};
-
+
/** Returns random string, all codepoints within the same unicode block. */
public static String randomRealisticUnicodeString(Random r) {
return randomRealisticUnicodeString(r, 20);
}
-
+
/** Returns random string, all codepoints within the same unicode block. */
public static String randomRealisticUnicodeString(Random r, int maxLength) {
final int end = r.nextInt(maxLength);
diff --git a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
index 097d9c9..24a49e1 100755
--- a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
+++ b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
@@ -42,7 +42,7 @@
import org.apache.lucene.util._TestUtil;
public class TestAddIndexes extends LuceneTestCase {
-
+
public void testSimpleCase() throws IOException {
// main directory
Directory dir = newDirectory();
@@ -204,9 +204,9 @@
doc.add(newField("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED));
writer.updateDocument(new Term("id", "" + (i%10)), doc);
}
-
+
writer.addIndexes(aux);
-
+
// Deletes one of the 10 added docs, leaving 9:
PhraseQuery q = new PhraseQuery();
q.add(new Term("content", "bbb"));
@@ -619,7 +619,7 @@
doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
}
-
+
private abstract class RunAddIndexesThreads {
Directory dir, dir2;
@@ -646,7 +646,7 @@
writer2 = new IndexWriter(dir2, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
writer2.setInfoStream(VERBOSE ? System.out : null);
writer2.commit();
-
+
readers = new IndexReader[NUM_COPY];
for(int i=0;i<NUM_COPY;i++)
@@ -754,7 +754,7 @@
}
}
}
-
+
// LUCENE-1335: test simultaneous addIndexes & commits
// from multiple threads
public void testAddIndexesWithThreads() throws Throwable {
@@ -1069,9 +1069,9 @@
w.addDocument(d);
w.close();
}
-
+
IndexReader[] readers = new IndexReader[] { IndexReader.open(dirs[0]), IndexReader.open(dirs[1]) };
-
+
Directory dir = new RAMDirectory();
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy());
LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
@@ -1083,5 +1083,5 @@
// we should now see segments_X, segments.gen,_Y.cfs, _Z.fnx
assertEquals("Only one compound segment should exist", 4, dir.listAll().length);
}
-
+
}
diff --git a/lucene/src/test/org/apache/lucene/index/TestByteSlices.java b/lucene/src/test/org/apache/lucene/index/TestByteSlices.java
index 0da34c9..1bf7ad7 100644
--- a/lucene/src/test/org/apache/lucene/index/TestByteSlices.java
+++ b/lucene/src/test/org/apache/lucene/index/TestByteSlices.java
@@ -39,7 +39,7 @@
starts[stream] = -1;
counters[stream] = 0;
}
-
+
int num = 10000 * RANDOM_MULTIPLIER;
for (int iter = 0; iter < num; iter++) {
int stream = random.nextInt(NUM_STREAM);
@@ -67,7 +67,7 @@
if (VERBOSE)
System.out.println(" addr now " + uptos[stream]);
}
-
+
for(int stream=0;stream<NUM_STREAM;stream++) {
if (VERBOSE)
System.out.println(" stream=" + stream + " count=" + counters[stream]);
@@ -76,7 +76,7 @@
reader.init(pool, starts[stream], uptos[stream]);
for(int j=0;j<counters[stream];j++) {
reader.readVInt();
- assertEquals(j, reader.readVInt());
+ assertEquals(j, reader.readVInt());
}
}
}
diff --git a/lucene/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/src/test/org/apache/lucene/index/TestCodecs.java
index bae4eb4..e665b24 100644
--- a/lucene/src/test/org/apache/lucene/index/TestCodecs.java
+++ b/lucene/src/test/org/apache/lucene/index/TestCodecs.java
@@ -381,7 +381,7 @@
this.register(new MockSepCodec());
this.setDefaultFieldCodec("MockSep");
}
-
+
}
private class Verify extends Thread {
diff --git a/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java b/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java
index 90a318f..c7e5927 100644
--- a/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java
+++ b/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java
@@ -50,7 +50,7 @@
boolean isClose = false;
StackTraceElement[] trace = new Exception().getStackTrace();
for (int i = 0; i < trace.length; i++) {
- if ("doFlush".equals(trace[i].getMethodName())) {
+ if ("flush".equals(trace[i].getMethodName())) {
isDoFlush = true;
}
if ("close".equals(trace[i].getMethodName())) {
diff --git a/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java b/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java
index 8e71ca8..662904c 100644
--- a/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java
+++ b/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java
@@ -40,7 +40,7 @@
*/
public class TestDeletionPolicy extends LuceneTestCase {
-
+
private void verifyCommitOrder(List<? extends IndexCommit> commits) throws IOException {
final IndexCommit firstCommit = commits.get(0);
long last = SegmentInfos.generationFromSegmentsFileName(firstCommit.getSegmentsFileName());
@@ -135,7 +135,7 @@
verifyCommitOrder(commits);
doDeletes(commits, true);
}
-
+
private void doDeletes(List<? extends IndexCommit> commits, boolean isCommit) {
// Assert that we really are only called for each new
@@ -248,7 +248,7 @@
// seconds of the last one's mod time, and, that I can
// open a reader on each:
long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
-
+
String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
"",
gen);
@@ -276,7 +276,7 @@
// OK
break;
}
-
+
dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
gen--;
}
@@ -449,7 +449,7 @@
// Now 8 because we made another commit
assertEquals(7, IndexReader.listCommits(dir).size());
-
+
r = IndexReader.open(dir, true);
// Not optimized because we rolled it back, and now only
// 10 docs
@@ -471,7 +471,7 @@
// but this time keeping only the last commit:
writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexCommit(lastCommit));
assertEquals(10, writer.numDocs());
-
+
// Reader still sees optimized index, because writer
// opened on the prior commit has not yet committed:
r = IndexReader.open(dir, true);
@@ -626,7 +626,7 @@
}
IndexWriter writer = new IndexWriter(dir, conf);
writer.close();
- Term searchTerm = new Term("content", "aaa");
+ Term searchTerm = new Term("content", "aaa");
Query query = new TermQuery(searchTerm);
for(int i=0;i<N+1;i++) {
@@ -731,7 +731,7 @@
* around, through creates.
*/
public void testKeepLastNDeletionPolicyWithCreates() throws IOException {
-
+
final int N = 10;
for(int pass=0;pass<2;pass++) {
@@ -751,7 +751,7 @@
}
IndexWriter writer = new IndexWriter(dir, conf);
writer.close();
- Term searchTerm = new Term("content", "aaa");
+ Term searchTerm = new Term("content", "aaa");
Query query = new TermQuery(searchTerm);
for(int i=0;i<N+1;i++) {
@@ -833,7 +833,7 @@
}
gen--;
}
-
+
dir.close();
}
}
diff --git a/lucene/src/test/org/apache/lucene/index/TestDoc.java b/lucene/src/test/org/apache/lucene/index/TestDoc.java
index 874df62..ed576fa 100644
--- a/lucene/src/test/org/apache/lucene/index/TestDoc.java
+++ b/lucene/src/test/org/apache/lucene/index/TestDoc.java
@@ -111,7 +111,7 @@
public void testIndexAndMerge() throws Exception {
StringWriter sw = new StringWriter();
PrintWriter out = new PrintWriter(sw, true);
-
+
Directory directory = newFSDirectory(indexDir);
IndexWriter writer = new IndexWriter(
directory,
@@ -136,7 +136,7 @@
SegmentInfo siMerge3 = merge(siMerge, siMerge2, "merge3", false);
printSegment(out, siMerge3);
-
+
directory.close();
out.close();
sw.close();
@@ -170,7 +170,7 @@
siMerge3 = merge(siMerge, siMerge2, "merge3", true);
printSegment(out, siMerge3);
-
+
directory.close();
out.close();
sw.close();
@@ -207,11 +207,11 @@
final SegmentInfo info = new SegmentInfo(merged, si1.docCount + si2.docCount, si1.dir,
false, fieldInfos.hasProx(), merger.getSegmentCodecs(),
fieldInfos.hasVectors(), fieldInfos);
-
+
if (useCompoundFile) {
Collection<String> filesToDelete = merger.createCompoundFile(merged + ".cfs", info);
info.setUseCompoundFile(true);
- for (final String fileToDelete : filesToDelete)
+ for (final String fileToDelete : filesToDelete)
si1.dir.deleteFile(fileToDelete);
}
diff --git a/lucene/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java b/lucene/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java
new file mode 100644
index 0000000..4c52726
--- /dev/null
+++ b/lucene/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java
@@ -0,0 +1,218 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.ThreadInterruptedException;
+
+/**
+ * Unit test for {@link DocumentsWriterDeleteQueue}
+ */
+public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
+
+ public void testUpdateDelteSlices() {
+ DocumentsWriterDeleteQueue queue = new DocumentsWriterDeleteQueue();
+ final int size = 200 + random.nextInt(500) * RANDOM_MULTIPLIER;
+ Integer[] ids = new Integer[size];
+ for (int i = 0; i < ids.length; i++) {
+ ids[i] = random.nextInt();
+ }
+ Term template = new Term("id");
+ DeleteSlice slice1 = queue.newSlice();
+ DeleteSlice slice2 = queue.newSlice();
+ BufferedDeletes bd1 = new BufferedDeletes(false);
+ BufferedDeletes bd2 = new BufferedDeletes(false);
+ int last1 = 0;
+ int last2 = 0;
+ Set<Term> uniqueValues = new HashSet<Term>();
+ for (int j = 0; j < ids.length; j++) {
+ Integer i = ids[j];
+ // create an array here since we compare identity below against tailItem
+ Term[] term = new Term[] {template.createTerm(i.toString())};
+ uniqueValues.add(term[0]);
+ queue.addDelete(term);
+ if (random.nextInt(20) == 0 || j == ids.length - 1) {
+ queue.updateSlice(slice1);
+ assertTrue(slice1.isTailItem(term));
+ slice1.apply(bd1, j);
+ assertAllBetween(last1, j, bd1, ids);
+ last1 = j + 1;
+ }
+ if (random.nextInt(10) == 5 || j == ids.length - 1) {
+ queue.updateSlice(slice2);
+ assertTrue(slice2.isTailItem(term));
+ slice2.apply(bd2, j);
+ assertAllBetween(last2, j, bd2, ids);
+ last2 = j + 1;
+ }
+ assertEquals(uniqueValues.size(), queue.numGlobalTermDeletes());
+ }
+ assertEquals(uniqueValues, bd1.terms.keySet());
+ assertEquals(uniqueValues, bd2.terms.keySet());
+ assertEquals(uniqueValues, new HashSet<Term>(Arrays.asList(queue
+ .freezeGlobalBuffer(null).terms)));
+ assertEquals("num deletes must be 0 after freeze", 0, queue
+ .numGlobalTermDeletes());
+ }
+
+ private void assertAllBetween(int start, int end, BufferedDeletes deletes,
+ Integer[] ids) {
+ Term template = new Term("id");
+ for (int i = start; i <= end; i++) {
+ assertEquals(Integer.valueOf(end), deletes.terms.get(template
+ .createTerm(ids[i].toString())));
+ }
+ }
+
+ public void testClear() {
+ DocumentsWriterDeleteQueue queue = new DocumentsWriterDeleteQueue();
+ Term template = new Term("id");
+ assertFalse(queue.anyChanges());
+ queue.clear();
+ assertFalse(queue.anyChanges());
+ final int size = 200 + random.nextInt(500) * RANDOM_MULTIPLIER;
+ int termsSinceFreeze = 0;
+ int queriesSinceFreeze = 0;
+ for (int i = 0; i < size; i++) {
+ Term term = template.createTerm("" + i);
+ if (random.nextInt(10) == 0) {
+ queue.addDelete(new TermQuery(term));
+ queriesSinceFreeze++;
+ } else {
+ queue.addDelete(term);
+ termsSinceFreeze++;
+ }
+ assertTrue(queue.anyChanges());
+ if (random.nextInt(10) == 0) {
+ queue.clear();
+ queue.tryApplyGlobalSlice();
+ assertFalse(queue.anyChanges());
+ }
+ }
+
+ }
+
+ public void testAnyChanges() {
+ DocumentsWriterDeleteQueue queue = new DocumentsWriterDeleteQueue();
+ Term template = new Term("id");
+ final int size = 200 + random.nextInt(500) * RANDOM_MULTIPLIER;
+ int termsSinceFreeze = 0;
+ int queriesSinceFreeze = 0;
+ for (int i = 0; i < size; i++) {
+ Term term = template.createTerm("" + i);
+ if (random.nextInt(10) == 0) {
+ queue.addDelete(new TermQuery(term));
+ queriesSinceFreeze++;
+ } else {
+ queue.addDelete(term);
+ termsSinceFreeze++;
+ }
+ assertTrue(queue.anyChanges());
+ if (random.nextInt(5) == 0) {
+ FrozenBufferedDeletes freezeGlobalBuffer = queue
+ .freezeGlobalBuffer(null);
+ assertEquals(termsSinceFreeze, freezeGlobalBuffer.terms.length);
+ assertEquals(queriesSinceFreeze, freezeGlobalBuffer.queries.length);
+ queriesSinceFreeze = 0;
+ termsSinceFreeze = 0;
+ assertFalse(queue.anyChanges());
+ }
+ }
+ }
+
+ public void testStressDeleteQueue() throws InterruptedException {
+ DocumentsWriterDeleteQueue queue = new DocumentsWriterDeleteQueue();
+ Set<Term> uniqueValues = new HashSet<Term>();
+ final int size = 10000 + random.nextInt(500) * RANDOM_MULTIPLIER;
+ Integer[] ids = new Integer[size];
+ Term template = new Term("id");
+ for (int i = 0; i < ids.length; i++) {
+ ids[i] = random.nextInt();
+ uniqueValues.add(template.createTerm(ids[i].toString()));
+ }
+ CountDownLatch latch = new CountDownLatch(1);
+ AtomicInteger index = new AtomicInteger(0);
+ final int numThreads = 2 + random.nextInt(5);
+ UpdateThread[] threads = new UpdateThread[numThreads];
+ for (int i = 0; i < threads.length; i++) {
+ threads[i] = new UpdateThread(queue, index, ids, latch);
+ threads[i].start();
+ }
+ latch.countDown();
+ for (int i = 0; i < threads.length; i++) {
+ threads[i].join();
+ }
+
+ for (UpdateThread updateThread : threads) {
+ DeleteSlice slice = updateThread.slice;
+ queue.updateSlice(slice);
+ BufferedDeletes deletes = updateThread.deletes;
+ slice.apply(deletes, BufferedDeletes.MAX_INT);
+ assertEquals(uniqueValues, deletes.terms.keySet());
+ }
+ queue.tryApplyGlobalSlice();
+ assertEquals(uniqueValues, new HashSet<Term>(Arrays.asList(queue
+ .freezeGlobalBuffer(null).terms)));
+ assertEquals("num deletes must be 0 after freeze", 0, queue
+ .numGlobalTermDeletes());
+ }
+
+ private static class UpdateThread extends Thread {
+ final DocumentsWriterDeleteQueue queue;
+ final AtomicInteger index;
+ final Integer[] ids;
+ final DeleteSlice slice;
+ final BufferedDeletes deletes;
+ final CountDownLatch latch;
+
+ protected UpdateThread(DocumentsWriterDeleteQueue queue,
+ AtomicInteger index, Integer[] ids, CountDownLatch latch) {
+ this.queue = queue;
+ this.index = index;
+ this.ids = ids;
+ this.slice = queue.newSlice();
+ deletes = new BufferedDeletes(false);
+ this.latch = latch;
+ }
+
+ @Override
+ public void run() {
+ try {
+ latch.await();
+ } catch (InterruptedException e) {
+ throw new ThreadInterruptedException(e);
+ }
+ Term template = new Term("id");
+ int i = 0;
+ while ((i = index.getAndIncrement()) < ids.length) {
+ Term term = template.createTerm(ids[i].toString());
+ queue.add(term, slice);
+ assertTrue(slice.isTailItem(term));
+ slice.apply(deletes, BufferedDeletes.MAX_INT);
+ }
+ }
+ }
+
+}
diff --git a/lucene/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java b/lucene/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java
new file mode 100644
index 0000000..40d0f79
--- /dev/null
+++ b/lucene/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java
@@ -0,0 +1,432 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.store.MockDirectoryWrapper;
+import org.apache.lucene.util.LineFileDocs;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.ThrottledIndexOutput;
+import org.junit.Before;
+
+public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
+
+ private LineFileDocs lineDocFile;
+
+ @Before
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ lineDocFile = new LineFileDocs(random);
+ }
+
+ public void testFlushByRam() throws CorruptIndexException,
+ LockObtainFailedException, IOException, InterruptedException {
+ int[] numThreads = new int[] { 3 + random.nextInt(12), 1 };
+ for (int i = 0; i < numThreads.length; i++) {
+ runFlushByRam(numThreads[i],
+ 1 + random.nextInt(10) + random.nextDouble(), false);
+ }
+
+ for (int i = 0; i < numThreads.length; i++) {
+ // with a 512 mb ram buffer we should never stall
+ runFlushByRam(numThreads[i], 512.d, true);
+ }
+ }
+
+ protected void runFlushByRam(int numThreads, double maxRam,
+ boolean ensureNotStalled) throws IOException, CorruptIndexException,
+ LockObtainFailedException, InterruptedException {
+ final int numDocumentsToIndex = 50 + random.nextInt(150);
+ AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
+ Directory dir = newDirectory();
+ MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
+ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(random)).setFlushPolicy(flushPolicy);
+
+ final int numDWPT = 1 + random.nextInt(8);
+ DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(
+ numDWPT);
+ iwc.setIndexerThreadPool(threadPool);
+ iwc.setRAMBufferSizeMB(1 + random.nextInt(10) + random.nextDouble());
+ iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
+ iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
+ IndexWriter writer = new IndexWriter(dir, iwc);
+ assertFalse(flushPolicy.flushOnDocCount());
+ assertFalse(flushPolicy.flushOnDeleteTerms());
+ assertTrue(flushPolicy.flushOnRAM());
+ DocumentsWriter docsWriter = writer.getDocsWriter();
+ assertNotNull(docsWriter);
+ DocumentsWriterFlushControl flushControl = docsWriter.flushControl;
+ assertEquals(" bytes must be 0 after init", 0, flushControl.flushBytes());
+
+ IndexThread[] threads = new IndexThread[numThreads];
+ for (int x = 0; x < threads.length; x++) {
+ threads[x] = new IndexThread(numDocs, numThreads, writer, lineDocFile,
+ false);
+ threads[x].start();
+ }
+
+ for (int x = 0; x < threads.length; x++) {
+ threads[x].join();
+ }
+ final long maxRAMBytes = (long) (iwc.getRAMBufferSizeMB() * 1024. * 1024.);
+ assertEquals(" all flushes must be due numThreads=" + numThreads, 0,
+ flushControl.flushBytes());
+ assertEquals(numDocumentsToIndex, writer.numDocs());
+ assertEquals(numDocumentsToIndex, writer.maxDoc());
+ assertTrue("peak bytes without flush exceeded watermark",
+ flushPolicy.peakBytesWithoutFlush <= maxRAMBytes);
+ assertActiveBytesAfter(flushControl);
+ if (flushPolicy.hasMarkedPending) {
+ assertTrue(maxRAMBytes < flushControl.peakActiveBytes);
+ }
+ if (ensureNotStalled) {
+ assertFalse(docsWriter.healthiness.wasStalled);
+ }
+ writer.close();
+ assertEquals(0, flushControl.activeBytes());
+ dir.close();
+ }
+
+ public void testFlushDocCount() throws CorruptIndexException,
+ LockObtainFailedException, IOException, InterruptedException {
+ int[] numThreads = new int[] { 3 + random.nextInt(12), 1 };
+ for (int i = 0; i < numThreads.length; i++) {
+
+ final int numDocumentsToIndex = 50 + random.nextInt(150);
+ AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
+ Directory dir = newDirectory();
+ MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
+ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(random)).setFlushPolicy(flushPolicy);
+
+ final int numDWPT = 1 + random.nextInt(8);
+ DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(
+ numDWPT);
+ iwc.setIndexerThreadPool(threadPool);
+ iwc.setMaxBufferedDocs(2 + random.nextInt(50));
+ iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH);
+ iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
+ IndexWriter writer = new IndexWriter(dir, iwc);
+ assertTrue(flushPolicy.flushOnDocCount());
+ assertFalse(flushPolicy.flushOnDeleteTerms());
+ assertFalse(flushPolicy.flushOnRAM());
+ DocumentsWriter docsWriter = writer.getDocsWriter();
+ assertNotNull(docsWriter);
+ DocumentsWriterFlushControl flushControl = docsWriter.flushControl;
+ assertEquals(" bytes must be 0 after init", 0, flushControl.flushBytes());
+
+ IndexThread[] threads = new IndexThread[numThreads[i]];
+ for (int x = 0; x < threads.length; x++) {
+ threads[x] = new IndexThread(numDocs, numThreads[i], writer,
+ lineDocFile, false);
+ threads[x].start();
+ }
+
+ for (int x = 0; x < threads.length; x++) {
+ threads[x].join();
+ }
+
+ assertEquals(" all flushes must be due numThreads=" + numThreads[i], 0,
+ flushControl.flushBytes());
+ assertEquals(numDocumentsToIndex, writer.numDocs());
+ assertEquals(numDocumentsToIndex, writer.maxDoc());
+ assertTrue("peak bytes without flush exceeded watermark",
+ flushPolicy.peakDocCountWithoutFlush <= iwc.getMaxBufferedDocs());
+ assertActiveBytesAfter(flushControl);
+ writer.close();
+ assertEquals(0, flushControl.activeBytes());
+ dir.close();
+ }
+ }
+
+ public void testRandom() throws IOException, InterruptedException {
+ final int numThreads = 1 + random.nextInt(8);
+ final int numDocumentsToIndex = 100 + random.nextInt(300);
+ AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(random));
+ MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
+ iwc.setFlushPolicy(flushPolicy);
+
+ final int numDWPT = 1 + random.nextInt(8);
+ DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(
+ numDWPT);
+ iwc.setIndexerThreadPool(threadPool);
+
+ IndexWriter writer = new IndexWriter(dir, iwc);
+ DocumentsWriter docsWriter = writer.getDocsWriter();
+ assertNotNull(docsWriter);
+ DocumentsWriterFlushControl flushControl = docsWriter.flushControl;
+
+ assertEquals(" bytes must be 0 after init", 0, flushControl.flushBytes());
+
+ IndexThread[] threads = new IndexThread[numThreads];
+ for (int x = 0; x < threads.length; x++) {
+ threads[x] = new IndexThread(numDocs, numThreads, writer, lineDocFile,
+ true);
+ threads[x].start();
+ }
+
+ for (int x = 0; x < threads.length; x++) {
+ threads[x].join();
+ }
+ assertEquals(" all flushes must be due", 0, flushControl.flushBytes());
+ assertEquals(numDocumentsToIndex, writer.numDocs());
+ assertEquals(numDocumentsToIndex, writer.maxDoc());
+ if (flushPolicy.flushOnRAM() && !flushPolicy.flushOnDocCount()
+ && !flushPolicy.flushOnDeleteTerms()) {
+ final long maxRAMBytes = (long) (iwc.getRAMBufferSizeMB() * 1024. * 1024.);
+ assertTrue("peak bytes without flush exceeded watermark",
+ flushPolicy.peakBytesWithoutFlush <= maxRAMBytes);
+ if (flushPolicy.hasMarkedPending) {
+ assertTrue("max: " + maxRAMBytes + " " + flushControl.peakActiveBytes,
+ maxRAMBytes <= flushControl.peakActiveBytes);
+ }
+ }
+ assertActiveBytesAfter(flushControl);
+ writer.commit();
+ assertEquals(0, flushControl.activeBytes());
+ IndexReader r = IndexReader.open(dir);
+ assertEquals(numDocumentsToIndex, r.numDocs());
+ assertEquals(numDocumentsToIndex, r.maxDoc());
+ if (!flushPolicy.flushOnRAM()) {
+ assertFalse("never stall if we don't flush on RAM", docsWriter.healthiness.wasStalled);
+ assertFalse("never block if we don't flush on RAM", docsWriter.healthiness.hasBlocked());
+ }
+ r.close();
+ writer.close();
+ dir.close();
+ }
+
+ public void testHealthyness() throws InterruptedException,
+ CorruptIndexException, LockObtainFailedException, IOException {
+
+ int[] numThreads = new int[] { 4 + random.nextInt(8), 1 };
+ final int numDocumentsToIndex = 50 + random.nextInt(50);
+ for (int i = 0; i < numThreads.length; i++) {
+ AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
+ MockDirectoryWrapper dir = newDirectory();
+ // mock a very slow harddisk here so that flushing is very slow
+ dir.setThrottledIndexOutput(new ThrottledIndexOutput(ThrottledIndexOutput
+ .mBitsToBytes(40 + random.nextInt(10)), 5 + random.nextInt(5), null));
+ IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(random));
+ iwc.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
+ iwc.setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH);
+ FlushPolicy flushPolicy = new FlushByRamOrCountsPolicy();
+ iwc.setFlushPolicy(flushPolicy);
+
+ DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(
+ numThreads[i]== 1 ? 1 : 2);
+ iwc.setIndexerThreadPool(threadPool);
+ // with such a small ram buffer we should be stalled quiet quickly
+ iwc.setRAMBufferSizeMB(0.25);
+ IndexWriter writer = new IndexWriter(dir, iwc);
+ IndexThread[] threads = new IndexThread[numThreads[i]];
+ for (int x = 0; x < threads.length; x++) {
+ threads[x] = new IndexThread(numDocs, numThreads[i], writer,
+ lineDocFile, false);
+ threads[x].start();
+ }
+
+ for (int x = 0; x < threads.length; x++) {
+ threads[x].join();
+ }
+ DocumentsWriter docsWriter = writer.getDocsWriter();
+ assertNotNull(docsWriter);
+ DocumentsWriterFlushControl flushControl = docsWriter.flushControl;
+ assertEquals(" all flushes must be due", 0, flushControl.flushBytes());
+ assertEquals(numDocumentsToIndex, writer.numDocs());
+ assertEquals(numDocumentsToIndex, writer.maxDoc());
+ if (numThreads[i] == 1) {
+ assertFalse(
+ "single thread must not stall",
+ docsWriter.healthiness.wasStalled);
+ assertFalse(
+ "single thread must not block numThreads: " + numThreads[i],
+ docsWriter.healthiness.hasBlocked());
+ // this assumption is too strict in this test
+// } else {
+// if (docsWriter.healthiness.wasStalled) {
+// // TODO maybe this assumtion is too strickt
+// assertTrue(" we should have blocked here numThreads: "
+// + numThreads[i], docsWriter.healthiness.hasBlocked());
+// }
+ }
+ assertActiveBytesAfter(flushControl);
+ writer.close(true);
+ dir.close();
+ }
+ }
+
+ protected void assertActiveBytesAfter(DocumentsWriterFlushControl flushControl) {
+ Iterator<ThreadState> allActiveThreads = flushControl.allActiveThreads();
+ long bytesUsed = 0;
+ while (allActiveThreads.hasNext()) {
+ bytesUsed += allActiveThreads.next().perThread.bytesUsed();
+ }
+ assertEquals(bytesUsed, flushControl.activeBytes());
+ }
+
+ public class IndexThread extends Thread {
+ IndexWriter writer;
+ IndexWriterConfig iwc;
+ LineFileDocs docs;
+ private AtomicInteger pendingDocs;
+ private final boolean doRandomCommit;
+
+ public IndexThread(AtomicInteger pendingDocs, int numThreads,
+ IndexWriter writer, LineFileDocs docs, boolean doRandomCommit) {
+ this.pendingDocs = pendingDocs;
+ this.writer = writer;
+ iwc = writer.getConfig();
+ this.docs = docs;
+ this.doRandomCommit = doRandomCommit;
+ }
+
+ public void run() {
+ try {
+ long ramSize = 0;
+ while (pendingDocs.decrementAndGet() > -1) {
+ Document doc = docs.nextDoc();
+ writer.addDocument(doc);
+ long newRamSize = writer.ramSizeInBytes();
+ if (newRamSize != ramSize) {
+ ramSize = newRamSize;
+ }
+ if (doRandomCommit) {
+ int commit;
+ synchronized (random) {
+ commit = random.nextInt(20);
+ }
+ if (commit == 0) {
+ writer.commit();
+ }
+ }
+ }
+ writer.commit();
+ } catch (Throwable ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+ }
+
+ private static class MockDefaultFlushPolicy extends FlushByRamOrCountsPolicy {
+ long peakBytesWithoutFlush = Integer.MIN_VALUE;
+ long peakDocCountWithoutFlush = Integer.MIN_VALUE;
+ boolean hasMarkedPending = false;
+
+ @Override
+ public void onDelete(DocumentsWriterFlushControl control, ThreadState state) {
+ final ArrayList<ThreadState> pending = new ArrayList<DocumentsWriterPerThreadPool.ThreadState>();
+ final ArrayList<ThreadState> notPending = new ArrayList<DocumentsWriterPerThreadPool.ThreadState>();
+ findPending(control, pending, notPending);
+ final boolean flushCurrent = state.flushPending;
+ final ThreadState toFlush;
+ if (state.flushPending) {
+ toFlush = state;
+ } else if (flushOnDeleteTerms()
+ && state.perThread.pendingDeletes.numTermDeletes.get() >= indexWriterConfig
+ .getMaxBufferedDeleteTerms()) {
+ toFlush = state;
+ } else {
+ toFlush = null;
+ }
+ super.onDelete(control, state);
+ if (toFlush != null) {
+ if (flushCurrent) {
+ assertTrue(pending.remove(toFlush));
+ } else {
+ assertTrue(notPending.remove(toFlush));
+ }
+ assertTrue(toFlush.flushPending);
+ hasMarkedPending = true;
+ }
+
+ for (ThreadState threadState : notPending) {
+ assertFalse(threadState.flushPending);
+ }
+ }
+
+ @Override
+ public void onInsert(DocumentsWriterFlushControl control, ThreadState state) {
+ final ArrayList<ThreadState> pending = new ArrayList<DocumentsWriterPerThreadPool.ThreadState>();
+ final ArrayList<ThreadState> notPending = new ArrayList<DocumentsWriterPerThreadPool.ThreadState>();
+ findPending(control, pending, notPending);
+ final boolean flushCurrent = state.flushPending;
+ long activeBytes = control.activeBytes();
+ final ThreadState toFlush;
+ if (state.flushPending) {
+ toFlush = state;
+ } else if (flushOnDocCount()
+ && state.perThread.getNumDocsInRAM() >= indexWriterConfig
+ .getMaxBufferedDocs()) {
+ toFlush = state;
+ } else if (flushOnRAM()
+ && activeBytes >= (long) (indexWriterConfig.getRAMBufferSizeMB() * 1024. * 1024.)) {
+ toFlush = findLargestNonPendingWriter(control, state);
+ assertFalse(toFlush.flushPending);
+ } else {
+ toFlush = null;
+ }
+ super.onInsert(control, state);
+ if (toFlush != null) {
+ if (flushCurrent) {
+ assertTrue(pending.remove(toFlush));
+ } else {
+ assertTrue(notPending.remove(toFlush));
+ }
+ assertTrue(toFlush.flushPending);
+ hasMarkedPending = true;
+ } else {
+ peakBytesWithoutFlush = Math.max(activeBytes, peakBytesWithoutFlush);
+ peakDocCountWithoutFlush = Math.max(state.perThread.getNumDocsInRAM(),
+ peakDocCountWithoutFlush);
+ }
+
+ for (ThreadState threadState : notPending) {
+ assertFalse(threadState.flushPending);
+ }
+ }
+ }
+
+ static void findPending(DocumentsWriterFlushControl flushControl,
+ ArrayList<ThreadState> pending, ArrayList<ThreadState> notPending) {
+ Iterator<ThreadState> allActiveThreads = flushControl.allActiveThreads();
+ while (allActiveThreads.hasNext()) {
+ ThreadState next = allActiveThreads.next();
+ if (next.flushPending) {
+ pending.add(next);
+ } else {
+ notPending.add(next);
+ }
+ }
+ }
+}
diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java
index 21525d8..e814893 100644
--- a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java
@@ -38,7 +38,7 @@
*/
public class TestIndexFileDeleter extends LuceneTestCase {
-
+
public void testDeleteLeftoverFiles() throws IOException {
MockDirectoryWrapper dir = newDirectory();
dir.setPreventDoubleWrite(false);
@@ -124,7 +124,7 @@
copyFile(dir, "_2_1." + normSuffix, "_1_1.f" + contentFieldIndex);
// Create a bogus separate del file for a
- // segment that already has a separate del file:
+ // segment that already has a separate del file:
copyFile(dir, "_0_1.del", "_0_2.del");
// Create a bogus separate del file for a
@@ -140,14 +140,14 @@
// Create a bogus fnm file when the CFS already exists:
copyFile(dir, "_0.cfs", "_0.fnm");
-
+
// Create some old segments file:
copyFile(dir, "segments_2", "segments");
copyFile(dir, "segments_2", "segments_1");
// Create a bogus cfs file shadowing a non-cfs segment:
copyFile(dir, "_1.cfs", "_2.cfs");
-
+
String[] filesPre = dir.listAll();
// Open & close a writer: it should delete the above 4
@@ -160,9 +160,9 @@
Arrays.sort(files);
Arrays.sort(files2);
-
+
Set<String> dif = difFiles(files, files2);
-
+
if (!Arrays.equals(files, files2)) {
fail("IndexFileDeleter failed to delete unreferenced extra files: should have deleted " + (filesPre.length-files.length) + " files but only deleted " + (filesPre.length - files2.length) + "; expected files:\n " + asString(files) + "\n actual files:\n " + asString(files2)+"\ndif: "+dif);
}
@@ -172,7 +172,7 @@
Set<String> set1 = new HashSet<String>();
Set<String> set2 = new HashSet<String>();
Set<String> extra = new HashSet<String>();
-
+
for (int x=0; x < files1.length; x++) {
set1.add(files1[x]);
}
@@ -195,7 +195,7 @@
}
return extra;
}
-
+
private String asString(String[] l) {
String s = "";
for(int i=0;i<l.length;i++) {
diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
index 7f7fa8b..01722a9 100644
--- a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
@@ -60,13 +60,13 @@
public class TestIndexReader extends LuceneTestCase
{
-
+
public void testCommitUserData() throws Exception {
Directory d = newDirectory();
Map<String,String> commitUserData = new HashMap<String,String>();
commitUserData.put("foo", "fighters");
-
+
// set up writer
IndexWriter writer = new IndexWriter(d, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random))
@@ -74,12 +74,12 @@
for(int i=0;i<27;i++)
addDocumentWithFields(writer);
writer.close();
-
+
IndexReader r = IndexReader.open(d, false);
r.deleteDocument(5);
r.flush(commitUserData);
r.close();
-
+
SegmentInfos sis = new SegmentInfos();
sis.read(d);
IndexReader r2 = IndexReader.open(d, false);
@@ -115,10 +115,10 @@
r3.close();
d.close();
}
-
+
public void testIsCurrent() throws Exception {
Directory d = newDirectory();
- IndexWriter writer = new IndexWriter(d, newIndexWriterConfig(
+ IndexWriter writer = new IndexWriter(d, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)));
addDocumentWithFields(writer);
writer.close();
@@ -205,7 +205,7 @@
doc.add(newField("tvpositionoffset","tvpositionoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
writer.addDocument(doc);
}
-
+
writer.close();
// verify fields again
reader = IndexReader.open(d, false);
@@ -224,10 +224,10 @@
assertTrue(fieldNames.contains("tvposition"));
assertTrue(fieldNames.contains("tvoffset"));
assertTrue(fieldNames.contains("tvpositionoffset"));
-
+
// verify that only indexed fields were returned
fieldNames = reader.getFieldNames(IndexReader.FieldOption.INDEXED);
- assertEquals(11, fieldNames.size()); // 6 original + the 5 termvector fields
+ assertEquals(11, fieldNames.size()); // 6 original + the 5 termvector fields
assertTrue(fieldNames.contains("keyword"));
assertTrue(fieldNames.contains("text"));
assertTrue(fieldNames.contains("unstored"));
@@ -239,26 +239,26 @@
assertTrue(fieldNames.contains("tvposition"));
assertTrue(fieldNames.contains("tvoffset"));
assertTrue(fieldNames.contains("tvpositionoffset"));
-
+
// verify that only unindexed fields were returned
fieldNames = reader.getFieldNames(IndexReader.FieldOption.UNINDEXED);
assertEquals(2, fieldNames.size()); // the following fields
assertTrue(fieldNames.contains("unindexed"));
assertTrue(fieldNames.contains("unindexed2"));
-
- // verify index term vector fields
+
+ // verify index term vector fields
fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR);
assertEquals(1, fieldNames.size()); // 1 field has term vector only
assertTrue(fieldNames.contains("termvector"));
-
+
fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION);
assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors
assertTrue(fieldNames.contains("tvposition"));
-
+
fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET);
assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors
assertTrue(fieldNames.contains("tvoffset"));
-
+
fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET);
assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors
assertTrue(fieldNames.contains("tvpositionoffset"));
@@ -366,13 +366,13 @@
reader2.close();
dir.close();
}
-
+
public void testBinaryFields() throws IOException {
Directory dir = newDirectory();
byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-
+
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
-
+
for (int i = 0; i < 10; i++) {
addDoc(writer, "document number " + (i + 1));
addDocumentWithFields(writer);
@@ -589,14 +589,14 @@
reader = IndexReader.open(dir, false);
reader.setNorm(0, "content", sim.encodeNormValue(2.0f));
reader.close();
-
+
// now open reader again & set norm for doc 0 (writes to _0_2.s0)
reader = IndexReader.open(dir, false);
reader.setNorm(0, "content", sim.encodeNormValue(2.0f));
reader.close();
assertFalse("failed to remove first generation norms file on writing second generation",
dir.fileExists("_0_1.s0"));
-
+
dir.close();
}
@@ -619,7 +619,7 @@
}
rmDir(fileDirName);
}*/
-
+
public void testDeleteReaderWriterConflictOptimized() throws IOException{
deleteReaderWriterConflict(true);
}
@@ -802,7 +802,7 @@
// expected exception
}
try {
- IndexWriter.unlock(dir); // this should not be done in the real world!
+ IndexWriter.unlock(dir); // this should not be done in the real world!
} catch (LockReleaseFailedException lrfe) {
writer.close();
}
@@ -866,7 +866,7 @@
public void testDeleteReaderReaderConflictUnoptimized() throws IOException{
deleteReaderReaderConflict(false);
}
-
+
public void testDeleteReaderReaderConflictOptimized() throws IOException{
deleteReaderReaderConflict(true);
}
@@ -880,7 +880,7 @@
Term searchTerm = new Term("content", "aaa");
int START_COUNT = 157;
int END_COUNT = 144;
-
+
// First build up a starting index:
MockDirectoryWrapper startDir = newDirectory();
IndexWriter writer = new IndexWriter(startDir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
@@ -1066,7 +1066,7 @@
}
public void testDocsOutOfOrderJIRA140() throws IOException {
- Directory dir = newDirectory();
+ Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
for(int i=0;i<11;i++) {
addDoc(writer, "aaa");
@@ -1106,7 +1106,7 @@
public void testExceptionReleaseWriteLockJIRA768() throws IOException {
- Directory dir = newDirectory();
+ Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)));
addDoc(writer, "aaa");
writer.close();
@@ -1157,7 +1157,7 @@
} catch (FileNotFoundException e) {
// expected
}
-
+
dir.close();
}
@@ -1315,10 +1315,10 @@
doc.add(newField("tvoffset","tvoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
doc.add(newField("tvposition","tvposition", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
doc.add(newField("tvpositionoffset","tvpositionoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
-
+
writer.addDocument(doc);
}
-
+
private void addDoc(IndexWriter writer, String value) throws IOException {
Document doc = new Document();
doc.add(newField("content", value, Field.Store.NO, Field.Index.ANALYZED));
@@ -1330,7 +1330,7 @@
assertEquals("IndexReaders have different values for maxDoc.", index1.maxDoc(), index2.maxDoc());
assertEquals("Only one IndexReader has deletions.", index1.hasDeletions(), index2.hasDeletions());
assertEquals("Only one index is optimized.", index1.isOptimized(), index2.isOptimized());
-
+
// check field names
Collection<String> fields1 = index1.getFieldNames(FieldOption.ALL);
Collection<String> fields2 = index1.getFieldNames(FieldOption.ALL);
@@ -1340,7 +1340,7 @@
while (it1.hasNext()) {
assertEquals("Different field names.", it1.next(), it2.next());
}
-
+
// check norms
it1 = fields1.iterator();
while (it1.hasNext()) {
@@ -1359,7 +1359,7 @@
assertSame(norms1, norms2);
}
}
-
+
// check deletions
final Bits delDocs1 = MultiFields.getDeletedDocs(index1);
final Bits delDocs2 = MultiFields.getDeletedDocs(index2);
@@ -1368,7 +1368,7 @@
delDocs1 == null || delDocs1.get(i),
delDocs2 == null || delDocs2.get(i));
}
-
+
// check stored fields
for (int i = 0; i < index1.maxDoc(); i++) {
if (delDocs1 == null || !delDocs1.get(i)) {
@@ -1384,10 +1384,10 @@
Field curField2 = (Field) itField2.next();
assertEquals("Different fields names for doc " + i + ".", curField1.name(), curField2.name());
assertEquals("Different field values for doc " + i + ".", curField1.stringValue(), curField2.stringValue());
- }
+ }
}
}
-
+
// check dictionary and posting lists
FieldsEnum fenum1 = MultiFields.getFields(index1).iterator();
FieldsEnum fenum2 = MultiFields.getFields(index1).iterator();
@@ -1467,7 +1467,7 @@
r.close();
r2.close();
d.close();
- }
+ }
public void testReadOnly() throws Throwable {
Directory d = newDirectory();
@@ -1518,7 +1518,7 @@
IndexReader r3 = r2.reopen();
assertFalse(r3 == r2);
r2.close();
-
+
assertFalse(r == r2);
try {
@@ -1602,7 +1602,7 @@
public void testNoDupCommitFileNames() throws Throwable {
Directory dir = newDirectory();
-
+
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random))
.setMaxBufferedDocs(2));
@@ -1610,12 +1610,12 @@
writer.addDocument(createDocument("a"));
writer.addDocument(createDocument("a"));
writer.close();
-
+
Collection<IndexCommit> commits = IndexReader.listCommits(dir);
for (final IndexCommit commit : commits) {
Collection<String> files = commit.getFileNames();
HashSet<String> seen = new HashSet<String>();
- for (final String fileName : files) {
+ for (final String fileName : files) {
assertTrue("file " + fileName + " was duplicated", !seen.contains(fileName));
seen.add(fileName);
}
@@ -1820,7 +1820,7 @@
// LUCENE-2046
public void testPrepareCommitIsCurrent() throws Throwable {
Directory dir = newDirectory();
- IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)));
writer.commit();
Document doc = new Document();
@@ -1838,12 +1838,12 @@
r.close();
dir.close();
}
-
+
// LUCENE-2753
public void testListCommits() throws Exception {
Directory dir = newDirectory();
SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
- IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, null).setIndexDeletionPolicy(sdp));
writer.addDocument(new Document());
writer.commit();
diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java
index c26dd34..11761c1 100644
--- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java
@@ -34,7 +34,7 @@
* implemented properly
*/
public class TestIndexReaderClone extends LuceneTestCase {
-
+
public void testCloneReadOnlySegmentReader() throws Exception {
final Directory dir1 = newDirectory();
@@ -67,7 +67,7 @@
r2.close();
dir1.close();
}
-
+
// open non-readOnly reader1, clone to non-readOnly
// reader2, make sure we can change reader1
public void testCloneWriteToOrig() throws Exception {
@@ -83,7 +83,7 @@
r2.close();
dir1.close();
}
-
+
// open non-readOnly reader1, clone to non-readOnly
// reader2, make sure we can change reader2
public void testCloneWriteToClone() throws Exception {
@@ -105,7 +105,7 @@
dir1.close();
}
-
+
// create single-segment index, open non-readOnly
// SegmentReader, add docs, reopen to multireader, then do
// delete
@@ -116,7 +116,7 @@
IndexReader reader1 = IndexReader.open(dir1, false);
TestIndexReaderReopen.modifyIndex(5, dir1);
-
+
IndexReader reader2 = reader1.reopen();
assertTrue(reader1 != reader2);
@@ -208,7 +208,7 @@
reader2.close();
dir1.close();
}
-
+
private static boolean deleteWorked(int doc, IndexReader r) {
boolean exception = false;
try {
@@ -219,7 +219,7 @@
}
return !exception;
}
-
+
public void testCloneReadOnlyDirectoryReader() throws Exception {
final Directory dir1 = newDirectory();
@@ -268,7 +268,7 @@
* are not the same on each reader 5. Verify the doc deleted is only in the
* cloned reader 6. Try to delete a document in the original reader, an
* exception should be thrown
- *
+ *
* @param r1 IndexReader to perform tests on
* @throws Exception
*/
@@ -323,7 +323,7 @@
// need to test norms?
dir1.close();
}
-
+
public void testSegmentReaderCloseReferencing() throws Exception {
final Directory dir1 = newDirectory();
TestIndexReaderReopen.createIndex(random, dir1, false);
@@ -343,7 +343,7 @@
clonedSegmentReader.close();
dir1.close();
}
-
+
public void testSegmentReaderDelDocsReferenceCounting() throws Exception {
final Directory dir1 = newDirectory();
TestIndexReaderReopen.createIndex(random, dir1, false);
@@ -454,16 +454,16 @@
private void assertDelDocsRefCountEquals(int refCount, SegmentReader reader) {
assertEquals(refCount, reader.deletedDocsRef.get());
}
-
+
public void testCloneSubreaders() throws Exception {
final Directory dir1 = newDirectory();
-
+
TestIndexReaderReopen.createIndex(random, dir1, true);
IndexReader reader = IndexReader.open(dir1, false);
reader.deleteDocument(1); // acquire write lock
IndexReader[] subs = reader.getSequentialSubReaders();
assert subs.length > 1;
-
+
IndexReader[] clones = new IndexReader[subs.length];
for (int x=0; x < subs.length; x++) {
clones[x] = (IndexReader) subs[x].clone();
@@ -483,9 +483,9 @@
IndexReader r2 = r1.clone(false);
r1.deleteDocument(5);
r1.decRef();
-
+
r1.incRef();
-
+
r2.close();
r1.decRef();
r1.close();
diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
index 261a0be..e6f2703 100644
--- a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
@@ -148,7 +148,7 @@
writer.addDocument(doc);
}
-
+
public static void assertNoUnreferencedFiles(Directory dir, String message) throws IOException {
String[] startFiles = dir.listAll();
@@ -262,7 +262,7 @@
if (VERBOSE) {
System.out.println("TEST: config1=" + writer.getConfig());
}
-
+
for(int j=0;j<500;j++) {
addDocWithIndex(writer, j);
}
@@ -338,7 +338,7 @@
assertEquals("should be one document", reader2.numDocs(), 1);
reader.close();
reader2.close();
-
+
dir.close();
}
@@ -367,14 +367,14 @@
* these docs until writer is closed.
*/
public void testCommitOnClose() throws IOException {
- Directory dir = newDirectory();
+ Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
for (int i = 0; i < 14; i++) {
addDoc(writer);
}
writer.close();
- Term searchTerm = new Term("content", "aaa");
+ Term searchTerm = new Term("content", "aaa");
IndexSearcher searcher = new IndexSearcher(dir, false);
ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs;
assertEquals("first number of hits", 14, hits.length);
@@ -415,14 +415,14 @@
* and add docs to it.
*/
public void testCommitOnCloseAbort() throws IOException {
- MockDirectoryWrapper dir = newDirectory();
+ MockDirectoryWrapper dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10));
for (int i = 0; i < 14; i++) {
addDoc(writer);
}
writer.close();
- Term searchTerm = new Term("content", "aaa");
+ Term searchTerm = new Term("content", "aaa");
IndexSearcher searcher = new IndexSearcher(dir, false);
ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs;
assertEquals("first number of hits", 14, hits.length);
@@ -450,7 +450,7 @@
hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs;
assertEquals("saw changes after writer.abort", 14, hits.length);
searcher.close();
-
+
// Now make sure we can re-open the index, add docs,
// and all is good:
writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))
@@ -567,7 +567,7 @@
* and close().
*/
public void testCommitOnCloseOptimize() throws IOException {
- MockDirectoryWrapper dir = newDirectory();
+ MockDirectoryWrapper dir = newDirectory();
// Must disable throwing exc on double-write: this
// test uses IW.rollback which easily results in
// writing to same file more than once
@@ -634,7 +634,7 @@
}
public void testIndexNoDocuments() throws IOException {
- MockDirectoryWrapper dir = newDirectory();
+ MockDirectoryWrapper dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
writer.commit();
writer.close();
@@ -656,7 +656,7 @@
}
public void testManyFields() throws IOException {
- MockDirectoryWrapper dir = newDirectory();
+ MockDirectoryWrapper dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10));
for(int j=0;j<100;j++) {
Document doc = new Document();
@@ -686,7 +686,7 @@
}
public void testSmallRAMBuffer() throws IOException {
- MockDirectoryWrapper dir = newDirectory();
+ MockDirectoryWrapper dir = newDirectory();
IndexWriter writer = new IndexWriter(
dir,
newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).
@@ -782,13 +782,14 @@
writer.deleteDocuments(new Term("field", "aaa" + j));
_TestUtil.syncConcurrentMerges(writer);
int flushCount = writer.getFlushCount();
+
if (j == 1)
lastFlushCount = flushCount;
else if (j < 10) {
// No new files should be created
assertEquals(flushCount, lastFlushCount);
} else if (10 == j) {
- assertTrue(flushCount > lastFlushCount);
+ assertTrue("" + j, flushCount > lastFlushCount);
lastFlushCount = flushCount;
writer.getConfig().setRAMBufferSizeMB(0.000001);
writer.getConfig().setMaxBufferedDeleteTerms(1);
@@ -825,7 +826,7 @@
}
public void testDiverseDocs() throws IOException {
- MockDirectoryWrapper dir = newDirectory();
+ MockDirectoryWrapper dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.5));
for(int i=0;i<3;i++) {
// First, docs where every term is unique (heavy on
@@ -872,12 +873,12 @@
}
public void testEnablingNorms() throws IOException {
- MockDirectoryWrapper dir = newDirectory();
+ MockDirectoryWrapper dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10));
// Enable norms for only 1 doc, pre flush
for(int j=0;j<10;j++) {
Document doc = new Document();
- Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED);
+ Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED);
if (j != 8) {
f.setOmitNorms(true);
}
@@ -898,7 +899,7 @@
// Enable norms for only 1 doc, post flush
for(int j=0;j<27;j++) {
Document doc = new Document();
- Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED);
+ Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED);
if (j != 26) {
f.setOmitNorms(true);
}
@@ -918,7 +919,7 @@
}
public void testHighFreqTerm() throws IOException {
- MockDirectoryWrapper dir = newDirectory();
+ MockDirectoryWrapper dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)).setRAMBufferSizeMB(0.01));
// Massive doc that has 128 K a's
@@ -968,7 +969,7 @@
return myLockFactory.makeLock(name);
}
}
-
+
Directory dir = new MyRAMDirectory(new RAMDirectory());
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)));
@@ -976,7 +977,7 @@
addDoc(writer);
}
writer.close();
- Term searchTerm = new Term("content", "aaa");
+ Term searchTerm = new Term("content", "aaa");
IndexSearcher searcher = new IndexSearcher(dir, false);
ScoreDoc[] hits = searcher.search(new TermQuery(searchTerm), null, 1000).scoreDocs;
assertEquals("did not get right number of hits", 100, hits.length);
@@ -1073,7 +1074,7 @@
infos.read(dir);
assertEquals(2, infos.size());
}
- }
+ }
dir.close();
}
@@ -1089,7 +1090,7 @@
Directory dir = newDirectory();
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)));
-
+
Document document = new Document();
document.add(newField("tvtest", "", Store.NO, Index.ANALYZED, TermVector.YES));
iw.addDocument(document);
@@ -1343,7 +1344,7 @@
setMergePolicy(newLogMergePolicy(5))
);
writer.commit();
-
+
for (int i = 0; i < 23; i++)
addDoc(writer);
@@ -1370,12 +1371,12 @@
writer.close();
dir.close();
}
-
+
// LUCENE-325: test expungeDeletes, when 2 singular merges
// are required
public void testExpungeDeletes() throws IOException {
Directory dir = newDirectory();
- IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random))
.setMaxBufferedDocs(2).setRAMBufferSizeMB(
IndexWriterConfig.DISABLE_AUTO_FLUSH));
@@ -1537,13 +1538,13 @@
public void doAfterFlush() {
afterWasCalled = true;
}
-
+
@Override
protected void doBeforeFlush() throws IOException {
beforeWasCalled = true;
}
}
-
+
// LUCENE-1222
public void testDoBeforeAfterFlush() throws IOException {
@@ -1572,7 +1573,7 @@
}
-
+
final String[] utf8Data = new String[] {
// unpaired low surrogate
"ab\udc17cd", "ab\ufffdcd",
@@ -1642,7 +1643,7 @@
}
UnicodeUtil.UTF16toUTF8(chars, 0, len, utf8);
-
+
String s1 = new String(chars, 0, len);
String s2 = new String(utf8.bytes, 0, utf8.length, "UTF-8");
assertEquals("codepoint " + ch, s1, s2);
@@ -1699,7 +1700,7 @@
expected[i++] = 0xfffd;
expected[i] = buffer[i] = (char) nextInt(0x800, 0xd800);
hasIllegal = true;
- } else
+ } else
expected[i] = buffer[i] = (char) nextInt(0x800, 0xd800);
} else {
expected[i] = buffer[i] = ' ';
@@ -1796,10 +1797,10 @@
final TokenStream tokens = new TokenStream() {
final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-
+
final Iterator<String> terms = Arrays.asList("a","b","c").iterator();
boolean first = true;
-
+
@Override
public boolean incrementToken() {
if (!terms.hasNext()) return false;
@@ -1856,7 +1857,7 @@
setMergePolicy(newLogMergePolicy(5))
);
writer.commit();
-
+
for (int i = 0; i < 23; i++)
addDoc(writer);
@@ -1912,7 +1913,7 @@
setMergePolicy(newLogMergePolicy(5))
);
writer.commit();
-
+
for (int i = 0; i < 23; i++)
addDoc(writer);
@@ -1979,7 +1980,7 @@
byte[] b = new byte[50];
for(int i=0;i<50;i++)
b[i] = (byte) (i+77);
-
+
Document doc = new Document();
Field f = new Field("binary", b, 10, 17);
byte[] bx = f.getBinaryValue();
@@ -2016,7 +2017,7 @@
// commit(Map) never called for this index
assertEquals(0, r.getCommitUserData().size());
r.close();
-
+
w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2));
for(int j=0;j<17;j++)
addDoc(w);
@@ -2024,7 +2025,7 @@
data.put("label", "test1");
w.commit(data);
w.close();
-
+
assertEquals("test1", IndexReader.getCommitUserData(dir).get("label"));
r = IndexReader.open(dir, true);
@@ -2036,7 +2037,7 @@
w.close();
assertEquals("test1", IndexReader.getCommitUserData(dir).get("label"));
-
+
dir.close();
}
@@ -2046,7 +2047,7 @@
Directory dir = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer(random);
analyzer.setPositionIncrementGap( 100 );
- IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, analyzer));
Document doc = new Document();
Field f = newField("field", "", Field.Store.NO,
@@ -2073,7 +2074,7 @@
// LUCENE-1468 -- make sure opening an IndexWriter with
// create=true does not remove non-index files
-
+
public void testOtherFiles() throws Throwable {
Directory dir = newDirectory();
try {
@@ -2132,7 +2133,7 @@
@Override
public void run() {
// LUCENE-2239: won't work with NIOFS/MMAP
- Directory dir = new MockDirectoryWrapper(random, new RAMDirectory());
+ Directory dir = new MockDirectoryWrapper(random, new RAMDirectory());
IndexWriter w = null;
while(!finish) {
try {
@@ -2141,7 +2142,7 @@
if (w != null) {
w.close();
}
- IndexWriterConfig conf = newIndexWriterConfig(
+ IndexWriterConfig conf = newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2);
w = new IndexWriter(dir, conf);
@@ -2208,10 +2209,10 @@
e.printStackTrace(System.out);
}
}
- try {
+ try {
dir.close();
- } catch (IOException e) {
- throw new RuntimeException(e);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
}
}
}
@@ -2226,7 +2227,7 @@
// interrupt arrives while class loader is trying to
// init this class (in servicing a first interrupt):
assertTrue(new ThreadInterruptedException(new InterruptedException()).getCause() instanceof InterruptedException);
-
+
// issue 100 interrupts to child thread
int i = 0;
while(i < 100) {
@@ -2260,12 +2261,12 @@
doc.add(f);
doc.add(f2);
w.addDocument(doc);
-
+
// add 2 docs to test in-memory merging
f.setTokenStream(new MockTokenizer(new StringReader("doc2field1"), MockTokenizer.WHITESPACE, false));
f2.setTokenStream(new MockTokenizer(new StringReader("doc2field2"), MockTokenizer.WHITESPACE, false));
w.addDocument(doc);
-
+
// force segment flush so we can force a segment merge with doc3 later.
w.commit();
@@ -2288,7 +2289,7 @@
assertTrue(ir.document(0).getFieldable("binary").isBinary());
assertTrue(ir.document(1).getFieldable("binary").isBinary());
assertTrue(ir.document(2).getFieldable("binary").isBinary());
-
+
assertEquals("value", ir.document(0).get("string"));
assertEquals("value", ir.document(1).get("string"));
assertEquals("value", ir.document(2).get("string"));
@@ -2359,7 +2360,7 @@
public void testNoDocsIndex() throws Throwable {
Directory dir = newDirectory();
- IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)));
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
writer.setInfoStream(new PrintStream(bos));
@@ -2369,7 +2370,7 @@
_TestUtil.checkIndex(dir);
dir.close();
}
-
+
// LUCENE-2095: make sure with multiple threads commit
// doesn't return until all changes are in fact in the
// index
@@ -2377,7 +2378,7 @@
final int NUM_THREADS = 5;
final double RUN_SEC = 0.5;
final Directory dir = newDirectory();
- final RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(
+ final RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
_TestUtil.reduceOpenFiles(w.w);
w.commit();
@@ -2562,7 +2563,7 @@
Field f = newField("field", s.toString(), Field.Store.NO, Field.Index.ANALYZED);
d.add(f);
w.addDocument(d);
-
+
IndexReader r = w.getReader().getSequentialSubReaders()[0];
TermsEnum t = r.fields().terms("field").iterator();
int count = 0;
@@ -2648,10 +2649,10 @@
// in case a deletion policy which holds onto commits is used.
Directory dir = newDirectory();
SnapshotDeletionPolicy sdp = new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
- IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random))
.setIndexDeletionPolicy(sdp));
-
+
// First commit
Document doc = new Document();
doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
@@ -2661,7 +2662,7 @@
// Keep that commit
sdp.snapshot("id");
-
+
// Second commit - now KeepOnlyLastCommit cannot delete the prev commit.
doc = new Document();
doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
@@ -2673,25 +2674,13 @@
sdp.release("id");
writer.deleteUnusedFiles();
assertEquals(1, IndexReader.listCommits(dir).size());
-
+
writer.close();
dir.close();
}
-
- private static class FlushCountingIndexWriter extends IndexWriter {
- int flushCount;
- public FlushCountingIndexWriter(Directory dir, IndexWriterConfig iwc) throws IOException {
- super(dir, iwc);
- }
- @Override
- public void doAfterFlush() {
- flushCount++;
- }
- }
public void testIndexingThenDeleting() throws Exception {
final Random r = random;
-
Directory dir = newDirectory();
// note this test explicitly disables payloads
final Analyzer analyzer = new Analyzer() {
@@ -2700,7 +2689,7 @@
return new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
}
};
- FlushCountingIndexWriter w = new FlushCountingIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(-1).setMaxBufferedDeleteTerms(-1));
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setRAMBufferSizeMB(1.0).setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH).setMaxBufferedDeleteTerms(IndexWriterConfig.DISABLE_AUTO_FLUSH));
w.setInfoStream(VERBOSE ? System.out : null);
Document doc = new Document();
doc.add(newField("field", "go 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20", Field.Store.NO, Field.Index.ANALYZED));
@@ -2714,15 +2703,15 @@
}
if (doIndexing) {
// Add docs until a flush is triggered
- final int startFlushCount = w.flushCount;
- while(w.flushCount == startFlushCount) {
+ final int startFlushCount = w.getFlushCount();
+ while(w.getFlushCount() == startFlushCount) {
w.addDocument(doc);
count++;
}
} else {
// Delete docs until a flush is triggered
- final int startFlushCount = w.flushCount;
- while(w.flushCount == startFlushCount) {
+ final int startFlushCount = w.getFlushCount();
+ while(w.getFlushCount() == startFlushCount) {
w.deleteDocuments(new Term("foo", ""+count));
count++;
}
@@ -2732,7 +2721,7 @@
w.close();
dir.close();
}
-
+
public void testNoCommits() throws Exception {
// Tests that if we don't call commit(), the directory has 0 commits. This has
// changed since LUCENE-2386, where before IW would always commit on a fresh
@@ -2753,7 +2742,7 @@
public void testEmptyFSDirWithNoLock() throws Exception {
// Tests that if FSDir is opened w/ a NoLockFactory (or SingleInstanceLF),
- // then IndexWriter ctor succeeds. Previously (LUCENE-2386) it failed
+ // then IndexWriter ctor succeeds. Previously (LUCENE-2386) it failed
// when listAll() was called in IndexFileDeleter.
Directory dir = newFSDirectory(_TestUtil.getTempDir("emptyFSDirNoLock"), NoLockFactory.getNoLockFactory());
new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))).close();
@@ -2762,10 +2751,10 @@
public void testEmptyDirRollback() throws Exception {
// Tests that if IW is created over an empty Directory, some documents are
- // indexed, flushed (but not committed) and then IW rolls back, then no
+ // indexed, flushed (but not committed) and then IW rolls back, then no
// files are left in the Directory.
Directory dir = newDirectory();
- IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
+ IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random))
.setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy()));
String[] files = dir.listAll();
@@ -2789,7 +2778,7 @@
writer.addDocument(doc);
// Adding just one document does not call flush yet.
assertEquals("only the stored and term vector files should exist in the directory", 5 + extraFileCount, dir.listAll().length);
-
+
doc = new Document();
doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
writer.addDocument(doc);
@@ -2810,17 +2799,17 @@
public void testNoSegmentFile() throws IOException {
Directory dir = newDirectory();
dir.setLockFactory(NoLockFactory.getNoLockFactory());
- IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2));
-
+
Document doc = new Document();
doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
w.addDocument(doc);
w.addDocument(doc);
- IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig(
+ IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)
.setOpenMode(OpenMode.CREATE));
-
+
w2.close();
// If we don't do that, the test fails on Windows
w.rollback();
@@ -2859,7 +2848,7 @@
w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE).setIndexCommit(commit));
assertEquals(1, w.numDocs());
-
+
// commit IndexWriter to "third"
w.addDocument(doc);
commitData.put("tag", "third");
@@ -2914,7 +2903,7 @@
}
final int docCount = 200*RANDOM_MULTIPLIER;
final int fieldCount = _TestUtil.nextInt(rand, 1, 5);
-
+
final List<Integer> fieldIDs = new ArrayList<Integer>();
Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED);
@@ -2924,7 +2913,7 @@
}
final Map<String,Document> docs = new HashMap<String,Document>();
-
+
if (VERBOSE) {
System.out.println("TEST: build index docCount=" + docCount);
}
@@ -3111,7 +3100,7 @@
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random, dir, new StringSplitAnalyzer());
- char[] chars = new char[DocumentsWriter.MAX_TERM_LENGTH_UTF8];
+ char[] chars = new char[DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8];
Arrays.fill(chars, 'x');
Document doc = new Document();
final String bigTerm = new String(chars);
diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
index 26f800b..689a6ad 100644
--- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterConfig.java
@@ -24,7 +24,7 @@
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.index.DocumentsWriter.IndexingChain;
+import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DefaultSimilarityProvider;
import org.apache.lucene.search.IndexSearcher;
@@ -36,15 +36,15 @@
private static final class MySimilarityProvider extends DefaultSimilarityProvider {
// Does not implement anything - used only for type checking on IndexWriterConfig.
}
-
+
private static final class MyIndexingChain extends IndexingChain {
// Does not implement anything - used only for type checking on IndexWriterConfig.
@Override
- DocConsumer getChain(DocumentsWriter documentsWriter) {
+ DocConsumer getChain(DocumentsWriterPerThread documentsWriter) {
return null;
}
-
+
}
@Test
@@ -64,12 +64,16 @@
assertEquals(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, conf.getRAMBufferSizeMB(), 0.0);
assertEquals(IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS, conf.getMaxBufferedDocs());
assertEquals(IndexWriterConfig.DEFAULT_READER_POOLING, conf.getReaderPooling());
- assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain());
+ assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain());
assertNull(conf.getMergedSegmentWarmer());
- assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates());
assertEquals(IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR, conf.getReaderTermsIndexDivisor());
assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass());
-
+ assertEquals(ThreadAffinityDocumentsWriterThreadPool.class, conf.getIndexerThreadPool().getClass());
+ assertNull(conf.getFlushPolicy());
+ assertEquals(IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB, conf.getRAMPerThreadHardLimitMB());
+
+
+
// Sanity check - validate that all getters are covered.
Set<String> getters = new HashSet<String>();
getters.add("getAnalyzer");
@@ -91,7 +95,11 @@
getters.add("getMergePolicy");
getters.add("getMaxThreadStates");
getters.add("getReaderPooling");
+ getters.add("getIndexerThreadPool");
getters.add("getReaderTermsIndexDivisor");
+ getters.add("getFlushPolicy");
+ getters.add("getRAMPerThreadHardLimitMB");
+
for (Method m : IndexWriterConfig.class.getDeclaredMethods()) {
if (m.getDeclaringClass() == IndexWriterConfig.class && m.getName().startsWith("get")) {
assertTrue("method " + m.getName() + " is not tested for defaults", getters.contains(m.getName()));
@@ -107,12 +115,12 @@
if (m.getDeclaringClass() == IndexWriterConfig.class
&& m.getName().startsWith("set")
&& !Modifier.isStatic(m.getModifiers())) {
- assertEquals("method " + m.getName() + " does not return IndexWriterConfig",
+ assertEquals("method " + m.getName() + " does not return IndexWriterConfig",
IndexWriterConfig.class, m.getReturnType());
}
}
}
-
+
@Test
public void testConstants() throws Exception {
// Tests that the values of the constants does not change
@@ -123,10 +131,9 @@
assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS);
assertEquals(16.0, IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, 0.0);
assertEquals(false, IndexWriterConfig.DEFAULT_READER_POOLING);
- assertEquals(8, IndexWriterConfig.DEFAULT_MAX_THREAD_STATES);
assertEquals(IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR);
}
-
+
@Test
public void testToString() throws Exception {
String str = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).toString();
@@ -143,15 +150,15 @@
assertTrue(f.getName() + " not found in toString", str.indexOf(f.getName()) != -1);
}
}
-
+
@Test
public void testClone() throws Exception {
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
IndexWriterConfig clone = (IndexWriterConfig) conf.clone();
-
+
// Clone is shallow since not all parameters are cloneable.
assertTrue(conf.getIndexDeletionPolicy() == clone.getIndexDeletionPolicy());
-
+
conf.setMergeScheduler(new SerialMergeScheduler());
assertEquals(ConcurrentMergeScheduler.class, clone.getMergeScheduler().getClass());
}
@@ -159,14 +166,14 @@
@Test
public void testInvalidValues() throws Exception {
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
-
+
// Test IndexDeletionPolicy
assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass());
conf.setIndexDeletionPolicy(new SnapshotDeletionPolicy(null));
assertEquals(SnapshotDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass());
conf.setIndexDeletionPolicy(null);
assertEquals(KeepOnlyLastCommitDeletionPolicy.class, conf.getIndexDeletionPolicy().getClass());
-
+
// Test MergeScheduler
assertEquals(ConcurrentMergeScheduler.class, conf.getMergeScheduler().getClass());
conf.setMergeScheduler(new SerialMergeScheduler());
@@ -183,12 +190,12 @@
assertTrue(IndexSearcher.getDefaultSimilarityProvider() == conf.getSimilarityProvider());
// Test IndexingChain
- assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain());
+ assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain());
conf.setIndexingChain(new MyIndexingChain());
assertEquals(MyIndexingChain.class, conf.getIndexingChain().getClass());
conf.setIndexingChain(null);
- assertTrue(DocumentsWriter.defaultIndexingChain == conf.getIndexingChain());
-
+ assertTrue(DocumentsWriterPerThread.defaultIndexingChain == conf.getIndexingChain());
+
try {
conf.setMaxBufferedDeleteTerms(0);
fail("should not have succeeded to set maxBufferedDeleteTerms to 0");
@@ -239,11 +246,19 @@
// this is expected
}
- assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates());
- conf.setMaxThreadStates(5);
- assertEquals(5, conf.getMaxThreadStates());
- conf.setMaxThreadStates(0);
- assertEquals(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES, conf.getMaxThreadStates());
+ try {
+ conf.setRAMPerThreadHardLimitMB(2048);
+ fail("should not have succeeded to set RAMPerThreadHardLimitMB to >= 2048");
+ } catch (IllegalArgumentException e) {
+ // this is expected
+ }
+
+ try {
+ conf.setRAMPerThreadHardLimitMB(0);
+ fail("should not have succeeded to set RAMPerThreadHardLimitMB to 0");
+ } catch (IllegalArgumentException e) {
+ // this is expected
+ }
// Test MergePolicy
assertEquals(TieredMergePolicy.class, conf.getMergePolicy().getClass());
diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
index 42f618a..fcc3508 100644
--- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterDelete.java
@@ -33,7 +33,7 @@
import org.apache.lucene.util._TestUtil;
public class TestIndexWriterDelete extends LuceneTestCase {
-
+
// test the simple case
public void testSimpleCase() throws IOException {
String[] keywords = { "1", "2" };
@@ -124,7 +124,7 @@
writer.close();
dir.close();
}
-
+
// test when delete terms only apply to ram segments
public void testRAMDeletes() throws IOException {
for(int t=0;t<2;t++) {
@@ -220,7 +220,7 @@
IndexReader reader = IndexReader.open(dir, true);
assertEquals(7, reader.numDocs());
reader.close();
-
+
id = 0;
modifier.deleteDocuments(new Term("id", String.valueOf(++id)));
modifier.deleteDocuments(new Term("id", String.valueOf(++id)));
@@ -297,33 +297,33 @@
IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2)
.setMaxBufferedDeleteTerms(2));
-
+
int id = 0;
int value = 100;
-
+
for (int i = 0; i < 7; i++) {
addDoc(modifier, ++id, value);
}
modifier.commit();
-
+
addDoc(modifier, ++id, value);
IndexReader reader = IndexReader.open(dir, true);
assertEquals(7, reader.numDocs());
reader.close();
-
+
// Delete all
- modifier.deleteAll();
+ modifier.deleteAll();
// Roll it back
modifier.rollback();
modifier.close();
-
+
// Validate that the docs are still there
reader = IndexReader.open(dir, true);
assertEquals(7, reader.numDocs());
reader.close();
-
+
dir.close();
}
@@ -334,10 +334,10 @@
IndexWriter modifier = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMaxBufferedDocs(2)
.setMaxBufferedDeleteTerms(2));
-
+
int id = 0;
int value = 100;
-
+
for (int i = 0; i < 7; i++) {
addDoc(modifier, ++id, value);
}
@@ -349,24 +349,24 @@
addDoc(modifier, ++id, value);
addDoc(modifier, ++id, value);
-
+
// Delete all
- modifier.deleteAll();
+ modifier.deleteAll();
reader = modifier.getReader();
assertEquals(0, reader.numDocs());
reader.close();
-
+
// Roll it back
modifier.rollback();
modifier.close();
-
+
// Validate that the docs are still there
reader = IndexReader.open(dir, true);
assertEquals(7, reader.numDocs());
reader.close();
-
+
dir.close();
}
@@ -538,10 +538,13 @@
}
// prevent throwing a random exception here!!
final double randomIOExceptionRate = dir.getRandomIOExceptionRate();
+ final long maxSizeInBytes = dir.getMaxSizeInBytes();
dir.setRandomIOExceptionRate(0.0);
+ dir.setMaxSizeInBytes(0);
if (!success) {
// Must force the close else the writer can have
// open files which cause exc in MockRAMDir.close
+
modifier.rollback();
}
@@ -552,6 +555,7 @@
TestIndexWriter.assertNoUnreferencedFiles(dir, "after writer.close");
}
dir.setRandomIOExceptionRate(randomIOExceptionRate);
+ dir.setMaxSizeInBytes(maxSizeInBytes);
// Finally, verify index is not corrupt, and, if
// we succeeded, we see all docs changed, and if
@@ -622,7 +626,7 @@
// This test tests that buffered deletes are cleared when
// an Exception is hit during flush.
public void testErrorAfterApplyDeletes() throws IOException {
-
+
MockDirectoryWrapper.Failure failure = new MockDirectoryWrapper.Failure() {
boolean sawMaybe = false;
boolean failed = false;
@@ -786,7 +790,7 @@
// a segment is written are cleaned up if there's an i/o error
public void testErrorInDocsWriterAdd() throws IOException {
-
+
MockDirectoryWrapper.Failure failure = new MockDirectoryWrapper.Failure() {
boolean failed = false;
@Override
diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
index 7ac0a23..4769319 100644
--- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
@@ -223,8 +223,9 @@
threads[i].join();
for(int i=0;i<NUM_THREADS;i++)
- if (threads[i].failure != null)
+ if (threads[i].failure != null) {
fail("thread " + threads[i].getName() + ": hit unexpected failure");
+ }
writer.commit();
@@ -246,7 +247,7 @@
_TestUtil.checkIndex(dir);
dir.close();
}
-
+
// LUCENE-1198
private static final class MockIndexWriter2 extends IndexWriter {
@@ -258,12 +259,12 @@
@Override
boolean testPoint(String name) {
- if (doFail && name.equals("DocumentsWriter.ThreadState.init start"))
+ if (doFail && name.equals("DocumentsWriterPerThread addDocument start"))
throw new RuntimeException("intentionally failing");
return true;
}
}
-
+
private class CrashingFilter extends TokenFilter {
String fieldName;
int count;
@@ -336,7 +337,7 @@
w.addDocument(doc);
w.close();
dir.close();
- }
+ }
private static final class MockIndexWriter3 extends IndexWriter {
@@ -356,7 +357,7 @@
return true;
}
}
-
+
// LUCENE-1210
public void testExceptionOnMergeInit() throws IOException {
@@ -381,7 +382,7 @@
w.close();
dir.close();
}
-
+
// LUCENE-1072
public void testExceptionFromTokenStream() throws IOException {
Directory dir = newDirectory();
@@ -472,9 +473,9 @@
boolean sawAppend = false;
boolean sawFlush = false;
for (int i = 0; i < trace.length; i++) {
- if ("org.apache.lucene.index.FreqProxTermsWriter".equals(trace[i].getClassName()) && "appendPostings".equals(trace[i].getMethodName()))
+ if ("org.apache.lucene.index.FreqProxTermsWriterPerField".equals(trace[i].getClassName()) && "flush".equals(trace[i].getMethodName()))
sawAppend = true;
- if ("doFlush".equals(trace[i].getMethodName()))
+ if ("flush".equals(trace[i].getMethodName()))
sawFlush = true;
}
@@ -683,7 +684,7 @@
for(int t=0;t<NUM_THREAD;t++)
threads[t].join();
-
+
writer.close();
}
@@ -730,7 +731,7 @@
dir.close();
}
}
-
+
// Throws IOException during MockDirectoryWrapper.sync
private static class FailOnlyInSync extends MockDirectoryWrapper.Failure {
boolean didFail;
@@ -747,7 +748,7 @@
}
}
}
-
+
// TODO: these are also in TestIndexWriter... add a simple doc-writing method
// like this to LuceneTestCase?
private void addDoc(IndexWriter writer) throws IOException
@@ -756,7 +757,7 @@
doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
}
-
+
// LUCENE-1044: test exception during sync
public void testExceptionDuringSync() throws IOException {
MockDirectoryWrapper dir = newDirectory();
@@ -792,7 +793,7 @@
reader.close();
dir.close();
}
-
+
private static class FailOnlyInCommit extends MockDirectoryWrapper.Failure {
boolean failOnCommit, failOnDeleteFile;
@@ -835,7 +836,7 @@
}
}
}
-
+
public void testExceptionsDuringCommit() throws Throwable {
FailOnlyInCommit[] failures = new FailOnlyInCommit[] {
// LUCENE-1214
@@ -869,7 +870,7 @@
dir.close();
}
}
-
+
public void testOptimizeExceptions() throws IOException {
Directory startDir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2).setMergePolicy(newLogMergePolicy());
@@ -901,7 +902,7 @@
}
startDir.close();
}
-
+
// LUCENE-1429
public void testOutOfMemoryErrorCausesCloseToFail() throws Exception {
@@ -930,7 +931,7 @@
writer.close();
dir.close();
}
-
+
// LUCENE-1347
private static final class MockIndexWriter4 extends IndexWriter {
@@ -947,7 +948,7 @@
return true;
}
}
-
+
// LUCENE-1347
public void testRollbackExceptionHang() throws Throwable {
Directory dir = newDirectory();
@@ -961,12 +962,12 @@
} catch (RuntimeException re) {
// expected
}
-
+
w.doFail = false;
w.rollback();
dir.close();
}
-
+
// LUCENE-1044: Simulate checksum error in segments_N
public void testSegmentsChecksumError() throws IOException {
Directory dir = newDirectory();
@@ -1005,7 +1006,7 @@
reader.close();
dir.close();
}
-
+
// Simulate a corrupt index by removing last byte of
// latest segments file and make sure we get an
// IOException trying to open the index:
@@ -1053,7 +1054,7 @@
}
dir.close();
}
-
+
// Simulate a corrupt index by removing one of the cfs
// files and make sure we get an IOException trying to
// open the index:
@@ -1102,7 +1103,7 @@
}
dir.close();
}
-
+
// Simulate a writer that crashed while writing segments
// file: make sure we can still open the index (ie,
// gracefully fallback to the previous segments file),
diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnJRECrash.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnJRECrash.java
index f2ecd1e..1cdb76a 100644
--- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnJRECrash.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterOnJRECrash.java
@@ -37,7 +37,7 @@
*/
public class TestIndexWriterOnJRECrash extends TestNRTThreads {
private File tempDir;
-
+
@Override
public void setUp() throws Exception {
super.setUp();
@@ -45,13 +45,13 @@
tempDir.delete();
tempDir.mkdir();
}
-
+
@Override
public void testNRTThreads() throws Exception {
String vendor = Constants.JAVA_VENDOR;
- assumeTrue(vendor + " JRE not supported.",
+ assumeTrue(vendor + " JRE not supported.",
vendor.startsWith("Sun") || vendor.startsWith("Apple"));
-
+
// if we are not the fork
if (System.getProperty("tests.crashmode") == null) {
// try up to 10 times to create an index
@@ -81,11 +81,11 @@
}
}
}
-
+
/** fork ourselves in a new jvm. sets -Dtests.crashmode=true */
public void forkTest() throws Exception {
List<String> cmd = new ArrayList<String>();
- cmd.add(System.getProperty("java.home")
+ cmd.add(System.getProperty("java.home")
+ System.getProperty("file.separator")
+ "bin"
+ System.getProperty("file.separator")
@@ -116,7 +116,7 @@
if (VERBOSE) System.err.println("<<< End subprocess output");
p.waitFor();
}
-
+
/**
* Recursively looks for indexes underneath <code>file</code>,
* and runs checkindex on them. returns true if it found any indexes.
@@ -139,7 +139,7 @@
}
return false;
}
-
+
/**
* currently, this only works/tested on Sun and IBM.
*/
diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java
index 2a3ce3e..323c050 100644
--- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java
+++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java
@@ -143,7 +143,7 @@
dir.close();
}
}
-
+
// LUCENE-1130: make sure we can close() even while
// threads are trying to add documents. Strictly
@@ -206,7 +206,7 @@
}
assertTrue(count > 0);
reader.close();
-
+
dir.close();
}
}
@@ -314,7 +314,7 @@
boolean sawClose = false;
for (int i = 0; i < trace.length; i++) {
if ("abort".equals(trace[i].getMethodName()) ||
- "flushDocument".equals(trace[i].getMethodName())) {
+ "finishDocument".equals(trace[i].getMethodName())) {
sawAbortOrFlushDoc = true;
}
if ("close".equals(trace[i].getMethodName())) {
diff --git a/lucene/src/test/org/apache/lucene/index/TestLazyBug.java b/lucene/src/test/org/apache/lucene/index/TestLazyBug.java
index e7cc9e6..da5100a 100755
--- a/lucene/src/test/org/apache/lucene/index/TestLazyBug.java
+++ b/lucene/src/test/org/apache/lucene/index/TestLazyBug.java
@@ -28,7 +28,7 @@
/**
- * Test demonstrating EOF bug on the last field of the last doc
+ * Test demonstrating EOF bug on the last field of the last doc
* if other docs have allready been accessed.
*/
public class TestLazyBug extends LuceneTestCase {
@@ -47,9 +47,9 @@
};
private static Set<String> dataset = asSet(data);
-
+
private static String MAGIC_FIELD = "f"+(NUM_FIELDS/3);
-
+
private static FieldSelector SELECTOR = new FieldSelector() {
public FieldSelectorResult accept(String f) {
if (f.equals(MAGIC_FIELD)) {
@@ -58,22 +58,21 @@
return FieldSelectorResult.LAZY_LOAD;
}
};
-
- private Directory makeIndex() throws Exception {
+
+ private Directory makeIndex() throws Exception {
Directory dir = newDirectory();
try {
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
LogMergePolicy lmp = (LogMergePolicy) writer.getConfig().getMergePolicy();
lmp.setUseCompoundFile(false);
-
for (int d = 1; d <= NUM_DOCS; d++) {
Document doc = new Document();
for (int f = 1; f <= NUM_FIELDS; f++ ) {
- doc.add(newField("f"+f,
- data[f % data.length]
- + '#' + data[random.nextInt(data.length)],
- Field.Store.YES,
+ doc.add(newField("f"+f,
+ data[f % data.length]
+ + '#' + data[random.nextInt(data.length)],
+ Field.Store.YES,
Field.Index.ANALYZED));
}
writer.addDocument(doc);
@@ -84,14 +83,14 @@
}
return dir;
}
-
+
public void doTest(int[] docs) throws Exception {
Directory dir = makeIndex();
IndexReader reader = IndexReader.open(dir, true);
for (int i = 0; i < docs.length; i++) {
Document d = reader.document(docs[i], SELECTOR);
d.get(MAGIC_FIELD);
-
+
List<Fieldable> fields = d.getFields();
for (Iterator<Fieldable> fi = fields.iterator(); fi.hasNext(); ) {
Fieldable f=null;
@@ -101,7 +100,7 @@
String fval = f.stringValue();
assertNotNull(docs[i]+" FIELD: "+fname, fval);
String[] vals = fval.split("#");
- if (!dataset.contains(vals[0]) || !dataset.contains(vals[1])) {
+ if (!dataset.contains(vals[0]) || !dataset.contains(vals[1])) {
fail("FIELD:"+fname+",VAL:"+fval);
}
} catch (Exception e) {
@@ -116,7 +115,7 @@
public void testLazyWorks() throws Exception {
doTest(new int[] { 399 });
}
-
+
public void testLazyAlsoWorks() throws Exception {
doTest(new int[] { 399, 150 });
}
diff --git a/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java b/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java
index 331430e..1bf6b37 100755
--- a/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java
+++ b/lucene/src/test/org/apache/lucene/index/TestLazyProxSkipping.java
@@ -44,7 +44,7 @@
public class TestLazyProxSkipping extends LuceneTestCase {
private IndexSearcher searcher;
private int seeksCounter = 0;
-
+
private String field = "tokens";
private String term1 = "xx";
private String term2 = "yy";
@@ -64,12 +64,12 @@
}
return ii;
}
-
+
}
-
+
private void createIndex(int numHits) throws IOException {
int numDocs = 500;
-
+
final Analyzer analyzer = new Analyzer() {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
@@ -101,7 +101,7 @@
doc.add(newField(this.field, content, Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
}
-
+
// make sure the index has only a single segment
writer.optimize();
writer.close();
@@ -110,27 +110,27 @@
this.searcher = newSearcher(reader);
}
-
+
private ScoreDoc[] search() throws IOException {
// create PhraseQuery "term1 term2" and search
PhraseQuery pq = new PhraseQuery();
pq.add(new Term(this.field, this.term1));
pq.add(new Term(this.field, this.term2));
- return this.searcher.search(pq, null, 1000).scoreDocs;
+ return this.searcher.search(pq, null, 1000).scoreDocs;
}
-
+
private void performTest(int numHits) throws IOException {
createIndex(numHits);
this.seeksCounter = 0;
ScoreDoc[] hits = search();
// verify that the right number of docs was found
assertEquals(numHits, hits.length);
-
+
// check if the number of calls of seek() does not exceed the number of hits
assertTrue(this.seeksCounter > 0);
assertTrue("seeksCounter=" + this.seeksCounter + " numHits=" + numHits, this.seeksCounter <= numHits + 1);
}
-
+
public void testLazySkipping() throws IOException {
assumeFalse("This test cannot run with SimpleText codec", CodecProvider.getDefault().getFieldCodec(this.field).equals("SimpleText"));
// test whether only the minimum amount of seeks()
@@ -140,7 +140,7 @@
performTest(10);
searcher.close();
}
-
+
public void testSeek() throws IOException {
Directory directory = newDirectory();
IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
@@ -149,7 +149,7 @@
doc.add(newField(this.field, "a b", Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
}
-
+
writer.close();
IndexReader reader = IndexReader.open(directory, true);
@@ -176,55 +176,55 @@
}
reader.close();
directory.close();
-
+
}
-
+
// Simply extends IndexInput in a way that we are able to count the number
// of invocations of seek()
class SeeksCountingStream extends IndexInput {
- private IndexInput input;
-
-
+ private IndexInput input;
+
+
SeeksCountingStream(IndexInput input) {
this.input = input;
- }
-
+ }
+
@Override
public byte readByte() throws IOException {
return this.input.readByte();
}
-
+
@Override
public void readBytes(byte[] b, int offset, int len) throws IOException {
- this.input.readBytes(b, offset, len);
+ this.input.readBytes(b, offset, len);
}
-
+
@Override
public void close() throws IOException {
this.input.close();
}
-
+
@Override
public long getFilePointer() {
return this.input.getFilePointer();
}
-
+
@Override
public void seek(long pos) throws IOException {
TestLazyProxSkipping.this.seeksCounter++;
this.input.seek(pos);
}
-
+
@Override
public long length() {
return this.input.length();
}
-
+
@Override
public Object clone() {
return new SeeksCountingStream((IndexInput) this.input.clone());
}
-
+
}
}
diff --git a/lucene/src/test/org/apache/lucene/index/TestNRTReaderWithThreads.java b/lucene/src/test/org/apache/lucene/index/TestNRTReaderWithThreads.java
index 9308846..09abfa5 100644
--- a/lucene/src/test/org/apache/lucene/index/TestNRTReaderWithThreads.java
+++ b/lucene/src/test/org/apache/lucene/index/TestNRTReaderWithThreads.java
@@ -44,7 +44,7 @@
indexThreads[x] = new RunThread(x % 2, writer);
indexThreads[x].setName("Thread " + x);
indexThreads[x].start();
- }
+ }
long startTime = System.currentTimeMillis();
long duration = 1000;
while ((System.currentTimeMillis() - startTime) < duration) {
@@ -78,7 +78,7 @@
int addCount = 0;
int type;
final Random r = new Random(random.nextLong());
-
+
public RunThread(int type, IndexWriter writer) {
this.type = type;
this.writer = writer;
diff --git a/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java b/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java
index 587f605..ad4bfc4 100644
--- a/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java
+++ b/lucene/src/test/org/apache/lucene/index/TestNRTThreads.java
@@ -341,7 +341,7 @@
if (VERBOSE) {
System.out.println("TEST: done join [" + (System.currentTimeMillis()-t0) + " ms]; addCount=" + addCount + " delCount=" + delCount);
}
-
+
final IndexReader r2 = writer.getReader();
final IndexSearcher s = newSearcher(r2);
boolean doFail = false;
diff --git a/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java b/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java
index 680bb87..e3eefc6 100644
--- a/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java
+++ b/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java
@@ -43,7 +43,7 @@
import org.junit.Test;
/**
- *
+ *
*
*/
public class TestPerFieldCodecSupport extends LuceneTestCase {
@@ -312,4 +312,4 @@
}
dir.close();
}
-}
+}
\ No newline at end of file
diff --git a/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java b/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java
index b5b4c20..7110d1a 100644
--- a/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java
+++ b/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java
@@ -32,13 +32,12 @@
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.Version;
public class TestPerSegmentDeletes extends LuceneTestCase {
public void testDeletes1() throws Exception {
//IndexWriter.debug2 = System.out;
Directory dir = new MockDirectoryWrapper(new Random(random.nextLong()), new RAMDirectory());
- IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_CURRENT,
+ IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random));
iwc.setMergeScheduler(new SerialMergeScheduler());
iwc.setMaxBufferedDocs(5000);
@@ -66,22 +65,22 @@
writer.addDocument(TestIndexWriterReader.createDocument(x, "3", 2));
//System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
}
-
+
writer.deleteDocuments(new Term("id", "1"));
-
+
writer.deleteDocuments(new Term("id", "11"));
- // flushing without applying deletes means
+ // flushing without applying deletes means
// there will still be deletes in the segment infos
writer.flush(false, false);
assertTrue(writer.bufferedDeletesStream.any());
-
+
// get reader flushes pending deletes
// so there should not be anymore
IndexReader r1 = writer.getReader();
assertFalse(writer.bufferedDeletesStream.any());
r1.close();
-
+
// delete id:2 from the first segment
// merge segments 0 and 1
// which should apply the delete id:2
@@ -91,16 +90,16 @@
fsmp.start = 0;
fsmp.length = 2;
writer.maybeMerge();
-
+
assertEquals(2, writer.segmentInfos.size());
-
+
// id:2 shouldn't exist anymore because
// it's been applied in the merge and now it's gone
IndexReader r2 = writer.getReader();
int[] id2docs = toDocsArray(new Term("id", "2"), null, r2);
assertTrue(id2docs == null);
r2.close();
-
+
/**
// added docs are in the ram buffer
for (int x = 15; x < 20; x++) {
@@ -110,43 +109,43 @@
assertTrue(writer.numRamDocs() > 0);
// delete from the ram buffer
writer.deleteDocuments(new Term("id", Integer.toString(13)));
-
+
Term id3 = new Term("id", Integer.toString(3));
-
+
// delete from the 1st segment
writer.deleteDocuments(id3);
-
+
assertTrue(writer.numRamDocs() > 0);
-
+
//System.out
// .println("segdels1:" + writer.docWriter.deletesToString());
-
+
//assertTrue(writer.docWriter.segmentDeletes.size() > 0);
-
+
// we cause a merge to happen
fsmp.doMerge = true;
fsmp.start = 0;
fsmp.length = 2;
System.out.println("maybeMerge "+writer.segmentInfos);
-
+
SegmentInfo info0 = writer.segmentInfos.get(0);
SegmentInfo info1 = writer.segmentInfos.get(1);
-
+
writer.maybeMerge();
System.out.println("maybeMerge after "+writer.segmentInfos);
// there should be docs in RAM
assertTrue(writer.numRamDocs() > 0);
-
+
// assert we've merged the 1 and 2 segments
// and still have a segment leftover == 2
assertEquals(2, writer.segmentInfos.size());
assertFalse(segThere(info0, writer.segmentInfos));
assertFalse(segThere(info1, writer.segmentInfos));
-
+
//System.out.println("segdels2:" + writer.docWriter.deletesToString());
-
+
//assertTrue(writer.docWriter.segmentDeletes.size() > 0);
-
+
IndexReader r = writer.getReader();
IndexReader r1 = r.getSequentialSubReaders()[0];
printDelDocs(r1.getDeletedDocs());
@@ -155,7 +154,7 @@
// there shouldn't be any docs for id:3
assertTrue(docs == null);
r.close();
-
+
part2(writer, fsmp);
**/
// System.out.println("segdels2:"+writer.docWriter.segmentDeletes.toString());
@@ -163,7 +162,7 @@
writer.close();
dir.close();
}
-
+
/**
static boolean hasPendingDeletes(SegmentInfos infos) {
for (SegmentInfo info : infos) {
@@ -185,42 +184,42 @@
//System.out.println("numRamDocs(" + x + ")" + writer.numRamDocs());
}
writer.flush(false, false);
-
+
//System.out.println("infos3:"+writer.segmentInfos);
-
+
Term delterm = new Term("id", "8");
writer.deleteDocuments(delterm);
//System.out.println("segdels3:" + writer.docWriter.deletesToString());
-
+
fsmp.doMerge = true;
fsmp.start = 1;
fsmp.length = 2;
writer.maybeMerge();
-
- // deletes for info1, the newly created segment from the
+
+ // deletes for info1, the newly created segment from the
// merge should have no deletes because they were applied in
// the merge
//SegmentInfo info1 = writer.segmentInfos.get(1);
//assertFalse(exists(info1, writer.docWriter.segmentDeletes));
-
+
//System.out.println("infos4:"+writer.segmentInfos);
//System.out.println("segdels4:" + writer.docWriter.deletesToString());
}
-
+
boolean segThere(SegmentInfo info, SegmentInfos infos) {
for (SegmentInfo si : infos) {
- if (si.name.equals(info.name)) return true;
+ if (si.name.equals(info.name)) return true;
}
return false;
}
-
+
public static void printDelDocs(Bits bits) {
if (bits == null) return;
for (int x = 0; x < bits.length(); x++) {
System.out.println(x + ":" + bits.get(x));
}
}
-
+
public static int[] toDocsArray(Term term, Bits bits, IndexReader reader)
throws IOException {
Fields fields = MultiFields.getFields(reader);
@@ -233,7 +232,7 @@
}
return null;
}
-
+
public static int[] toArray(DocsEnum docsEnum) throws IOException {
List<Integer> docs = new ArrayList<Integer>();
while (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
@@ -242,21 +241,21 @@
}
return ArrayUtil.toIntArray(docs);
}
-
+
public class RangeMergePolicy extends MergePolicy {
boolean doMerge = false;
int start;
int length;
-
+
private final boolean useCompoundFile;
-
+
private RangeMergePolicy(boolean useCompoundFile) {
this.useCompoundFile = useCompoundFile;
}
-
+
@Override
public void close() {}
-
+
@Override
public MergeSpecification findMerges(SegmentInfos segmentInfos)
throws CorruptIndexException, IOException {
@@ -273,20 +272,20 @@
}
return null;
}
-
+
@Override
public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos,
int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize)
throws CorruptIndexException, IOException {
return null;
}
-
+
@Override
public MergeSpecification findMergesToExpungeDeletes(
SegmentInfos segmentInfos) throws CorruptIndexException, IOException {
return null;
}
-
+
@Override
public boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment) {
return useCompoundFile;
diff --git a/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java b/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java
index e362480..46689a2 100644
--- a/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java
+++ b/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java
@@ -19,6 +19,7 @@
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.*;
+import org.apache.lucene.document.Field.Index;
import org.apache.lucene.store.*;
import org.apache.lucene.util.*;
import org.junit.Test;
@@ -72,4 +73,72 @@
dir.close();
}
+
+
+ public void testUpdateSameDoc() throws Exception {
+ final Directory dir = newDirectory();
+
+ final LineFileDocs docs = new LineFileDocs(random);
+ for (int r = 0; r < 3; r++) {
+ final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
+ TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2));
+ final int SIZE = 200 * RANDOM_MULTIPLIER;
+ final int numUpdates = (int) (SIZE * (2 + random.nextDouble()));
+ int numThreads = 3 + random.nextInt(12);
+ IndexingThread[] threads = new IndexingThread[numThreads];
+ for (int i = 0; i < numThreads; i++) {
+ threads[i] = new IndexingThread(docs, w, numUpdates);
+ threads[i].start();
+ }
+
+ for (int i = 0; i < numThreads; i++) {
+ threads[i].join();
+ }
+
+ w.close();
+ }
+ IndexReader open = IndexReader.open(dir);
+ assertEquals(1, open.numDocs());
+ open.close();
+ docs.close();
+ dir.close();
+ }
+
+ static class IndexingThread extends Thread {
+ final LineFileDocs docs;
+ final IndexWriter writer;
+ final int num;
+
+ public IndexingThread(LineFileDocs docs, IndexWriter writer, int num) {
+ super();
+ this.docs = docs;
+ this.writer = writer;
+ this.num = num;
+ }
+
+ public void run() {
+ try {
+ IndexReader open = null;
+ for (int i = 0; i < num; i++) {
+ Document doc = new Document();// docs.nextDoc();
+ doc.add(newField("id", "test", Index.NOT_ANALYZED));
+ writer.updateDocument(new Term("id", "test"), doc);
+ if (random.nextInt(10) == 0) {
+ if (open == null)
+ open = IndexReader.open(writer, true);
+ IndexReader reader = open.reopen();
+ if (reader != open) {
+ open.close();
+ open = reader;
+ }
+ assertEquals("iter: " + i + " numDocs: "+ open.numDocs() + " del: " + open.numDeletedDocs() + " max: " + open.maxDoc(), 1, open.numDocs());
+ }
+ }
+ open.close();
+ } catch (Exception e) {
+ fail(e.getMessage());
+ }
+
+ }
+ }
}
diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java b/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
index d54dd3d..d161e13 100644
--- a/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
+++ b/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java
@@ -53,7 +53,7 @@
reader1 = SegmentReader.get(true, info1, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
reader2 = SegmentReader.get(true, info2, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
}
-
+
@Override
public void tearDown() throws Exception {
reader1.close();
@@ -71,8 +71,8 @@
assertTrue(reader1 != null);
assertTrue(reader2 != null);
}
-
- public void testMerge() throws IOException {
+
+ public void testMerge() throws IOException {
SegmentMerger merger = new SegmentMerger(mergedDir, IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL, mergedSegment, null, CodecProvider.getDefault(), null, new FieldInfos());
merger.add(reader1);
merger.add(reader2);
@@ -83,7 +83,6 @@
SegmentReader mergedReader = SegmentReader.get(false, mergedDir, new SegmentInfo(mergedSegment, docsMerged, mergedDir, false, fieldInfos.hasProx(),
merger.getSegmentCodecs(), fieldInfos.hasVectors(), fieldInfos),
BufferedIndexInput.BUFFER_SIZE, true, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
-
assertTrue(mergedReader != null);
assertTrue(mergedReader.numDocs() == 2);
Document newDoc1 = mergedReader.document(0);
@@ -93,19 +92,19 @@
Document newDoc2 = mergedReader.document(1);
assertTrue(newDoc2 != null);
assertTrue(DocHelper.numFields(newDoc2) == DocHelper.numFields(doc2) - DocHelper.unstored.size());
-
+
DocsEnum termDocs = MultiFields.getTermDocsEnum(mergedReader,
MultiFields.getDeletedDocs(mergedReader),
DocHelper.TEXT_FIELD_2_KEY,
new BytesRef("field"));
assertTrue(termDocs != null);
assertTrue(termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
-
+
Collection<String> stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR);
assertTrue(stored != null);
//System.out.println("stored size: " + stored.size());
assertTrue("We do not have 3 fields that were indexed with term vector",stored.size() == 3);
-
+
TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
assertTrue(vector != null);
BytesRef [] terms = vector.getTerms();
@@ -116,7 +115,7 @@
assertTrue(freqs != null);
//System.out.println("Freqs size: " + freqs.length);
assertTrue(vector instanceof TermPositionVector == true);
-
+
for (int i = 0; i < terms.length; i++) {
String term = terms[i].utf8ToString();
int freq = freqs[i];
@@ -127,5 +126,5 @@
TestSegmentReader.checkNorms(mergedReader);
mergedReader.close();
- }
+ }
}
diff --git a/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java b/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java
index c1d7682..01bc4a8 100644
--- a/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java
+++ b/lucene/src/test/org/apache/lucene/index/TestSizeBoundedOptimize.java
@@ -33,7 +33,7 @@
}
writer.commit();
}
-
+
private static IndexWriterConfig newWriterConfig() throws IOException {
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
conf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
@@ -42,7 +42,7 @@
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES);
return conf;
}
-
+
public void testByteSizeLimit() throws Exception {
// tests that the max merge size constraint is applied during optimize.
Directory dir = new RAMDirectory();
@@ -65,7 +65,7 @@
LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
lmp.setMaxMergeMBForOptimize((min + 1) / (1 << 20));
conf.setMergePolicy(lmp);
-
+
writer = new IndexWriter(dir, conf);
writer.optimize();
writer.close();
@@ -91,14 +91,14 @@
addDocs(writer, 3);
addDocs(writer, 3);
addDocs(writer, 3);
-
+
writer.close();
conf = newWriterConfig();
LogMergePolicy lmp = new LogDocMergePolicy();
lmp.setMaxMergeDocs(3);
conf.setMergePolicy(lmp);
-
+
writer = new IndexWriter(dir, conf);
writer.optimize();
writer.close();
@@ -119,14 +119,14 @@
addDocs(writer, 3);
addDocs(writer, 3);
addDocs(writer, 5);
-
+
writer.close();
conf = newWriterConfig();
LogMergePolicy lmp = new LogDocMergePolicy();
lmp.setMaxMergeDocs(3);
conf.setMergePolicy(lmp);
-
+
writer = new IndexWriter(dir, conf);
writer.optimize();
writer.close();
@@ -135,120 +135,120 @@
sis.read(dir);
assertEquals(2, sis.size());
}
-
+
public void testFirstSegmentTooLarge() throws Exception {
Directory dir = new RAMDirectory();
-
+
IndexWriterConfig conf = newWriterConfig();
IndexWriter writer = new IndexWriter(dir, conf);
-
+
addDocs(writer, 5);
addDocs(writer, 3);
addDocs(writer, 3);
addDocs(writer, 3);
-
+
writer.close();
-
+
conf = newWriterConfig();
LogMergePolicy lmp = new LogDocMergePolicy();
lmp.setMaxMergeDocs(3);
conf.setMergePolicy(lmp);
-
+
writer = new IndexWriter(dir, conf);
writer.optimize();
writer.close();
-
+
SegmentInfos sis = new SegmentInfos();
sis.read(dir);
assertEquals(2, sis.size());
}
-
+
public void testAllSegmentsSmall() throws Exception {
Directory dir = new RAMDirectory();
-
+
IndexWriterConfig conf = newWriterConfig();
IndexWriter writer = new IndexWriter(dir, conf);
-
+
addDocs(writer, 3);
addDocs(writer, 3);
addDocs(writer, 3);
addDocs(writer, 3);
-
+
writer.close();
-
+
conf = newWriterConfig();
LogMergePolicy lmp = new LogDocMergePolicy();
lmp.setMaxMergeDocs(3);
conf.setMergePolicy(lmp);
-
+
writer = new IndexWriter(dir, conf);
writer.optimize();
writer.close();
-
+
SegmentInfos sis = new SegmentInfos();
sis.read(dir);
assertEquals(1, sis.size());
}
-
+
public void testAllSegmentsLarge() throws Exception {
Directory dir = new RAMDirectory();
-
+
IndexWriterConfig conf = newWriterConfig();
IndexWriter writer = new IndexWriter(dir, conf);
-
+
addDocs(writer, 3);
addDocs(writer, 3);
addDocs(writer, 3);
-
+
writer.close();
-
+
conf = newWriterConfig();
LogMergePolicy lmp = new LogDocMergePolicy();
lmp.setMaxMergeDocs(2);
conf.setMergePolicy(lmp);
-
+
writer = new IndexWriter(dir, conf);
writer.optimize();
writer.close();
-
+
SegmentInfos sis = new SegmentInfos();
sis.read(dir);
assertEquals(3, sis.size());
}
-
+
public void testOneLargeOneSmall() throws Exception {
Directory dir = new RAMDirectory();
-
+
IndexWriterConfig conf = newWriterConfig();
IndexWriter writer = new IndexWriter(dir, conf);
-
+
addDocs(writer, 3);
addDocs(writer, 5);
addDocs(writer, 3);
addDocs(writer, 5);
-
+
writer.close();
-
+
conf = newWriterConfig();
LogMergePolicy lmp = new LogDocMergePolicy();
lmp.setMaxMergeDocs(3);
conf.setMergePolicy(lmp);
-
+
writer = new IndexWriter(dir, conf);
writer.optimize();
writer.close();
-
+
SegmentInfos sis = new SegmentInfos();
sis.read(dir);
assertEquals(4, sis.size());
}
-
+
public void testMergeFactor() throws Exception {
Directory dir = new RAMDirectory();
-
+
IndexWriterConfig conf = newWriterConfig();
IndexWriter writer = new IndexWriter(dir, conf);
-
+
addDocs(writer, 3);
addDocs(writer, 3);
addDocs(writer, 3);
@@ -256,78 +256,78 @@
addDocs(writer, 5);
addDocs(writer, 3);
addDocs(writer, 3);
-
+
writer.close();
-
+
conf = newWriterConfig();
LogMergePolicy lmp = new LogDocMergePolicy();
lmp.setMaxMergeDocs(3);
lmp.setMergeFactor(2);
conf.setMergePolicy(lmp);
-
+
writer = new IndexWriter(dir, conf);
writer.optimize();
writer.close();
-
+
// Should only be 4 segments in the index, because of the merge factor and
// max merge docs settings.
SegmentInfos sis = new SegmentInfos();
sis.read(dir);
assertEquals(4, sis.size());
}
-
+
public void testSingleNonOptimizedSegment() throws Exception {
Directory dir = new RAMDirectory();
-
+
IndexWriterConfig conf = newWriterConfig();
IndexWriter writer = new IndexWriter(dir, conf);
-
+
addDocs(writer, 3);
addDocs(writer, 5);
addDocs(writer, 3);
-
+
writer.close();
-
+
// delete the last document, so that the last segment is optimized.
IndexReader r = IndexReader.open(dir, false);
r.deleteDocument(r.numDocs() - 1);
r.close();
-
+
conf = newWriterConfig();
LogMergePolicy lmp = new LogDocMergePolicy();
lmp.setMaxMergeDocs(3);
conf.setMergePolicy(lmp);
-
+
writer = new IndexWriter(dir, conf);
writer.optimize();
writer.close();
-
+
// Verify that the last segment does not have deletions.
SegmentInfos sis = new SegmentInfos();
sis.read(dir);
assertEquals(3, sis.size());
assertFalse(sis.info(2).hasDeletions());
}
-
+
public void testSingleOptimizedSegment() throws Exception {
Directory dir = new RAMDirectory();
-
+
IndexWriterConfig conf = newWriterConfig();
IndexWriter writer = new IndexWriter(dir, conf);
-
+
addDocs(writer, 3);
-
+
writer.close();
-
+
conf = newWriterConfig();
LogMergePolicy lmp = new LogDocMergePolicy();
lmp.setMaxMergeDocs(3);
conf.setMergePolicy(lmp);
-
+
writer = new IndexWriter(dir, conf);
writer.optimize();
writer.close();
-
+
// Verify that the last segment does not have deletions.
SegmentInfos sis = new SegmentInfos();
sis.read(dir);
@@ -336,28 +336,28 @@
public void testSingleNonOptimizedTooLargeSegment() throws Exception {
Directory dir = new RAMDirectory();
-
+
IndexWriterConfig conf = newWriterConfig();
IndexWriter writer = new IndexWriter(dir, conf);
-
+
addDocs(writer, 5);
-
+
writer.close();
-
+
// delete the last document
IndexReader r = IndexReader.open(dir, false);
r.deleteDocument(r.numDocs() - 1);
r.close();
-
+
conf = newWriterConfig();
LogMergePolicy lmp = new LogDocMergePolicy();
lmp.setMaxMergeDocs(2);
conf.setMergePolicy(lmp);
-
+
writer = new IndexWriter(dir, conf);
writer.optimize();
writer.close();
-
+
// Verify that the last segment does not have deletions.
SegmentInfos sis = new SegmentInfos();
sis.read(dir);
diff --git a/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java b/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
index 0762d5a..a0fbe6d 100644
--- a/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
+++ b/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java
@@ -201,7 +201,7 @@
Map<String,Document> docs = new HashMap<String,Document>();
IndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)
- .setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setMaxThreadStates(maxThreadStates)
+ .setRAMBufferSizeMB(0.1).setMaxBufferedDocs(maxBufferedDocs).setIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(maxThreadStates))
.setReaderPooling(doReaderPooling).setMergePolicy(newLogMergePolicy()));
w.setInfoStream(VERBOSE ? System.out : null);
LogMergePolicy lmp = (LogMergePolicy) w.getConfig().getMergePolicy();
diff --git a/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java b/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java
index c48e571..31630ed 100644
--- a/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java
+++ b/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java
@@ -121,7 +121,7 @@
fieldInfos = new FieldInfos(dir, IndexFileNames.segmentFileName(seg, "", IndexFileNames.FIELD_INFOS_EXTENSION));
}
-
+
@Override
public void tearDown() throws Exception {
dir.close();
@@ -130,17 +130,17 @@
private class MyTokenStream extends TokenStream {
int tokenUpto;
-
+
CharTermAttribute termAtt;
PositionIncrementAttribute posIncrAtt;
OffsetAttribute offsetAtt;
-
+
public MyTokenStream() {
termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
}
-
+
@Override
public boolean incrementToken() {
if (tokenUpto >= tokens.length)
diff --git a/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java b/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java
index 23370e4..766330b 100644
--- a/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java
+++ b/lucene/src/test/org/apache/lucene/search/TestCachingSpanFilter.java
@@ -67,7 +67,7 @@
// ignore deletions
CachingSpanFilter filter = new CachingSpanFilter(startFilter, CachingWrapperFilter.DeletesMode.IGNORE);
-
+
docs = searcher.search(new MatchAllDocsQuery(), filter, 1);
assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits);
ConstantScoreQuery constantScore = new ConstantScoreQuery(filter);
@@ -97,7 +97,7 @@
reader = refreshReader(reader);
searcher.close();
searcher = newSearcher(reader);
-
+
docs = searcher.search(new MatchAllDocsQuery(), filter, 1);
assertEquals("[query + filter] Should find a hit...", 1, docs.totalHits);
diff --git a/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java b/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java
index 75090b1..180ed1c 100644
--- a/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java
+++ b/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java
@@ -38,12 +38,12 @@
Set<String> fileExtensions = new HashSet<String>();
fileExtensions.add(IndexFileNames.FIELDS_EXTENSION);
fileExtensions.add(IndexFileNames.FIELDS_INDEX_EXTENSION);
-
+
MockDirectoryWrapper primaryDir = new MockDirectoryWrapper(random, new RAMDirectory());
primaryDir.setCheckIndexOnClose(false); // only part of an index
MockDirectoryWrapper secondaryDir = new MockDirectoryWrapper(random, new RAMDirectory());
secondaryDir.setCheckIndexOnClose(false); // only part of an index
-
+
FileSwitchDirectory fsd = new FileSwitchDirectory(fileExtensions, primaryDir, secondaryDir, true);
IndexWriter writer = new IndexWriter(
fsd,
diff --git a/solr/src/test-files/solr/conf/solrconfig-repeater.xml b/solr/src/test-files/solr/conf/solrconfig-repeater.xml
index e956e7f..e29db07 100644
--- a/solr/src/test-files/solr/conf/solrconfig-repeater.xml
+++ b/solr/src/test-files/solr/conf/solrconfig-repeater.xml
@@ -17,7 +17,7 @@
limitations under the License.
-->
-<!-- $Id$
+<!-- $Id: solrconfig-repeater.xml 1072397 2011-02-19 17:09:45Z hossman $
$Source$
$Name$
-->