src/java/org/apache/lucene/index/IndexWriterConfig.java - lucene-solr - Git at Google

 package org.apache.lucene.index;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.index.DocumentsWriter.IndexingChain;
 import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.util.Version;

 /**
  * Holds all the configuration of {@link IndexWriter}. This object is only used
  * while constructing a new IndexWriter. Those settings cannot be changed
  * afterwards, except instantiating a new IndexWriter.
  * <p>
  * All setter methods return {@link IndexWriterConfig} to allow chaining
  * settings conveniently. Thus someone can do:
  *
  * <pre>
  * IndexWriterConfig conf = new IndexWriterConfig(analyzer);
  * conf.setter1().setter2();
  * </pre>
  *
  * @since 3.1
  */
 public final class IndexWriterConfig implements Cloneable {

   public static final int UNLIMITED_FIELD_LENGTH = Integer.MAX_VALUE;

   /**
    * Specifies the open mode for {@link IndexWriter}:
    * <ul>
    * {@link #CREATE} - creates a new index or overwrites an existing one.
    * {@link #CREATE_OR_APPEND} - creates a new index if one does not exist,
    * otherwise it opens the index and documents will be appended.
    * {@link #APPEND} - opens an existing index.
    * </ul>
    */
   public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND }

   /** Default value is 128. Change using {@link #setTermIndexInterval(int)}. */
   public static final int DEFAULT_TERM_INDEX_INTERVAL = 128;

   /** Denotes a flush trigger is disabled. */
   public final static int DISABLE_AUTO_FLUSH = -1;

   /** Disabled by default (because IndexWriter flushes by RAM usage by default). */
   public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH;

   /** Disabled by default (because IndexWriter flushes by RAM usage by default). */
   public final static int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH;

   /**
    * Default value is 16 MB (which means flush when buffered docs consume
    * approximately 16 MB RAM).
    */
   public final static double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;

   /**
    * Default value for the write lock timeout (1,000 ms).
    *
    * @see #setDefaultWriteLockTimeout(long)
    */
   public static long WRITE_LOCK_TIMEOUT = 1000;

   /** The maximum number of simultaneous threads that may be
    *  indexing documents at once in IndexWriter; if more
    *  than this many threads arrive they will wait for
    *  others to finish. */
   public final static int DEFAULT_MAX_THREAD_STATES = 8;

   /**
    * Sets the default (for any instance) maximum time to wait for a write lock
    * (in milliseconds).
    */
   public static void setDefaultWriteLockTimeout(long writeLockTimeout) {
     WRITE_LOCK_TIMEOUT = writeLockTimeout;
   }

   /**
    * Returns the default write lock timeout for newly instantiated
    * IndexWriterConfigs.
    *
    * @see #setDefaultWriteLockTimeout(long)
    */
   public static long getDefaultWriteLockTimeout() {
     return WRITE_LOCK_TIMEOUT;
   }

   private Analyzer analyzer;
   private IndexDeletionPolicy delPolicy;
   private IndexCommit commit;
   private OpenMode openMode;
   private int maxFieldLength;
   private Similarity similarity;
   private int termIndexInterval;
   private MergeScheduler mergeScheduler;
   private long writeLockTimeout;
   private int maxBufferedDeleteTerms;
   private double ramBufferSizeMB;
   private int maxBufferedDocs;
   private IndexingChain indexingChain;
   private IndexReaderWarmer mergedSegmentWarmer;
   private MergePolicy mergePolicy;
   private int maxThreadStates;

   // required for clone
   private Version matchVersion;

   /**
    * Creates a new config that with defaults that match the specified
    * {@link Version} as well as the default {@link Analyzer}. {@link Version} is
    * a placeholder for future changes. The default settings are relevant to 3.1
    * and before. In the future, if different settings will apply to different
    * versions, they will be documented here.
    */
   public IndexWriterConfig(Version matchVersion, Analyzer analyzer) {
     this.matchVersion = matchVersion;
     this.analyzer = analyzer;
     delPolicy = new KeepOnlyLastCommitDeletionPolicy();
     commit = null;
     openMode = OpenMode.CREATE_OR_APPEND;
     maxFieldLength = UNLIMITED_FIELD_LENGTH;
     similarity = Similarity.getDefault();
     termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
     mergeScheduler = new ConcurrentMergeScheduler();
     writeLockTimeout = WRITE_LOCK_TIMEOUT;
     maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;
     ramBufferSizeMB = DEFAULT_RAM_BUFFER_SIZE_MB;
     maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS;
     indexingChain = DocumentsWriter.defaultIndexingChain;
     mergedSegmentWarmer = null;
     mergePolicy = new LogByteSizeMergePolicy();
     maxThreadStates = DEFAULT_MAX_THREAD_STATES;
   }

   @Override
   public Object clone() {
     // Shallow clone is the only thing that's possible, since parameters like
     // analyzer, index commit etc. do not implemnt Cloneable.
     try {
       return super.clone();
     } catch (CloneNotSupportedException e) {
       // should not happen
       throw new RuntimeException(e);
     }
   }

   /** Returns the default analyzer to use for indexing documents. */
   public Analyzer getAnalyzer() {
     return analyzer;
   }

   /** Specifies {@link OpenMode} of that index. */
   public IndexWriterConfig setOpenMode(OpenMode openMode) {
     this.openMode = openMode;
     return this;
   }

   /** Returns the {@link OpenMode} set by {@link #setOpenMode(OpenMode)}. */
   public OpenMode getOpenMode() {
     return openMode;
   }

   /**
    * Expert: allows an optional {@link IndexDeletionPolicy} implementation to be
    * specified. You can use this to control when prior commits are deleted from
    * the index. The default policy is {@link KeepOnlyLastCommitDeletionPolicy}
    * which removes all prior commits as soon as a new commit is done (this
    * matches behavior before 2.2). Creating your own policy can allow you to
    * explicitly keep previous "point in time" commits alive in the index for
    * some time, to allow readers to refresh to the new commit without having the
    * old commit deleted out from under them. This is necessary on filesystems
    * like NFS that do not support "delete on last close" semantics, which
    * Lucene's "point in time" search normally relies on.
    * <p>
    * <b>NOTE:</b> the deletion policy cannot be null. If <code>null</code> is
    * passed, the deletion policy will be set to the default.
    */
   public IndexWriterConfig setIndexDeletionPolicy(IndexDeletionPolicy delPolicy) {
     this.delPolicy = delPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : delPolicy;
     return this;
   }

   /**
    * Returns the {@link IndexDeletionPolicy} specified in
    * {@link #setIndexDeletionPolicy(IndexDeletionPolicy)} or the default
    * {@link KeepOnlyLastCommitDeletionPolicy}/
    */
   public IndexDeletionPolicy getIndexDeletionPolicy() {
     return delPolicy;
   }

   /**
    * The maximum number of terms that will be indexed for a single field in a
    * document. This limits the amount of memory required for indexing, so that
    * collections with very large files will not crash the indexing process by
    * running out of memory. This setting refers to the number of running terms,
    * not to the number of different terms.
    * <p>
    * <b>NOTE:</b> this silently truncates large documents, excluding from the
    * index all terms that occur further in the document. If you know your source
    * documents are large, be sure to set this value high enough to accomodate
    * the expected size. If you set it to {@link #UNLIMITED_FIELD_LENGTH}, then
    * the only limit is your memory, but you should anticipate an
    * OutOfMemoryError.
    * <p>
    * By default it is set to {@link #UNLIMITED_FIELD_LENGTH}.
    */
   public IndexWriterConfig setMaxFieldLength(int maxFieldLength) {
     this.maxFieldLength = maxFieldLength;
     return this;
   }

   /**
    * Returns the maximum number of terms that will be indexed for a single field
    * in a document.
    *
    * @see #setMaxFieldLength(int)
    */
   public int getMaxFieldLength() {
     return maxFieldLength;
   }

   /**
    * Expert: allows to open a certain commit point. The default is null which
    * opens the latest commit point.
    */
   public IndexWriterConfig setIndexCommit(IndexCommit commit) {
     this.commit = commit;
     return this;
   }

   /**
    * Returns the {@link IndexCommit} as specified in
    * {@link #setIndexCommit(IndexCommit)} or the default, <code>null</code>
    * which specifies to open the latest index commit point.
    */
   public IndexCommit getIndexCommit() {
     return commit;
   }

   /**
    * Expert: set the {@link Similarity} implementation used by this IndexWriter.
    * <p>
    * <b>NOTE:</b> the similarity cannot be null. If <code>null</code> is passed,
    * the similarity will be set to the default.
    *
    * @see Similarity#setDefault(Similarity)
    */
   public IndexWriterConfig setSimilarity(Similarity similarity) {
     this.similarity = similarity == null ? Similarity.getDefault() : similarity;
     return this;
   }

   /**
    * Expert: returns the {@link Similarity} implementation used by this
    * IndexWriter. This defaults to the current value of
    * {@link Similarity#getDefault()}.
    */
   public Similarity getSimilarity() {
     return similarity;
   }

   /**
    * Expert: set the interval between indexed terms. Large values cause less
    * memory to be used by IndexReader, but slow random-access to terms. Small
    * values cause more memory to be used by an IndexReader, and speed
    * random-access to terms.
    * <p>
    * This parameter determines the amount of computation required per query
    * term, regardless of the number of documents that contain that term. In
    * particular, it is the maximum number of other terms that must be scanned
    * before a term is located and its frequency and position information may be
    * processed. In a large index with user-entered query terms, query processing
    * time is likely to be dominated not by term lookup but rather by the
    * processing of frequency and positional data. In a small index or when many
    * uncommon query terms are generated (e.g., by wildcard queries) term lookup
    * may become a dominant cost.
    * <p>
    * In particular, <code>numUniqueTerms/interval</code> terms are read into
    * memory by an IndexReader, and, on average, <code>interval/2</code> terms
    * must be scanned for each random term access.
    *
    * @see #DEFAULT_TERM_INDEX_INTERVAL
    */
   public IndexWriterConfig setTermIndexInterval(int interval) {
     this.termIndexInterval = interval;
     return this;
   }

   /**
    * Returns the interval between indexed terms.
    *
    * @see #setTermIndexInterval(int)
    */
   public int getTermIndexInterval() {
     return termIndexInterval;
   }

   /**
    * Expert: sets the merge scheduler used by this writer. The default is
    * {@link ConcurrentMergeScheduler}.
    * <p>
    * <b>NOTE:</b> the merge scheduler cannot be null. If <code>null</code> is
    * passed, the merge scheduler will be set to the default.
    */
   public IndexWriterConfig setMergeScheduler(MergeScheduler mergeScheduler) {
     this.mergeScheduler = mergeScheduler == null ? new ConcurrentMergeScheduler() : mergeScheduler;
     return this;
   }

   /**
    * Returns the {@link MergeScheduler} that was set by
    * {@link #setMergeScheduler(MergeScheduler)}
    */
   public MergeScheduler getMergeScheduler() {
     return mergeScheduler;
   }

   /**
    * Sets the maximum time to wait for a write lock (in milliseconds) for this
    * instance. You can change the default value for all instances by calling
    * {@link #setDefaultWriteLockTimeout(long)}.
    */
   public IndexWriterConfig setWriteLockTimeout(long writeLockTimeout) {
     this.writeLockTimeout = writeLockTimeout;
     return this;
   }

   /**
    * Returns allowed timeout when acquiring the write lock.
    *
    * @see #setWriteLockTimeout(long)
    */
   public long getWriteLockTimeout() {
     return writeLockTimeout;
   }

   /**
    * Determines the minimal number of delete terms required before the buffered
    * in-memory delete terms are applied and flushed. If there are documents
    * buffered in memory at the time, they are merged and a new segment is
    * created.

    * <p>Disabled by default (writer flushes by RAM usage).
    *
    * @throws IllegalArgumentException if maxBufferedDeleteTerms
    * is enabled but smaller than 1
    * @see #setRAMBufferSizeMB
    */
   public IndexWriterConfig setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
     if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH
         && maxBufferedDeleteTerms < 1)
       throw new IllegalArgumentException(
           "maxBufferedDeleteTerms must at least be 1 when enabled");
     this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;
     return this;
   }

   /**
    * Returns the number of buffered deleted terms that will trigger a flush if
    * enabled.
    *
    * @see #setMaxBufferedDeleteTerms(int)
    */
   public int getMaxBufferedDeleteTerms() {
     return maxBufferedDeleteTerms;
   }

   /**
    * Determines the amount of RAM that may be used for buffering added documents
    * and deletions before they are flushed to the Directory. Generally for
    * faster indexing performance it's best to flush by RAM usage instead of
    * document count and use as large a RAM buffer as you can.
    *
    * <p>
    * When this is set, the writer will flush whenever buffered documents and
    * deletions use this much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
    * triggering a flush due to RAM usage. Note that if flushing by document
    * count is also enabled, then the flush will be triggered by whichever comes
    * first.
    *
    * <p>
    * <b>NOTE</b>: the account of RAM usage for pending deletions is only
    * approximate. Specifically, if you delete by Query, Lucene currently has no
    * way to measure the RAM usage of individual Queries so the accounting will
    * under-estimate and you should compensate by either calling commit()
    * periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)}
    * to flush by count instead of RAM usage (each buffered delete Query counts
    * as one).
    *
    * <p>
    * <b>NOTE</b>: because IndexWriter uses <code>int</code>s when managing its
    * internal storage, the absolute maximum value for this setting is somewhat
    * less than 2048 MB. The precise limit depends on various factors, such as
    * how large your documents are, how many fields have norms, etc., so it's
    * best to set this value comfortably under 2048.
    *
    * <p>
    * The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.
    *
    * @throws IllegalArgumentException
    *           if ramBufferSize is enabled but non-positive, or it disables
    *           ramBufferSize when maxBufferedDocs is already disabled
    */
   public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) {
     if (ramBufferSizeMB > 2048.0) {
       throw new IllegalArgumentException("ramBufferSize " + ramBufferSizeMB
           + " is too large; should be comfortably less than 2048");
     }
     if (ramBufferSizeMB != DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0)
       throw new IllegalArgumentException(
           "ramBufferSize should be > 0.0 MB when enabled");
     if (ramBufferSizeMB == DISABLE_AUTO_FLUSH && maxBufferedDocs == DISABLE_AUTO_FLUSH)
       throw new IllegalArgumentException(
           "at least one of ramBufferSize and maxBufferedDocs must be enabled");
     this.ramBufferSizeMB = ramBufferSizeMB;
     return this;
   }

   /** Returns the value set by {@link #setRAMBufferSizeMB(double)} if enabled. */
   public double getRAMBufferSizeMB() {
     return ramBufferSizeMB;
   }

   /**
    * Determines the minimal number of documents required before the buffered
    * in-memory documents are flushed as a new Segment. Large values generally
    * give faster indexing.
    *
    * <p>
    * When this is set, the writer will flush every maxBufferedDocs added
    * documents. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a
    * flush due to number of buffered documents. Note that if flushing by RAM
    * usage is also enabled, then the flush will be triggered by whichever comes
    * first.
    *
    * <p>
    * Disabled by default (writer flushes by RAM usage).
    *
    * @see #setRAMBufferSizeMB(double)
    *
    * @throws IllegalArgumentException
    *           if maxBufferedDocs is enabled but smaller than 2, or it disables
    *           maxBufferedDocs when ramBufferSize is already disabled
    */
   public IndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) {
     if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)
       throw new IllegalArgumentException(
           "maxBufferedDocs must at least be 2 when enabled");
     if (maxBufferedDocs == DISABLE_AUTO_FLUSH
         && ramBufferSizeMB == DISABLE_AUTO_FLUSH)
       throw new IllegalArgumentException(
           "at least one of ramBufferSize and maxBufferedDocs must be enabled");
     this.maxBufferedDocs = maxBufferedDocs;
     return this;
   }

   /**
    * Returns the number of buffered added documents that will trigger a flush if
    * enabled.
    *
    * @see #setMaxBufferedDocs(int)
    */
   public int getMaxBufferedDocs() {
     return maxBufferedDocs;
   }

   /** Set the merged segment warmer. See {@link IndexReaderWarmer}. */
   public IndexWriterConfig setMergedSegmentWarmer(IndexReaderWarmer mergeSegmentWarmer) {
     this.mergedSegmentWarmer = mergeSegmentWarmer;
     return this;
   }

   /** Returns the current merged segment warmer. See {@link IndexReaderWarmer}. */
   public IndexReaderWarmer getMergedSegmentWarmer() {
     return mergedSegmentWarmer;
   }

   /**
    * Expert: {@link MergePolicy} is invoked whenever there are changes to the
    * segments in the index. Its role is to select which merges to do, if any,
    * and return a {@link MergePolicy.MergeSpecification} describing the merges.
    * It also selects merges to do for optimize(). (The default is
    * {@link LogByteSizeMergePolicy}.
    */
   public IndexWriterConfig setMergePolicy(MergePolicy mergePolicy) {
     this.mergePolicy = mergePolicy == null ? new LogByteSizeMergePolicy() : mergePolicy;
     return this;
   }

   /**
    * Sets the max number of simultaneous threads that may be indexing documents
    * at once in IndexWriter. Values &lt; 1 are invalid and if passed
    * <code>maxThreadStates</code> will be set to
    * {@link #DEFAULT_MAX_THREAD_STATES}.
    */
   public IndexWriterConfig setMaxThreadStates(int maxThreadStates) {
     this.maxThreadStates = maxThreadStates < 1 ? DEFAULT_MAX_THREAD_STATES : maxThreadStates;
     return this;
   }

   /** Returns the max number of simultaneous threads that
    *  may be indexing documents at once in IndexWriter. */
   public int getMaxThreadStates() {
     return maxThreadStates;
   }

   /**
    * Returns the current MergePolicy in use by this writer.
    *
    * @see #setMergePolicy(MergePolicy)
    */
   public MergePolicy getMergePolicy() {
     return mergePolicy;
   }

   /** Expert: sets the {@link DocConsumer} chain to be used to process documents. */
   IndexWriterConfig setIndexingChain(IndexingChain indexingChain) {
     this.indexingChain = indexingChain == null ? DocumentsWriter.defaultIndexingChain : indexingChain;
     return this;
   }

   /** Returns the indexing chain set on {@link #setIndexingChain(IndexingChain)}. */
   IndexingChain getIndexingChain() {
     return indexingChain;
   }

   @Override
   public String toString() {
     StringBuilder sb = new StringBuilder();
     sb.append("matchVersion=").append(matchVersion).append("\n");
     sb.append("analyzer=").append(analyzer.getClass().getName()).append("\n");
     sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n");
     sb.append("commit=").append(commit == null ? "null" : commit.getClass().getName()).append("\n");
     sb.append("openMode=").append(openMode).append("\n");
     sb.append("maxFieldLength=").append(maxFieldLength).append("\n");
     sb.append("similarity=").append(similarity.getClass().getName()).append("\n");
     sb.append("termIndexInterval=").append(termIndexInterval).append("\n");
     sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n");
     sb.append("default WRITE_LOCK_TIMEOUT=").append(WRITE_LOCK_TIMEOUT).append("\n");
     sb.append("writeLockTimeout=").append(writeLockTimeout).append("\n");
     sb.append("maxBufferedDeleteTerms=").append(maxBufferedDeleteTerms).append("\n");
     sb.append("ramBufferSizeMB=").append(ramBufferSizeMB).append("\n");
     sb.append("maxBufferedDocs=").append(maxBufferedDocs).append("\n");
     sb.append("mergedSegmentWarmer=").append(mergedSegmentWarmer).append("\n");
     sb.append("mergePolicy=").append(mergePolicy).append("\n");
     sb.append("maxThreadStates=").append(maxThreadStates).append("\n");
     return sb.toString();
   }
 }
	package org.apache.lucene.index;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.index.DocumentsWriter.IndexingChain;
	import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
	import org.apache.lucene.search.Similarity;
	import org.apache.lucene.util.Version;

	/**
	* Holds all the configuration of {@link IndexWriter}. This object is only used
	* while constructing a new IndexWriter. Those settings cannot be changed
	* afterwards, except instantiating a new IndexWriter.
	* <p>
	* All setter methods return {@link IndexWriterConfig} to allow chaining
	* settings conveniently. Thus someone can do:
	*
	* <pre>
	* IndexWriterConfig conf = new IndexWriterConfig(analyzer);
	* conf.setter1().setter2();
	* </pre>
	*
	* @since 3.1
	*/
	public final class IndexWriterConfig implements Cloneable {

	public static final int UNLIMITED_FIELD_LENGTH = Integer.MAX_VALUE;

	/**
	* Specifies the open mode for {@link IndexWriter}:
	* <ul>
	* {@link #CREATE} - creates a new index or overwrites an existing one.
	* {@link #CREATE_OR_APPEND} - creates a new index if one does not exist,
	* otherwise it opens the index and documents will be appended.
	* {@link #APPEND} - opens an existing index.
	* </ul>
	*/
	public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND }

	/** Default value is 128. Change using {@link #setTermIndexInterval(int)}. */
	public static final int DEFAULT_TERM_INDEX_INTERVAL = 128;

	/** Denotes a flush trigger is disabled. */
	public final static int DISABLE_AUTO_FLUSH = -1;

	/** Disabled by default (because IndexWriter flushes by RAM usage by default). */
	public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH;

	/** Disabled by default (because IndexWriter flushes by RAM usage by default). */
	public final static int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH;

	/**
	* Default value is 16 MB (which means flush when buffered docs consume
	* approximately 16 MB RAM).
	*/
	public final static double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0;

	/**
	* Default value for the write lock timeout (1,000 ms).
	*
	* @see #setDefaultWriteLockTimeout(long)
	*/
	public static long WRITE_LOCK_TIMEOUT = 1000;

	/** The maximum number of simultaneous threads that may be
	* indexing documents at once in IndexWriter; if more
	* than this many threads arrive they will wait for
	* others to finish. */
	public final static int DEFAULT_MAX_THREAD_STATES = 8;

	/**
	* Sets the default (for any instance) maximum time to wait for a write lock
	* (in milliseconds).
	*/
	public static void setDefaultWriteLockTimeout(long writeLockTimeout) {
	WRITE_LOCK_TIMEOUT = writeLockTimeout;
	}

	/**
	* Returns the default write lock timeout for newly instantiated
	* IndexWriterConfigs.
	*
	* @see #setDefaultWriteLockTimeout(long)
	*/
	public static long getDefaultWriteLockTimeout() {
	return WRITE_LOCK_TIMEOUT;
	}

	private Analyzer analyzer;
	private IndexDeletionPolicy delPolicy;
	private IndexCommit commit;
	private OpenMode openMode;
	private int maxFieldLength;
	private Similarity similarity;
	private int termIndexInterval;
	private MergeScheduler mergeScheduler;
	private long writeLockTimeout;
	private int maxBufferedDeleteTerms;
	private double ramBufferSizeMB;
	private int maxBufferedDocs;
	private IndexingChain indexingChain;
	private IndexReaderWarmer mergedSegmentWarmer;
	private MergePolicy mergePolicy;
	private int maxThreadStates;

	// required for clone
	private Version matchVersion;

	/**
	* Creates a new config that with defaults that match the specified
	* {@link Version} as well as the default {@link Analyzer}. {@link Version} is
	* a placeholder for future changes. The default settings are relevant to 3.1
	* and before. In the future, if different settings will apply to different
	* versions, they will be documented here.
	*/
	public IndexWriterConfig(Version matchVersion, Analyzer analyzer) {
	this.matchVersion = matchVersion;
	this.analyzer = analyzer;
	delPolicy = new KeepOnlyLastCommitDeletionPolicy();
	commit = null;
	openMode = OpenMode.CREATE_OR_APPEND;
	maxFieldLength = UNLIMITED_FIELD_LENGTH;
	similarity = Similarity.getDefault();
	termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
	mergeScheduler = new ConcurrentMergeScheduler();
	writeLockTimeout = WRITE_LOCK_TIMEOUT;
	maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;
	ramBufferSizeMB = DEFAULT_RAM_BUFFER_SIZE_MB;
	maxBufferedDocs = DEFAULT_MAX_BUFFERED_DOCS;
	indexingChain = DocumentsWriter.defaultIndexingChain;
	mergedSegmentWarmer = null;
	mergePolicy = new LogByteSizeMergePolicy();
	maxThreadStates = DEFAULT_MAX_THREAD_STATES;
	}

	@Override
	public Object clone() {
	// Shallow clone is the only thing that's possible, since parameters like
	// analyzer, index commit etc. do not implemnt Cloneable.
	try {
	return super.clone();
	} catch (CloneNotSupportedException e) {
	// should not happen
	throw new RuntimeException(e);
	}
	}

	/** Returns the default analyzer to use for indexing documents. */
	public Analyzer getAnalyzer() {
	return analyzer;
	}

	/** Specifies {@link OpenMode} of that index. */
	public IndexWriterConfig setOpenMode(OpenMode openMode) {
	this.openMode = openMode;
	return this;
	}

	/** Returns the {@link OpenMode} set by {@link #setOpenMode(OpenMode)}. */
	public OpenMode getOpenMode() {
	return openMode;
	}

	/**
	* Expert: allows an optional {@link IndexDeletionPolicy} implementation to be
	* specified. You can use this to control when prior commits are deleted from
	* the index. The default policy is {@link KeepOnlyLastCommitDeletionPolicy}
	* which removes all prior commits as soon as a new commit is done (this
	* matches behavior before 2.2). Creating your own policy can allow you to
	* explicitly keep previous "point in time" commits alive in the index for
	* some time, to allow readers to refresh to the new commit without having the
	* old commit deleted out from under them. This is necessary on filesystems
	* like NFS that do not support "delete on last close" semantics, which
	* Lucene's "point in time" search normally relies on.
	* <p>
	* <b>NOTE:</b> the deletion policy cannot be null. If <code>null</code> is
	* passed, the deletion policy will be set to the default.
	*/
	public IndexWriterConfig setIndexDeletionPolicy(IndexDeletionPolicy delPolicy) {
	this.delPolicy = delPolicy == null ? new KeepOnlyLastCommitDeletionPolicy() : delPolicy;
	return this;
	}

	/**
	* Returns the {@link IndexDeletionPolicy} specified in
	* {@link #setIndexDeletionPolicy(IndexDeletionPolicy)} or the default
	* {@link KeepOnlyLastCommitDeletionPolicy}/
	*/
	public IndexDeletionPolicy getIndexDeletionPolicy() {
	return delPolicy;
	}

	/**
	* The maximum number of terms that will be indexed for a single field in a
	* document. This limits the amount of memory required for indexing, so that
	* collections with very large files will not crash the indexing process by
	* running out of memory. This setting refers to the number of running terms,
	* not to the number of different terms.
	* <p>
	* <b>NOTE:</b> this silently truncates large documents, excluding from the
	* index all terms that occur further in the document. If you know your source
	* documents are large, be sure to set this value high enough to accomodate
	* the expected size. If you set it to {@link #UNLIMITED_FIELD_LENGTH}, then
	* the only limit is your memory, but you should anticipate an
	* OutOfMemoryError.
	* <p>
	* By default it is set to {@link #UNLIMITED_FIELD_LENGTH}.
	*/
	public IndexWriterConfig setMaxFieldLength(int maxFieldLength) {
	this.maxFieldLength = maxFieldLength;
	return this;
	}

	/**
	* Returns the maximum number of terms that will be indexed for a single field
	* in a document.
	*
	* @see #setMaxFieldLength(int)
	*/
	public int getMaxFieldLength() {
	return maxFieldLength;
	}

	/**
	* Expert: allows to open a certain commit point. The default is null which
	* opens the latest commit point.
	*/
	public IndexWriterConfig setIndexCommit(IndexCommit commit) {
	this.commit = commit;
	return this;
	}

	/**
	* Returns the {@link IndexCommit} as specified in
	* {@link #setIndexCommit(IndexCommit)} or the default, <code>null</code>
	* which specifies to open the latest index commit point.
	*/
	public IndexCommit getIndexCommit() {
	return commit;
	}

	/**
	* Expert: set the {@link Similarity} implementation used by this IndexWriter.
	* <p>
	* <b>NOTE:</b> the similarity cannot be null. If <code>null</code> is passed,
	* the similarity will be set to the default.
	*
	* @see Similarity#setDefault(Similarity)
	*/
	public IndexWriterConfig setSimilarity(Similarity similarity) {
	this.similarity = similarity == null ? Similarity.getDefault() : similarity;
	return this;
	}

	/**
	* Expert: returns the {@link Similarity} implementation used by this
	* IndexWriter. This defaults to the current value of
	* {@link Similarity#getDefault()}.
	*/
	public Similarity getSimilarity() {
	return similarity;
	}

	/**
	* Expert: set the interval between indexed terms. Large values cause less
	* memory to be used by IndexReader, but slow random-access to terms. Small
	* values cause more memory to be used by an IndexReader, and speed
	* random-access to terms.
	* <p>
	* This parameter determines the amount of computation required per query
	* term, regardless of the number of documents that contain that term. In
	* particular, it is the maximum number of other terms that must be scanned
	* before a term is located and its frequency and position information may be
	* processed. In a large index with user-entered query terms, query processing
	* time is likely to be dominated not by term lookup but rather by the
	* processing of frequency and positional data. In a small index or when many
	* uncommon query terms are generated (e.g., by wildcard queries) term lookup
	* may become a dominant cost.
	* <p>
	* In particular, <code>numUniqueTerms/interval</code> terms are read into
	* memory by an IndexReader, and, on average, <code>interval/2</code> terms
	* must be scanned for each random term access.
	*
	* @see #DEFAULT_TERM_INDEX_INTERVAL
	*/
	public IndexWriterConfig setTermIndexInterval(int interval) {
	this.termIndexInterval = interval;
	return this;
	}

	/**
	* Returns the interval between indexed terms.
	*
	* @see #setTermIndexInterval(int)
	*/
	public int getTermIndexInterval() {
	return termIndexInterval;
	}

	/**
	* Expert: sets the merge scheduler used by this writer. The default is
	* {@link ConcurrentMergeScheduler}.
	* <p>
	* <b>NOTE:</b> the merge scheduler cannot be null. If <code>null</code> is
	* passed, the merge scheduler will be set to the default.
	*/
	public IndexWriterConfig setMergeScheduler(MergeScheduler mergeScheduler) {
	this.mergeScheduler = mergeScheduler == null ? new ConcurrentMergeScheduler() : mergeScheduler;
	return this;
	}

	/**
	* Returns the {@link MergeScheduler} that was set by
	* {@link #setMergeScheduler(MergeScheduler)}
	*/
	public MergeScheduler getMergeScheduler() {
	return mergeScheduler;
	}

	/**
	* Sets the maximum time to wait for a write lock (in milliseconds) for this
	* instance. You can change the default value for all instances by calling
	* {@link #setDefaultWriteLockTimeout(long)}.
	*/
	public IndexWriterConfig setWriteLockTimeout(long writeLockTimeout) {
	this.writeLockTimeout = writeLockTimeout;
	return this;
	}

	/**
	* Returns allowed timeout when acquiring the write lock.
	*
	* @see #setWriteLockTimeout(long)
	*/
	public long getWriteLockTimeout() {
	return writeLockTimeout;
	}

	/**
	* Determines the minimal number of delete terms required before the buffered
	* in-memory delete terms are applied and flushed. If there are documents
	* buffered in memory at the time, they are merged and a new segment is
	* created.

	* <p>Disabled by default (writer flushes by RAM usage).
	*
	* @throws IllegalArgumentException if maxBufferedDeleteTerms
	* is enabled but smaller than 1
	* @see #setRAMBufferSizeMB
	*/
	public IndexWriterConfig setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) {
	if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH
	&& maxBufferedDeleteTerms < 1)
	throw new IllegalArgumentException(
	"maxBufferedDeleteTerms must at least be 1 when enabled");
	this.maxBufferedDeleteTerms = maxBufferedDeleteTerms;
	return this;
	}

	/**
	* Returns the number of buffered deleted terms that will trigger a flush if
	* enabled.
	*
	* @see #setMaxBufferedDeleteTerms(int)
	*/
	public int getMaxBufferedDeleteTerms() {
	return maxBufferedDeleteTerms;
	}

	/**
	* Determines the amount of RAM that may be used for buffering added documents
	* and deletions before they are flushed to the Directory. Generally for
	* faster indexing performance it's best to flush by RAM usage instead of
	* document count and use as large a RAM buffer as you can.
	*
	* <p>
	* When this is set, the writer will flush whenever buffered documents and
	* deletions use this much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent
	* triggering a flush due to RAM usage. Note that if flushing by document
	* count is also enabled, then the flush will be triggered by whichever comes
	* first.
	*
	* <p>
	* <b>NOTE</b>: the account of RAM usage for pending deletions is only
	* approximate. Specifically, if you delete by Query, Lucene currently has no
	* way to measure the RAM usage of individual Queries so the accounting will
	* under-estimate and you should compensate by either calling commit()
	* periodically yourself, or by using {@link #setMaxBufferedDeleteTerms(int)}
	* to flush by count instead of RAM usage (each buffered delete Query counts
	* as one).
	*
	* <p>
	* <b>NOTE</b>: because IndexWriter uses <code>int</code>s when managing its
	* internal storage, the absolute maximum value for this setting is somewhat
	* less than 2048 MB. The precise limit depends on various factors, such as
	* how large your documents are, how many fields have norms, etc., so it's
	* best to set this value comfortably under 2048.
	*
	* <p>
	* The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.
	*
	* @throws IllegalArgumentException
	* if ramBufferSize is enabled but non-positive, or it disables
	* ramBufferSize when maxBufferedDocs is already disabled
	*/
	public IndexWriterConfig setRAMBufferSizeMB(double ramBufferSizeMB) {
	if (ramBufferSizeMB > 2048.0) {
	throw new IllegalArgumentException("ramBufferSize " + ramBufferSizeMB
	+ " is too large; should be comfortably less than 2048");
	}
	if (ramBufferSizeMB != DISABLE_AUTO_FLUSH && ramBufferSizeMB <= 0.0)
	throw new IllegalArgumentException(
	"ramBufferSize should be > 0.0 MB when enabled");
	if (ramBufferSizeMB == DISABLE_AUTO_FLUSH && maxBufferedDocs == DISABLE_AUTO_FLUSH)
	throw new IllegalArgumentException(
	"at least one of ramBufferSize and maxBufferedDocs must be enabled");
	this.ramBufferSizeMB = ramBufferSizeMB;
	return this;
	}

	/** Returns the value set by {@link #setRAMBufferSizeMB(double)} if enabled. */
	public double getRAMBufferSizeMB() {
	return ramBufferSizeMB;
	}

	/**
	* Determines the minimal number of documents required before the buffered
	* in-memory documents are flushed as a new Segment. Large values generally
	* give faster indexing.
	*
	* <p>
	* When this is set, the writer will flush every maxBufferedDocs added
	* documents. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a
	* flush due to number of buffered documents. Note that if flushing by RAM
	* usage is also enabled, then the flush will be triggered by whichever comes
	* first.
	*
	* <p>
	* Disabled by default (writer flushes by RAM usage).
	*
	* @see #setRAMBufferSizeMB(double)
	*
	* @throws IllegalArgumentException
	* if maxBufferedDocs is enabled but smaller than 2, or it disables
	* maxBufferedDocs when ramBufferSize is already disabled
	*/
	public IndexWriterConfig setMaxBufferedDocs(int maxBufferedDocs) {
	if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2)
	throw new IllegalArgumentException(
	"maxBufferedDocs must at least be 2 when enabled");
	if (maxBufferedDocs == DISABLE_AUTO_FLUSH
	&& ramBufferSizeMB == DISABLE_AUTO_FLUSH)
	throw new IllegalArgumentException(
	"at least one of ramBufferSize and maxBufferedDocs must be enabled");
	this.maxBufferedDocs = maxBufferedDocs;
	return this;
	}

	/**
	* Returns the number of buffered added documents that will trigger a flush if
	* enabled.
	*
	* @see #setMaxBufferedDocs(int)
	*/
	public int getMaxBufferedDocs() {
	return maxBufferedDocs;
	}

	/** Set the merged segment warmer. See {@link IndexReaderWarmer}. */
	public IndexWriterConfig setMergedSegmentWarmer(IndexReaderWarmer mergeSegmentWarmer) {
	this.mergedSegmentWarmer = mergeSegmentWarmer;
	return this;
	}

	/** Returns the current merged segment warmer. See {@link IndexReaderWarmer}. */
	public IndexReaderWarmer getMergedSegmentWarmer() {
	return mergedSegmentWarmer;
	}

	/**
	* Expert: {@link MergePolicy} is invoked whenever there are changes to the
	* segments in the index. Its role is to select which merges to do, if any,
	* and return a {@link MergePolicy.MergeSpecification} describing the merges.
	* It also selects merges to do for optimize(). (The default is
	* {@link LogByteSizeMergePolicy}.
	*/
	public IndexWriterConfig setMergePolicy(MergePolicy mergePolicy) {
	this.mergePolicy = mergePolicy == null ? new LogByteSizeMergePolicy() : mergePolicy;
	return this;
	}

	/**
	* Sets the max number of simultaneous threads that may be indexing documents
	* at once in IndexWriter. Values < 1 are invalid and if passed
	* <code>maxThreadStates</code> will be set to
	* {@link #DEFAULT_MAX_THREAD_STATES}.
	*/
	public IndexWriterConfig setMaxThreadStates(int maxThreadStates) {
	this.maxThreadStates = maxThreadStates < 1 ? DEFAULT_MAX_THREAD_STATES : maxThreadStates;
	return this;
	}

	/** Returns the max number of simultaneous threads that
	* may be indexing documents at once in IndexWriter. */
	public int getMaxThreadStates() {
	return maxThreadStates;
	}

	/**
	* Returns the current MergePolicy in use by this writer.
	*
	* @see #setMergePolicy(MergePolicy)
	*/
	public MergePolicy getMergePolicy() {
	return mergePolicy;
	}

	/** Expert: sets the {@link DocConsumer} chain to be used to process documents. */
	IndexWriterConfig setIndexingChain(IndexingChain indexingChain) {
	this.indexingChain = indexingChain == null ? DocumentsWriter.defaultIndexingChain : indexingChain;
	return this;
	}

	/** Returns the indexing chain set on {@link #setIndexingChain(IndexingChain)}. */
	IndexingChain getIndexingChain() {
	return indexingChain;
	}

	@Override
	public String toString() {
	StringBuilder sb = new StringBuilder();
	sb.append("matchVersion=").append(matchVersion).append("\n");
	sb.append("analyzer=").append(analyzer.getClass().getName()).append("\n");
	sb.append("delPolicy=").append(delPolicy.getClass().getName()).append("\n");
	sb.append("commit=").append(commit == null ? "null" : commit.getClass().getName()).append("\n");
	sb.append("openMode=").append(openMode).append("\n");
	sb.append("maxFieldLength=").append(maxFieldLength).append("\n");
	sb.append("similarity=").append(similarity.getClass().getName()).append("\n");
	sb.append("termIndexInterval=").append(termIndexInterval).append("\n");
	sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n");
	sb.append("default WRITE_LOCK_TIMEOUT=").append(WRITE_LOCK_TIMEOUT).append("\n");
	sb.append("writeLockTimeout=").append(writeLockTimeout).append("\n");
	sb.append("maxBufferedDeleteTerms=").append(maxBufferedDeleteTerms).append("\n");
	sb.append("ramBufferSizeMB=").append(ramBufferSizeMB).append("\n");
	sb.append("maxBufferedDocs=").append(maxBufferedDocs).append("\n");
	sb.append("mergedSegmentWarmer=").append(mergedSegmentWarmer).append("\n");
	sb.append("mergePolicy=").append(mergePolicy).append("\n");
	sb.append("maxThreadStates=").append(maxThreadStates).append("\n");
	return sb.toString();
	}
	}