attachments/LUCENE-602/TokenSelectorAllWithParallelWriter.patch - lucene-jira-archive - Git at Google

 Index: common-build.xml
 ===================================================================
 --- common-build.xml	(revision 414705)
 +++ common-build.xml	(working copy)
 @@ -28,8 +28,8 @@

    <property name="javac.deprecation" value="off"/>
    <property name="javac.debug" value="on"/>
 -  <property name="javac.source" value="1.4"/>
 -  <property name="javac.target" value="1.4"/>
 +  <property name="javac.source" value="1.5"/>
 +  <property name="javac.target" value="1.5"/>

    <property name="project.name" value="site"/> <!-- todo: is this used by anakia or something else? -->
    <property name="build.encoding" value="utf-8"/>
 Index: src/test/org/apache/lucene/index/TestDocumentWriter.java
 ===================================================================
 --- src/test/org/apache/lucene/index/TestDocumentWriter.java	(revision 414705)
 +++ src/test/org/apache/lucene/index/TestDocumentWriter.java	(working copy)
 @@ -16,11 +16,15 @@
   * limitations under the License.
   */

 +import java.util.LinkedList;
 +import java.util.List;
  import junit.framework.TestCase;
  import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.Token;
  import org.apache.lucene.analysis.WhitespaceAnalyzer;
  import org.apache.lucene.analysis.TokenStream;
  import org.apache.lucene.analysis.WhitespaceTokenizer;
 +import org.apache.lucene.analysis.TokenSelector;
  import org.apache.lucene.document.*;
  import org.apache.lucene.search.Similarity;
  import org.apache.lucene.store.RAMDirectory;
 @@ -54,6 +58,16 @@
      Analyzer analyzer = new WhitespaceAnalyzer();
      Similarity similarity = Similarity.getDefault();
      DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
 +    writer.setTermVectorTokenSelector(new TokenSelector(){
 +      public boolean accept(String field, Token t) {
 +        return Character.isLowerCase(t.termText().charAt(0));
 +      }
 +    });
 +    writer.setPositionsTokenSelector(new TokenSelector(){
 +      public boolean accept(String field, Token t) {
 +        return Character.isLowerCase(t.termText().charAt(0));
 +      }
 +    });
      String segName = "test";
      writer.addDocument(segName, testDoc);
      //After adding the document, we should be able to read it back in
 @@ -84,6 +98,31 @@
      fields = doc.getFields(DocHelper.TEXT_FIELD_3_KEY);
      assertTrue(fields != null && fields.length == 1);
      assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_3_TEXT));
 +
 +    fields = doc.getFields(DocHelper.TEXT_FIELD_UTF2_KEY);
 +    assertTrue(fields != null && fields.length == 1);
 +    assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_UTF2_TEXT));
 +    assertTrue(fields[0].isTermVectorStored());
 +    TermFreqVector tv = reader.getTermFreqVector(0, DocHelper.TEXT_FIELD_UTF2_KEY);
 +    assertTrue(tv != null);
 +    String[] words = DocHelper.FIELD_UTF2_TEXT.split("\\s+");
 +    String[] tvwords = tv.getTerms();
 +    List uniques = new LinkedList();
 +    int omitted = 0;
 +    for (int i=0; i<words.length; i++)
 +      if (!uniques.contains(words[i])) {
 +        uniques.add(words[i]);
 +        if (!Character.isLowerCase(words[i].charAt(0)))
 +          omitted++;
 +      }
 +    assertTrue(omitted!=0);
 +    assertTrue(omitted!=uniques.size());
 +    assertEquals(uniques.size()-omitted, tvwords.length);
 +    for (int i=0; i<uniques.size(); i++) {
 +      for (int j=0; j<tvwords.length; j++)
 +        if (uniques.get(i).equals(tvwords[j]))
 +          assertTrue(Character.isLowerCase(((String)uniques.get(i)).charAt(0)));
 +    }

      // test that the norm file is not present if omitNorms is true
      for (int i = 0; i < reader.fieldInfos.size(); i++) {
 Index: src/test/org/apache/lucene/index/TestParallelWriter.java
 ===================================================================
 --- src/test/org/apache/lucene/index/TestParallelWriter.java	(revision 0)
 +++ src/test/org/apache/lucene/index/TestParallelWriter.java	(revision 0)
 @@ -0,0 +1,151 @@
 +/*
 + * TestParallelWriter.java
 + * JUnit based test
 + *
 + * Created on April 30, 2006, 12:34 PM
 + */
 +
 +package org.apache.lucene.index;
 +
 +import java.util.Arrays;
 +import junit.framework.*;
 +import java.io.IOException;
 +import java.io.PrintStream;
 +import java.util.ArrayList;
 +import java.util.Enumeration;
 +import java.util.HashMap;
 +import java.util.HashSet;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.Set;
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.standard.StandardAnalyzer;
 +import org.apache.lucene.document.Document;
 +import org.apache.lucene.document.Field;
 +import org.apache.lucene.search.Hits;
 +import org.apache.lucene.search.IndexSearcher;
 +import org.apache.lucene.search.Similarity;
 +import org.apache.lucene.search.TermQuery;
 +import org.apache.lucene.store.Directory;
 +import org.apache.lucene.store.RAMDirectory;
 +
 +/**
 + *
 + * @author Chuck Williams
 + */
 +public class TestParallelWriter extends TestCase {
 +
 +    ParallelWriter writer;
 +    Directory[] directories;
 +    Map<Directory, List<String>> fieldDirectories = new HashMap<Directory, List<String>>();
 +    ParallelReader reader;
 +    IndexSearcher searcher;
 +
 +    public TestParallelWriter(String testName) {
 +        super(testName);
 +    }
 +
 +    protected void setUp() throws Exception {
 +        directories = new Directory[] { new RAMDirectory(), new RAMDirectory(), new RAMDirectory() };
 +        fieldDirectories.put(directories[0], Arrays.asList("title", "body"));
 +        fieldDirectories.put(directories[1], Arrays.asList("markup"));
 +        fieldDirectories.put(directories[2], Arrays.asList("meta"));
 +
 +        openWriter(true);
 +
 +        Document doc1 = new Document();
 +        doc1.add(new Field("title", "Foxes", Field.Store.YES, Field.Index.TOKENIZED));
 +        doc1.add(new Field("body", "The quick brown fox jumped over the lazy dog", Field.Store.NO, Field.Index.TOKENIZED));
 +        doc1.add(new Field("meta", "Animals", Field.Store.YES, Field.Index.UN_TOKENIZED));
 +        writer.addDocument(doc1);
 +
 +        Document doc2 = new Document();
 +        doc2.add(new Field("title", "Galaxies", Field.Store.YES, Field.Index.TOKENIZED));
 +        doc2.add(new Field("body", "Once upon a time in a galaxy far far away", Field.Store.NO, Field.Index.TOKENIZED));
 +        doc2.add(new Field("meta", "Space", Field.Store.YES, Field.Index.UN_TOKENIZED));
 +        writer.addDocument(doc2);
 +
 +        closeWriter();
 +
 +        openWriter(false);
 +        openReader();
 +    }
 +
 +    private void openWriter(boolean create) throws IOException {
 +        writer = new ParallelWriter(fieldDirectories, new StandardAnalyzer(), create);
 +    }
 +
 +    private void closeWriter() throws IOException {
 +        writer.close();
 +    }
 +
 +    private void openReader() throws IOException {
 +        reader = new ParallelReader();
 +        for (Directory dir : directories)
 +            reader.add(IndexReader.open(dir));
 +        searcher = new IndexSearcher(reader);
 +    }
 +
 +    private void closeReader() throws IOException {
 +        searcher.close();
 +        reader.close();
 +    }
 +
 +    protected void tearDown() throws Exception {
 +        writer.close();
 +        reader.close();
 +        for (Directory dir : directories)
 +            dir.close();
 +    }
 +
 +    public static Test suite() {
 +        TestSuite suite = new TestSuite(TestParallelWriter.class);
 +
 +        return suite;
 +    }
 +
 +    /**
 +     * Test of addDocument method, of class org.apache.lucene.index.ParallelWriter.
 +     */
 +    public void test() throws Exception {
 +        System.out.println("Test ParallelWriter");
 +
 +        assertEquals(2, writer.docCount());
 +        assertEquals(2, reader.numDocs());
 +
 +        Hits hits = searcher.search(new TermQuery(new Term("title", "foxes")));
 +        assertEquals(1, hits.length());
 +        Document doc = hits.doc(0);
 +        assertEquals("Animals", doc.get("meta"));
 +
 +        hits = searcher.search(new TermQuery(new Term("body", "galaxy")));
 +        assertEquals(1, hits.length());
 +        doc = hits.doc(0);
 +        assertEquals("Galaxies", doc.get("title"));
 +        assertEquals("Space", doc.get("meta"));
 +
 +        closeWriter();
 +        reader.deleteDocuments(new Term("title", "foxes"));
 +        closeReader();
 +
 +        openWriter(false);
 +        doc = new Document();
 +        doc.add(new Field("title", "Foxes", Field.Store.YES, Field.Index.TOKENIZED));
 +        doc.add(new Field("body", "The quick brown fox jumped over the lazy dog", Field.Store.NO, Field.Index.TOKENIZED));
 +        doc.add(new Field("meta", "Animals", Field.Store.YES, Field.Index.UN_TOKENIZED));
 +        doc.add(new Field("markup", "Interesting", Field.Store.YES, Field.Index.UN_TOKENIZED));
 +        writer.addDocument(doc);
 +
 +        closeWriter();
 +        openWriter(false);
 +        openReader();
 +
 +        hits = searcher.search(new TermQuery(new Term("markup", "Interesting")));
 +        assertEquals(1, hits.length());
 +        doc = hits.doc(0);
 +        assertEquals("Animals", doc.get("meta"));
 +        assertEquals("Foxes", doc.get("title"));
 +        assertEquals("Interesting", doc.get("markup"));
 +    }
 +
 +}
 Index: src/java/org/apache/lucene/analysis/TokenSelector.java
 ===================================================================
 --- src/java/org/apache/lucene/analysis/TokenSelector.java	(revision 0)
 +++ src/java/org/apache/lucene/analysis/TokenSelector.java	(revision 0)
 @@ -0,0 +1,24 @@
 +/*
 + * TokenSelector.java
 + *
 + * Created on June 13, 2006, 12:18 PM
 + *
 + */
 +
 +package org.apache.lucene.analysis;
 +
 +/**
 + * An interface for selecting a subset of a token stream
 + *
 + * @author Chuck Wiliams
 + */
 +public interface TokenSelector {
 +
 +  /** Determine if a token should be selected
 +   * @param fieldName field in which token was found
 +   * @param token a token
 +   * @return true iff token should be selected
 +   */
 +  public boolean accept(String fieldName, Token token);
 +
 +}
 Index: src/java/org/apache/lucene/analysis/PerFieldTokenSelectorWrapper.java
 ===================================================================
 --- src/java/org/apache/lucene/analysis/PerFieldTokenSelectorWrapper.java	(revision 0)
 +++ src/java/org/apache/lucene/analysis/PerFieldTokenSelectorWrapper.java	(revision 0)
 @@ -0,0 +1,44 @@
 +/*
 + * PerFieldTokenSelectorWrapper.java
 + *
 + * Created on June 13, 2006, 4:09 PM
 + *
 + */
 +
 +package org.apache.lucene.analysis;
 +
 +import java.util.HashMap;
 +import java.util.Map;
 +
 +/**
 + * Expert: TokenSelector that implements a mapping from field names to TokenSelectors
 + *
 + * @author Chuck Williams
 + */
 +public class PerFieldTokenSelectorWrapper implements TokenSelector {
 +
 +  private Map selectors = new HashMap();
 +  private TokenSelector defaultSelector;
 +
 +  /** Expert: create a PerFieldTokenSelector with given default selector (null means select all) */
 +  public PerFieldTokenSelectorWrapper(TokenSelector defaultSelector) {
 +    this.defaultSelector = defaultSelector;
 +  }
 +
 +  /** Add a token selector for the named field */
 +  public void addSelector(String fieldName, TokenSelector selector) {
 +    selectors.put(fieldName, selector);
 +  }
 +
 +  /** Determine if token is accepted by fieldName */
 +  public boolean accept(String fieldName, Token token) {
 +    TokenSelector selector = (TokenSelector) selectors.get(fieldName);
 +    if (selector!=null)
 +        return selector.accept(fieldName, token);
 +    else if (defaultSelector!=null)
 +        return defaultSelector.accept(fieldName, token);
 +    else
 +        return true;
 +  }
 +
 +}
 \ No newline at end of file
 Index: src/java/org/apache/lucene/index/Writable.java.orig
 ===================================================================
 --- src/java/org/apache/lucene/index/Writable.java.orig	(revision 0)
 +++ src/java/org/apache/lucene/index/Writable.java.orig	(revision 0)
 @@ -0,0 +1,248 @@
 +/*
 + * Writable.java
 + *
 + * Created on April 28, 2006, 6:10 PM
 + *
 + */
 +
 +package org.apache.lucene.index;
 +
 +import java.io.IOException;
 +import java.io.PrintStream;
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.TokenSelector;
 +import org.apache.lucene.document.Document;
 +import org.apache.lucene.search.Similarity;
 +import org.apache.lucene.store.Directory;
 +
 +/**
 + * An interface that abstracts index writers (e.g., IndexWriter, ParallelWriter)
 + *
 + * @author Chuck Williams
 + */
 +public interface Writable {
 +
 +    /**
 +     * Adds a document to this index.  If the document contains more than
 +     * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
 +     * discarded.
 +     */
 +    public void addDocument(Document doc) throws IOException;
 +
 +    /**
 +     * Adds a document to this index, using the provided analyzer instead of the
 +     * value of {@link #getAnalyzer()}.  If the document contains more than
 +     * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
 +     * discarded.
 +     */
 +    public void addDocument(Document doc, Analyzer analyzer) throws IOException;
 +
 +    /**
 +     * Returns the number of documents currently in this index.
 +     */
 +    public int docCount();
 +
 +    /**
 +     * Merges all segments together into a single segment, optimizing an index
 +     *      for search.
 +     */
 +    public void optimize() throws IOException;
 +
 +    /**
 +     * Flushes all changes to an index and closes all associated files.
 +     */
 +    public void close() throws IOException;
 +
 +    /**
 +     * Returns the analyzer used by this index.
 +     */
 +    public Analyzer getAnalyzer();
 +
 +
 +    /**
 +     * Setting to turn on usage of a compound file. When on, multiple files
 +     *  for each segment are merged into a single file once the segment creation
 +     *  is finished. This is done regardless of what directory is in use.
 +     */
 +    public void setUseCompoundFile(boolean value);
 +
 +    /**
 +     * Get the current setting of whether to use the compound file format.
 +     *  Note that this just returns the value you set with setUseCompoundFile(boolean)
 +     *  or the default. You cannot use this to query the status of an existing index.
 +     *
 +     * @see #setUseCompoundFile(boolean)
 +     */
 +    public boolean getUseCompoundFile();
 +
 +    /**
 +     * Expert: Set the Similarity implementation used by this IndexWriter.
 +     *
 +     * @see Similarity#setDefault(Similarity)
 +     */
 +    public void setSimilarity(Similarity similarity);
 +
 +    /**
 +     * Expert: Return the Similarity implementation used by this IndexWriter.
 +     *
 +     * <p>This defaults to the current value of {@link Similarity#getDefault()}.
 +     */
 +    public Similarity getSimilarity();
 +
 +    /**
 +     * Expert: Set the interval between indexed terms.  Large values cause less
 +     * memory to be used by IndexReader, but slow random-access to terms.  Small
 +     * values cause more memory to be used by an IndexReader, and speed
 +     * random-access to terms.
 +     *
 +     * This parameter determines the amount of computation required per query
 +     * term, regardless of the number of documents that contain that term.  In
 +     * particular, it is the maximum number of other terms that must be
 +     * scanned before a term is located and its frequency and position information
 +     * may be processed.  In a large index with user-entered query terms, query
 +     * processing time is likely to be dominated not by term lookup but rather
 +     * by the processing of frequency and positional data.  In a small index
 +     * or when many uncommon query terms are generated (e.g., by wildcard
 +     * queries) term lookup may become a dominant cost.
 +     *
 +     * In particular, <code>numUniqueTerms/interval</code> terms are read into
 +     * memory by an IndexReader, and, on average, <code>interval/2</code> terms
 +     * must be scanned for each random term access.
 +     *
 +     * @see #DEFAULT_TERM_INDEX_INTERVAL
 +     */
 +    public void setTermIndexInterval(int interval);
 +
 +    /**
 +     * Expert: Return the interval between indexed terms.
 +     *
 +     * @see #setTermIndexInterval(int)
 +     */
 +    public int getTermIndexInterval();
 +
 +    /**
 +     * Determines the minimal number of documents required before the buffered
 +     * in-memory documents are merging and a new Segment is created.
 +     * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory},
 +     * large value gives faster indexing.  At the same time, mergeFactor limits
 +     * the number of files open in a FSDirectory.
 +     *
 +     * <p> The default value is 10.
 +     *
 +     *
 +     * @throws IllegalArgumentException if maxBufferedDocs is smaller than 2
 +     */
 +    public void setMaxBufferedDocs(int maxBufferedDocs);
 +
 +    /**
 +     *
 +     *
 +     * @see #setMaxBufferedDocs
 +     */
 +    public int getMaxBufferedDocs();
 +
 +    /**
 +     * The maximum number of terms that will be indexed for a single field in a
 +     * document.  This limits the amount of memory required for indexing, so that
 +     * collections with very large files will not crash the indexing process by
 +     * running out of memory.<p/>
 +     * Note that this effectively truncates large documents, excluding from the
 +     * index terms that occur further in the document.  If you know your source
 +     * documents are large, be sure to set this value high enough to accomodate
 +     * the expected size.  If you set it to Integer.MAX_VALUE, then the only limit
 +     * is your memory, but you should anticipate an OutOfMemoryError.<p/>
 +     * By default, no more than 10,000 terms will be indexed for a field.
 +     */
 +    public void setMaxFieldLength(int maxFieldLength);
 +
 +    /**
 +     *
 +     *
 +     * @see #setMaxFieldLength
 +     */
 +    public int getMaxFieldLength();
 +
 +    /**
 +     * Determines the largest number of documents ever merged by addDocument().
 +     * Small values (e.g., less than 10,000) are best for interactive indexing,
 +     * as this limits the length of pauses while indexing to a few seconds.
 +     * Larger values are best for batched indexing and speedier searches.
 +     *
 +     * <p>The default value is {@link Integer#MAX_VALUE}.
 +     */
 +    public void setMaxMergeDocs(int maxMergeDocs);
 +
 +    /**
 +     *
 +     *
 +     * @see #setMaxMergeDocs
 +     */
 +    public int getMaxMergeDocs();
 +
 +    /**
 +     * Determines how often segment indices are merged by addDocument().  With
 +     * smaller values, less RAM is used while indexing, and searches on
 +     * unoptimized indices are faster, but indexing speed is slower.  With larger
 +     * values, more RAM is used during indexing, and while searches on unoptimized
 +     * indices are slower, indexing is faster.  Thus larger values (> 10) are best
 +     * for batch index creation, and smaller values (< 10) for indices that are
 +     * interactively maintained.
 +     *
 +     * <p>This must never be less than 2.  The default value is 10.
 +     */
 +    public void setMergeFactor(int mergeFactor);
 +
 +    /**
 +     *
 +     *
 +     * @see #setMergeFactor
 +     */
 +    public int getMergeFactor();
 +
 +    /**
 +     * Sets the maximum time to wait for a write lock (in milliseconds).
 +     */
 +    public void setWriteLockTimeout(long writeLockTimeout);
 +
 +    /**
 +     *
 +     *
 +     * @see #setWriteLockTimeout
 +     */
 +    public long getWriteLockTimeout();
 +
 +    /**
 +     * Sets the maximum time to wait for a commit lock (in milliseconds).
 +     */
 +    public void setCommitLockTimeout(long commitLockTimeout);
 +
 +    /**
 +     *
 +     *
 +     * @see #setCommitLockTimeout
 +     */
 +    public long getCommitLockTimeout();
 +
 +    /** Expert:  Set the TokenSelector used to determine subset of tokens stored in term vectors.
 +     * @param selector the term vector TokenSelector
 +     */
 +    public void setTermVectorTokenSelector(TokenSelector selector);
 +
 +    /** Expert: Set the TokenSelector used to determine subset of tokens stored in term vectors.
 +     * @return the TokenSelector used to determine term vector tokens
 +     */
 +    public TokenSelector getTermVectorTokenSelector();
 +
 +    /** If non-null, information about merges and a message when
 +     * maxFieldLength is reached will be printed to this.
 +     */
 +    public void setInfoStream(PrintStream infoStream);
 +
 +    /**
 +     *
 +     *
 +     * @see #setInfoStream
 +     */
 +    public PrintStream getInfoStream();
 +
 +}
 Index: src/java/org/apache/lucene/index/IndexWriter.java
 ===================================================================
 --- src/java/org/apache/lucene/index/IndexWriter.java	(revision 414705)
 +++ src/java/org/apache/lucene/index/IndexWriter.java	(working copy)
 @@ -17,6 +17,7 @@
   */

  import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.TokenSelector;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.search.Similarity;
  import org.apache.lucene.store.Directory;
 @@ -56,7 +57,7 @@
    @see IndexModifier IndexModifier supports the important methods of IndexWriter plus deletion
    */

 -public class IndexWriter {
 +public class IndexWriter implements Writable {

    /**
     * Default value for the write lock timeout (1,000).
 @@ -100,8 +101,10 @@
     */
    public final static int DEFAULT_TERM_INDEX_INTERVAL = 128;

 -  private Directory directory;  // where this index resides
 -  private Analyzer analyzer;    // how to analyze text
 +  private Directory directory;                      // where this index resides
 +  private Analyzer analyzer;                        // how to analyze text
 +  private TokenSelector termVectorTokenSelector;    // subset of token stream stored in term vectors
 +  private TokenSelector positionsTokenSelector;     // subset of token stream for which positions are stored

    private Similarity similarity = Similarity.getDefault(); // how to normalize

 @@ -153,6 +156,38 @@
      return this.similarity;
    }

 +  /** Expert:  Set the TokenSelector used to determine subset of tokens stored in term vectors.
 +   * @param selector the term vector TokenSelector
 +   */
 +  public void setTermVectorTokenSelector(TokenSelector selector) {
 +    this.termVectorTokenSelector = selector;
 +  }
 +
 +  /** Expert: Set the TokenSelector used to determine subset of tokens stored in term vectors.
 +   * @return the TokenSelector used to determine term vector tokens
 +   */
 +  public TokenSelector getTermVectorTokenSelector() {
 +    return termVectorTokenSelector;
 +  }
 +
 +  /** Expert:  Set the TokenSelector used to determine subset of tokens for which positions are stored.
 +   *           (At least one position is always stored for each term in each doc to ensure the term stays in
 +   *            the index so long as any docs reference it)
 +   * @param selector the positions TokenSelector
 +   */
 +  public void setPositionsTokenSelector(TokenSelector selector) {
 +    this.positionsTokenSelector = selector;
 +  }
 +
 +  /** Expert: Set the TokenSelector used to determine subset of tokens for which freq and positions are stored..
 +   *          (At least one position is always stored for each term in each doc to ensure the term stays in
 +   *           the index so long as any docs reference it)
 +   * @return the positions TokenSelector
 +   */
 +  public TokenSelector getPositionsTokenSelector() {
 +    return positionsTokenSelector;
 +  }
 +
    /** Expert: Set the interval between indexed terms.  Large values cause less
     * memory to be used by IndexReader, but slow random-access to terms.  Small
     * values cause more memory to be used by an IndexReader, and speed
 @@ -471,6 +506,8 @@
    public void addDocument(Document doc, Analyzer analyzer) throws IOException {
      DocumentWriter dw =
        new DocumentWriter(ramDirectory, analyzer, this);
 +    dw.setTermVectorTokenSelector(termVectorTokenSelector);
 +    dw.setPositionsTokenSelector(positionsTokenSelector);
      dw.setInfoStream(infoStream);
      String segmentName = newSegmentName();
      dw.addDocument(segmentName, doc);
 Index: src/java/org/apache/lucene/index/Writable.java
 ===================================================================
 --- src/java/org/apache/lucene/index/Writable.java	(revision 0)
 +++ src/java/org/apache/lucene/index/Writable.java	(revision 0)
 @@ -0,0 +1,262 @@
 +/*
 + * Writable.java
 + *
 + * Created on April 28, 2006, 6:10 PM
 + *
 + */
 +
 +package org.apache.lucene.index;
 +
 +import java.io.IOException;
 +import java.io.PrintStream;
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.TokenSelector;
 +import org.apache.lucene.document.Document;
 +import org.apache.lucene.search.Similarity;
 +import org.apache.lucene.store.Directory;
 +
 +/**
 + * An interface that abstracts index writers (e.g., IndexWriter, ParallelWriter)
 + *
 + * @author Chuck Williams
 + */
 +public interface Writable {
 +
 +    /**
 +     * Adds a document to this index.  If the document contains more than
 +     * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
 +     * discarded.
 +     */
 +    public void addDocument(Document doc) throws IOException;
 +
 +    /**
 +     * Adds a document to this index, using the provided analyzer instead of the
 +     * value of {@link #getAnalyzer()}.  If the document contains more than
 +     * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are
 +     * discarded.
 +     */
 +    public void addDocument(Document doc, Analyzer analyzer) throws IOException;
 +
 +    /**
 +     * Returns the number of documents currently in this index.
 +     */
 +    public int docCount();
 +
 +    /**
 +     * Merges all segments together into a single segment, optimizing an index
 +     *      for search.
 +     */
 +    public void optimize() throws IOException;
 +
 +    /**
 +     * Flushes all changes to an index and closes all associated files.
 +     */
 +    public void close() throws IOException;
 +
 +    /**
 +     * Returns the analyzer used by this index.
 +     */
 +    public Analyzer getAnalyzer();
 +
 +
 +    /**
 +     * Setting to turn on usage of a compound file. When on, multiple files
 +     *  for each segment are merged into a single file once the segment creation
 +     *  is finished. This is done regardless of what directory is in use.
 +     */
 +    public void setUseCompoundFile(boolean value);
 +
 +    /**
 +     * Get the current setting of whether to use the compound file format.
 +     *  Note that this just returns the value you set with setUseCompoundFile(boolean)
 +     *  or the default. You cannot use this to query the status of an existing index.
 +     *
 +     * @see #setUseCompoundFile(boolean)
 +     */
 +    public boolean getUseCompoundFile();
 +
 +    /**
 +     * Expert: Set the Similarity implementation used by this IndexWriter.
 +     *
 +     * @see Similarity#setDefault(Similarity)
 +     */
 +    public void setSimilarity(Similarity similarity);
 +
 +    /**
 +     * Expert: Return the Similarity implementation used by this IndexWriter.
 +     *
 +     * <p>This defaults to the current value of {@link Similarity#getDefault()}.
 +     */
 +    public Similarity getSimilarity();
 +
 +    /**
 +     * Expert: Set the interval between indexed terms.  Large values cause less
 +     * memory to be used by IndexReader, but slow random-access to terms.  Small
 +     * values cause more memory to be used by an IndexReader, and speed
 +     * random-access to terms.
 +     *
 +     * This parameter determines the amount of computation required per query
 +     * term, regardless of the number of documents that contain that term.  In
 +     * particular, it is the maximum number of other terms that must be
 +     * scanned before a term is located and its frequency and position information
 +     * may be processed.  In a large index with user-entered query terms, query
 +     * processing time is likely to be dominated not by term lookup but rather
 +     * by the processing of frequency and positional data.  In a small index
 +     * or when many uncommon query terms are generated (e.g., by wildcard
 +     * queries) term lookup may become a dominant cost.
 +     *
 +     * In particular, <code>numUniqueTerms/interval</code> terms are read into
 +     * memory by an IndexReader, and, on average, <code>interval/2</code> terms
 +     * must be scanned for each random term access.
 +     *
 +     * @see #DEFAULT_TERM_INDEX_INTERVAL
 +     */
 +    public void setTermIndexInterval(int interval);
 +
 +    /**
 +     * Expert: Return the interval between indexed terms.
 +     *
 +     * @see #setTermIndexInterval(int)
 +     */
 +    public int getTermIndexInterval();
 +
 +    /**
 +     * Determines the minimal number of documents required before the buffered
 +     * in-memory documents are merging and a new Segment is created.
 +     * Since Documents are merged in a {@link org.apache.lucene.store.RAMDirectory},
 +     * large value gives faster indexing.  At the same time, mergeFactor limits
 +     * the number of files open in a FSDirectory.
 +     *
 +     * <p> The default value is 10.
 +     *
 +     *
 +     * @throws IllegalArgumentException if maxBufferedDocs is smaller than 2
 +     */
 +    public void setMaxBufferedDocs(int maxBufferedDocs);
 +
 +    /**
 +     *
 +     *
 +     * @see #setMaxBufferedDocs
 +     */
 +    public int getMaxBufferedDocs();
 +
 +    /**
 +     * The maximum number of terms that will be indexed for a single field in a
 +     * document.  This limits the amount of memory required for indexing, so that
 +     * collections with very large files will not crash the indexing process by
 +     * running out of memory.<p/>
 +     * Note that this effectively truncates large documents, excluding from the
 +     * index terms that occur further in the document.  If you know your source
 +     * documents are large, be sure to set this value high enough to accomodate
 +     * the expected size.  If you set it to Integer.MAX_VALUE, then the only limit
 +     * is your memory, but you should anticipate an OutOfMemoryError.<p/>
 +     * By default, no more than 10,000 terms will be indexed for a field.
 +     */
 +    public void setMaxFieldLength(int maxFieldLength);
 +
 +    /**
 +     *
 +     *
 +     * @see #setMaxFieldLength
 +     */
 +    public int getMaxFieldLength();
 +
 +    /**
 +     * Determines the largest number of documents ever merged by addDocument().
 +     * Small values (e.g., less than 10,000) are best for interactive indexing,
 +     * as this limits the length of pauses while indexing to a few seconds.
 +     * Larger values are best for batched indexing and speedier searches.
 +     *
 +     * <p>The default value is {@link Integer#MAX_VALUE}.
 +     */
 +    public void setMaxMergeDocs(int maxMergeDocs);
 +
 +    /**
 +     *
 +     *
 +     * @see #setMaxMergeDocs
 +     */
 +    public int getMaxMergeDocs();
 +
 +    /**
 +     * Determines how often segment indices are merged by addDocument().  With
 +     * smaller values, less RAM is used while indexing, and searches on
 +     * unoptimized indices are faster, but indexing speed is slower.  With larger
 +     * values, more RAM is used during indexing, and while searches on unoptimized
 +     * indices are slower, indexing is faster.  Thus larger values (> 10) are best
 +     * for batch index creation, and smaller values (< 10) for indices that are
 +     * interactively maintained.
 +     *
 +     * <p>This must never be less than 2.  The default value is 10.
 +     */
 +    public void setMergeFactor(int mergeFactor);
 +
 +    /**
 +     *
 +     *
 +     * @see #setMergeFactor
 +     */
 +    public int getMergeFactor();
 +
 +    /**
 +     * Sets the maximum time to wait for a write lock (in milliseconds).
 +     */
 +    public void setWriteLockTimeout(long writeLockTimeout);
 +
 +    /**
 +     *
 +     *
 +     * @see #setWriteLockTimeout
 +     */
 +    public long getWriteLockTimeout();
 +
 +    /**
 +     * Sets the maximum time to wait for a commit lock (in milliseconds).
 +     */
 +    public void setCommitLockTimeout(long commitLockTimeout);
 +
 +    /**
 +     *
 +     *
 +     * @see #setCommitLockTimeout
 +     */
 +    public long getCommitLockTimeout();
 +
 +    /** Expert:  Set the TokenSelector used to determine subset of tokens stored in term vectors.
 +     * @param selector the term vector TokenSelector
 +     */
 +    public void setTermVectorTokenSelector(TokenSelector selector);
 +
 +    /** Expert: Set the TokenSelector used to determine subset of tokens stored in term vectors.
 +     * @return the TokenSelector used to determine term vector tokens
 +     */
 +    public TokenSelector getTermVectorTokenSelector();
 +
 +    /** Expert:  Set the TokenSelector used to determine subset of tokens for which positions are stored.
 +     *           (At least one position is always stored for each term in each doc to ensure the term stays in
 +     *            the index so long as any docs reference it)
 +     * @param selector the positions TokenSelector
 +     */
 +    public void setPositionsTokenSelector(TokenSelector selector);
 +
 +    /** Expert: Set the TokenSelector used to determine subset of tokens for which freq and positions are stored..
 +     *          (At least one position is always stored for each term in each doc to ensure the term stays in
 +     *           the index so long as any docs reference it)
 +     * @return the positions TokenSelector
 +     */
 +    public TokenSelector getPositionsTokenSelector();
 +
 +    /** If non-null, information about merges and a message when
 +     * maxFieldLength is reached will be printed to this.
 +     */
 +    public void setInfoStream(PrintStream infoStream);
 +
 +    /**
 +     *
 +     *
 +     * @see #setInfoStream
 +     */
 +    public PrintStream getInfoStream();
 +
 +}
 Index: src/java/org/apache/lucene/index/DocumentWriter.java
 ===================================================================
 --- src/java/org/apache/lucene/index/DocumentWriter.java	(revision 414705)
 +++ src/java/org/apache/lucene/index/DocumentWriter.java	(working copy)
 @@ -17,6 +17,7 @@
   */

  import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.TokenSelector;
  import org.apache.lucene.analysis.Token;
  import org.apache.lucene.analysis.TokenStream;
  import org.apache.lucene.document.Document;
 @@ -35,6 +36,8 @@

  final class DocumentWriter {
    private Analyzer analyzer;
 +  private TokenSelector termVectorTokenSelector;
 +  private TokenSelector positionsTokenSelector;
    private Directory directory;
    private Similarity similarity;
    private FieldInfos fieldInfos;
 @@ -142,9 +145,9 @@
          if (!field.isTokenized()) {		  // un-tokenized field
            String stringValue = field.stringValue();
            if(field.isStoreOffsetWithTermVector())
 -            addPosition(fieldName, stringValue, position++, new TermVectorOffsetInfo(offset, offset + stringValue.length()));
 +            addPosition(fieldName, stringValue, position++, new TermVectorOffsetInfo(offset, offset + stringValue.length()), false, false);
            else
 -            addPosition(fieldName, stringValue, position++, null);
 +            addPosition(fieldName, stringValue, position++, null, false, false);
            offset += stringValue.length();
            length++;
          } else
 @@ -165,10 +168,16 @@
              for (Token t = stream.next(); t != null; t = stream.next()) {
                position += (t.getPositionIncrement() - 1);

 -              if(field.isStoreOffsetWithTermVector())
 -                addPosition(fieldName, t.termText(), position++, new TermVectorOffsetInfo(offset + t.startOffset(), offset + t.endOffset()));
 -              else
 -                addPosition(fieldName, t.termText(), position++, null);
 +              boolean omittv = false, omitpos = false;
 +              if (termVectorTokenSelector!=null && !termVectorTokenSelector.accept(field.name(), t))
 +                  omittv  = true;
 +              if (positionsTokenSelector !=null && !positionsTokenSelector. accept(field.name(), t))
 +                  omitpos = true;
 +
 +              addPosition(fieldName, t.termText(), position++,
 +                          field.isStoreOffsetWithTermVector() && !omittv ? new TermVectorOffsetInfo(offset + t.startOffset(), offset + t.endOffset())
 +                                                                         : null,
 +                          omittv, omitpos);

                lastToken = t;
                if (++length > maxFieldLength) {
 @@ -196,20 +205,24 @@

    private final Term termBuffer = new Term("", ""); // avoid consing

 -  private final void addPosition(String field, String text, int position, TermVectorOffsetInfo offset) {
 +  private final void addPosition(String field, String text, int position, TermVectorOffsetInfo offset,
 +                                 boolean omitFromTermVector, boolean omitPosition) {
      termBuffer.set(field, text);
      //System.out.println("Offset: " + offset);
      Posting ti = (Posting) postingTable.get(termBuffer);
      if (ti != null) {				  // word seen before
        int freq = ti.freq;
 -      if (ti.positions.length == freq) {	  // positions array is full
 -        int[] newPositions = new int[freq * 2];	  // double size
 -        int[] positions = ti.positions;
 -        for (int i = 0; i < freq; i++)		  // copy old positions to new
 -          newPositions[i] = positions[i];
 -        ti.positions = newPositions;
 +
 +      if (!omitPosition) {
 +        if (ti.positions.length == freq) {        // positions array is full
 +          int[] newPositions = new int[freq * 2]; // double size
 +          int[] positions = ti.positions;
 +          for (int i = 0; i < freq; i++)          // copy old positions to new
 +            newPositions[i] = positions[i];
 +          ti.positions = newPositions;
 +        }
 +        ti.positions[freq] = position;            // add new position
        }
 -      ti.positions[freq] = position;		  // add new position

        if (offset != null) {
          if (ti.offsets.length == freq){
 @@ -223,10 +236,12 @@
          }
          ti.offsets[freq] = offset;
        }
 -      ti.freq = freq + 1;			  // update frequency
 -    } else {					  // word not seen before
 +
 +      if (!omitPosition)
 +        ti.freq = freq + 1;                       // update frequency
 +    } else {                                      // word not seen before
        Term term = new Term(field, text, false);
 -      postingTable.put(term, new Posting(term, position, offset));
 +      postingTable.put(term, new Posting(term, position, offset, omitFromTermVector));
      }
    }

 @@ -351,7 +366,7 @@
              termVectorWriter.closeField();
            }
          }
 -        if (termVectorWriter != null && termVectorWriter.isFieldOpen()) {
 +        if (termVectorWriter != null && termVectorWriter.isFieldOpen() && !posting.omitFromTermVector) {
              termVectorWriter.addTerm(posting.term.text(), postingFreq, posting.positions, posting.offsets);
          }
        }
 @@ -390,6 +405,16 @@
      this.infoStream = infoStream;
    }

 +  /** If non-null, this will be used to select which tokens are stored in term vectors */
 +  void setTermVectorTokenSelector(TokenSelector selector) {
 +    this.termVectorTokenSelector = selector;
 +  }
 +
 +  /** If non-null, this will be used to select which tokens have positions stored in the index. */
 +  void setPositionsTokenSelector(TokenSelector selector) {
 +    this.positionsTokenSelector = selector;
 +  }
 +
  }

  final class Posting {				  // info about a Term in a doc
 @@ -397,17 +422,17 @@
    int freq;					  // its frequency in doc
    int[] positions;				  // positions it occurs at
    TermVectorOffsetInfo [] offsets;
 +  boolean omitFromTermVector;                     // if true, omit from term vector

 -  Posting(Term t, int position, TermVectorOffsetInfo offset) {
 +  Posting(Term t, int position, TermVectorOffsetInfo offset, boolean omitFromTermVector) {
      term = t;
      freq = 1;
      positions = new int[1];
      positions[0] = position;
 -    if(offset != null){
 -    offsets = new TermVectorOffsetInfo[1];
 -    offsets[0] = offset;
 +    if(offset != null) {
 +      offsets = new TermVectorOffsetInfo[1];
 +      offsets[0] = offset;
      }
 -    else
 -      offsets = null;
 +    this.omitFromTermVector = omitFromTermVector;
    }
  }
 Index: src/java/org/apache/lucene/index/ParallelWriter.java
 ===================================================================
 --- src/java/org/apache/lucene/index/ParallelWriter.java	(revision 0)
 +++ src/java/org/apache/lucene/index/ParallelWriter.java	(revision 0)
 @@ -0,0 +1,345 @@
 +/*
 + * ParallelWriter.java
 + *
 + * Created on April 28, 2006, 7:07 PM
 + *
 + */
 +
 +package org.apache.lucene.index;
 +
 +import java.io.IOException;
 +import java.io.PrintStream;
 +import java.util.Enumeration;
 +import java.util.HashMap;
 +import java.util.List;
 +import java.util.Map;
 +import java.util.concurrent.CountDownLatch;
 +import org.apache.lucene.analysis.Analyzer;
 +import org.apache.lucene.analysis.TokenSelector;
 +import org.apache.lucene.document.Document;
 +import org.apache.lucene.document.Field;
 +import org.apache.lucene.search.Similarity;
 +import org.apache.lucene.store.Directory;
 +
 +/**
 + * ParallelWriter is a companion to ParallelReader, although as with IndexWriter it only supports indexes stored in a Directory.
 + * The interface is at the field level.  A map from directories to lists of fields is provided to create the ParallelWriter,
 + * which then creates an IndexWriter for each specified directory and operates on each field of a document using the IndexWriter
 + * for the directory to which that field is mapped.  This mapping allows an application to configure its use of parallel sub-
 + * indexes independently from the rest of its processing.
 + *
 + * This implementation single-threads calls to addDocument(), but does the sub-document writes in parallel. Users of this class
 + * must ensure that the ParallelReader is never reopened while adding a new document, and must deal with recovery if exceptions
 + * occur while adding a document.
 + *
 + * @author Chuck Williams
 + */
 +public class ParallelWriter implements Writable {
 +
 +    IndexWriter[] writers;                // All IndexWriters
 +    IOException exception;                // If any writer gets an exception, this is stored here (only one needed)
 +    Map<String,IndexWriter> writerMap;    // Field name --> IndexWriter that stores that field
 +    IndexWriter oneWriter;                // An arbitrarily chosen IndexWriter -- used to get config info which is the same for all IndexWriters
 +    Analyzer analyzer;                    // The Analyzer applied to all tokenized field content
 +
 +    private static final Document EMPTY_DOCUMENT = new Document();  // Empty document used to sync doc id's when a document is added without fields for all indexes
 +
 +    /**
 +     * Create a new ParallelWriter
 +     *
 +     * @param directoryFieldsMap specifies the directory to use to store each field, multiple directories creating parallel indexes
 +     * @param analyzer applied to all tokenized field content
 +     * @param create create new indexes in directories iff true
 +     * @throws IOException if the IndexWriters cannot be created
 +     */
 +    public ParallelWriter(Map<Directory,List<String>> directoryFieldsMap, Analyzer analyzer, boolean create) throws IOException {
 +        this.analyzer = analyzer;
 +        writers = new IndexWriter[directoryFieldsMap.size()];
 +        writerMap = new HashMap<String,IndexWriter>(directoryFieldsMap.size()*5/3);
 +        int i=0;
 +        for (Map.Entry<Directory,List<String>> entry : directoryFieldsMap.entrySet()) {
 +            IndexWriter writer = new IndexWriter(entry.getKey(), analyzer, create);
 +            writers[i++] = oneWriter = writer;
 +            for (String field : entry.getValue())
 +                writerMap.put(field, writer);
 +        }
 +    }
 +
 +    /** Invert a directoryFieldsMap
 +     * @param directoryFieldsMap a map for directories to lists of fields they contain
 +     * @return a map from each field to its directory
 +     */
 +    public static Map<String, Directory> invertDirectoryFieldsMap(Map<Directory,List<String>> directoryFieldsMap) {
 +        Map<String, Directory> fieldDirectoryMap = new HashMap<String, Directory>();
 +        for (Map.Entry<Directory, List<String>> entry : directoryFieldsMap.entrySet())
 +            for (String field : entry.getValue())
 +                fieldDirectoryMap.put(field, entry.getKey());
 +        return fieldDirectoryMap;
 +    }
 +
 +    /** Add document to this index by adding subdocuments with the mapped fields for each parallel index.  This method is synchronized because the
 +     *  the parallel indexes must be maintained such that equal doc id's in different indexes hold fields for the same document.
 +     *  This synchronization could have a negative effect on batch indexing performance.  Users of this method must ensure that the ParllelReader
 +     *  is not re-opened within the scope of this method as it would likely find the sub-indexes out of sync.
 +     * @param doc the document to add
 +     * @throws IOException if there are problems writing the indexes.  <strong>WARNING:  If this happens it is bad.</string>  The doc-id's in the
 +     *                     indexes are likely out of sync.  This situation requires repair to resync the doc ids in each document set.  Possible
 +     *                     repair actions include rebuilding the indexes or deleting documents at the end to restore equal document sets and then
 +     *                     optimizing to restore equal doc ids.
 +     * @throws RuntimeException if the threads writing to the sub-indexes are interrupted.
 +     */
 +    public void addDocument(Document doc) throws IOException {
 +        addDocument(doc, analyzer);
 +    }
 +
 +    /** Add document to this index by adding subdocuments with the mapped fields for each parallel index.  This method is synchronized because the
 +     *  the parallel indexes must be maintained such that equal doc id's in different indexes hold fields for the same document.
 +     *  This synchronization could have a negative effect on batch indexing performance.  Users of this method must ensure that the ParllelReader
 +     *  is not re-opened within the scope of this method as it would likely find the sub-indexes out of sync.
 +     * @param doc the document to add
 +     * @param analyzer apply special analyzer to this document rather than the one for the index (discouraged -- use addDocument(doc))
 +     * @throws IOException if there are problems writing the indexes.  <strong>WARNING:  If this happens it is bad.</string>  The doc-id's in the
 +     *                     indexes are likely out of sync.  This situation requires repair to resync the doc ids in each document set.  Possible
 +     *                     repair actions include rebuilding the indexes or deleting documents at the end to restore equal document sets and then
 +     *                     optimizing to restore equal doc ids.
 +     * @throws RuntimeException if the threads writing to the sub-indexes are interrupted.
 +     */
 +    public synchronized void addDocument(Document doc, Analyzer analyzer) throws IOException {
 +        Map<IndexWriter,Document> documentMap = new HashMap<IndexWriter,Document>(writers.length*5/3);
 +        Enumeration<Field> fields = doc.fields();
 +        while (fields.hasMoreElements()) {
 +            Field field = fields.nextElement();
 +            IndexWriter writer = writerMap.get(field.name());
 +            if (writer==null)
 +                throw new RuntimeException(new UnknownFieldException("Unregistered field:  " + field.name()));
 +            Document subdoc = documentMap.get(writer);
 +            if (subdoc==null)
 +                documentMap.put(writer, subdoc = new Document());
 +            subdoc.add(field);
 +        }
 +        CountDownLatch latch = new CountDownLatch(writers.length);
 +        exception = null;
 +        for (IndexWriter writer : writers) {
 +            Document subdoc = documentMap.get(writer);
 +            if (subdoc==null)      // Must have a document in each parallel index to sync doc id's
 +                subdoc = EMPTY_DOCUMENT;
 +            new Thread(new WriterWorker(writer, subdoc, latch)).run();
 +        }
 +        try {
 +            latch.await();
 +        } catch (InterruptedException e) {
 +            throw new RuntimeException("Interrupted while writing subdocuments!", e);
 +        }
 +        if (exception != null)
 +            throw exception;
 +    }
 +
 +    // Write a sub-documents to a sub-index and record any exception
 +    private class WriterWorker implements Runnable {
 +
 +        private IndexWriter writer;
 +        private Document document;
 +        private CountDownLatch latch;
 +
 +        private WriterWorker(IndexWriter writer, Document document, CountDownLatch latch) {
 +            this.writer = writer;
 +            this.document = document;
 +            this.latch = latch;
 +        }
 +
 +        public void run() {
 +            try {
 +                writer.addDocument(document);
 +            } catch (IOException e) {
 +                exception = e;
 +            } finally {
 +                latch.countDown();
 +            }
 +        }
 +
 +    }
 +
 +    /** Obtain the number of document in this index, which is the same for each parallel index. */
 +    public int docCount() {
 +        return oneWriter.docCount();
 +    }
 +
 +    /** Optimize all parallel indexes.  This is synchronized to keep all index doc-id's synced up */
 +    public synchronized void optimize() throws IOException {
 +        for (IndexWriter writer : writers)
 +            writer.optimize();
 +    }
 +
 +    /** Close all parallel indexes.  Note that the provided directories are not closed. Synchronized. */
 +    public synchronized void close() throws IOException {
 +        for (IndexWriter writer : writers)
 +            writer.close();
 +    }
 +
 +    /** Getter for analyzer provided to the constructor */
 +    public Analyzer getAnalyzer() {
 +        return analyzer;
 +    }
 +
 +    /** Set whether or not to use compound file format in every parallel index */
 +    public void setUseCompoundFile(boolean value) {
 +        for (IndexWriter writer : writers)
 +            writer.setUseCompoundFile(value);
 +    }
 +
 +    /** Get the compound file usage decision, same for every parallel index */
 +    public boolean getUseCompoundFile() {
 +        return oneWriter.getUseCompoundFile();
 +    }
 +
 +    /** Set similarity to use for every parallel index */
 +    public void setSimilarity(Similarity similarity) {
 +        for (IndexWriter writer : writers)
 +            writer.setSimilarity(similarity);
 +    }
 +
 +    /** Get similarity, which is used by every parallel index */
 +    public Similarity getSimilarity() {
 +        return oneWriter.getSimilarity();
 +    }
 +
 +    /** Set the termIndexInterval used for every parallel index */
 +    public void setTermIndexInterval(int interval) {
 +        for (IndexWriter writer : writers)
 +            writer.setTermIndexInterval(interval);
 +    }
 +
 +    /** Get the termIndexInterval, which is used by every parallel index */
 +    public int getTermIndexInterval() {
 +        return oneWriter.getTermIndexInterval();
 +    }
 +
 +    /** Set maxBufferedDocs for every parallel index */
 +    public void setMaxBufferedDocs(int maxBufferedDocs) {
 +        for (IndexWriter writer : writers)
 +            writer.setMaxBufferedDocs(maxBufferedDocs);
 +    }
 +
 +    /** get maxBufferedDocs, same for every parallel index */
 +    public int getMaxBufferedDocs() {
 +        return oneWriter.getMaxBufferedDocs();
 +    }
 +
 +    /** Set maxFieldLength to use for every parallel index */
 +    public void setMaxFieldLength(int maxFieldLength) {
 +        for (IndexWriter writer : writers)
 +            writer.setMaxFieldLength(maxFieldLength);
 +    }
 +
 +    /** Get maxFieldLength, same for every parallel index */
 +    public int getMaxFieldLength() {
 +        return oneWriter.getMaxFieldLength();
 +    }
 +
 +    /** Set maxMergeDocs for every parallel index */
 +    public void setMaxMergeDocs(int maxMergeDocs) {
 +        for (IndexWriter writer : writers)
 +            writer.setMaxMergeDocs(maxMergeDocs);
 +    }
 +
 +    /** Get max merge docs, same for every parallel index */
 +    public int getMaxMergeDocs() {
 +        return oneWriter.getMaxMergeDocs();
 +    }
 +
 +    /** Set merge factor for every parallel index */
 +    public void setMergeFactor(int mergeFactor) {
 +        for (IndexWriter writer : writers)
 +            writer.setMergeFactor(mergeFactor);
 +    }
 +
 +    /** Get merge factor, same for every parallel index */
 +    public int getMergeFactor() {
 +        return oneWriter.getMergeFactor();
 +    }
 +
 +    /** Set write lock timeout (millis) for every parallel index */
 +    public void setWriteLockTimeout(long writeLockTimeout) {
 +        for (IndexWriter writer : writers)
 +            writer.setWriteLockTimeout(writeLockTimeout);
 +    }
 +
 +    /** Get write lock timeout, same for every parallel index */
 +    public long getWriteLockTimeout() {
 +        return oneWriter.getWriteLockTimeout();
 +    }
 +
 +    /** Set commit lock timeout for every parallel index */
 +    public void setCommitLockTimeout(long commitLockTimeout) {
 +        for (IndexWriter writer : writers)
 +            writer.setCommitLockTimeout(commitLockTimeout);
 +    }
 +
 +    /** Get commit lock timeout, same for every parallel index */
 +    public long getCommitLockTimeout() {
 +        return oneWriter.getCommitLockTimeout();
 +    }
 +
 +    /** Get term vector TokenSelector, same for every parallel index */
 +    public void setTermVectorTokenSelector(TokenSelector selector) {
 +        for (IndexWriter writer : writers)
 +            writer.setTermVectorTokenSelector(selector);
 +    }
 +
 +    /** Set term vector TokenSelector for every parallel index */
 +    public TokenSelector getTermVectorTokenSelector() {
 +        return oneWriter.getTermVectorTokenSelector();
 +    }
 +
 +    /** Set positions TokenSelector for every parallel index */
 +    public void setPositionsTokenSelector(TokenSelector selector) {
 +        for (IndexWriter writer : writers)
 +            writer.setPositionsTokenSelector(selector);
 +    }
 +
 +    /** Get positions TokenSelector, same for every parallel index */
 +    public TokenSelector getPositionsTokenSelector() {
 +        return oneWriter.getPositionsTokenSelector();
 +    }
 +
 +    /** Unsupported.  use setInfoStream(field, infoStream) */
 +    public void setInfoStream(PrintStream infoStream) {
 +        throw new UnsupportedOperationException();
 +    }
 +
 +    /** Set an info stream for the the IndexWriter managing a specified field.  The info stream receives information about field truncations, merges, etc.
 +     * @param field the field whose writer to assign the info stream to
 +     * @param infoStream the info stream
 +     * @throws UnknownFieldException if field has not been associated with an IndexWriter in this index
 +     */
 +    public void setInfoStream(String field, PrintStream infoStream) throws UnknownFieldException {
 +        IndexWriter writer = writerMap.get(field);
 +        if (writer==null)
 +            throw new UnknownFieldException("Unregistered field:  " + field);
 +        writer.setInfoStream(infoStream);
 +
 +    }
 +
 +    /** Unsupported.  use getInfoStream(field) */
 +    public PrintStream getInfoStream() {
 +        throw new UnsupportedOperationException();
 +    }
 +
 +    /** Getter for info stream associated with field.  See setInfoStream(field, infoStream).
 +     */
 +    public PrintStream getInfoStream(String field, PrintStream infoStream) throws UnknownFieldException {
 +        IndexWriter writer = writerMap.get(field);
 +        if (writer==null)
 +            throw new UnknownFieldException("Unregistered field:  " + field);
 +        return writer.getInfoStream();
 +    }
 +
 +    private static class UnknownFieldException extends Exception {
 +
 +        private UnknownFieldException(String message) {
 +            super(message);
 +        }
 +
 +    }
 +
 +}