docs/attachments/LUCENE-3262/LUCENE-3262.patch - lucene-jira-archive - Git at Google

 Index: lucene/contrib/benchmark/conf/facets.alg
 ===================================================================
 --- lucene/contrib/benchmark/conf/facets.alg	(revision 0)
 +++ lucene/contrib/benchmark/conf/facets.alg	(revision 0)
 @@ -0,0 +1,70 @@
 +#/**
 +# * Licensed to the Apache Software Foundation (ASF) under one or more
 +# * contributor license agreements.  See the NOTICE file distributed with
 +# * this work for additional information regarding copyright ownership.
 +# * The ASF licenses this file to You under the Apache License, Version 2.0
 +# * (the "License"); you may not use this file except in compliance with
 +# * the License.  You may obtain a copy of the License at
 +# *
 +# *     http://www.apache.org/licenses/LICENSE-2.0
 +# *
 +# * Unless required by applicable law or agreed to in writing, software
 +# * distributed under the License is distributed on an "AS IS" BASIS,
 +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 +# * See the License for the specific language governing permissions and
 +# * limitations under the License.
 +# */
 +# -------------------------------------------------------------------------------------
 +
 +with.facets=facets:true:false
 +
 +compound=true
 +analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
 +directory=FSDirectory
 +taxonomy.directory=FSDirectory
 +
 +doc.stored=true
 +doc.tokenized=true
 +doc.term.vector=false
 +log.step=300
 +
 +docs.dir=reuters-out
 +
 +content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
 +
 +facet.source=org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource
 +rand.seed=10
 +max.doc.facets=120
 +max.facet.length=8
 +
 +query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
 +
 +task.max.depth.log=2
 +
 +#log.queries=true
 +# -------------------------------------------------------------------------------------
 +
 +{ "Rounds"
 +  ResetSystemErase
 +  { "Populate"
 +      -CreateIndex
 +      -CreateTaxonomyIndex
 +      { "MAddDocs" AddFacetedDoc > : 400
 +      -Optimize
 +      -CloseIndex
 +      -CloseTaxonomyIndex
 +  }
 +
 +  OpenReader
 +  { "SearchSameRdr" Search > : 400
 +  CloseReader
 +
 +  RepSumByNameRound
 +  ResetSystemErase
 +  NewRound
 +} : 4
 +
 +RepSumByPrefRound Search
 +RepSumByPrefRound Populate
 +RepSumByPrefRound MAddDocs
 +
 Index: lucene/contrib/benchmark/CHANGES.txt
 ===================================================================
 --- lucene/contrib/benchmark/CHANGES.txt	(revision 1180173)
 +++ lucene/contrib/benchmark/CHANGES.txt	(working copy)
 @@ -5,6 +5,10 @@
  For more information on past and future Lucene versions, please see:
  http://s.apache.org/luceneversions

 +10/07/2011
 +  LUCENE-3262: Facet benchmarking - Benchmark tasks and sources were added for indexing
 +  with facets, demonstrated in facets.alg. (Doron Cohen)
 +
  09/25/2011
    LUCENE-3457: Upgrade commons-compress to 1.2 (and undo LUCENE-2980's workaround).
    (Doron Cohen)
 Index: lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
 ===================================================================
 --- lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java	(revision 1180173)
 +++ lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java	(working copy)
 @@ -38,6 +38,7 @@
  import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
  import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
  import org.apache.lucene.collation.CollationKeyAnalyzer;
 +import org.apache.lucene.facet.taxonomy.TaxonomyReader;
  import org.apache.lucene.index.IndexReader;
  import org.apache.lucene.index.IndexWriter;
  import org.apache.lucene.index.IndexWriterConfig;
 @@ -773,6 +774,42 @@
    }

    /**
 +   * Test indexing with facets tasks.
 +   */
 +  public void testIndexingWithFacets() throws Exception {
 +    // 1. alg definition (required in every "logic" test)
 +    String algLines[] = {
 +        "# ----- properties ",
 +        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
 +        "docs.file=" + getReuters20LinesFile(),
 +        "content.source.log.step=100",
 +        "content.source.forever=false",
 +        "directory=RAMDirectory",
 +        "doc.stored=false",
 +        "merge.factor=3",
 +        "doc.tokenized=false",
 +        "debug.level=1",
 +        "# ----- alg ",
 +        "ResetSystemErase",
 +        "CreateIndex",
 +        "CreateTaxonomyIndex",
 +        "{ \"AddDocs\"  AddFacetedDoc > : * ",
 +        "CloseIndex",
 +        "CloseTaxonomyIndex",
 +        "OpenTaxonomyReader",
 +    };
 +
 +    // 2. execute the algorithm  (required in every "logic" test)
 +    Benchmark benchmark = execBenchmark(algLines);
 +    PerfRunData runData = benchmark.getRunData();
 +    assertNull("taxo writer was not properly closed",runData.getTaxonomyWriter());
 +    TaxonomyReader taxoReader = runData.getTaxonomyReader();
 +    assertNotNull("taxo reader was not opened", taxoReader);
 +    assertTrue("nothing was added to the taxnomy (expecting root and at least one addtional category)",taxoReader.getSize()>1);
 +    taxoReader.close();
 +  }
 +
 +  /**
     * Test that we can call optimize(maxNumSegments).
     */
    public void testOptimizeMaxNumSegments() throws Exception {
 Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java
 ===================================================================
 --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java	(revision 0)
 +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java	(revision 0)
 @@ -0,0 +1,41 @@
 +package org.apache.lucene.benchmark.byTask.tasks;
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import org.apache.lucene.benchmark.byTask.PerfRunData;
 +import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
 +
 +/**
 + * Commits the Taxonomy Index.
 + */
 +public class CommitTaxonomyIndexTask extends PerfTask {
 +  public CommitTaxonomyIndexTask(PerfRunData runData) {
 +    super(runData);
 +  }
 +
 +  @Override
 +  public int doLogic() throws Exception {
 +    TaxonomyWriter taxonomyWriter = getRunData().getTaxonomyWriter();
 +    if (taxonomyWriter != null) {
 +      taxonomyWriter.commit();
 +    } else {
 +      throw new IllegalStateException("TaxonomyWriter is not currently open");
 +    }
 +
 +    return 1;
 +  }
 +}
 Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java
 ===================================================================
 --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java	(revision 0)
 +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java	(revision 0)
 @@ -0,0 +1,45 @@
 +package org.apache.lucene.benchmark.byTask.tasks;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +
 +import org.apache.lucene.benchmark.byTask.PerfRunData;
 +import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
 +
 +/**
 + * Open a taxonomy index reader.
 + * <br>Other side effects: taxonomy reader object in perfRunData is set.
 + */
 +public class OpenTaxonomyReaderTask extends PerfTask {
 +
 +  public OpenTaxonomyReaderTask(PerfRunData runData) {
 +    super(runData);
 +  }
 +
 +  @Override
 +  public int doLogic() throws IOException {
 +    PerfRunData runData = getRunData();
 +    LuceneTaxonomyReader taxoReader = new LuceneTaxonomyReader(runData.getTaxonomyDir());
 +    runData.setTaxonomyReader(taxoReader);
 +    // We transfer reference to the run data
 +    taxoReader.decRef();
 +    return 1;
 +  }
 +
 +}
 Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java
 ===================================================================
 --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java	(revision 0)
 +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java	(revision 0)
 @@ -0,0 +1,46 @@
 +package org.apache.lucene.benchmark.byTask.tasks;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +
 +import org.apache.lucene.benchmark.byTask.PerfRunData;
 +import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 +
 +/**
 + * Close taxonomy reader.
 + * <br>Other side effects: taxonomy reader in perfRunData is nullified.
 + */
 +public class CloseTaxonomyReaderTask extends PerfTask {
 +
 +  public CloseTaxonomyReaderTask(PerfRunData runData) {
 +    super(runData);
 +  }
 +
 +  @Override
 +  public int doLogic() throws IOException {
 +    TaxonomyReader taxoReader = getRunData().getTaxonomyReader();
 +    getRunData().setTaxonomyReader(null);
 +    if (taxoReader.getRefCount() != 1) {
 +      System.out.println("WARNING: CloseTaxonomyReader: reference count is currently " + taxoReader.getRefCount());
 +    }
 +    taxoReader.close();
 +    return 1;
 +  }
 +
 +}
 Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java
 ===================================================================
 --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java	(revision 0)
 +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java	(revision 0)
 @@ -0,0 +1,42 @@
 +package org.apache.lucene.benchmark.byTask.tasks;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import org.apache.lucene.benchmark.byTask.PerfRunData;
 +import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
 +import java.io.IOException;
 +
 +
 +/**
 + * Open a taxonomy index.
 + * <br>Other side effects: taxonomy writer object in perfRunData is set.
 + */
 +public class OpenTaxonomyIndexTask extends PerfTask {
 +
 +  public OpenTaxonomyIndexTask(PerfRunData runData) {
 +    super(runData);
 +  }
 +
 +  @Override
 +  public int doLogic() throws IOException {
 +    PerfRunData runData = getRunData();
 +    runData.setTaxonomyWriter(new LuceneTaxonomyWriter(runData.getTaxonomyDir()));
 +    return 1;
 +  }
 +
 +}
 Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java
 ===================================================================
 --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java	(revision 0)
 +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java	(revision 0)
 @@ -0,0 +1,44 @@
 +package org.apache.lucene.benchmark.byTask.tasks;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import org.apache.lucene.benchmark.byTask.PerfRunData;
 +import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
 +import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 +
 +import java.io.IOException;
 +
 +
 +/**
 + * Create a taxonomy index.
 + * <br>Other side effects: taxonomy writer object in perfRunData is set.
 + */
 +public class CreateTaxonomyIndexTask extends PerfTask {
 +
 +  public CreateTaxonomyIndexTask(PerfRunData runData) {
 +    super(runData);
 +  }
 +
 +  @Override
 +  public int doLogic() throws IOException {
 +    PerfRunData runData = getRunData();
 +    runData.setTaxonomyWriter(new LuceneTaxonomyWriter(runData.getTaxonomyDir(), OpenMode.CREATE));
 +    return 1;
 +  }
 +
 +}
 Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java
 ===================================================================
 --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java	(revision 0)
 +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java	(revision 0)
 @@ -0,0 +1,76 @@
 +package org.apache.lucene.benchmark.byTask.tasks;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import org.apache.lucene.benchmark.byTask.PerfRunData;
 +import org.apache.lucene.facet.index.CategoryContainer;
 +import org.apache.lucene.facet.index.CategoryDocumentBuilder;
 +
 +/**
 + * Add a faceted document.
 + * <p>
 + * Config properties:
 + * <ul>
 + *  <li><b>with.facets</b>=&lt;optional, tells whether to actually add any facets to the document| Default: true&gt;
 + *  <b>This config property allows to easily compare the performance of adding docs with and without facets.
 + *  Note that facets are created even when this is false, just that they are not added to the document (nor to the taxonomy).
 + * </ul>
 + * <p>
 + * See {@link AddDocTask} for general document parameters and configuration.
 + */
 +public class AddFacetedDocTask extends AddDocTask {
 +
 +  public AddFacetedDocTask(PerfRunData runData) {
 +    super(runData);
 +  }
 +
 +  private CategoryContainer facets = null;
 +  private CategoryDocumentBuilder categoryDocBuilder = null;
 +  private boolean withFacets = true;
 +
 +  @Override
 +  public void setup() throws Exception {
 +    super.setup();
 +    // create the facets even if they should not be added - allows to measure the effect of just adding facets
 +    facets = getRunData().getFacetSource().getNextFacets(facets);
 +    withFacets = getRunData().getConfig().get("with.facets", true);
 +    if (withFacets) {
 +      categoryDocBuilder = new CategoryDocumentBuilder(getRunData().getTaxonomyWriter());
 +      categoryDocBuilder.setCategories(facets);
 +    }
 +  }
 +
 +  @Override
 +  public void tearDown() throws Exception {
 +    super.tearDown();
 +  }
 +
 +  @Override
 +  protected String getLogMessage(int recsCount) {
 +    return "added " + recsCount + " docs with "+(withFacets ? "facets" : "no facets");
 +  }
 +
 +  @Override
 +  public int doLogic() throws Exception {
 +    if (withFacets) {
 +      categoryDocBuilder.build(doc);
 +    }
 +    return super.doLogic();
 +  }
 +
 +}
 Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java
 ===================================================================
 --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java	(revision 0)
 +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java	(revision 0)
 @@ -0,0 +1,43 @@
 +package org.apache.lucene.benchmark.byTask.tasks;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +
 +import org.apache.lucene.benchmark.byTask.PerfRunData;
 +import org.apache.lucene.util.IOUtils;
 +
 +/**
 + * Close taxonomy index.
 + * <br>Other side effects: taxonomy writer object in perfRunData is nullified.
 + */
 +public class CloseTaxonomyIndexTask extends PerfTask {
 +
 +  public CloseTaxonomyIndexTask(PerfRunData runData) {
 +    super(runData);
 +  }
 +
 +  @Override
 +  public int doLogic() throws IOException {
 +    IOUtils.close(getRunData().getTaxonomyWriter());
 +    getRunData().setTaxonomyWriter(null);
 +
 +    return 1;
 +  }
 +
 +}
 Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
 ===================================================================
 --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java	(revision 1180173)
 +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java	(working copy)
 @@ -22,7 +22,7 @@
  import org.apache.lucene.document.Document;

  /**
 - * Add a document, optionally with of a certain size.
 + * Add a document, optionally of a certain size.
   * <br>Other side effects: none.
   * <br>Takes optional param: document size.
   */
 @@ -34,9 +34,12 @@

    private int docSize = 0;

 -  // volatile data passed between setup(), doLogic(), tearDown().
 -  private Document doc = null;
 -
 +  /**
 +   * volatile data passed between setup(), doLogic(), tearDown().
 +   * the doc is created at setup() and added at doLogic().
 +   */
 +  protected Document doc = null;
 +
    @Override
    public void setup() throws Exception {
      super.setup();
 Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java
 ===================================================================
 --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java	(revision 0)
 +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java	(revision 0)
 @@ -0,0 +1,45 @@
 +package org.apache.lucene.benchmark.byTask.feeds;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +
 +import org.apache.lucene.facet.index.CategoryContainer;
 +
 +/**
 + * Source items for facets.
 + * <p>
 + * For supported configuration parameters see {@link ContentItemsSource}.
 + */
 +public abstract class FacetSource extends ContentItemsSource {
 +
 +  /** Returns the next {@link CategoryContainer facets content item}.
 +   * Implementations must account for multi-threading, as multiple threads
 +   * can call this method simultaneously.
 +   */
 +  public abstract CategoryContainer getNextFacets(CategoryContainer facets) throws NoMoreDataException, IOException;
 +
 +  @Override
 +  public void resetInputs() throws IOException {
 +    printStatistics("facets");
 +    // re-initiate since properties by round may have changed.
 +    setConfig(getConfig());
 +    super.resetInputs();
 +  }
 +
 +}
 Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java
 ===================================================================
 --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java	(revision 0)
 +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java	(revision 0)
 @@ -0,0 +1,81 @@
 +package org.apache.lucene.benchmark.byTask.feeds;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.IOException;
 +import java.util.Random;
 +
 +import org.apache.lucene.benchmark.byTask.utils.Config;
 +import org.apache.lucene.facet.index.CategoryContainer;
 +import org.apache.lucene.facet.taxonomy.CategoryPath;
 +
 +/**
 + * Simple implementation of a random facet source
 + * <p>
 + * Supports the following parameters:
 + * <ul>
 + * <li><b>rand.seed</b> - defines the seed to initialize Random with (default: <b>13</b>).
 + * <li><b>max.doc.facets</b> - maximal #facets per doc (default: <b>200</b>).
 + *    Actual number of facets in a certain doc would be anything between 1 and that number.
 + * <li><b>max.facet.length</b> - maximal #components in a facet (default: <b>10</b>).
 + *    Actual number of components in a certain facet would be anything between 1 and that number.
 + * </ul>
 + */
 +public class RandomFacetSource extends FacetSource {
 +
 +  Random random;
 +
 +  private int maxDocFacets = 200;
 +  private int maxFacetDepth = 10;
 +  private int maxValue = maxDocFacets * maxFacetDepth;
 +
 +  @Override
 +  public CategoryContainer getNextFacets(CategoryContainer facets) throws NoMoreDataException, IOException {
 +    if (facets == null) {
 +      facets = new CategoryContainer();
 +    } else {
 +      facets.clear();
 +    }
 +    int numFacets = 1 + random.nextInt(maxDocFacets-1); // at least one facet to each doc
 +    for (int i=0; i<numFacets; i++) {
 +      CategoryPath cp = new CategoryPath();
 +      int length = 1 + random.nextInt(maxFacetDepth-1); // length 0 is not useful
 +      for (int k=0; k<length; k++) {
 +        cp.add(Integer.toString(random.nextInt(maxValue)));
 +        addItem();
 +      }
 +      facets.addCategory(cp);
 +      addBytes(cp.toString().length()); // very rough approximation
 +    }
 +    return facets;
 +  }
 +
 +  @Override
 +  public void close() throws IOException {
 +    // nothing to do here
 +  }
 +
 +  @Override
 +  public void setConfig(Config config) {
 +    super.setConfig(config);
 +    random = new Random(config.get("rand.seed", 13));
 +    maxDocFacets = config.get("max.doc.facets", 200);
 +    maxFacetDepth = config.get("max.facet.length", 10);
 +    maxValue = maxDocFacets * maxFacetDepth;
 +  }
 +}
 Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
 ===================================================================
 --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java	(revision 1180173)
 +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java	(working copy)
 @@ -289,7 +289,7 @@
      // here, everything else is already private to that thread, so we're safe.
      try {
        docData = trecDocParser.parse(docData, name, this, docBuf, parsedPathType);
 -      addDoc();
 +      addItem();
      } catch (InterruptedException ie) {
        throw new ThreadInterruptedException(ie);
      }
 Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
 ===================================================================
 --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java	(revision 1180173)
 +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java	(working copy)
 @@ -17,13 +17,8 @@
   * limitations under the License.
   */

 -import java.io.File;
  import java.io.IOException;
 -import java.util.ArrayList;
 -import java.util.Arrays;

 -import org.apache.lucene.benchmark.byTask.utils.Config;
 -
  /**
   * Represents content from a specified source, such as TREC, Reuters etc. A
   * {@link ContentSource} is responsible for creating {@link DocData} objects for
 @@ -31,119 +26,13 @@
   * of various statistics, such as how many documents were generated, size in
   * bytes etc.
   * <p>
 - * Supports the following configuration parameters:
 - * <ul>
 - * <li><b>content.source.forever</b> - specifies whether to generate documents
 - * forever (<b>default=true</b>).
 - * <li><b>content.source.verbose</b> - specifies whether messages should be
 - * output by the content source (<b>default=false</b>).
 - * <li><b>content.source.encoding</b> - specifies which encoding to use when
 - * reading the files of that content source. Certain implementations may define
 - * a default value if this parameter is not specified. (<b>default=null</b>).
 - * <li><b>content.source.log.step</b> - specifies for how many documents a
 - * message should be logged. If set to 0 it means no logging should occur.
 - * <b>NOTE:</b> if verbose is set to false, logging should not occur even if
 - * logStep is not 0 (<b>default=0</b>).
 - * </ul>
 + * For supported configuration parameters see {@link ContentItemsSource}.
   */
 -public abstract class ContentSource {
 +public abstract class ContentSource extends ContentItemsSource {

 -  private long bytesCount;
 -  private long totalBytesCount;
 -  private int docsCount;
 -  private int totalDocsCount;
 -  private Config config;
 -
 -  protected boolean forever;
 -  protected int logStep;
 -  protected boolean verbose;
 -  protected String encoding;
 -
 -  /** update count of bytes generated by this source */
 -  protected final synchronized void addBytes(long numBytes) {
 -    bytesCount += numBytes;
 -    totalBytesCount += numBytes;
 -  }
 -
 -  /** update count of documents generated by this source */
 -  protected final synchronized void addDoc() {
 -    ++docsCount;
 -    ++totalDocsCount;
 -  }
 -
 -  /**
 -   * A convenience method for collecting all the files of a content source from
 -   * a given directory. The collected {@link File} instances are stored in the
 -   * given <code>files</code>.
 -   */
 -  protected final void collectFiles(File dir, ArrayList<File> files) {
 -    if (!dir.canRead()) {
 -      return;
 -    }
 -
 -    File[] dirFiles = dir.listFiles();
 -    Arrays.sort(dirFiles);
 -    for (int i = 0; i < dirFiles.length; i++) {
 -      File file = dirFiles[i];
 -      if (file.isDirectory()) {
 -        collectFiles(file, files);
 -      } else if (file.canRead()) {
 -        files.add(file);
 -      }
 -    }
 -  }
 -
 -	/**
 -   * Returns true whether it's time to log a message (depending on verbose and
 -   * the number of documents generated).
 -   */
 -  protected final boolean shouldLog() {
 -    return verbose && logStep > 0 && docsCount % logStep == 0;
 -  }
 -
 -  /** Called when reading from this content source is no longer required. */
 -  public abstract void close() throws IOException;
 -
 -  /** Returns the number of bytes generated since last reset. */
 -  public final long getBytesCount() { return bytesCount; }
 -
 -  /** Returns the number of generated documents since last reset. */
 -  public final int getDocsCount() { return docsCount; }
 -
 -  public final Config getConfig() { return config; }
 -
 -  /** Returns the next {@link DocData} from the content source. */
 +  /** Returns the next {@link DocData} from the content source.
 +   * Implementations must account for multi-threading, as multiple threads
 +   * can call this method simultaneously. */
    public abstract DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException;

 -  /** Returns the total number of bytes that were generated by this source. */
 -  public final long getTotalBytesCount() { return totalBytesCount; }
 -
 -  /** Returns the total number of generated documents. */
 -  public final int getTotalDocsCount() { return totalDocsCount; }
 -
 -  /**
 -   * Resets the input for this content source, so that the test would behave as
 -   * if it was just started, input-wise.
 -   * <p>
 -   * <b>NOTE:</b> the default implementation resets the number of bytes and
 -   * documents generated since the last reset, so it's important to call
 -   * super.resetInputs in case you override this method.
 -   */
 -  public void resetInputs() throws IOException {
 -    bytesCount = 0;
 -    docsCount = 0;
 -  }
 -
 -  /**
 -   * Sets the {@link Config} for this content source. If you override this
 -   * method, you must call super.setConfig.
 -   */
 -  public void setConfig(Config config) {
 -    this.config = config;
 -    forever = config.get("content.source.forever", true);
 -    logStep = config.get("content.source.log.step", 0);
 -    verbose = config.get("content.source.verbose", false);
 -    encoding = config.get("content.source.encoding", null);
 -  }
 -
  }
 Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java
 ===================================================================
 --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java	(revision 0)
 +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java	(revision 0)
 @@ -0,0 +1,180 @@
 +package org.apache.lucene.benchmark.byTask.feeds;
 +
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one or more
 + * contributor license agreements.  See the NOTICE file distributed with
 + * this work for additional information regarding copyright ownership.
 + * The ASF licenses this file to You under the Apache License, Version 2.0
 + * (the "License"); you may not use this file except in compliance with
 + * the License.  You may obtain a copy of the License at
 + *
 + *     http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing, software
 + * distributed under the License is distributed on an "AS IS" BASIS,
 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 + * See the License for the specific language governing permissions and
 + * limitations under the License.
 + */
 +
 +import java.io.File;
 +import java.io.IOException;
 +import java.util.ArrayList;
 +import java.util.Arrays;
 +
 +import org.apache.lucene.benchmark.byTask.utils.Config;
 +import org.apache.lucene.benchmark.byTask.utils.Format;
 +
 +/**
 + * Base class for source of data for benchmarking
 + * <p>
 + * Keeps track of various statistics, such as how many data items were generated,
 + * size in bytes etc.
 + * <p>
 + * Supports the following configuration parameters:
 + * <ul>
 + * <li><b>content.source.forever</b> - specifies whether to generate items
 + * forever (<b>default=true</b>).
 + * <li><b>content.source.verbose</b> - specifies whether messages should be
 + * output by the content source (<b>default=false</b>).
 + * <li><b>content.source.encoding</b> - specifies which encoding to use when
 + * reading the files of that content source. Certain implementations may define
 + * a default value if this parameter is not specified. (<b>default=null</b>).
 + * <li><b>content.source.log.step</b> - specifies for how many items a
 + * message should be logged. If set to 0 it means no logging should occur.
 + * <b>NOTE:</b> if verbose is set to false, logging should not occur even if
 + * logStep is not 0 (<b>default=0</b>).
 + * </ul>
 + */
 +public abstract class ContentItemsSource {
 +
 +  private long bytesCount;
 +  private long totalBytesCount;
 +  private int itemCount;
 +  private int totalItemCount;
 +  private Config config;
 +
 +  private int lastPrintedNumUniqueTexts = 0;
 +  private long lastPrintedNumUniqueBytes = 0;
 +  private int printNum = 0;
 +
 +  protected boolean forever;
 +  protected int logStep;
 +  protected boolean verbose;
 +  protected String encoding;
 +
 +  /** update count of bytes generated by this source */
 +  protected final synchronized void addBytes(long numBytes) {
 +    bytesCount += numBytes;
 +    totalBytesCount += numBytes;
 +  }
 +
 +  /** update count of items generated by this source */
 +  protected final synchronized void addItem() {
 +    ++itemCount;
 +    ++totalItemCount;
 +  }
 +
 +  /**
 +   * A convenience method for collecting all the files of a content source from
 +   * a given directory. The collected {@link File} instances are stored in the
 +   * given <code>files</code>.
 +   */
 +  protected final void collectFiles(File dir, ArrayList<File> files) {
 +    if (!dir.canRead()) {
 +      return;
 +    }
 +
 +    File[] dirFiles = dir.listFiles();
 +    Arrays.sort(dirFiles);
 +    for (int i = 0; i < dirFiles.length; i++) {
 +      File file = dirFiles[i];
 +      if (file.isDirectory()) {
 +        collectFiles(file, files);
 +      } else if (file.canRead()) {
 +        files.add(file);
 +      }
 +    }
 +  }
 +
 +  /**
 +   * Returns true whether it's time to log a message (depending on verbose and
 +   * the number of items generated).
 +   */
 +  protected final boolean shouldLog() {
 +    return verbose && logStep > 0 && itemCount % logStep == 0;
 +  }
 +
 +  /** Called when reading from this content source is no longer required. */
 +  public abstract void close() throws IOException;
 +
 +  /** Returns the number of bytes generated since last reset. */
 +  public final long getBytesCount() { return bytesCount; }
 +
 +  /** Returns the number of generated items since last reset. */
 +  public final int getItemsCount() { return itemCount; }
 +
 +  public final Config getConfig() { return config; }
 +
 +  /** Returns the total number of bytes that were generated by this source. */
 +  public final long getTotalBytesCount() { return totalBytesCount; }
 +
 +  /** Returns the total number of generated items. */
 +  public final int getTotalItemsCount() { return totalItemCount; }
 +
 +  /**
 +   * Resets the input for this content source, so that the test would behave as
 +   * if it was just started, input-wise.
 +   * <p>
 +   * <b>NOTE:</b> the default implementation resets the number of bytes and
 +   * items generated since the last reset, so it's important to call
 +   * super.resetInputs in case you override this method.
 +   */
 +  @SuppressWarnings("unused")
 +  public void resetInputs() throws IOException {
 +    bytesCount = 0;
 +    itemCount = 0;
 +  }
 +
 +  /**
 +   * Sets the {@link Config} for this content source. If you override this
 +   * method, you must call super.setConfig.
 +   */
 +  public void setConfig(Config config) {
 +    this.config = config;
 +    forever = config.get("content.source.forever", true);
 +    logStep = config.get("content.source.log.step", 0);
 +    verbose = config.get("content.source.verbose", false);
 +    encoding = config.get("content.source.encoding", null);
 +  }
 +
 +  public void printStatistics(String itemsName) {
 +    boolean print = false;
 +    String col = "                  ";
 +    StringBuilder sb = new StringBuilder();
 +    String newline = System.getProperty("line.separator");
 +    sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
 +    int nut = getTotalItemsCount();
 +    if (nut > lastPrintedNumUniqueTexts) {
 +      print = true;
 +      sb.append("total count of "+itemsName+": ").append(Format.format(0,nut,col)).append(newline);
 +      lastPrintedNumUniqueTexts = nut;
 +    }
 +    long nub = getTotalBytesCount();
 +    if (nub > lastPrintedNumUniqueBytes) {
 +      print = true;
 +      sb.append("total bytes of "+itemsName+": ").append(Format.format(0,nub,col)).append(newline);
 +      lastPrintedNumUniqueBytes = nub;
 +    }
 +    if (getItemsCount() > 0) {
 +      print = true;
 +      sb.append("num "+itemsName+" added since last inputs reset:   ").append(Format.format(0,getItemsCount(),col)).append(newline);
 +      sb.append("total bytes added for "+itemsName+" since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline);
 +    }
 +    if (print) {
 +      System.out.println(sb.append(newline).toString());
 +      printNum++;
 +    }
 +  }
 +
 +}
 Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
 ===================================================================
 --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java	(revision 1180173)
 +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java	(working copy)
 @@ -31,7 +31,6 @@
  import java.text.ParsePosition;

  import org.apache.lucene.benchmark.byTask.utils.Config;
 -import org.apache.lucene.benchmark.byTask.utils.Format;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.document.Field;
  import org.apache.lucene.document.NumericField;
 @@ -189,13 +188,8 @@
    protected boolean reuseFields;
    protected boolean indexProperties;

 -  private int lastPrintedNumUniqueTexts = 0;
 -
 -  private long lastPrintedNumUniqueBytes = 0;
    private final AtomicInteger numDocsCreated = new AtomicInteger();

 -  private int printNum = 0;
 -
    // create a doc
    // use only part of the body, modify it to keep the rest (or use all if size==0).
    // reset the docdata properties so they are not added more than once.
 @@ -397,38 +391,9 @@
      return doc;
    }

 -  public void printDocStatistics() {
 -    boolean print = false;
 -    String col = "                  ";
 -    StringBuilder sb = new StringBuilder();
 -    String newline = System.getProperty("line.separator");
 -    sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
 -    int nut = source.getTotalDocsCount();
 -    if (nut > lastPrintedNumUniqueTexts) {
 -      print = true;
 -      sb.append("total count of unique texts: ").append(Format.format(0,nut,col)).append(newline);
 -      lastPrintedNumUniqueTexts = nut;
 -    }
 -    long nub = getTotalBytesCount();
 -    if (nub > lastPrintedNumUniqueBytes) {
 -      print = true;
 -      sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline);
 -      lastPrintedNumUniqueBytes = nub;
 -    }
 -    if (source.getDocsCount() > 0) {
 -      print = true;
 -      sb.append("num docs added since last inputs reset:   ").append(Format.format(0,source.getDocsCount(),col)).append(newline);
 -      sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline);
 -    }
 -    if (print) {
 -      System.out.println(sb.append(newline).toString());
 -      printNum++;
 -    }
 -  }
 -
    /** Reset inputs so that the test run would behave, input wise, as if it just started. */
    public synchronized void resetInputs() throws IOException {
 -    printDocStatistics();
 +    source.printStatistics("docs");
      // re-initiate since properties by round may have changed.
      setConfig(config);
      source.resetInputs();
 Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
 ===================================================================
 --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java	(revision 1180173)
 +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java	(working copy)
 @@ -24,6 +24,7 @@

  import org.apache.lucene.analysis.Analyzer;
  import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
 +import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
  import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
  import org.apache.lucene.benchmark.byTask.stats.Points;
  import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
 @@ -31,12 +32,15 @@
  import org.apache.lucene.benchmark.byTask.utils.Config;
  import org.apache.lucene.benchmark.byTask.utils.FileUtils;
  import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
 +import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 +import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
  import org.apache.lucene.index.IndexReader;
  import org.apache.lucene.index.IndexWriter;
  import org.apache.lucene.search.IndexSearcher;
  import org.apache.lucene.store.Directory;
  import org.apache.lucene.store.FSDirectory;
  import org.apache.lucene.store.RAMDirectory;
 +import org.apache.lucene.util.IOUtils;

  /**
   * Data maintained by a performance test run.
 @@ -45,11 +49,21 @@
   * <ul>
   *  <li>Configuration.
   *  <li>Directory, Writer, Reader.
 - *  <li>Docmaker and a few instances of QueryMaker.
 + *  <li>Taxonomy Directory, Writer, Reader.
 + *  <li>DocMaker, FacetSource and a few instances of QueryMaker.
   *  <li>Analyzer.
   *  <li>Statistics data which updated during the run.
   * </ul>
 - * Config properties: work.dir=&lt;path to root of docs and index dirs| Default: work&gt;
 + * Config properties:
 + * <ul>
 + *  <li><b>work.dir</b>=&lt;path to root of docs and index dirs| Default: work&gt;
 + *  <li><b>analyzer</b>=&lt;class name for analyzer| Default: StandardAnalyzer&gt;
 + *  <li><b>doc.maker</b>=&lt;class name for doc-maker| Default: DocMaker&gt;
 + *  <li><b>facet.source</b>=&lt;class name for facet-source| Default: RandomFacetSource&gt;
 + *  <li><b>query.maker</b>=&lt;class name for query-maker| Default: SimpleQueryMaker&gt;
 + *  <li><b>log.queries</b>=&lt;whether queries should be printed| Default: false&gt;
 + *  <li><b>directory</b>=&lt;type of directory to use for the index| Default: RAMDirectory&gt;
 + *  <li><b>taxonomy.directory</b>=&lt;type of directory for taxonomy index| Default: RAMDirectory&gt;
   * </ul>
   */
  public class PerfRunData {
 @@ -62,7 +76,12 @@
    private Directory directory;
    private Analyzer analyzer;
    private DocMaker docMaker;
 +  private FacetSource facetSource;
    private Locale locale;
 +
 +  private Directory taxonomyDir;
 +  private TaxonomyWriter taxonomyWriter;
 +  private TaxonomyReader taxonomyReader;

    // we use separate (identical) instances for each "read" task type, so each can iterate the quries separately.
    private HashMap<Class<? extends ReadTask>,QueryMaker> readTaskQueryMaker;
 @@ -73,6 +92,7 @@
    private IndexWriter indexWriter;
    private Config config;
    private long startTimeMillis;
 +

    // constructor
    public PerfRunData (Config config) throws Exception {
 @@ -84,6 +104,10 @@
      docMaker = Class.forName(config.get("doc.maker",
          "org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
      docMaker.setConfig(config);
 +    // facet source
 +    facetSource = Class.forName(config.get("facet.source",
 +        "org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance();
 +    facetSource.setConfig(config);
      // query makers
      readTaskQueryMaker = new HashMap<Class<? extends ReadTask>,QueryMaker>();
      qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker")).asSubclass(QueryMaker.class);
 @@ -104,30 +128,17 @@
    public void reinit(boolean eraseIndex) throws Exception {

      // cleanup index
 -    if (indexWriter!=null) {
 -      indexWriter.close();
 -      indexWriter = null;
 -    }
 -    if (indexReader!=null) {
 -      indexReader.close();
 -      indexReader = null;
 -    }
 -    if (directory!=null) {
 -      directory.close();
 -    }
 +    IOUtils.close(indexWriter, indexReader, directory);
 +    indexWriter = null;
 +    indexReader = null;
 +
 +    IOUtils.close(taxonomyWriter, taxonomyReader, taxonomyDir);
 +    taxonomyWriter = null;
 +    taxonomyReader = null;

      // directory (default is ram-dir).
 -    if ("FSDirectory".equals(config.get("directory","RAMDirectory"))) {
 -      File workDir = new File(config.get("work.dir","work"));
 -      File indexDir = new File(workDir,"index");
 -      if (eraseIndex && indexDir.exists()) {
 -        FileUtils.fullyDelete(indexDir);
 -      }
 -      indexDir.mkdirs();
 -      directory = FSDirectory.open(indexDir);
 -    } else {
 -      directory = new RAMDirectory();
 -    }
 +    directory = createDirectory(eraseIndex, "index", "directory");
 +    taxonomyDir = createDirectory(eraseIndex, "taxo", "taxonomy.directory");

      // inputs
      resetInputs();
 @@ -139,6 +150,21 @@
      // Re-init clock
      setStartTimeMillis();
    }
 +
 +  private Directory createDirectory(boolean eraseIndex, String dirName,
 +      String dirParam) throws IOException {
 +    if ("FSDirectory".equals(config.get(dirParam,"RAMDirectory"))) {
 +      File workDir = new File(config.get("work.dir","work"));
 +      File indexDir = new File(workDir,dirName);
 +      if (eraseIndex && indexDir.exists()) {
 +        FileUtils.fullyDelete(indexDir);
 +      }
 +      indexDir.mkdirs();
 +      return FSDirectory.open(indexDir);
 +    }
 +
 +    return new RAMDirectory();
 +  }

    public long setStartTimeMillis() {
      startTimeMillis = System.currentTimeMillis();
 @@ -174,6 +200,57 @@
    }

    /**
 +   * @return Returns the taxonomy directory
 +   */
 +  public Directory getTaxonomyDir() {
 +    return taxonomyDir;
 +  }
 +
 +  /**
 +   * Set the taxonomy reader. Takes ownership of that taxonomy reader, that is,
 +   * internally performs taxoReader.incRef() (If caller no longer needs that
 +   * reader it should decRef()/close() it after calling this method, otherwise,
 +   * the reader will remain open).
 +   * @param taxoReader The taxonomy reader to set.
 +   */
 +  public synchronized void setTaxonomyReader(TaxonomyReader taxoReader) throws IOException {
 +    if (taxoReader == this.taxonomyReader) {
 +      return;
 +    }
 +    if (taxonomyReader != null) {
 +      taxonomyReader.decRef();
 +    }
 +
 +    if (taxoReader != null) {
 +      taxoReader.incRef();
 +    }
 +    this.taxonomyReader = taxoReader;
 +  }
 +
 +  /**
 +   * @return Returns the taxonomyReader.  NOTE: this returns a
 +   * reference.  You must call TaxonomyReader.decRef() when
 +   * you're done.
 +   */
 +  public synchronized TaxonomyReader getTaxonomyReader() {
 +    if (taxonomyReader != null) {
 +      taxonomyReader.incRef();
 +    }
 +    return taxonomyReader;
 +  }
 +
 +  /**
 +   * @param taxoWriter The taxonomy writer to set.
 +   */
 +  public void setTaxonomyWriter(TaxonomyWriter taxoWriter) {
 +    this.taxonomyWriter = taxoWriter;
 +  }
 +
 +  public TaxonomyWriter getTaxonomyWriter() {
 +    return taxonomyWriter;
 +  }
 +
 +  /**
     * @return Returns the indexReader.  NOTE: this returns a
     * reference.  You must call IndexReader.decRef() when
     * you're done.
 @@ -198,13 +275,22 @@
    }

    /**
 +   * Set the index reader. Takes ownership of that index reader, that is,
 +   * internally performs indexReader.incRef() (If caller no longer needs that
 +   * reader it should decRef()/close() it after calling this method, otherwise,
 +   * the reader will remain open).
     * @param indexReader The indexReader to set.
     */
    public synchronized void setIndexReader(IndexReader indexReader) throws IOException {
 +    if (indexReader == this.indexReader) {
 +      return;
 +    }
 +
      if (this.indexReader != null) {
        // Release current IR
        this.indexReader.decRef();
      }
 +
      this.indexReader = indexReader;
      if (indexReader != null) {
        // Hold reference to new IR
 @@ -246,6 +332,11 @@
      return docMaker;
    }

 +  /** Returns the facet source. */
 +  public FacetSource getFacetSource() {
 +    return facetSource;
 +  }
 +
    /**
     * @return the locale
     */
 @@ -269,6 +360,7 @@

    public void resetInputs() throws IOException {
      docMaker.resetInputs();
 +    facetSource.resetInputs();
      for (final QueryMaker queryMaker : readTaskQueryMaker.values()) {
        queryMaker.resetInputs();
      }
 Index: lucene/contrib/benchmark/build.xml
 ===================================================================
 --- lucene/contrib/benchmark/build.xml	(revision 1180173)
 +++ lucene/contrib/benchmark/build.xml	(working copy)
 @@ -130,6 +130,7 @@
        <pathelement path="${memory.jar}"/>
        <pathelement path="${highlighter.jar}"/>
        <pathelement path="${analyzers-common.jar}"/>
 +      <pathelement path="${facet.jar}"/>
        <path refid="base.classpath"/>
      	<fileset dir="lib">
      		<include name="**/*.jar"/>
 @@ -218,7 +219,7 @@
        <echo>Benchmark output in JIRA table format is in file: ${shingle.jira.output.file}</echo>
      </target>

 -    <target name="init" depends="contrib-build.init,jar-memory,jar-highlighter,jar-analyzers-common"/>
 +    <target name="init" depends="contrib-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-facet"/>

      <target name="clean-javacc">
        <fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
 Index: lucene/contrib/contrib-build.xml
 ===================================================================
 --- lucene/contrib/contrib-build.xml	(revision 1180173)
 +++ lucene/contrib/contrib-build.xml	(working copy)
 @@ -129,6 +129,17 @@
      <property name="analyzers-common.uptodate" value="true"/>
    </target>

 +  <property name="facet.jar" value="${common.dir}/build/contrib/facet/lucene-facet-${version}.jar"/>
 +  <target name="check-facet-uptodate" unless="facet.uptodate">
 +    <contrib-uptodate name="facet" jarfile="${facet.jar}" property="facet.uptodate"/>
 +  </target>
 +  <target name="jar-facet" unless="facet.uptodate" depends="check-facet-uptodate">
 +    <ant dir="${common.dir}/contrib/facet" target="jar-core" inheritall="false">
 +      <propertyset refid="uptodate.and.compiled.properties"/>
 +    </ant>
 +    <property name="facet.uptodate" value="true"/>
 +  </target>
 +
    <property name="analyzers-smartcn.jar" value="${common.dir}/build/contrib/analyzers/smartcn/lucene-smartcn-${version}.jar"/>
    <target name="check-analyzers-smartcn-uptodate" unless="analyzers-smartcn.uptodate">
      <contrib-uptodate name="analyzers/smartcn" jarfile="${analyzers-smartcn.jar}" property="analyzers-smartcn.uptodate"/>