| Index: lucene/contrib/benchmark/conf/facets.alg |
| =================================================================== |
| --- lucene/contrib/benchmark/conf/facets.alg (revision 0) |
| +++ lucene/contrib/benchmark/conf/facets.alg (revision 0) |
| @@ -0,0 +1,70 @@ |
| +#/** |
| +# * Licensed to the Apache Software Foundation (ASF) under one or more |
| +# * contributor license agreements. See the NOTICE file distributed with |
| +# * this work for additional information regarding copyright ownership. |
| +# * The ASF licenses this file to You under the Apache License, Version 2.0 |
| +# * (the "License"); you may not use this file except in compliance with |
| +# * the License. You may obtain a copy of the License at |
| +# * |
| +# * http://www.apache.org/licenses/LICENSE-2.0 |
| +# * |
| +# * Unless required by applicable law or agreed to in writing, software |
| +# * distributed under the License is distributed on an "AS IS" BASIS, |
| +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| +# * See the License for the specific language governing permissions and |
| +# * limitations under the License. |
| +# */ |
| +# ------------------------------------------------------------------------------------- |
| + |
| +with.facets=facets:true:false |
| + |
| +compound=true |
| +analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer |
| +directory=FSDirectory |
| +taxonomy.directory=FSDirectory |
| + |
| +doc.stored=true |
| +doc.tokenized=true |
| +doc.term.vector=false |
| +log.step=300 |
| + |
| +docs.dir=reuters-out |
| + |
| +content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource |
| + |
| +facet.source=org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource |
| +rand.seed=10 |
| +max.doc.facets=120 |
| +max.facet.length=8 |
| + |
| +query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker |
| + |
| +task.max.depth.log=2 |
| + |
| +#log.queries=true |
| +# ------------------------------------------------------------------------------------- |
| + |
| +{ "Rounds" |
| + ResetSystemErase |
| + { "Populate" |
| + -CreateIndex |
| + -CreateTaxonomyIndex |
| + { "MAddDocs" AddFacetedDoc > : 400 |
| + -Optimize |
| + -CloseIndex |
| + -CloseTaxonomyIndex |
| + } |
| + |
| + OpenReader |
| + { "SearchSameRdr" Search > : 400 |
| + CloseReader |
| + |
| + RepSumByNameRound |
| + ResetSystemErase |
| + NewRound |
| +} : 4 |
| + |
| +RepSumByPrefRound Search |
| +RepSumByPrefRound Populate |
| +RepSumByPrefRound MAddDocs |
| + |
| Index: lucene/contrib/benchmark/CHANGES.txt |
| =================================================================== |
| --- lucene/contrib/benchmark/CHANGES.txt (revision 1180173) |
| +++ lucene/contrib/benchmark/CHANGES.txt (working copy) |
| @@ -5,6 +5,10 @@ |
| For more information on past and future Lucene versions, please see: |
| http://s.apache.org/luceneversions |
| |
| +10/07/2011 |
| + LUCENE-3262: Facet benchmarking - Benchmark tasks and sources were added for indexing |
| + with facets, demonstrated in facets.alg. (Doron Cohen) |
| + |
| 09/25/2011 |
| LUCENE-3457: Upgrade commons-compress to 1.2 (and undo LUCENE-2980's workaround). |
| (Doron Cohen) |
| Index: lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java |
| =================================================================== |
| --- lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (revision 1180173) |
| +++ lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (working copy) |
| @@ -38,6 +38,7 @@ |
| import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask; |
| import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask; |
| import org.apache.lucene.collation.CollationKeyAnalyzer; |
| +import org.apache.lucene.facet.taxonomy.TaxonomyReader; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexWriter; |
| import org.apache.lucene.index.IndexWriterConfig; |
| @@ -773,6 +774,42 @@ |
| } |
| |
| /** |
| + * Test indexing with facets tasks. |
| + */ |
| + public void testIndexingWithFacets() throws Exception { |
| + // 1. alg definition (required in every "logic" test) |
| + String algLines[] = { |
| + "# ----- properties ", |
| + "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| + "docs.file=" + getReuters20LinesFile(), |
| + "content.source.log.step=100", |
| + "content.source.forever=false", |
| + "directory=RAMDirectory", |
| + "doc.stored=false", |
| + "merge.factor=3", |
| + "doc.tokenized=false", |
| + "debug.level=1", |
| + "# ----- alg ", |
| + "ResetSystemErase", |
| + "CreateIndex", |
| + "CreateTaxonomyIndex", |
| + "{ \"AddDocs\" AddFacetedDoc > : * ", |
| + "CloseIndex", |
| + "CloseTaxonomyIndex", |
| + "OpenTaxonomyReader", |
| + }; |
| + |
| + // 2. execute the algorithm (required in every "logic" test) |
| + Benchmark benchmark = execBenchmark(algLines); |
| + PerfRunData runData = benchmark.getRunData(); |
| + assertNull("taxo writer was not properly closed",runData.getTaxonomyWriter()); |
| + TaxonomyReader taxoReader = runData.getTaxonomyReader(); |
| + assertNotNull("taxo reader was not opened", taxoReader); |
| + assertTrue("nothing was added to the taxnomy (expecting root and at least one addtional category)",taxoReader.getSize()>1); |
| + taxoReader.close(); |
| + } |
| + |
| + /** |
| * Test that we can call optimize(maxNumSegments). |
| */ |
| public void testOptimizeMaxNumSegments() throws Exception { |
| Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java |
| =================================================================== |
| --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java (revision 0) |
| +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java (revision 0) |
| @@ -0,0 +1,41 @@ |
| +package org.apache.lucene.benchmark.byTask.tasks; |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import org.apache.lucene.benchmark.byTask.PerfRunData; |
| +import org.apache.lucene.facet.taxonomy.TaxonomyWriter; |
| + |
| +/** |
| + * Commits the Taxonomy Index. |
| + */ |
| +public class CommitTaxonomyIndexTask extends PerfTask { |
| + public CommitTaxonomyIndexTask(PerfRunData runData) { |
| + super(runData); |
| + } |
| + |
| + @Override |
| + public int doLogic() throws Exception { |
| + TaxonomyWriter taxonomyWriter = getRunData().getTaxonomyWriter(); |
| + if (taxonomyWriter != null) { |
| + taxonomyWriter.commit(); |
| + } else { |
| + throw new IllegalStateException("TaxonomyWriter is not currently open"); |
| + } |
| + |
| + return 1; |
| + } |
| +} |
| Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java |
| =================================================================== |
| --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java (revision 0) |
| +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java (revision 0) |
| @@ -0,0 +1,45 @@ |
| +package org.apache.lucene.benchmark.byTask.tasks; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| + |
| +import org.apache.lucene.benchmark.byTask.PerfRunData; |
| +import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader; |
| + |
| +/** |
| + * Open a taxonomy index reader. |
| + * <br>Other side effects: taxonomy reader object in perfRunData is set. |
| + */ |
| +public class OpenTaxonomyReaderTask extends PerfTask { |
| + |
| + public OpenTaxonomyReaderTask(PerfRunData runData) { |
| + super(runData); |
| + } |
| + |
| + @Override |
| + public int doLogic() throws IOException { |
| + PerfRunData runData = getRunData(); |
| + LuceneTaxonomyReader taxoReader = new LuceneTaxonomyReader(runData.getTaxonomyDir()); |
| + runData.setTaxonomyReader(taxoReader); |
| + // We transfer reference to the run data |
| + taxoReader.decRef(); |
| + return 1; |
| + } |
| + |
| +} |
| Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java |
| =================================================================== |
| --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java (revision 0) |
| +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java (revision 0) |
| @@ -0,0 +1,46 @@ |
| +package org.apache.lucene.benchmark.byTask.tasks; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| + |
| +import org.apache.lucene.benchmark.byTask.PerfRunData; |
| +import org.apache.lucene.facet.taxonomy.TaxonomyReader; |
| + |
| +/** |
| + * Close taxonomy reader. |
| + * <br>Other side effects: taxonomy reader in perfRunData is nullified. |
| + */ |
| +public class CloseTaxonomyReaderTask extends PerfTask { |
| + |
| + public CloseTaxonomyReaderTask(PerfRunData runData) { |
| + super(runData); |
| + } |
| + |
| + @Override |
| + public int doLogic() throws IOException { |
| + TaxonomyReader taxoReader = getRunData().getTaxonomyReader(); |
| + getRunData().setTaxonomyReader(null); |
| + if (taxoReader.getRefCount() != 1) { |
| + System.out.println("WARNING: CloseTaxonomyReader: reference count is currently " + taxoReader.getRefCount()); |
| + } |
| + taxoReader.close(); |
| + return 1; |
| + } |
| + |
| +} |
| Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java |
| =================================================================== |
| --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java (revision 0) |
| +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java (revision 0) |
| @@ -0,0 +1,42 @@ |
| +package org.apache.lucene.benchmark.byTask.tasks; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import org.apache.lucene.benchmark.byTask.PerfRunData; |
| +import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter; |
| +import java.io.IOException; |
| + |
| + |
| +/** |
| + * Open a taxonomy index. |
| + * <br>Other side effects: taxonomy writer object in perfRunData is set. |
| + */ |
| +public class OpenTaxonomyIndexTask extends PerfTask { |
| + |
| + public OpenTaxonomyIndexTask(PerfRunData runData) { |
| + super(runData); |
| + } |
| + |
| + @Override |
| + public int doLogic() throws IOException { |
| + PerfRunData runData = getRunData(); |
| + runData.setTaxonomyWriter(new LuceneTaxonomyWriter(runData.getTaxonomyDir())); |
| + return 1; |
| + } |
| + |
| +} |
| Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java |
| =================================================================== |
| --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java (revision 0) |
| +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java (revision 0) |
| @@ -0,0 +1,44 @@ |
| +package org.apache.lucene.benchmark.byTask.tasks; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import org.apache.lucene.benchmark.byTask.PerfRunData; |
| +import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter; |
| +import org.apache.lucene.index.IndexWriterConfig.OpenMode; |
| + |
| +import java.io.IOException; |
| + |
| + |
| +/** |
| + * Create a taxonomy index. |
| + * <br>Other side effects: taxonomy writer object in perfRunData is set. |
| + */ |
| +public class CreateTaxonomyIndexTask extends PerfTask { |
| + |
| + public CreateTaxonomyIndexTask(PerfRunData runData) { |
| + super(runData); |
| + } |
| + |
| + @Override |
| + public int doLogic() throws IOException { |
| + PerfRunData runData = getRunData(); |
| + runData.setTaxonomyWriter(new LuceneTaxonomyWriter(runData.getTaxonomyDir(), OpenMode.CREATE)); |
| + return 1; |
| + } |
| + |
| +} |
| Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java |
| =================================================================== |
| --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java (revision 0) |
| +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java (revision 0) |
| @@ -0,0 +1,76 @@ |
| +package org.apache.lucene.benchmark.byTask.tasks; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import org.apache.lucene.benchmark.byTask.PerfRunData; |
| +import org.apache.lucene.facet.index.CategoryContainer; |
| +import org.apache.lucene.facet.index.CategoryDocumentBuilder; |
| + |
| +/** |
| + * Add a faceted document. |
| + * <p> |
| + * Config properties: |
| + * <ul> |
| + * <li><b>with.facets</b>=<optional, tells whether to actually add any facets to the document| Default: true> |
| + * <b>This config property allows to easily compare the performance of adding docs with and without facets. |
| + * Note that facets are created even when this is false, just that they are not added to the document (nor to the taxonomy). |
| + * </ul> |
| + * <p> |
| + * See {@link AddDocTask} for general document parameters and configuration. |
| + */ |
| +public class AddFacetedDocTask extends AddDocTask { |
| + |
| + public AddFacetedDocTask(PerfRunData runData) { |
| + super(runData); |
| + } |
| + |
| + private CategoryContainer facets = null; |
| + private CategoryDocumentBuilder categoryDocBuilder = null; |
| + private boolean withFacets = true; |
| + |
| + @Override |
| + public void setup() throws Exception { |
| + super.setup(); |
| + // create the facets even if they should not be added - allows to measure the effect of just adding facets |
| + facets = getRunData().getFacetSource().getNextFacets(facets); |
| + withFacets = getRunData().getConfig().get("with.facets", true); |
| + if (withFacets) { |
| + categoryDocBuilder = new CategoryDocumentBuilder(getRunData().getTaxonomyWriter()); |
| + categoryDocBuilder.setCategories(facets); |
| + } |
| + } |
| + |
| + @Override |
| + public void tearDown() throws Exception { |
| + super.tearDown(); |
| + } |
| + |
| + @Override |
| + protected String getLogMessage(int recsCount) { |
| + return "added " + recsCount + " docs with "+(withFacets ? "facets" : "no facets"); |
| + } |
| + |
| + @Override |
| + public int doLogic() throws Exception { |
| + if (withFacets) { |
| + categoryDocBuilder.build(doc); |
| + } |
| + return super.doLogic(); |
| + } |
| + |
| +} |
| Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java |
| =================================================================== |
| --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java (revision 0) |
| +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java (revision 0) |
| @@ -0,0 +1,43 @@ |
| +package org.apache.lucene.benchmark.byTask.tasks; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| + |
| +import org.apache.lucene.benchmark.byTask.PerfRunData; |
| +import org.apache.lucene.util.IOUtils; |
| + |
| +/** |
| + * Close taxonomy index. |
| + * <br>Other side effects: taxonomy writer object in perfRunData is nullified. |
| + */ |
| +public class CloseTaxonomyIndexTask extends PerfTask { |
| + |
| + public CloseTaxonomyIndexTask(PerfRunData runData) { |
| + super(runData); |
| + } |
| + |
| + @Override |
| + public int doLogic() throws IOException { |
| + IOUtils.close(getRunData().getTaxonomyWriter()); |
| + getRunData().setTaxonomyWriter(null); |
| + |
| + return 1; |
| + } |
| + |
| +} |
| Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java |
| =================================================================== |
| --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (revision 1180173) |
| +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (working copy) |
| @@ -22,7 +22,7 @@ |
| import org.apache.lucene.document.Document; |
| |
| /** |
| - * Add a document, optionally with of a certain size. |
| + * Add a document, optionally of a certain size. |
| * <br>Other side effects: none. |
| * <br>Takes optional param: document size. |
| */ |
| @@ -34,9 +34,12 @@ |
| |
| private int docSize = 0; |
| |
| - // volatile data passed between setup(), doLogic(), tearDown(). |
| - private Document doc = null; |
| - |
| + /** |
| + * volatile data passed between setup(), doLogic(), tearDown(). |
| + * the doc is created at setup() and added at doLogic(). |
| + */ |
| + protected Document doc = null; |
| + |
| @Override |
| public void setup() throws Exception { |
| super.setup(); |
| Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java |
| =================================================================== |
| --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java (revision 0) |
| +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java (revision 0) |
| @@ -0,0 +1,45 @@ |
| +package org.apache.lucene.benchmark.byTask.feeds; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| + |
| +import org.apache.lucene.facet.index.CategoryContainer; |
| + |
| +/** |
| + * Source items for facets. |
| + * <p> |
| + * For supported configuration parameters see {@link ContentItemsSource}. |
| + */ |
| +public abstract class FacetSource extends ContentItemsSource { |
| + |
| + /** Returns the next {@link CategoryContainer facets content item}. |
| + * Implementations must account for multi-threading, as multiple threads |
| + * can call this method simultaneously. |
| + */ |
| + public abstract CategoryContainer getNextFacets(CategoryContainer facets) throws NoMoreDataException, IOException; |
| + |
| + @Override |
| + public void resetInputs() throws IOException { |
| + printStatistics("facets"); |
| + // re-initiate since properties by round may have changed. |
| + setConfig(getConfig()); |
| + super.resetInputs(); |
| + } |
| + |
| +} |
| Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java |
| =================================================================== |
| --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java (revision 0) |
| +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java (revision 0) |
| @@ -0,0 +1,81 @@ |
| +package org.apache.lucene.benchmark.byTask.feeds; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.IOException; |
| +import java.util.Random; |
| + |
| +import org.apache.lucene.benchmark.byTask.utils.Config; |
| +import org.apache.lucene.facet.index.CategoryContainer; |
| +import org.apache.lucene.facet.taxonomy.CategoryPath; |
| + |
| +/** |
| + * Simple implementation of a random facet source |
| + * <p> |
| + * Supports the following parameters: |
| + * <ul> |
| + * <li><b>rand.seed</b> - defines the seed to initialize Random with (default: <b>13</b>). |
| + * <li><b>max.doc.facets</b> - maximal #facets per doc (default: <b>200</b>). |
| + * Actual number of facets in a certain doc would be anything between 1 and that number. |
| + * <li><b>max.facet.length</b> - maximal #components in a facet (default: <b>10</b>). |
| + * Actual number of components in a certain facet would be anything between 1 and that number. |
| + * </ul> |
| + */ |
| +public class RandomFacetSource extends FacetSource { |
| + |
| + Random random; |
| + |
| + private int maxDocFacets = 200; |
| + private int maxFacetDepth = 10; |
| + private int maxValue = maxDocFacets * maxFacetDepth; |
| + |
| + @Override |
| + public CategoryContainer getNextFacets(CategoryContainer facets) throws NoMoreDataException, IOException { |
| + if (facets == null) { |
| + facets = new CategoryContainer(); |
| + } else { |
| + facets.clear(); |
| + } |
| + int numFacets = 1 + random.nextInt(maxDocFacets-1); // at least one facet to each doc |
| + for (int i=0; i<numFacets; i++) { |
| + CategoryPath cp = new CategoryPath(); |
| + int length = 1 + random.nextInt(maxFacetDepth-1); // length 0 is not useful |
| + for (int k=0; k<length; k++) { |
| + cp.add(Integer.toString(random.nextInt(maxValue))); |
| + addItem(); |
| + } |
| + facets.addCategory(cp); |
| + addBytes(cp.toString().length()); // very rough approximation |
| + } |
| + return facets; |
| + } |
| + |
| + @Override |
| + public void close() throws IOException { |
| + // nothing to do here |
| + } |
| + |
| + @Override |
| + public void setConfig(Config config) { |
| + super.setConfig(config); |
| + random = new Random(config.get("rand.seed", 13)); |
| + maxDocFacets = config.get("max.doc.facets", 200); |
| + maxFacetDepth = config.get("max.facet.length", 10); |
| + maxValue = maxDocFacets * maxFacetDepth; |
| + } |
| +} |
| Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java |
| =================================================================== |
| --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (revision 1180173) |
| +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (working copy) |
| @@ -289,7 +289,7 @@ |
| // here, everything else is already private to that thread, so we're safe. |
| try { |
| docData = trecDocParser.parse(docData, name, this, docBuf, parsedPathType); |
| - addDoc(); |
| + addItem(); |
| } catch (InterruptedException ie) { |
| throw new ThreadInterruptedException(ie); |
| } |
| Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java |
| =================================================================== |
| --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java (revision 1180173) |
| +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java (working copy) |
| @@ -17,13 +17,8 @@ |
| * limitations under the License. |
| */ |
| |
| -import java.io.File; |
| import java.io.IOException; |
| -import java.util.ArrayList; |
| -import java.util.Arrays; |
| |
| -import org.apache.lucene.benchmark.byTask.utils.Config; |
| - |
| /** |
| * Represents content from a specified source, such as TREC, Reuters etc. A |
| * {@link ContentSource} is responsible for creating {@link DocData} objects for |
| @@ -31,119 +26,13 @@ |
| * of various statistics, such as how many documents were generated, size in |
| * bytes etc. |
| * <p> |
| - * Supports the following configuration parameters: |
| - * <ul> |
| - * <li><b>content.source.forever</b> - specifies whether to generate documents |
| - * forever (<b>default=true</b>). |
| - * <li><b>content.source.verbose</b> - specifies whether messages should be |
| - * output by the content source (<b>default=false</b>). |
| - * <li><b>content.source.encoding</b> - specifies which encoding to use when |
| - * reading the files of that content source. Certain implementations may define |
| - * a default value if this parameter is not specified. (<b>default=null</b>). |
| - * <li><b>content.source.log.step</b> - specifies for how many documents a |
| - * message should be logged. If set to 0 it means no logging should occur. |
| - * <b>NOTE:</b> if verbose is set to false, logging should not occur even if |
| - * logStep is not 0 (<b>default=0</b>). |
| - * </ul> |
| + * For supported configuration parameters see {@link ContentItemsSource}. |
| */ |
| -public abstract class ContentSource { |
| +public abstract class ContentSource extends ContentItemsSource { |
| |
| - private long bytesCount; |
| - private long totalBytesCount; |
| - private int docsCount; |
| - private int totalDocsCount; |
| - private Config config; |
| - |
| - protected boolean forever; |
| - protected int logStep; |
| - protected boolean verbose; |
| - protected String encoding; |
| - |
| - /** update count of bytes generated by this source */ |
| - protected final synchronized void addBytes(long numBytes) { |
| - bytesCount += numBytes; |
| - totalBytesCount += numBytes; |
| - } |
| - |
| - /** update count of documents generated by this source */ |
| - protected final synchronized void addDoc() { |
| - ++docsCount; |
| - ++totalDocsCount; |
| - } |
| - |
| - /** |
| - * A convenience method for collecting all the files of a content source from |
| - * a given directory. The collected {@link File} instances are stored in the |
| - * given <code>files</code>. |
| - */ |
| - protected final void collectFiles(File dir, ArrayList<File> files) { |
| - if (!dir.canRead()) { |
| - return; |
| - } |
| - |
| - File[] dirFiles = dir.listFiles(); |
| - Arrays.sort(dirFiles); |
| - for (int i = 0; i < dirFiles.length; i++) { |
| - File file = dirFiles[i]; |
| - if (file.isDirectory()) { |
| - collectFiles(file, files); |
| - } else if (file.canRead()) { |
| - files.add(file); |
| - } |
| - } |
| - } |
| - |
| - /** |
| - * Returns true whether it's time to log a message (depending on verbose and |
| - * the number of documents generated). |
| - */ |
| - protected final boolean shouldLog() { |
| - return verbose && logStep > 0 && docsCount % logStep == 0; |
| - } |
| - |
| - /** Called when reading from this content source is no longer required. */ |
| - public abstract void close() throws IOException; |
| - |
| - /** Returns the number of bytes generated since last reset. */ |
| - public final long getBytesCount() { return bytesCount; } |
| - |
| - /** Returns the number of generated documents since last reset. */ |
| - public final int getDocsCount() { return docsCount; } |
| - |
| - public final Config getConfig() { return config; } |
| - |
| - /** Returns the next {@link DocData} from the content source. */ |
| + /** Returns the next {@link DocData} from the content source. |
| + * Implementations must account for multi-threading, as multiple threads |
| + * can call this method simultaneously. */ |
| public abstract DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException; |
| |
| - /** Returns the total number of bytes that were generated by this source. */ |
| - public final long getTotalBytesCount() { return totalBytesCount; } |
| - |
| - /** Returns the total number of generated documents. */ |
| - public final int getTotalDocsCount() { return totalDocsCount; } |
| - |
| - /** |
| - * Resets the input for this content source, so that the test would behave as |
| - * if it was just started, input-wise. |
| - * <p> |
| - * <b>NOTE:</b> the default implementation resets the number of bytes and |
| - * documents generated since the last reset, so it's important to call |
| - * super.resetInputs in case you override this method. |
| - */ |
| - public void resetInputs() throws IOException { |
| - bytesCount = 0; |
| - docsCount = 0; |
| - } |
| - |
| - /** |
| - * Sets the {@link Config} for this content source. If you override this |
| - * method, you must call super.setConfig. |
| - */ |
| - public void setConfig(Config config) { |
| - this.config = config; |
| - forever = config.get("content.source.forever", true); |
| - logStep = config.get("content.source.log.step", 0); |
| - verbose = config.get("content.source.verbose", false); |
| - encoding = config.get("content.source.encoding", null); |
| - } |
| - |
| } |
| Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java |
| =================================================================== |
| --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java (revision 0) |
| +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java (revision 0) |
| @@ -0,0 +1,180 @@ |
| +package org.apache.lucene.benchmark.byTask.feeds; |
| + |
| +/** |
| + * Licensed to the Apache Software Foundation (ASF) under one or more |
| + * contributor license agreements. See the NOTICE file distributed with |
| + * this work for additional information regarding copyright ownership. |
| + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| + * (the "License"); you may not use this file except in compliance with |
| + * the License. You may obtain a copy of the License at |
| + * |
| + * http://www.apache.org/licenses/LICENSE-2.0 |
| + * |
| + * Unless required by applicable law or agreed to in writing, software |
| + * distributed under the License is distributed on an "AS IS" BASIS, |
| + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| + * See the License for the specific language governing permissions and |
| + * limitations under the License. |
| + */ |
| + |
| +import java.io.File; |
| +import java.io.IOException; |
| +import java.util.ArrayList; |
| +import java.util.Arrays; |
| + |
| +import org.apache.lucene.benchmark.byTask.utils.Config; |
| +import org.apache.lucene.benchmark.byTask.utils.Format; |
| + |
| +/** |
| + * Base class for source of data for benchmarking |
| + * <p> |
| + * Keeps track of various statistics, such as how many data items were generated, |
| + * size in bytes etc. |
| + * <p> |
| + * Supports the following configuration parameters: |
| + * <ul> |
| + * <li><b>content.source.forever</b> - specifies whether to generate items |
| + * forever (<b>default=true</b>). |
| + * <li><b>content.source.verbose</b> - specifies whether messages should be |
| + * output by the content source (<b>default=false</b>). |
| + * <li><b>content.source.encoding</b> - specifies which encoding to use when |
| + * reading the files of that content source. Certain implementations may define |
| + * a default value if this parameter is not specified. (<b>default=null</b>). |
| + * <li><b>content.source.log.step</b> - specifies for how many items a |
| + * message should be logged. If set to 0 it means no logging should occur. |
| + * <b>NOTE:</b> if verbose is set to false, logging should not occur even if |
| + * logStep is not 0 (<b>default=0</b>). |
| + * </ul> |
| + */ |
| +public abstract class ContentItemsSource { |
| + |
| + private long bytesCount; |
| + private long totalBytesCount; |
| + private int itemCount; |
| + private int totalItemCount; |
| + private Config config; |
| + |
| + private int lastPrintedNumUniqueTexts = 0; |
| + private long lastPrintedNumUniqueBytes = 0; |
| + private int printNum = 0; |
| + |
| + protected boolean forever; |
| + protected int logStep; |
| + protected boolean verbose; |
| + protected String encoding; |
| + |
| + /** update count of bytes generated by this source */ |
| + protected final synchronized void addBytes(long numBytes) { |
| + bytesCount += numBytes; |
| + totalBytesCount += numBytes; |
| + } |
| + |
| + /** update count of items generated by this source */ |
| + protected final synchronized void addItem() { |
| + ++itemCount; |
| + ++totalItemCount; |
| + } |
| + |
| + /** |
| + * A convenience method for collecting all the files of a content source from |
| + * a given directory. The collected {@link File} instances are stored in the |
| + * given <code>files</code>. |
| + */ |
| + protected final void collectFiles(File dir, ArrayList<File> files) { |
| + if (!dir.canRead()) { |
| + return; |
| + } |
| + |
| + File[] dirFiles = dir.listFiles(); |
| + Arrays.sort(dirFiles); |
| + for (int i = 0; i < dirFiles.length; i++) { |
| + File file = dirFiles[i]; |
| + if (file.isDirectory()) { |
| + collectFiles(file, files); |
| + } else if (file.canRead()) { |
| + files.add(file); |
| + } |
| + } |
| + } |
| + |
| + /** |
| + * Returns true whether it's time to log a message (depending on verbose and |
| + * the number of items generated). |
| + */ |
| + protected final boolean shouldLog() { |
| + return verbose && logStep > 0 && itemCount % logStep == 0; |
| + } |
| + |
| + /** Called when reading from this content source is no longer required. */ |
| + public abstract void close() throws IOException; |
| + |
| + /** Returns the number of bytes generated since last reset. */ |
| + public final long getBytesCount() { return bytesCount; } |
| + |
| + /** Returns the number of generated items since last reset. */ |
| + public final int getItemsCount() { return itemCount; } |
| + |
| + public final Config getConfig() { return config; } |
| + |
| + /** Returns the total number of bytes that were generated by this source. */ |
| + public final long getTotalBytesCount() { return totalBytesCount; } |
| + |
| + /** Returns the total number of generated items. */ |
| + public final int getTotalItemsCount() { return totalItemCount; } |
| + |
| + /** |
| + * Resets the input for this content source, so that the test would behave as |
| + * if it was just started, input-wise. |
| + * <p> |
| + * <b>NOTE:</b> the default implementation resets the number of bytes and |
| + * items generated since the last reset, so it's important to call |
| + * super.resetInputs in case you override this method. |
| + */ |
| + @SuppressWarnings("unused") |
| + public void resetInputs() throws IOException { |
| + bytesCount = 0; |
| + itemCount = 0; |
| + } |
| + |
| + /** |
| + * Sets the {@link Config} for this content source. If you override this |
| + * method, you must call super.setConfig. |
| + */ |
| + public void setConfig(Config config) { |
| + this.config = config; |
| + forever = config.get("content.source.forever", true); |
| + logStep = config.get("content.source.log.step", 0); |
| + verbose = config.get("content.source.verbose", false); |
| + encoding = config.get("content.source.encoding", null); |
| + } |
| + |
| + public void printStatistics(String itemsName) { |
| + boolean print = false; |
| + String col = " "; |
| + StringBuilder sb = new StringBuilder(); |
| + String newline = System.getProperty("line.separator"); |
| + sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline); |
| + int nut = getTotalItemsCount(); |
| + if (nut > lastPrintedNumUniqueTexts) { |
| + print = true; |
| + sb.append("total count of "+itemsName+": ").append(Format.format(0,nut,col)).append(newline); |
| + lastPrintedNumUniqueTexts = nut; |
| + } |
| + long nub = getTotalBytesCount(); |
| + if (nub > lastPrintedNumUniqueBytes) { |
| + print = true; |
| + sb.append("total bytes of "+itemsName+": ").append(Format.format(0,nub,col)).append(newline); |
| + lastPrintedNumUniqueBytes = nub; |
| + } |
| + if (getItemsCount() > 0) { |
| + print = true; |
| + sb.append("num "+itemsName+" added since last inputs reset: ").append(Format.format(0,getItemsCount(),col)).append(newline); |
| + sb.append("total bytes added for "+itemsName+" since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline); |
| + } |
| + if (print) { |
| + System.out.println(sb.append(newline).toString()); |
| + printNum++; |
| + } |
| + } |
| + |
| +} |
| Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java |
| =================================================================== |
| --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (revision 1180173) |
| +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (working copy) |
| @@ -31,7 +31,6 @@ |
| import java.text.ParsePosition; |
| |
| import org.apache.lucene.benchmark.byTask.utils.Config; |
| -import org.apache.lucene.benchmark.byTask.utils.Format; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.NumericField; |
| @@ -189,13 +188,8 @@ |
| protected boolean reuseFields; |
| protected boolean indexProperties; |
| |
| - private int lastPrintedNumUniqueTexts = 0; |
| - |
| - private long lastPrintedNumUniqueBytes = 0; |
| private final AtomicInteger numDocsCreated = new AtomicInteger(); |
| |
| - private int printNum = 0; |
| - |
| // create a doc |
| // use only part of the body, modify it to keep the rest (or use all if size==0). |
| // reset the docdata properties so they are not added more than once. |
| @@ -397,38 +391,9 @@ |
| return doc; |
| } |
| |
| - public void printDocStatistics() { |
| - boolean print = false; |
| - String col = " "; |
| - StringBuilder sb = new StringBuilder(); |
| - String newline = System.getProperty("line.separator"); |
| - sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline); |
| - int nut = source.getTotalDocsCount(); |
| - if (nut > lastPrintedNumUniqueTexts) { |
| - print = true; |
| - sb.append("total count of unique texts: ").append(Format.format(0,nut,col)).append(newline); |
| - lastPrintedNumUniqueTexts = nut; |
| - } |
| - long nub = getTotalBytesCount(); |
| - if (nub > lastPrintedNumUniqueBytes) { |
| - print = true; |
| - sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline); |
| - lastPrintedNumUniqueBytes = nub; |
| - } |
| - if (source.getDocsCount() > 0) { |
| - print = true; |
| - sb.append("num docs added since last inputs reset: ").append(Format.format(0,source.getDocsCount(),col)).append(newline); |
| - sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline); |
| - } |
| - if (print) { |
| - System.out.println(sb.append(newline).toString()); |
| - printNum++; |
| - } |
| - } |
| - |
| /** Reset inputs so that the test run would behave, input wise, as if it just started. */ |
| public synchronized void resetInputs() throws IOException { |
| - printDocStatistics(); |
| + source.printStatistics("docs"); |
| // re-initiate since properties by round may have changed. |
| setConfig(config); |
| source.resetInputs(); |
| Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java |
| =================================================================== |
| --- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (revision 1180173) |
| +++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (working copy) |
| @@ -24,6 +24,7 @@ |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.benchmark.byTask.feeds.DocMaker; |
| +import org.apache.lucene.benchmark.byTask.feeds.FacetSource; |
| import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; |
| import org.apache.lucene.benchmark.byTask.stats.Points; |
| import org.apache.lucene.benchmark.byTask.tasks.ReadTask; |
| @@ -31,12 +32,15 @@ |
| import org.apache.lucene.benchmark.byTask.utils.Config; |
| import org.apache.lucene.benchmark.byTask.utils.FileUtils; |
| import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask; |
| +import org.apache.lucene.facet.taxonomy.TaxonomyReader; |
| +import org.apache.lucene.facet.taxonomy.TaxonomyWriter; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexWriter; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.FSDirectory; |
| import org.apache.lucene.store.RAMDirectory; |
| +import org.apache.lucene.util.IOUtils; |
| |
| /** |
| * Data maintained by a performance test run. |
| @@ -45,11 +49,21 @@ |
| * <ul> |
| * <li>Configuration. |
| * <li>Directory, Writer, Reader. |
| - * <li>Docmaker and a few instances of QueryMaker. |
| + * <li>Taxonomy Directory, Writer, Reader. |
| + * <li>DocMaker, FacetSource and a few instances of QueryMaker. |
| * <li>Analyzer. |
| * <li>Statistics data which updated during the run. |
| * </ul> |
| - * Config properties: work.dir=<path to root of docs and index dirs| Default: work> |
| + * Config properties: |
| + * <ul> |
| + * <li><b>work.dir</b>=<path to root of docs and index dirs| Default: work> |
| + * <li><b>analyzer</b>=<class name for analyzer| Default: StandardAnalyzer> |
| + * <li><b>doc.maker</b>=<class name for doc-maker| Default: DocMaker> |
| + * <li><b>facet.source</b>=<class name for facet-source| Default: RandomFacetSource> |
| + * <li><b>query.maker</b>=<class name for query-maker| Default: SimpleQueryMaker> |
| + * <li><b>log.queries</b>=<whether queries should be printed| Default: false> |
| + * <li><b>directory</b>=<type of directory to use for the index| Default: RAMDirectory> |
| + * <li><b>taxonomy.directory</b>=<type of directory for taxonomy index| Default: RAMDirectory> |
| * </ul> |
| */ |
| public class PerfRunData { |
| @@ -62,7 +76,12 @@ |
| private Directory directory; |
| private Analyzer analyzer; |
| private DocMaker docMaker; |
| + private FacetSource facetSource; |
| private Locale locale; |
| + |
| + private Directory taxonomyDir; |
| + private TaxonomyWriter taxonomyWriter; |
| + private TaxonomyReader taxonomyReader; |
| |
| // we use separate (identical) instances for each "read" task type, so each can iterate the quries separately. |
| private HashMap<Class<? extends ReadTask>,QueryMaker> readTaskQueryMaker; |
| @@ -73,6 +92,7 @@ |
| private IndexWriter indexWriter; |
| private Config config; |
| private long startTimeMillis; |
| + |
| |
| // constructor |
| public PerfRunData (Config config) throws Exception { |
| @@ -84,6 +104,10 @@ |
| docMaker = Class.forName(config.get("doc.maker", |
| "org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance(); |
| docMaker.setConfig(config); |
| + // facet source |
| + facetSource = Class.forName(config.get("facet.source", |
| + "org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance(); |
| + facetSource.setConfig(config); |
| // query makers |
| readTaskQueryMaker = new HashMap<Class<? extends ReadTask>,QueryMaker>(); |
| qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker")).asSubclass(QueryMaker.class); |
| @@ -104,30 +128,17 @@ |
| public void reinit(boolean eraseIndex) throws Exception { |
| |
| // cleanup index |
| - if (indexWriter!=null) { |
| - indexWriter.close(); |
| - indexWriter = null; |
| - } |
| - if (indexReader!=null) { |
| - indexReader.close(); |
| - indexReader = null; |
| - } |
| - if (directory!=null) { |
| - directory.close(); |
| - } |
| + IOUtils.close(indexWriter, indexReader, directory); |
| + indexWriter = null; |
| + indexReader = null; |
| + |
| + IOUtils.close(taxonomyWriter, taxonomyReader, taxonomyDir); |
| + taxonomyWriter = null; |
| + taxonomyReader = null; |
| |
| // directory (default is ram-dir). |
| - if ("FSDirectory".equals(config.get("directory","RAMDirectory"))) { |
| - File workDir = new File(config.get("work.dir","work")); |
| - File indexDir = new File(workDir,"index"); |
| - if (eraseIndex && indexDir.exists()) { |
| - FileUtils.fullyDelete(indexDir); |
| - } |
| - indexDir.mkdirs(); |
| - directory = FSDirectory.open(indexDir); |
| - } else { |
| - directory = new RAMDirectory(); |
| - } |
| + directory = createDirectory(eraseIndex, "index", "directory"); |
| + taxonomyDir = createDirectory(eraseIndex, "taxo", "taxonomy.directory"); |
| |
| // inputs |
| resetInputs(); |
| @@ -139,6 +150,21 @@ |
| // Re-init clock |
| setStartTimeMillis(); |
| } |
| + |
| + private Directory createDirectory(boolean eraseIndex, String dirName, |
| + String dirParam) throws IOException { |
| + if ("FSDirectory".equals(config.get(dirParam,"RAMDirectory"))) { |
| + File workDir = new File(config.get("work.dir","work")); |
| + File indexDir = new File(workDir,dirName); |
| + if (eraseIndex && indexDir.exists()) { |
| + FileUtils.fullyDelete(indexDir); |
| + } |
| + indexDir.mkdirs(); |
| + return FSDirectory.open(indexDir); |
| + } |
| + |
| + return new RAMDirectory(); |
| + } |
| |
| public long setStartTimeMillis() { |
| startTimeMillis = System.currentTimeMillis(); |
| @@ -174,6 +200,57 @@ |
| } |
| |
| /** |
| + * @return Returns the taxonomy directory |
| + */ |
| + public Directory getTaxonomyDir() { |
| + return taxonomyDir; |
| + } |
| + |
| + /** |
| + * Set the taxonomy reader. Takes ownership of that taxonomy reader, that is, |
| + * internally performs taxoReader.incRef() (If caller no longer needs that |
| + * reader it should decRef()/close() it after calling this method, otherwise, |
| + * the reader will remain open). |
| + * @param taxoReader The taxonomy reader to set. |
| + */ |
| + public synchronized void setTaxonomyReader(TaxonomyReader taxoReader) throws IOException { |
| + if (taxoReader == this.taxonomyReader) { |
| + return; |
| + } |
| + if (taxonomyReader != null) { |
| + taxonomyReader.decRef(); |
| + } |
| + |
| + if (taxoReader != null) { |
| + taxoReader.incRef(); |
| + } |
| + this.taxonomyReader = taxoReader; |
| + } |
| + |
| + /** |
| + * @return Returns the taxonomyReader. NOTE: this returns a |
| + * reference. You must call TaxonomyReader.decRef() when |
| + * you're done. |
| + */ |
| + public synchronized TaxonomyReader getTaxonomyReader() { |
| + if (taxonomyReader != null) { |
| + taxonomyReader.incRef(); |
| + } |
| + return taxonomyReader; |
| + } |
| + |
| + /** |
| + * @param taxoWriter The taxonomy writer to set. |
| + */ |
| + public void setTaxonomyWriter(TaxonomyWriter taxoWriter) { |
| + this.taxonomyWriter = taxoWriter; |
| + } |
| + |
| + public TaxonomyWriter getTaxonomyWriter() { |
| + return taxonomyWriter; |
| + } |
| + |
| + /** |
| * @return Returns the indexReader. NOTE: this returns a |
| * reference. You must call IndexReader.decRef() when |
| * you're done. |
| @@ -198,13 +275,22 @@ |
| } |
| |
| /** |
| + * Set the index reader. Takes ownership of that index reader, that is, |
| + * internally performs indexReader.incRef() (If caller no longer needs that |
| + * reader it should decRef()/close() it after calling this method, otherwise, |
| + * the reader will remain open). |
| * @param indexReader The indexReader to set. |
| */ |
| public synchronized void setIndexReader(IndexReader indexReader) throws IOException { |
| + if (indexReader == this.indexReader) { |
| + return; |
| + } |
| + |
| if (this.indexReader != null) { |
| // Release current IR |
| this.indexReader.decRef(); |
| } |
| + |
| this.indexReader = indexReader; |
| if (indexReader != null) { |
| // Hold reference to new IR |
| @@ -246,6 +332,11 @@ |
| return docMaker; |
| } |
| |
| + /** Returns the facet source. */ |
| + public FacetSource getFacetSource() { |
| + return facetSource; |
| + } |
| + |
| /** |
| * @return the locale |
| */ |
| @@ -269,6 +360,7 @@ |
| |
| public void resetInputs() throws IOException { |
| docMaker.resetInputs(); |
| + facetSource.resetInputs(); |
| for (final QueryMaker queryMaker : readTaskQueryMaker.values()) { |
| queryMaker.resetInputs(); |
| } |
| Index: lucene/contrib/benchmark/build.xml |
| =================================================================== |
| --- lucene/contrib/benchmark/build.xml (revision 1180173) |
| +++ lucene/contrib/benchmark/build.xml (working copy) |
| @@ -130,6 +130,7 @@ |
| <pathelement path="${memory.jar}"/> |
| <pathelement path="${highlighter.jar}"/> |
| <pathelement path="${analyzers-common.jar}"/> |
| + <pathelement path="${facet.jar}"/> |
| <path refid="base.classpath"/> |
| <fileset dir="lib"> |
| <include name="**/*.jar"/> |
| @@ -218,7 +219,7 @@ |
| <echo>Benchmark output in JIRA table format is in file: ${shingle.jira.output.file}</echo> |
| </target> |
| |
| - <target name="init" depends="contrib-build.init,jar-memory,jar-highlighter,jar-analyzers-common"/> |
| + <target name="init" depends="contrib-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-facet"/> |
| |
| <target name="clean-javacc"> |
| <fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java"> |
| Index: lucene/contrib/contrib-build.xml |
| =================================================================== |
| --- lucene/contrib/contrib-build.xml (revision 1180173) |
| +++ lucene/contrib/contrib-build.xml (working copy) |
| @@ -129,6 +129,17 @@ |
| <property name="analyzers-common.uptodate" value="true"/> |
| </target> |
| |
| + <property name="facet.jar" value="${common.dir}/build/contrib/facet/lucene-facet-${version}.jar"/> |
| + <target name="check-facet-uptodate" unless="facet.uptodate"> |
| + <contrib-uptodate name="facet" jarfile="${facet.jar}" property="facet.uptodate"/> |
| + </target> |
| + <target name="jar-facet" unless="facet.uptodate" depends="check-facet-uptodate"> |
| + <ant dir="${common.dir}/contrib/facet" target="jar-core" inheritall="false"> |
| + <propertyset refid="uptodate.and.compiled.properties"/> |
| + </ant> |
| + <property name="facet.uptodate" value="true"/> |
| + </target> |
| + |
| <property name="analyzers-smartcn.jar" value="${common.dir}/build/contrib/analyzers/smartcn/lucene-smartcn-${version}.jar"/> |
| <target name="check-analyzers-smartcn-uptodate" unless="analyzers-smartcn.uptodate"> |
| <contrib-uptodate name="analyzers/smartcn" jarfile="${analyzers-smartcn.jar}" property="analyzers-smartcn.uptodate"/> |