blob: 33da2920d2e0a5208ee23a62ae7b4c3e57f3dad7 [file] [log] [blame]
Index: lucene/contrib/benchmark/conf/facets.alg
===================================================================
--- lucene/contrib/benchmark/conf/facets.alg (revision 0)
+++ lucene/contrib/benchmark/conf/facets.alg (revision 0)
@@ -0,0 +1,70 @@
+#/**
+# * Licensed to the Apache Software Foundation (ASF) under one or more
+# * contributor license agreements. See the NOTICE file distributed with
+# * this work for additional information regarding copyright ownership.
+# * The ASF licenses this file to You under the Apache License, Version 2.0
+# * (the "License"); you may not use this file except in compliance with
+# * the License. You may obtain a copy of the License at
+# *
+# * http://www.apache.org/licenses/LICENSE-2.0
+# *
+# * Unless required by applicable law or agreed to in writing, software
+# * distributed under the License is distributed on an "AS IS" BASIS,
+# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# * See the License for the specific language governing permissions and
+# * limitations under the License.
+# */
+# -------------------------------------------------------------------------------------
+
+with.facets=facets:true:false
+
+compound=true
+analyzer=org.apache.lucene.analysis.standard.StandardAnalyzer
+directory=FSDirectory
+taxonomy.directory=FSDirectory
+
+doc.stored=true
+doc.tokenized=true
+doc.term.vector=false
+log.step=300
+
+docs.dir=reuters-out
+
+content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
+
+facet.source=org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource
+rand.seed=10
+max.doc.facets=120
+max.facet.length=8
+
+query.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker
+
+task.max.depth.log=2
+
+#log.queries=true
+# -------------------------------------------------------------------------------------
+
+{ "Rounds"
+ ResetSystemErase
+ { "Populate"
+ -CreateIndex
+ -CreateTaxonomyIndex
+ { "MAddDocs" AddFacetedDoc > : 400
+ -Optimize
+ -CloseIndex
+ -CloseTaxonomyIndex
+ }
+
+ OpenReader
+ { "SearchSameRdr" Search > : 400
+ CloseReader
+
+ RepSumByNameRound
+ ResetSystemErase
+ NewRound
+} : 4
+
+RepSumByPrefRound Search
+RepSumByPrefRound Populate
+RepSumByPrefRound MAddDocs
+
Index: lucene/contrib/benchmark/CHANGES.txt
===================================================================
--- lucene/contrib/benchmark/CHANGES.txt (revision 1180173)
+++ lucene/contrib/benchmark/CHANGES.txt (working copy)
@@ -5,6 +5,10 @@
For more information on past and future Lucene versions, please see:
http://s.apache.org/luceneversions
+10/07/2011
+ LUCENE-3262: Facet benchmarking - Benchmark tasks and sources were added for indexing
+ with facets, demonstrated in facets.alg. (Doron Cohen)
+
09/25/2011
LUCENE-3457: Upgrade commons-compress to 1.2 (and undo LUCENE-2980's workaround).
(Doron Cohen)
Index: lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
===================================================================
--- lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (revision 1180173)
+++ lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (working copy)
@@ -38,6 +38,7 @@
import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
import org.apache.lucene.collation.CollationKeyAnalyzer;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
@@ -773,6 +774,42 @@
}
/**
+ * Test indexing with facets tasks.
+ */
+ public void testIndexingWithFacets() throws Exception {
+ // 1. alg definition (required in every "logic" test)
+ String algLines[] = {
+ "# ----- properties ",
+ "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+ "docs.file=" + getReuters20LinesFile(),
+ "content.source.log.step=100",
+ "content.source.forever=false",
+ "directory=RAMDirectory",
+ "doc.stored=false",
+ "merge.factor=3",
+ "doc.tokenized=false",
+ "debug.level=1",
+ "# ----- alg ",
+ "ResetSystemErase",
+ "CreateIndex",
+ "CreateTaxonomyIndex",
+ "{ \"AddDocs\" AddFacetedDoc > : * ",
+ "CloseIndex",
+ "CloseTaxonomyIndex",
+ "OpenTaxonomyReader",
+ };
+
+ // 2. execute the algorithm (required in every "logic" test)
+ Benchmark benchmark = execBenchmark(algLines);
+ PerfRunData runData = benchmark.getRunData();
+ assertNull("taxo writer was not properly closed",runData.getTaxonomyWriter());
+ TaxonomyReader taxoReader = runData.getTaxonomyReader();
+ assertNotNull("taxo reader was not opened", taxoReader);
+ assertTrue("nothing was added to the taxnomy (expecting root and at least one addtional category)",taxoReader.getSize()>1);
+ taxoReader.close();
+ }
+
+ /**
* Test that we can call optimize(maxNumSegments).
*/
public void testOptimizeMaxNumSegments() throws Exception {
Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java
===================================================================
--- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java (revision 0)
+++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CommitTaxonomyIndexTask.java (revision 0)
@@ -0,0 +1,41 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
+
+/**
+ * Commits the Taxonomy Index.
+ */
+public class CommitTaxonomyIndexTask extends PerfTask {
+ public CommitTaxonomyIndexTask(PerfRunData runData) {
+ super(runData);
+ }
+
+ @Override
+ public int doLogic() throws Exception {
+ TaxonomyWriter taxonomyWriter = getRunData().getTaxonomyWriter();
+ if (taxonomyWriter != null) {
+ taxonomyWriter.commit();
+ } else {
+ throw new IllegalStateException("TaxonomyWriter is not currently open");
+ }
+
+ return 1;
+ }
+}
Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java
===================================================================
--- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java (revision 0)
+++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyReaderTask.java (revision 0)
@@ -0,0 +1,45 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyReader;
+
+/**
+ * Open a taxonomy index reader.
+ * <br>Other side effects: taxonomy reader object in perfRunData is set.
+ */
+public class OpenTaxonomyReaderTask extends PerfTask {
+
+ public OpenTaxonomyReaderTask(PerfRunData runData) {
+ super(runData);
+ }
+
+ @Override
+ public int doLogic() throws IOException {
+ PerfRunData runData = getRunData();
+ LuceneTaxonomyReader taxoReader = new LuceneTaxonomyReader(runData.getTaxonomyDir());
+ runData.setTaxonomyReader(taxoReader);
+ // We transfer reference to the run data
+ taxoReader.decRef();
+ return 1;
+ }
+
+}
Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java
===================================================================
--- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java (revision 0)
+++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyReaderTask.java (revision 0)
@@ -0,0 +1,46 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+
+/**
+ * Close taxonomy reader.
+ * <br>Other side effects: taxonomy reader in perfRunData is nullified.
+ */
+public class CloseTaxonomyReaderTask extends PerfTask {
+
+ public CloseTaxonomyReaderTask(PerfRunData runData) {
+ super(runData);
+ }
+
+ @Override
+ public int doLogic() throws IOException {
+ TaxonomyReader taxoReader = getRunData().getTaxonomyReader();
+ getRunData().setTaxonomyReader(null);
+ if (taxoReader.getRefCount() != 1) {
+ System.out.println("WARNING: CloseTaxonomyReader: reference count is currently " + taxoReader.getRefCount());
+ }
+ taxoReader.close();
+ return 1;
+ }
+
+}
Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java
===================================================================
--- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java (revision 0)
+++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/OpenTaxonomyIndexTask.java (revision 0)
@@ -0,0 +1,42 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
+import java.io.IOException;
+
+
+/**
+ * Open a taxonomy index.
+ * <br>Other side effects: taxonomy writer object in perfRunData is set.
+ */
+public class OpenTaxonomyIndexTask extends PerfTask {
+
+ public OpenTaxonomyIndexTask(PerfRunData runData) {
+ super(runData);
+ }
+
+ @Override
+ public int doLogic() throws IOException {
+ PerfRunData runData = getRunData();
+ runData.setTaxonomyWriter(new LuceneTaxonomyWriter(runData.getTaxonomyDir()));
+ return 1;
+ }
+
+}
Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java
===================================================================
--- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java (revision 0)
+++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateTaxonomyIndexTask.java (revision 0)
@@ -0,0 +1,44 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.facet.taxonomy.lucene.LuceneTaxonomyWriter;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+
+import java.io.IOException;
+
+
+/**
+ * Create a taxonomy index.
+ * <br>Other side effects: taxonomy writer object in perfRunData is set.
+ */
+public class CreateTaxonomyIndexTask extends PerfTask {
+
+ public CreateTaxonomyIndexTask(PerfRunData runData) {
+ super(runData);
+ }
+
+ @Override
+ public int doLogic() throws IOException {
+ PerfRunData runData = getRunData();
+ runData.setTaxonomyWriter(new LuceneTaxonomyWriter(runData.getTaxonomyDir(), OpenMode.CREATE));
+ return 1;
+ }
+
+}
Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java
===================================================================
--- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java (revision 0)
+++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddFacetedDocTask.java (revision 0)
@@ -0,0 +1,76 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.facet.index.CategoryContainer;
+import org.apache.lucene.facet.index.CategoryDocumentBuilder;
+
+/**
+ * Add a faceted document.
+ * <p>
+ * Config properties:
+ * <ul>
+ * <li><b>with.facets</b>=&lt;optional, tells whether to actually add any facets to the document| Default: true&gt;
+ * <b>This config property allows to easily compare the performance of adding docs with and without facets.
+ * Note that facets are created even when this is false, just that they are not added to the document (nor to the taxonomy).
+ * </ul>
+ * <p>
+ * See {@link AddDocTask} for general document parameters and configuration.
+ */
+public class AddFacetedDocTask extends AddDocTask {
+
+ public AddFacetedDocTask(PerfRunData runData) {
+ super(runData);
+ }
+
+ private CategoryContainer facets = null;
+ private CategoryDocumentBuilder categoryDocBuilder = null;
+ private boolean withFacets = true;
+
+ @Override
+ public void setup() throws Exception {
+ super.setup();
+ // create the facets even if they should not be added - allows to measure the effect of just adding facets
+ facets = getRunData().getFacetSource().getNextFacets(facets);
+ withFacets = getRunData().getConfig().get("with.facets", true);
+ if (withFacets) {
+ categoryDocBuilder = new CategoryDocumentBuilder(getRunData().getTaxonomyWriter());
+ categoryDocBuilder.setCategories(facets);
+ }
+ }
+
+ @Override
+ public void tearDown() throws Exception {
+ super.tearDown();
+ }
+
+ @Override
+ protected String getLogMessage(int recsCount) {
+ return "added " + recsCount + " docs with "+(withFacets ? "facets" : "no facets");
+ }
+
+ @Override
+ public int doLogic() throws Exception {
+ if (withFacets) {
+ categoryDocBuilder.build(doc);
+ }
+ return super.doLogic();
+ }
+
+}
Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java
===================================================================
--- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java (revision 0)
+++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CloseTaxonomyIndexTask.java (revision 0)
@@ -0,0 +1,43 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Close taxonomy index.
+ * <br>Other side effects: taxonomy writer object in perfRunData is nullified.
+ */
+public class CloseTaxonomyIndexTask extends PerfTask {
+
+ public CloseTaxonomyIndexTask(PerfRunData runData) {
+ super(runData);
+ }
+
+ @Override
+ public int doLogic() throws IOException {
+ IOUtils.close(getRunData().getTaxonomyWriter());
+ getRunData().setTaxonomyWriter(null);
+
+ return 1;
+ }
+
+}
Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
===================================================================
--- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (revision 1180173)
+++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java (working copy)
@@ -22,7 +22,7 @@
import org.apache.lucene.document.Document;
/**
- * Add a document, optionally with of a certain size.
+ * Add a document, optionally of a certain size.
* <br>Other side effects: none.
* <br>Takes optional param: document size.
*/
@@ -34,9 +34,12 @@
private int docSize = 0;
- // volatile data passed between setup(), doLogic(), tearDown().
- private Document doc = null;
-
+ /**
+ * volatile data passed between setup(), doLogic(), tearDown().
+ * the doc is created at setup() and added at doLogic().
+ */
+ protected Document doc = null;
+
@Override
public void setup() throws Exception {
super.setup();
Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java
===================================================================
--- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java (revision 0)
+++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FacetSource.java (revision 0)
@@ -0,0 +1,45 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.facet.index.CategoryContainer;
+
+/**
+ * Source items for facets.
+ * <p>
+ * For supported configuration parameters see {@link ContentItemsSource}.
+ */
+public abstract class FacetSource extends ContentItemsSource {
+
+ /** Returns the next {@link CategoryContainer facets content item}.
+ * Implementations must account for multi-threading, as multiple threads
+ * can call this method simultaneously.
+ */
+ public abstract CategoryContainer getNextFacets(CategoryContainer facets) throws NoMoreDataException, IOException;
+
+ @Override
+ public void resetInputs() throws IOException {
+ printStatistics("facets");
+ // re-initiate since properties by round may have changed.
+ setConfig(getConfig());
+ super.resetInputs();
+ }
+
+}
Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java
===================================================================
--- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java (revision 0)
+++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/RandomFacetSource.java (revision 0)
@@ -0,0 +1,81 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.facet.index.CategoryContainer;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+
+/**
+ * Simple implementation of a random facet source
+ * <p>
+ * Supports the following parameters:
+ * <ul>
+ * <li><b>rand.seed</b> - defines the seed to initialize Random with (default: <b>13</b>).
+ * <li><b>max.doc.facets</b> - maximal #facets per doc (default: <b>200</b>).
+ * Actual number of facets in a certain doc would be anything between 1 and that number.
+ * <li><b>max.facet.length</b> - maximal #components in a facet (default: <b>10</b>).
+ * Actual number of components in a certain facet would be anything between 1 and that number.
+ * </ul>
+ */
+public class RandomFacetSource extends FacetSource {
+
+ Random random;
+
+ private int maxDocFacets = 200;
+ private int maxFacetDepth = 10;
+ private int maxValue = maxDocFacets * maxFacetDepth;
+
+ @Override
+ public CategoryContainer getNextFacets(CategoryContainer facets) throws NoMoreDataException, IOException {
+ if (facets == null) {
+ facets = new CategoryContainer();
+ } else {
+ facets.clear();
+ }
+ int numFacets = 1 + random.nextInt(maxDocFacets-1); // at least one facet to each doc
+ for (int i=0; i<numFacets; i++) {
+ CategoryPath cp = new CategoryPath();
+ int length = 1 + random.nextInt(maxFacetDepth-1); // length 0 is not useful
+ for (int k=0; k<length; k++) {
+ cp.add(Integer.toString(random.nextInt(maxValue)));
+ addItem();
+ }
+ facets.addCategory(cp);
+ addBytes(cp.toString().length()); // very rough approximation
+ }
+ return facets;
+ }
+
+ @Override
+ public void close() throws IOException {
+ // nothing to do here
+ }
+
+ @Override
+ public void setConfig(Config config) {
+ super.setConfig(config);
+ random = new Random(config.get("rand.seed", 13));
+ maxDocFacets = config.get("max.doc.facets", 200);
+ maxFacetDepth = config.get("max.facet.length", 10);
+ maxValue = maxDocFacets * maxFacetDepth;
+ }
+}
Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
===================================================================
--- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (revision 1180173)
+++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java (working copy)
@@ -289,7 +289,7 @@
// here, everything else is already private to that thread, so we're safe.
try {
docData = trecDocParser.parse(docData, name, this, docBuf, parsedPathType);
- addDoc();
+ addItem();
} catch (InterruptedException ie) {
throw new ThreadInterruptedException(ie);
}
Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
===================================================================
--- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java (revision 1180173)
+++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java (working copy)
@@ -17,13 +17,8 @@
* limitations under the License.
*/
-import java.io.File;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import org.apache.lucene.benchmark.byTask.utils.Config;
-
/**
* Represents content from a specified source, such as TREC, Reuters etc. A
* {@link ContentSource} is responsible for creating {@link DocData} objects for
@@ -31,119 +26,13 @@
* of various statistics, such as how many documents were generated, size in
* bytes etc.
* <p>
- * Supports the following configuration parameters:
- * <ul>
- * <li><b>content.source.forever</b> - specifies whether to generate documents
- * forever (<b>default=true</b>).
- * <li><b>content.source.verbose</b> - specifies whether messages should be
- * output by the content source (<b>default=false</b>).
- * <li><b>content.source.encoding</b> - specifies which encoding to use when
- * reading the files of that content source. Certain implementations may define
- * a default value if this parameter is not specified. (<b>default=null</b>).
- * <li><b>content.source.log.step</b> - specifies for how many documents a
- * message should be logged. If set to 0 it means no logging should occur.
- * <b>NOTE:</b> if verbose is set to false, logging should not occur even if
- * logStep is not 0 (<b>default=0</b>).
- * </ul>
+ * For supported configuration parameters see {@link ContentItemsSource}.
*/
-public abstract class ContentSource {
+public abstract class ContentSource extends ContentItemsSource {
- private long bytesCount;
- private long totalBytesCount;
- private int docsCount;
- private int totalDocsCount;
- private Config config;
-
- protected boolean forever;
- protected int logStep;
- protected boolean verbose;
- protected String encoding;
-
- /** update count of bytes generated by this source */
- protected final synchronized void addBytes(long numBytes) {
- bytesCount += numBytes;
- totalBytesCount += numBytes;
- }
-
- /** update count of documents generated by this source */
- protected final synchronized void addDoc() {
- ++docsCount;
- ++totalDocsCount;
- }
-
- /**
- * A convenience method for collecting all the files of a content source from
- * a given directory. The collected {@link File} instances are stored in the
- * given <code>files</code>.
- */
- protected final void collectFiles(File dir, ArrayList<File> files) {
- if (!dir.canRead()) {
- return;
- }
-
- File[] dirFiles = dir.listFiles();
- Arrays.sort(dirFiles);
- for (int i = 0; i < dirFiles.length; i++) {
- File file = dirFiles[i];
- if (file.isDirectory()) {
- collectFiles(file, files);
- } else if (file.canRead()) {
- files.add(file);
- }
- }
- }
-
- /**
- * Returns true whether it's time to log a message (depending on verbose and
- * the number of documents generated).
- */
- protected final boolean shouldLog() {
- return verbose && logStep > 0 && docsCount % logStep == 0;
- }
-
- /** Called when reading from this content source is no longer required. */
- public abstract void close() throws IOException;
-
- /** Returns the number of bytes generated since last reset. */
- public final long getBytesCount() { return bytesCount; }
-
- /** Returns the number of generated documents since last reset. */
- public final int getDocsCount() { return docsCount; }
-
- public final Config getConfig() { return config; }
-
- /** Returns the next {@link DocData} from the content source. */
+ /** Returns the next {@link DocData} from the content source.
+ * Implementations must account for multi-threading, as multiple threads
+ * can call this method simultaneously. */
public abstract DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException;
- /** Returns the total number of bytes that were generated by this source. */
- public final long getTotalBytesCount() { return totalBytesCount; }
-
- /** Returns the total number of generated documents. */
- public final int getTotalDocsCount() { return totalDocsCount; }
-
- /**
- * Resets the input for this content source, so that the test would behave as
- * if it was just started, input-wise.
- * <p>
- * <b>NOTE:</b> the default implementation resets the number of bytes and
- * documents generated since the last reset, so it's important to call
- * super.resetInputs in case you override this method.
- */
- public void resetInputs() throws IOException {
- bytesCount = 0;
- docsCount = 0;
- }
-
- /**
- * Sets the {@link Config} for this content source. If you override this
- * method, you must call super.setConfig.
- */
- public void setConfig(Config config) {
- this.config = config;
- forever = config.get("content.source.forever", true);
- logStep = config.get("content.source.log.step", 0);
- verbose = config.get("content.source.verbose", false);
- encoding = config.get("content.source.encoding", null);
- }
-
}
Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java
===================================================================
--- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java (revision 0)
+++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentItemsSource.java (revision 0)
@@ -0,0 +1,180 @@
+package org.apache.lucene.benchmark.byTask.feeds;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import org.apache.lucene.benchmark.byTask.utils.Config;
+import org.apache.lucene.benchmark.byTask.utils.Format;
+
+/**
+ * Base class for source of data for benchmarking
+ * <p>
+ * Keeps track of various statistics, such as how many data items were generated,
+ * size in bytes etc.
+ * <p>
+ * Supports the following configuration parameters:
+ * <ul>
+ * <li><b>content.source.forever</b> - specifies whether to generate items
+ * forever (<b>default=true</b>).
+ * <li><b>content.source.verbose</b> - specifies whether messages should be
+ * output by the content source (<b>default=false</b>).
+ * <li><b>content.source.encoding</b> - specifies which encoding to use when
+ * reading the files of that content source. Certain implementations may define
+ * a default value if this parameter is not specified. (<b>default=null</b>).
+ * <li><b>content.source.log.step</b> - specifies for how many items a
+ * message should be logged. If set to 0 it means no logging should occur.
+ * <b>NOTE:</b> if verbose is set to false, logging should not occur even if
+ * logStep is not 0 (<b>default=0</b>).
+ * </ul>
+ */
+public abstract class ContentItemsSource {
+
+ private long bytesCount;
+ private long totalBytesCount;
+ private int itemCount;
+ private int totalItemCount;
+ private Config config;
+
+ private int lastPrintedNumUniqueTexts = 0;
+ private long lastPrintedNumUniqueBytes = 0;
+ private int printNum = 0;
+
+ protected boolean forever;
+ protected int logStep;
+ protected boolean verbose;
+ protected String encoding;
+
+ /** update count of bytes generated by this source */
+ protected final synchronized void addBytes(long numBytes) {
+ bytesCount += numBytes;
+ totalBytesCount += numBytes;
+ }
+
+ /** update count of items generated by this source */
+ protected final synchronized void addItem() {
+ ++itemCount;
+ ++totalItemCount;
+ }
+
+ /**
+ * A convenience method for collecting all the files of a content source from
+ * a given directory. The collected {@link File} instances are stored in the
+ * given <code>files</code>.
+ */
+ protected final void collectFiles(File dir, ArrayList<File> files) {
+ if (!dir.canRead()) {
+ return;
+ }
+
+ File[] dirFiles = dir.listFiles();
+ Arrays.sort(dirFiles);
+ for (int i = 0; i < dirFiles.length; i++) {
+ File file = dirFiles[i];
+ if (file.isDirectory()) {
+ collectFiles(file, files);
+ } else if (file.canRead()) {
+ files.add(file);
+ }
+ }
+ }
+
+ /**
+ * Returns true whether it's time to log a message (depending on verbose and
+ * the number of items generated).
+ */
+ protected final boolean shouldLog() {
+ return verbose && logStep > 0 && itemCount % logStep == 0;
+ }
+
+ /** Called when reading from this content source is no longer required. */
+ public abstract void close() throws IOException;
+
+ /** Returns the number of bytes generated since last reset. */
+ public final long getBytesCount() { return bytesCount; }
+
+ /** Returns the number of generated items since last reset. */
+ public final int getItemsCount() { return itemCount; }
+
+ public final Config getConfig() { return config; }
+
+ /** Returns the total number of bytes that were generated by this source. */
+ public final long getTotalBytesCount() { return totalBytesCount; }
+
+ /** Returns the total number of generated items. */
+ public final int getTotalItemsCount() { return totalItemCount; }
+
+ /**
+ * Resets the input for this content source, so that the test would behave as
+ * if it was just started, input-wise.
+ * <p>
+ * <b>NOTE:</b> the default implementation resets the number of bytes and
+ * items generated since the last reset, so it's important to call
+ * super.resetInputs in case you override this method.
+ */
+ @SuppressWarnings("unused")
+ public void resetInputs() throws IOException {
+ bytesCount = 0;
+ itemCount = 0;
+ }
+
+ /**
+ * Sets the {@link Config} for this content source. If you override this
+ * method, you must call super.setConfig.
+ */
+ public void setConfig(Config config) {
+ this.config = config;
+ forever = config.get("content.source.forever", true);
+ logStep = config.get("content.source.log.step", 0);
+ verbose = config.get("content.source.verbose", false);
+ encoding = config.get("content.source.encoding", null);
+ }
+
+ public void printStatistics(String itemsName) {
+ boolean print = false;
+ String col = " ";
+ StringBuilder sb = new StringBuilder();
+ String newline = System.getProperty("line.separator");
+ sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
+ int nut = getTotalItemsCount();
+ if (nut > lastPrintedNumUniqueTexts) {
+ print = true;
+ sb.append("total count of "+itemsName+": ").append(Format.format(0,nut,col)).append(newline);
+ lastPrintedNumUniqueTexts = nut;
+ }
+ long nub = getTotalBytesCount();
+ if (nub > lastPrintedNumUniqueBytes) {
+ print = true;
+ sb.append("total bytes of "+itemsName+": ").append(Format.format(0,nub,col)).append(newline);
+ lastPrintedNumUniqueBytes = nub;
+ }
+ if (getItemsCount() > 0) {
+ print = true;
+ sb.append("num "+itemsName+" added since last inputs reset: ").append(Format.format(0,getItemsCount(),col)).append(newline);
+ sb.append("total bytes added for "+itemsName+" since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline);
+ }
+ if (print) {
+ System.out.println(sb.append(newline).toString());
+ printNum++;
+ }
+ }
+
+}
Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
===================================================================
--- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (revision 1180173)
+++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (working copy)
@@ -31,7 +31,6 @@
import java.text.ParsePosition;
import org.apache.lucene.benchmark.byTask.utils.Config;
-import org.apache.lucene.benchmark.byTask.utils.Format;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
@@ -189,13 +188,8 @@
protected boolean reuseFields;
protected boolean indexProperties;
- private int lastPrintedNumUniqueTexts = 0;
-
- private long lastPrintedNumUniqueBytes = 0;
private final AtomicInteger numDocsCreated = new AtomicInteger();
- private int printNum = 0;
-
// create a doc
// use only part of the body, modify it to keep the rest (or use all if size==0).
// reset the docdata properties so they are not added more than once.
@@ -397,38 +391,9 @@
return doc;
}
- public void printDocStatistics() {
- boolean print = false;
- String col = " ";
- StringBuilder sb = new StringBuilder();
- String newline = System.getProperty("line.separator");
- sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline);
- int nut = source.getTotalDocsCount();
- if (nut > lastPrintedNumUniqueTexts) {
- print = true;
- sb.append("total count of unique texts: ").append(Format.format(0,nut,col)).append(newline);
- lastPrintedNumUniqueTexts = nut;
- }
- long nub = getTotalBytesCount();
- if (nub > lastPrintedNumUniqueBytes) {
- print = true;
- sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline);
- lastPrintedNumUniqueBytes = nub;
- }
- if (source.getDocsCount() > 0) {
- print = true;
- sb.append("num docs added since last inputs reset: ").append(Format.format(0,source.getDocsCount(),col)).append(newline);
- sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline);
- }
- if (print) {
- System.out.println(sb.append(newline).toString());
- printNum++;
- }
- }
-
/** Reset inputs so that the test run would behave, input wise, as if it just started. */
public synchronized void resetInputs() throws IOException {
- printDocStatistics();
+ source.printStatistics("docs");
// re-initiate since properties by round may have changed.
setConfig(config);
source.resetInputs();
Index: lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
===================================================================
--- lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (revision 1180173)
+++ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (working copy)
@@ -24,6 +24,7 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
+import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
import org.apache.lucene.benchmark.byTask.stats.Points;
import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
@@ -31,12 +32,15 @@
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.FileUtils;
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.IOUtils;
/**
* Data maintained by a performance test run.
@@ -45,11 +49,21 @@
* <ul>
* <li>Configuration.
* <li>Directory, Writer, Reader.
- * <li>Docmaker and a few instances of QueryMaker.
+ * <li>Taxonomy Directory, Writer, Reader.
+ * <li>DocMaker, FacetSource and a few instances of QueryMaker.
* <li>Analyzer.
* <li>Statistics data which updated during the run.
* </ul>
- * Config properties: work.dir=&lt;path to root of docs and index dirs| Default: work&gt;
+ * Config properties:
+ * <ul>
+ * <li><b>work.dir</b>=&lt;path to root of docs and index dirs| Default: work&gt;
+ * <li><b>analyzer</b>=&lt;class name for analyzer| Default: StandardAnalyzer&gt;
+ * <li><b>doc.maker</b>=&lt;class name for doc-maker| Default: DocMaker&gt;
+ * <li><b>facet.source</b>=&lt;class name for facet-source| Default: RandomFacetSource&gt;
+ * <li><b>query.maker</b>=&lt;class name for query-maker| Default: SimpleQueryMaker&gt;
+ * <li><b>log.queries</b>=&lt;whether queries should be printed| Default: false&gt;
+ * <li><b>directory</b>=&lt;type of directory to use for the index| Default: RAMDirectory&gt;
+ * <li><b>taxonomy.directory</b>=&lt;type of directory for taxonomy index| Default: RAMDirectory&gt;
* </ul>
*/
public class PerfRunData {
@@ -62,7 +76,12 @@
private Directory directory;
private Analyzer analyzer;
private DocMaker docMaker;
+ private FacetSource facetSource;
private Locale locale;
+
+ private Directory taxonomyDir;
+ private TaxonomyWriter taxonomyWriter;
+ private TaxonomyReader taxonomyReader;
// we use separate (identical) instances for each "read" task type, so each can iterate the quries separately.
private HashMap<Class<? extends ReadTask>,QueryMaker> readTaskQueryMaker;
@@ -73,6 +92,7 @@
private IndexWriter indexWriter;
private Config config;
private long startTimeMillis;
+
// constructor
public PerfRunData (Config config) throws Exception {
@@ -84,6 +104,10 @@
docMaker = Class.forName(config.get("doc.maker",
"org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
docMaker.setConfig(config);
+ // facet source
+ facetSource = Class.forName(config.get("facet.source",
+ "org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance();
+ facetSource.setConfig(config);
// query makers
readTaskQueryMaker = new HashMap<Class<? extends ReadTask>,QueryMaker>();
qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker")).asSubclass(QueryMaker.class);
@@ -104,30 +128,17 @@
public void reinit(boolean eraseIndex) throws Exception {
// cleanup index
- if (indexWriter!=null) {
- indexWriter.close();
- indexWriter = null;
- }
- if (indexReader!=null) {
- indexReader.close();
- indexReader = null;
- }
- if (directory!=null) {
- directory.close();
- }
+ IOUtils.close(indexWriter, indexReader, directory);
+ indexWriter = null;
+ indexReader = null;
+
+ IOUtils.close(taxonomyWriter, taxonomyReader, taxonomyDir);
+ taxonomyWriter = null;
+ taxonomyReader = null;
// directory (default is ram-dir).
- if ("FSDirectory".equals(config.get("directory","RAMDirectory"))) {
- File workDir = new File(config.get("work.dir","work"));
- File indexDir = new File(workDir,"index");
- if (eraseIndex && indexDir.exists()) {
- FileUtils.fullyDelete(indexDir);
- }
- indexDir.mkdirs();
- directory = FSDirectory.open(indexDir);
- } else {
- directory = new RAMDirectory();
- }
+ directory = createDirectory(eraseIndex, "index", "directory");
+ taxonomyDir = createDirectory(eraseIndex, "taxo", "taxonomy.directory");
// inputs
resetInputs();
@@ -139,6 +150,21 @@
// Re-init clock
setStartTimeMillis();
}
+
+ private Directory createDirectory(boolean eraseIndex, String dirName,
+ String dirParam) throws IOException {
+ if ("FSDirectory".equals(config.get(dirParam,"RAMDirectory"))) {
+ File workDir = new File(config.get("work.dir","work"));
+ File indexDir = new File(workDir,dirName);
+ if (eraseIndex && indexDir.exists()) {
+ FileUtils.fullyDelete(indexDir);
+ }
+ indexDir.mkdirs();
+ return FSDirectory.open(indexDir);
+ }
+
+ return new RAMDirectory();
+ }
public long setStartTimeMillis() {
startTimeMillis = System.currentTimeMillis();
@@ -174,6 +200,57 @@
}
/**
+ * @return Returns the taxonomy directory
+ */
+ public Directory getTaxonomyDir() {
+ return taxonomyDir;
+ }
+
+ /**
+ * Set the taxonomy reader. Takes ownership of that taxonomy reader, that is,
+ * internally performs taxoReader.incRef() (If caller no longer needs that
+ * reader it should decRef()/close() it after calling this method, otherwise,
+ * the reader will remain open).
+ * @param taxoReader The taxonomy reader to set.
+ */
+ public synchronized void setTaxonomyReader(TaxonomyReader taxoReader) throws IOException {
+ if (taxoReader == this.taxonomyReader) {
+ return;
+ }
+ if (taxonomyReader != null) {
+ taxonomyReader.decRef();
+ }
+
+ if (taxoReader != null) {
+ taxoReader.incRef();
+ }
+ this.taxonomyReader = taxoReader;
+ }
+
+ /**
+ * @return Returns the taxonomyReader. NOTE: this returns a
+ * reference. You must call TaxonomyReader.decRef() when
+ * you're done.
+ */
+ public synchronized TaxonomyReader getTaxonomyReader() {
+ if (taxonomyReader != null) {
+ taxonomyReader.incRef();
+ }
+ return taxonomyReader;
+ }
+
+ /**
+ * @param taxoWriter The taxonomy writer to set.
+ */
+ public void setTaxonomyWriter(TaxonomyWriter taxoWriter) {
+ this.taxonomyWriter = taxoWriter;
+ }
+
+ public TaxonomyWriter getTaxonomyWriter() {
+ return taxonomyWriter;
+ }
+
+ /**
* @return Returns the indexReader. NOTE: this returns a
* reference. You must call IndexReader.decRef() when
* you're done.
@@ -198,13 +275,22 @@
}
/**
+ * Set the index reader. Takes ownership of that index reader, that is,
+ * internally performs indexReader.incRef() (If caller no longer needs that
+ * reader it should decRef()/close() it after calling this method, otherwise,
+ * the reader will remain open).
* @param indexReader The indexReader to set.
*/
public synchronized void setIndexReader(IndexReader indexReader) throws IOException {
+ if (indexReader == this.indexReader) {
+ return;
+ }
+
if (this.indexReader != null) {
// Release current IR
this.indexReader.decRef();
}
+
this.indexReader = indexReader;
if (indexReader != null) {
// Hold reference to new IR
@@ -246,6 +332,11 @@
return docMaker;
}
+ /** Returns the facet source. */
+ public FacetSource getFacetSource() {
+ return facetSource;
+ }
+
/**
* @return the locale
*/
@@ -269,6 +360,7 @@
public void resetInputs() throws IOException {
docMaker.resetInputs();
+ facetSource.resetInputs();
for (final QueryMaker queryMaker : readTaskQueryMaker.values()) {
queryMaker.resetInputs();
}
Index: lucene/contrib/benchmark/build.xml
===================================================================
--- lucene/contrib/benchmark/build.xml (revision 1180173)
+++ lucene/contrib/benchmark/build.xml (working copy)
@@ -130,6 +130,7 @@
<pathelement path="${memory.jar}"/>
<pathelement path="${highlighter.jar}"/>
<pathelement path="${analyzers-common.jar}"/>
+ <pathelement path="${facet.jar}"/>
<path refid="base.classpath"/>
<fileset dir="lib">
<include name="**/*.jar"/>
@@ -218,7 +219,7 @@
<echo>Benchmark output in JIRA table format is in file: ${shingle.jira.output.file}</echo>
</target>
- <target name="init" depends="contrib-build.init,jar-memory,jar-highlighter,jar-analyzers-common"/>
+ <target name="init" depends="contrib-build.init,jar-memory,jar-highlighter,jar-analyzers-common,jar-facet"/>
<target name="clean-javacc">
<fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
Index: lucene/contrib/contrib-build.xml
===================================================================
--- lucene/contrib/contrib-build.xml (revision 1180173)
+++ lucene/contrib/contrib-build.xml (working copy)
@@ -129,6 +129,17 @@
<property name="analyzers-common.uptodate" value="true"/>
</target>
+ <property name="facet.jar" value="${common.dir}/build/contrib/facet/lucene-facet-${version}.jar"/>
+ <target name="check-facet-uptodate" unless="facet.uptodate">
+ <contrib-uptodate name="facet" jarfile="${facet.jar}" property="facet.uptodate"/>
+ </target>
+ <target name="jar-facet" unless="facet.uptodate" depends="check-facet-uptodate">
+ <ant dir="${common.dir}/contrib/facet" target="jar-core" inheritall="false">
+ <propertyset refid="uptodate.and.compiled.properties"/>
+ </ant>
+ <property name="facet.uptodate" value="true"/>
+ </target>
+
<property name="analyzers-smartcn.jar" value="${common.dir}/build/contrib/analyzers/smartcn/lucene-smartcn-${version}.jar"/>
<target name="check-analyzers-smartcn-uptodate" unless="analyzers-smartcn.uptodate">
<contrib-uptodate name="analyzers/smartcn" jarfile="${analyzers-smartcn.jar}" property="analyzers-smartcn.uptodate"/>