| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.lucene.benchmark.byTask; |
| |
| import java.io.StringReader; |
| import java.io.File; |
| import java.io.FileReader; |
| import java.io.BufferedReader; |
| import java.text.Collator; |
| import java.util.List; |
| import java.util.Locale; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.analysis.WhitespaceAnalyzer; |
| import org.apache.lucene.analysis.tokenattributes.TermAttribute; |
| import org.apache.lucene.benchmark.byTask.feeds.DocMaker; |
| import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker; |
| import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask; |
| import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask; |
| import org.apache.lucene.benchmark.byTask.stats.TaskStats; |
| import org.apache.lucene.collation.CollationKeyAnalyzer; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexWriter; |
| import org.apache.lucene.index.IndexWriterConfig; |
| import org.apache.lucene.index.LogMergePolicy; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.index.TermEnum; |
| import org.apache.lucene.index.TermDocs; |
| import org.apache.lucene.index.SerialMergeScheduler; |
| import org.apache.lucene.index.LogDocMergePolicy; |
| import org.apache.lucene.index.TermFreqVector; |
| import org.apache.lucene.index.IndexWriterConfig.OpenMode; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.search.FieldCache.StringIndex; |
| import org.apache.lucene.search.FieldCache; |
| import org.apache.lucene.util.LuceneTestCase; |
| |
| /** |
| * Test very simply that perf tasks - simple algorithms - are doing what they should. |
| */ |
| public class TestPerfTasksLogic extends LuceneTestCase { |
| |
| static final String NEW_LINE = System.getProperty("line.separator"); |
| |
| // properties in effect in all tests here |
| static final String propLines [] = { |
| "directory=RAMDirectory", |
| "print.props=false", |
| }; |
| |
| /** |
| * @param name test name |
| */ |
| public TestPerfTasksLogic(String name) { |
| super(name); |
| } |
| |
| /** |
| * Test index creation logic |
| */ |
| public void testIndexAndSearchTasks() throws Exception { |
| // 1. alg definition (required in every "logic" test) |
| String algLines[] = { |
| "ResetSystemErase", |
| "CreateIndex", |
| "{ AddDoc } : 1000", |
| "Optimize", |
| "CloseIndex", |
| "OpenReader", |
| "{ CountingSearchTest } : 200", |
| "CloseReader", |
| "[ CountingSearchTest > : 70", |
| "[ CountingSearchTest > : 9", |
| }; |
| |
| // 2. we test this value later |
| CountingSearchTestTask.numSearches = 0; |
| |
| // 3. execute the algorithm (required in every "logic" test) |
| Benchmark benchmark = execBenchmark(algLines); |
| |
| // 4. test specific checks after the benchmark run completed. |
| assertEquals("TestSearchTask was supposed to be called!",279,CountingSearchTestTask.numSearches); |
| assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory())); |
| // now we should be able to open the index for write. |
| IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), |
| new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) |
| .setOpenMode(OpenMode.APPEND)); |
| iw.close(); |
| IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); |
| assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs()); |
| ir.close(); |
| } |
| |
| /** |
| * Test timed sequence task. |
| */ |
| public void testTimedSearchTask() throws Exception { |
| String algLines[] = { |
| "log.step=100000", |
| "ResetSystemErase", |
| "CreateIndex", |
| "{ AddDoc } : 100", |
| "Optimize", |
| "CloseIndex", |
| "OpenReader", |
| "{ CountingSearchTest } : .5s", |
| "CloseReader", |
| }; |
| |
| CountingSearchTestTask.numSearches = 0; |
| execBenchmark(algLines); |
| assertTrue(CountingSearchTestTask.numSearches > 0); |
| long elapsed = CountingSearchTestTask.prevLastMillis - CountingSearchTestTask.startMillis; |
| assertTrue("elapsed time was " + elapsed + " msec", elapsed <= 1500); |
| } |
| |
| // disabled until we fix BG thread prio -- this test |
| // causes build to hang |
| public void testBGSearchTaskThreads() throws Exception { |
| String algLines[] = { |
| "log.time.step.msec = 100", |
| "log.step=100000", |
| "ResetSystemErase", |
| "CreateIndex", |
| "{ AddDoc } : 1000", |
| "Optimize", |
| "CloseIndex", |
| "OpenReader", |
| "{", |
| " [ \"XSearch\" { CountingSearchTest > : * ] : 2 &-1", |
| " Wait(0.5)", |
| "}", |
| "CloseReader", |
| "RepSumByPref X" |
| }; |
| |
| CountingSearchTestTask.numSearches = 0; |
| execBenchmark(algLines); |
| assertTrue(CountingSearchTestTask.numSearches > 0); |
| } |
| |
| public void testHighlighting() throws Exception { |
| // 1. alg definition (required in every "logic" test) |
| String algLines[] = { |
| "doc.stored=true", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + getReuters20LinesFile(), |
| "query.maker=" + ReutersQueryMaker.class.getName(), |
| "ResetSystemErase", |
| "CreateIndex", |
| "{ AddDoc } : 100", |
| "Optimize", |
| "CloseIndex", |
| "OpenReader(true)", |
| "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200", |
| "CloseReader", |
| }; |
| |
| // 2. we test this value later |
| CountingHighlighterTestTask.numHighlightedResults = 0; |
| CountingHighlighterTestTask.numDocsRetrieved = 0; |
| // 3. execute the algorithm (required in every "logic" test) |
| Benchmark benchmark = execBenchmark(algLines); |
| |
| // 4. test specific checks after the benchmark run completed. |
| assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved); |
| //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs |
| //we probably should use a different doc/query maker, but... |
| assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0); |
| |
| assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory())); |
| // now we should be able to open the index for write. |
| IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); |
| iw.close(); |
| IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); |
| assertEquals("100 docs were added to the index, this is what we expect to find!",100,ir.numDocs()); |
| ir.close(); |
| } |
| |
| public void testHighlightingTV() throws Exception { |
| // 1. alg definition (required in every "logic" test) |
| String algLines[] = { |
| "doc.stored=true",//doc storage is required in order to have text to highlight |
| "doc.term.vector.offsets=true", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + getReuters20LinesFile(), |
| "query.maker=" + ReutersQueryMaker.class.getName(), |
| "ResetSystemErase", |
| "CreateIndex", |
| "{ AddDoc } : 1000", |
| "Optimize", |
| "CloseIndex", |
| "OpenReader(false)", |
| "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200", |
| "CloseReader", |
| }; |
| |
| // 2. we test this value later |
| CountingHighlighterTestTask.numHighlightedResults = 0; |
| CountingHighlighterTestTask.numDocsRetrieved = 0; |
| // 3. execute the algorithm (required in every "logic" test) |
| Benchmark benchmark = execBenchmark(algLines); |
| |
| // 4. test specific checks after the benchmark run completed. |
| assertEquals("TestSearchTask was supposed to be called!",92,CountingHighlighterTestTask.numDocsRetrieved); |
| //pretty hard to figure out a priori how many docs are going to have highlighted fragments returned, but we can never have more than the number of docs |
| //we probably should use a different doc/query maker, but... |
| assertTrue("TestSearchTask was supposed to be called!", CountingHighlighterTestTask.numDocsRetrieved >= CountingHighlighterTestTask.numHighlightedResults && CountingHighlighterTestTask.numHighlightedResults > 0); |
| |
| assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory())); |
| // now we should be able to open the index for write. |
| IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); |
| iw.close(); |
| IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); |
| assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs()); |
| ir.close(); |
| } |
| |
| public void testHighlightingNoTvNoStore() throws Exception { |
| // 1. alg definition (required in every "logic" test) |
| String algLines[] = { |
| "doc.stored=false", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + getReuters20LinesFile(), |
| "query.maker=" + ReutersQueryMaker.class.getName(), |
| "ResetSystemErase", |
| "CreateIndex", |
| "{ AddDoc } : 1000", |
| "Optimize", |
| "CloseIndex", |
| "OpenReader", |
| "{ CountingHighlighterTest(size[1],highlight[1],mergeContiguous[true],maxFrags[1],fields[body]) } : 200", |
| "CloseReader", |
| }; |
| |
| // 2. we test this value later |
| CountingHighlighterTestTask.numHighlightedResults = 0; |
| CountingHighlighterTestTask.numDocsRetrieved = 0; |
| // 3. execute the algorithm (required in every "logic" test) |
| try { |
| Benchmark benchmark = execBenchmark(algLines); |
| assertTrue("CountingHighlighterTest should have thrown an exception", false); |
| assertNotNull(benchmark); // (avoid compile warning on unused variable) |
| } catch (Exception e) { |
| assertTrue(true); |
| } |
| } |
| |
| /** |
| * Test Exhasting Doc Maker logic |
| */ |
| public void testExhaustContentSource() throws Exception { |
| // 1. alg definition (required in every "logic" test) |
| String algLines[] = { |
| "# ----- properties ", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource", |
| "content.source.log.step=1", |
| "doc.term.vector=false", |
| "content.source.forever=false", |
| "directory=RAMDirectory", |
| "doc.stored=false", |
| "doc.tokenized=false", |
| "# ----- alg ", |
| "CreateIndex", |
| "{ AddDoc } : * ", |
| "Optimize", |
| "CloseIndex", |
| "OpenReader", |
| "{ CountingSearchTest } : 100", |
| "CloseReader", |
| "[ CountingSearchTest > : 30", |
| "[ CountingSearchTest > : 9", |
| }; |
| |
| // 2. we test this value later |
| CountingSearchTestTask.numSearches = 0; |
| |
| // 3. execute the algorithm (required in every "logic" test) |
| Benchmark benchmark = execBenchmark(algLines); |
| |
| // 4. test specific checks after the benchmark run completed. |
| assertEquals("TestSearchTask was supposed to be called!",139,CountingSearchTestTask.numSearches); |
| assertTrue("Index does not exist?...!", IndexReader.indexExists(benchmark.getRunData().getDirectory())); |
| // now we should be able to open the index for write. |
| IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)).setOpenMode(OpenMode.APPEND)); |
| iw.close(); |
| IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); |
| assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs()); |
| ir.close(); |
| } |
| |
| // LUCENE-1994: test thread safety of SortableSingleDocMaker |
| public void testDocMakerThreadSafety() throws Exception { |
| // 1. alg definition (required in every "logic" test) |
| String algLines[] = { |
| "# ----- properties ", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource", |
| "doc.term.vector=false", |
| "log.step.AddDoc=10000", |
| "content.source.forever=true", |
| "directory=RAMDirectory", |
| "doc.reuse.fields=false", |
| "doc.stored=false", |
| "doc.tokenized=false", |
| "doc.index.props=true", |
| "# ----- alg ", |
| "CreateIndex", |
| "[ { AddDoc > : 250 ] : 4", |
| "CloseIndex", |
| }; |
| |
| // 2. we test this value later |
| CountingSearchTestTask.numSearches = 0; |
| |
| // 3. execute the algorithm (required in every "logic" test) |
| Benchmark benchmark = execBenchmark(algLines); |
| |
| IndexReader r = IndexReader.open(benchmark.getRunData().getDirectory(), true); |
| StringIndex idx = FieldCache.DEFAULT.getStringIndex(r, "country"); |
| final int maxDoc = r.maxDoc(); |
| assertEquals(1000, maxDoc); |
| for(int i=0;i<1000;i++) { |
| assertNotNull("doc " + i + " has null country", idx.lookup[idx.order[i]]); |
| } |
| r.close(); |
| } |
| |
| /** |
| * Test Parallel Doc Maker logic (for LUCENE-940) |
| */ |
| public void testParallelDocMaker() throws Exception { |
| // 1. alg definition (required in every "logic" test) |
| String algLines[] = { |
| "# ----- properties ", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + getReuters20LinesFile(), |
| "content.source.log.step=3", |
| "doc.term.vector=false", |
| "content.source.forever=false", |
| "directory=FSDirectory", |
| "doc.stored=false", |
| "doc.tokenized=false", |
| "# ----- alg ", |
| "CreateIndex", |
| "[ { AddDoc } : * ] : 4 ", |
| "CloseIndex", |
| }; |
| |
| // 2. execute the algorithm (required in every "logic" test) |
| Benchmark benchmark = execBenchmark(algLines); |
| |
| // 3. test number of docs in the index |
| IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); |
| int ndocsExpected = 20; // first 20 reuters docs. |
| assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs()); |
| ir.close(); |
| } |
| |
| /** |
| * Test WriteLineDoc and LineDocSource. |
| */ |
| public void testLineDocFile() throws Exception { |
| File lineFile = new File(TEMP_DIR, "test.reuters.lines.txt"); |
| |
| // We will call WriteLineDocs this many times |
| final int NUM_TRY_DOCS = 50; |
| |
| // Creates a line file with first 50 docs from SingleDocSource |
| String algLines1[] = { |
| "# ----- properties ", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.SingleDocSource", |
| "content.source.forever=true", |
| "line.file.out=" + lineFile.getAbsolutePath().replace('\\', '/'), |
| "# ----- alg ", |
| "{WriteLineDoc()}:" + NUM_TRY_DOCS, |
| }; |
| |
| // Run algo |
| Benchmark benchmark = execBenchmark(algLines1); |
| |
| BufferedReader r = new BufferedReader(new FileReader(lineFile)); |
| int numLines = 0; |
| while(r.readLine() != null) |
| numLines++; |
| r.close(); |
| assertEquals("did not see the right number of docs; should be " + NUM_TRY_DOCS + " but was " + numLines, NUM_TRY_DOCS, numLines); |
| |
| // Index the line docs |
| String algLines2[] = { |
| "# ----- properties ", |
| "analyzer=org.apache.lucene.analysis.SimpleAnalyzer", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + lineFile.getAbsolutePath().replace('\\', '/'), |
| "content.source.forever=false", |
| "doc.reuse.fields=false", |
| "ram.flush.mb=4", |
| "# ----- alg ", |
| "ResetSystemErase", |
| "CreateIndex", |
| "{AddDoc}: *", |
| "CloseIndex", |
| }; |
| |
| // Run algo |
| benchmark = execBenchmark(algLines2); |
| |
| // now we should be able to open the index for write. |
| IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(), |
| new IndexWriterConfig(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)) |
| .setOpenMode(OpenMode.APPEND)); |
| iw.close(); |
| |
| IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); |
| assertEquals(numLines + " lines were created but " + ir.numDocs() + " docs are in the index", numLines, ir.numDocs()); |
| ir.close(); |
| |
| lineFile.delete(); |
| } |
| |
| /** |
| * Test ReadTokensTask |
| */ |
| public void testReadTokens() throws Exception { |
| |
| // We will call ReadTokens on this many docs |
| final int NUM_DOCS = 20; |
| |
| // Read tokens from first NUM_DOCS docs from Reuters and |
| // then build index from the same docs |
| String algLines1[] = { |
| "# ----- properties ", |
| "analyzer=org.apache.lucene.analysis.WhitespaceAnalyzer", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + getReuters20LinesFile(), |
| "# ----- alg ", |
| "{ReadTokens}: " + NUM_DOCS, |
| "ResetSystemErase", |
| "CreateIndex", |
| "{AddDoc}: " + NUM_DOCS, |
| "CloseIndex", |
| }; |
| |
| // Run algo |
| Benchmark benchmark = execBenchmark(algLines1); |
| |
| List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats(); |
| |
| // Count how many tokens all ReadTokens saw |
| int totalTokenCount1 = 0; |
| for (final TaskStats stat : stats) { |
| if (stat.getTask().getName().equals("ReadTokens")) { |
| totalTokenCount1 += stat.getCount(); |
| } |
| } |
| |
| // Separately count how many tokens are actually in the index: |
| IndexReader reader = IndexReader.open(benchmark.getRunData().getDirectory(), true); |
| assertEquals(NUM_DOCS, reader.numDocs()); |
| |
| TermEnum terms = reader.terms(); |
| TermDocs termDocs = reader.termDocs(); |
| int totalTokenCount2 = 0; |
| while(terms.next()) { |
| Term term = terms.term(); |
| /* not-tokenized, but indexed field */ |
| if (term != null && term.field() != DocMaker.ID_FIELD) { |
| termDocs.seek(terms.term()); |
| while (termDocs.next()) |
| totalTokenCount2 += termDocs.freq(); |
| } |
| } |
| reader.close(); |
| |
| // Make sure they are the same |
| assertEquals(totalTokenCount1, totalTokenCount2); |
| } |
| |
| /** |
| * Test that " {[AddDoc(4000)]: 4} : * " works corrcetly (for LUCENE-941) |
| */ |
| public void testParallelExhausted() throws Exception { |
| // 1. alg definition (required in every "logic" test) |
| String algLines[] = { |
| "# ----- properties ", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + getReuters20LinesFile(), |
| "content.source.log.step=3", |
| "doc.term.vector=false", |
| "content.source.forever=false", |
| "directory=RAMDirectory", |
| "doc.stored=false", |
| "doc.tokenized=false", |
| "task.max.depth.log=1", |
| "# ----- alg ", |
| "CreateIndex", |
| "{ [ AddDoc]: 4} : * ", |
| "ResetInputs ", |
| "{ [ AddDoc]: 4} : * ", |
| "CloseIndex", |
| }; |
| |
| // 2. execute the algorithm (required in every "logic" test) |
| Benchmark benchmark = execBenchmark(algLines); |
| |
| // 3. test number of docs in the index |
| IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); |
| int ndocsExpected = 2 * 20; // first 20 reuters docs. |
| assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs()); |
| ir.close(); |
| } |
| |
| // create the benchmark and execute it. |
| public static Benchmark execBenchmark(String[] algLines) throws Exception { |
| String algText = algLinesToText(algLines); |
| logTstLogic(algText); |
| Benchmark benchmark = new Benchmark(new StringReader(algText)); |
| benchmark.execute(); |
| return benchmark; |
| } |
| |
| // catenate alg lines to make the alg text |
| private static String algLinesToText(String[] algLines) { |
| String indent = " "; |
| StringBuffer sb = new StringBuffer(); |
| for (int i = 0; i < propLines.length; i++) { |
| sb.append(indent).append(propLines[i]).append(NEW_LINE); |
| } |
| for (int i = 0; i < algLines.length; i++) { |
| sb.append(indent).append(algLines[i]).append(NEW_LINE); |
| } |
| return sb.toString(); |
| } |
| |
| private static void logTstLogic (String txt) { |
| if (!VERBOSE) |
| return; |
| System.out.println("Test logic of:"); |
| System.out.println(txt); |
| } |
| |
| /** |
| * Test that exhaust in loop works as expected (LUCENE-1115). |
| */ |
| public void testExhaustedLooped() throws Exception { |
| // 1. alg definition (required in every "logic" test) |
| String algLines[] = { |
| "# ----- properties ", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + getReuters20LinesFile(), |
| "content.source.log.step=3", |
| "doc.term.vector=false", |
| "content.source.forever=false", |
| "directory=RAMDirectory", |
| "doc.stored=false", |
| "doc.tokenized=false", |
| "task.max.depth.log=1", |
| "# ----- alg ", |
| "{ \"Rounds\"", |
| " ResetSystemErase", |
| " CreateIndex", |
| " { \"AddDocs\" AddDoc > : * ", |
| " CloseIndex", |
| "} : 2", |
| }; |
| |
| // 2. execute the algorithm (required in every "logic" test) |
| Benchmark benchmark = execBenchmark(algLines); |
| |
| // 3. test number of docs in the index |
| IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); |
| int ndocsExpected = 20; // first 20 reuters docs. |
| assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs()); |
| ir.close(); |
| } |
| |
| /** |
| * Test that we can close IndexWriter with argument "false". |
| */ |
| public void testCloseIndexFalse() throws Exception { |
| // 1. alg definition (required in every "logic" test) |
| String algLines[] = { |
| "# ----- properties ", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + getReuters20LinesFile(), |
| "ram.flush.mb=-1", |
| "max.buffered=2", |
| "content.source.log.step=3", |
| "doc.term.vector=false", |
| "content.source.forever=false", |
| "directory=RAMDirectory", |
| "doc.stored=false", |
| "doc.tokenized=false", |
| "debug.level=1", |
| "# ----- alg ", |
| "{ \"Rounds\"", |
| " ResetSystemErase", |
| " CreateIndex", |
| " { \"AddDocs\" AddDoc > : * ", |
| " CloseIndex(false)", |
| "} : 2", |
| }; |
| |
| // 2. execute the algorithm (required in every "logic" test) |
| Benchmark benchmark = execBenchmark(algLines); |
| |
| // 3. test number of docs in the index |
| IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); |
| int ndocsExpected = 20; // first 20 reuters docs. |
| assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs()); |
| ir.close(); |
| } |
| |
| public static class MyMergeScheduler extends SerialMergeScheduler { |
| boolean called; |
| public MyMergeScheduler() { |
| super(); |
| called = true; |
| } |
| } |
| |
| /** |
| * Test that we can set merge scheduler". |
| */ |
| public void testMergeScheduler() throws Exception { |
| // 1. alg definition (required in every "logic" test) |
| String algLines[] = { |
| "# ----- properties ", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + getReuters20LinesFile(), |
| "content.source.log.step=3", |
| "doc.term.vector=false", |
| "content.source.forever=false", |
| "directory=RAMDirectory", |
| "merge.scheduler=" + MyMergeScheduler.class.getName(), |
| "doc.stored=false", |
| "doc.tokenized=false", |
| "debug.level=1", |
| "# ----- alg ", |
| "{ \"Rounds\"", |
| " ResetSystemErase", |
| " CreateIndex", |
| " { \"AddDocs\" AddDoc > : * ", |
| "} : 2", |
| }; |
| // 2. execute the algorithm (required in every "logic" test) |
| Benchmark benchmark = execBenchmark(algLines); |
| |
| assertTrue("did not use the specified MergeScheduler", |
| ((MyMergeScheduler) benchmark.getRunData().getIndexWriter().getConfig() |
| .getMergeScheduler()).called); |
| benchmark.getRunData().getIndexWriter().close(); |
| |
| // 3. test number of docs in the index |
| IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); |
| int ndocsExpected = 20; // first 20 reuters docs. |
| assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs()); |
| ir.close(); |
| } |
| |
| public static class MyMergePolicy extends LogDocMergePolicy { |
| boolean called; |
| public MyMergePolicy() { |
| called = true; |
| } |
| } |
| |
| /** |
| * Test that we can set merge policy". |
| */ |
| public void testMergePolicy() throws Exception { |
| // 1. alg definition (required in every "logic" test) |
| String algLines[] = { |
| "# ----- properties ", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + getReuters20LinesFile(), |
| "content.source.log.step=3", |
| "ram.flush.mb=-1", |
| "max.buffered=2", |
| "doc.term.vector=false", |
| "content.source.forever=false", |
| "directory=RAMDirectory", |
| "merge.policy=" + MyMergePolicy.class.getName(), |
| "doc.stored=false", |
| "doc.tokenized=false", |
| "debug.level=1", |
| "# ----- alg ", |
| "{ \"Rounds\"", |
| " ResetSystemErase", |
| " CreateIndex", |
| " { \"AddDocs\" AddDoc > : * ", |
| "} : 2", |
| }; |
| |
| // 2. execute the algorithm (required in every "logic" test) |
| Benchmark benchmark = execBenchmark(algLines); |
| assertTrue("did not use the specified MergePolicy", ((MyMergePolicy) benchmark.getRunData().getIndexWriter().getConfig().getMergePolicy()).called); |
| benchmark.getRunData().getIndexWriter().close(); |
| |
| // 3. test number of docs in the index |
| IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); |
| int ndocsExpected = 20; // first 20 reuters docs. |
| assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs()); |
| ir.close(); |
| } |
| |
| /** |
| * Test that IndexWriter settings stick. |
| */ |
| public void testIndexWriterSettings() throws Exception { |
| // 1. alg definition (required in every "logic" test) |
| String algLines[] = { |
| "# ----- properties ", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + getReuters20LinesFile(), |
| "content.source.log.step=3", |
| "ram.flush.mb=-1", |
| "max.buffered=2", |
| "compound=cmpnd:true:false", |
| "doc.term.vector=vector:false:true", |
| "content.source.forever=false", |
| "directory=RAMDirectory", |
| "doc.stored=false", |
| "merge.factor=3", |
| "doc.tokenized=false", |
| "debug.level=1", |
| "# ----- alg ", |
| "{ \"Rounds\"", |
| " ResetSystemErase", |
| " CreateIndex", |
| " { \"AddDocs\" AddDoc > : * ", |
| " NewRound", |
| "} : 2", |
| }; |
| |
| // 2. execute the algorithm (required in every "logic" test) |
| Benchmark benchmark = execBenchmark(algLines); |
| final IndexWriter writer = benchmark.getRunData().getIndexWriter(); |
| assertEquals(2, writer.getConfig().getMaxBufferedDocs()); |
| assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, (int) writer.getConfig().getRAMBufferSizeMB()); |
| assertEquals(3, ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor()); |
| assertFalse(((LogMergePolicy) writer.getConfig().getMergePolicy()).getUseCompoundFile()); |
| writer.close(); |
| Directory dir = benchmark.getRunData().getDirectory(); |
| IndexReader reader = IndexReader.open(dir, true); |
| TermFreqVector [] tfv = reader.getTermFreqVectors(0); |
| assertNotNull(tfv); |
| assertTrue(tfv.length > 0); |
| reader.close(); |
| } |
| |
| /** |
| * Test that we can call optimize(maxNumSegments). |
| */ |
| public void testOptimizeMaxNumSegments() throws Exception { |
| // 1. alg definition (required in every "logic" test) |
| String algLines[] = { |
| "# ----- properties ", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + getReuters20LinesFile(), |
| "content.source.log.step=3", |
| "ram.flush.mb=-1", |
| "max.buffered=3", |
| "doc.term.vector=false", |
| "content.source.forever=false", |
| "directory=RAMDirectory", |
| "merge.policy=org.apache.lucene.index.LogDocMergePolicy", |
| "doc.stored=false", |
| "doc.tokenized=false", |
| "debug.level=1", |
| "# ----- alg ", |
| "{ \"Rounds\"", |
| " ResetSystemErase", |
| " CreateIndex", |
| " { \"AddDocs\" AddDoc > : * ", |
| " Optimize(3)", |
| " CloseIndex()", |
| "} : 2", |
| }; |
| |
| // 2. execute the algorithm (required in every "logic" test) |
| Benchmark benchmark = execBenchmark(algLines); |
| |
| // 3. test number of docs in the index |
| IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory(), true); |
| int ndocsExpected = 20; // first 20 reuters docs. |
| assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs()); |
| ir.close(); |
| |
| // Make sure we have 3 segments: |
| final String[] files = benchmark.getRunData().getDirectory().listAll(); |
| int cfsCount = 0; |
| for(int i=0;i<files.length;i++) |
| if (files[i].endsWith(".cfs")) |
| cfsCount++; |
| assertEquals(3, cfsCount); |
| } |
| |
| /** |
| * Test disabling task count (LUCENE-1136). |
| */ |
| public void testDisableCounting() throws Exception { |
| doTestDisableCounting(true); |
| doTestDisableCounting(false); |
| } |
| |
| private void doTestDisableCounting(boolean disable) throws Exception { |
| // 1. alg definition (required in every "logic" test) |
| String algLines[] = disableCountingLines(disable); |
| |
| // 2. execute the algorithm (required in every "logic" test) |
| Benchmark benchmark = execBenchmark(algLines); |
| |
| // 3. test counters |
| int n = disable ? 0 : 1; |
| int nChecked = 0; |
| for (final TaskStats stats : benchmark.getRunData().getPoints().taskStats()) { |
| String taskName = stats.getTask().getName(); |
| if (taskName.equals("Rounds")) { |
| assertEquals("Wrong total count!",20+2*n,stats.getCount()); |
| nChecked++; |
| } else if (taskName.equals("CreateIndex")) { |
| assertEquals("Wrong count for CreateIndex!",n,stats.getCount()); |
| nChecked++; |
| } else if (taskName.equals("CloseIndex")) { |
| assertEquals("Wrong count for CloseIndex!",n,stats.getCount()); |
| nChecked++; |
| } |
| } |
| assertEquals("Missing some tasks to check!",3,nChecked); |
| } |
| |
| private static String[] disableCountingLines (boolean disable) { |
| String dis = disable ? "-" : ""; |
| return new String[] { |
| "# ----- properties ", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + getReuters20LinesFile(), |
| "content.source.log.step=30", |
| "doc.term.vector=false", |
| "content.source.forever=false", |
| "directory=RAMDirectory", |
| "doc.stored=false", |
| "doc.tokenized=false", |
| "task.max.depth.log=1", |
| "# ----- alg ", |
| "{ \"Rounds\"", |
| " ResetSystemErase", |
| " "+dis+"CreateIndex", // optionally disable counting here |
| " { \"AddDocs\" AddDoc > : * ", |
| " "+dis+" CloseIndex", // optionally disable counting here (with extra blanks) |
| "}", |
| "RepSumByName", |
| }; |
| } |
| |
| /** |
| * Test that we can change the Locale in the runData, |
| * that it is parsed as we expect. |
| */ |
| public void testLocale() throws Exception { |
| // empty Locale: clear it (null) |
| Benchmark benchmark = execBenchmark(getLocaleConfig("")); |
| assertNull(benchmark.getRunData().getLocale()); |
| |
| // ROOT locale |
| benchmark = execBenchmark(getLocaleConfig("ROOT")); |
| assertEquals(new Locale(""), benchmark.getRunData().getLocale()); |
| |
| // specify just a language |
| benchmark = execBenchmark(getLocaleConfig("de")); |
| assertEquals(new Locale("de"), benchmark.getRunData().getLocale()); |
| |
| // specify language + country |
| benchmark = execBenchmark(getLocaleConfig("en,US")); |
| assertEquals(new Locale("en", "US"), benchmark.getRunData().getLocale()); |
| |
| // specify language + country + variant |
| benchmark = execBenchmark(getLocaleConfig("no,NO,NY")); |
| assertEquals(new Locale("no", "NO", "NY"), benchmark.getRunData().getLocale()); |
| } |
| |
| private static String[] getLocaleConfig(String localeParam) { |
| String algLines[] = { |
| "# ----- properties ", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + getReuters20LinesFile(), |
| "content.source.log.step=3", |
| "content.source.forever=false", |
| "directory=RAMDirectory", |
| "# ----- alg ", |
| "{ \"Rounds\"", |
| " ResetSystemErase", |
| " NewLocale(" + localeParam + ")", |
| " CreateIndex", |
| " { \"AddDocs\" AddDoc > : * ", |
| " NewRound", |
| "} : 1", |
| }; |
| return algLines; |
| } |
| |
| /** |
| * Test that we can create CollationAnalyzers. |
| */ |
| public void testCollator() throws Exception { |
| // ROOT locale |
| Benchmark benchmark = execBenchmark(getCollatorConfig("ROOT", "impl:jdk")); |
| CollationKeyAnalyzer expected = new CollationKeyAnalyzer(Collator |
| .getInstance(new Locale(""))); |
| assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar"); |
| |
| // specify just a language |
| benchmark = execBenchmark(getCollatorConfig("de", "impl:jdk")); |
| expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("de"))); |
| assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar"); |
| |
| // specify language + country |
| benchmark = execBenchmark(getCollatorConfig("en,US", "impl:jdk")); |
| expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("en", |
| "US"))); |
| assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar"); |
| |
| // specify language + country + variant |
| benchmark = execBenchmark(getCollatorConfig("no,NO,NY", "impl:jdk")); |
| expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("no", |
| "NO", "NY"))); |
| assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar"); |
| } |
| |
| private void assertEqualCollation(Analyzer a1, Analyzer a2, String text) |
| throws Exception { |
| TokenStream ts1 = a1.tokenStream("bogus", new StringReader(text)); |
| TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text)); |
| ts1.reset(); |
| ts2.reset(); |
| TermAttribute termAtt1 = ts1.addAttribute(TermAttribute.class); |
| TermAttribute termAtt2 = ts2.addAttribute(TermAttribute.class); |
| assertTrue(ts1.incrementToken()); |
| assertTrue(ts2.incrementToken()); |
| assertEquals(termAtt1.term(), termAtt2.term()); |
| assertFalse(ts1.incrementToken()); |
| assertFalse(ts2.incrementToken()); |
| ts1.close(); |
| ts2.close(); |
| } |
| |
| private static String[] getCollatorConfig(String localeParam, |
| String collationParam) { |
| String algLines[] = { |
| "# ----- properties ", |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + getReuters20LinesFile(), |
| "content.source.log.step=3", |
| "content.source.forever=false", |
| "directory=RAMDirectory", |
| "# ----- alg ", |
| "{ \"Rounds\"", |
| " ResetSystemErase", |
| " NewLocale(" + localeParam + ")", |
| " NewCollationAnalyzer(" + collationParam + ")", |
| " CreateIndex", |
| " { \"AddDocs\" AddDoc > : * ", |
| " NewRound", |
| "} : 1", |
| }; |
| return algLines; |
| } |
| |
| /** |
| * Test that we can create ShingleAnalyzerWrappers. |
| */ |
| public void testShingleAnalyzer() throws Exception { |
| String text = "one,two,three, four five six"; |
| |
| // Default analyzer, maxShingleSize, and outputUnigrams |
| Benchmark benchmark = execBenchmark(getShingleConfig("")); |
| benchmark.getRunData().getAnalyzer().tokenStream |
| ("bogus", new StringReader(text)).close(); |
| assertEqualShingle(benchmark.getRunData().getAnalyzer(), text, |
| new String[] {"one", "one two", "two", "two three", |
| "three", "three four", "four", "four five", |
| "five", "five six", "six"}); |
| // Default analyzer, maxShingleSize = 3, and outputUnigrams = false |
| benchmark = execBenchmark |
| (getShingleConfig("maxShingleSize:3,outputUnigrams:false")); |
| assertEqualShingle(benchmark.getRunData().getAnalyzer(), text, |
| new String[] { "one two", "one two three", "two three", |
| "two three four", "three four", |
| "three four five", "four five", |
| "four five six", "five six" }); |
| // WhitespaceAnalyzer, default maxShingleSize and outputUnigrams |
| benchmark = execBenchmark |
| (getShingleConfig("analyzer:WhitespaceAnalyzer")); |
| assertEqualShingle(benchmark.getRunData().getAnalyzer(), text, |
| new String[] { "one,two,three,", "one,two,three, four", |
| "four", "four five", "five", "five six", |
| "six" }); |
| |
| // WhitespaceAnalyzer, maxShingleSize=3 and outputUnigrams=false |
| benchmark = execBenchmark |
| (getShingleConfig |
| ("outputUnigrams:false,maxShingleSize:3,analyzer:WhitespaceAnalyzer")); |
| assertEqualShingle(benchmark.getRunData().getAnalyzer(), text, |
| new String[] { "one,two,three, four", |
| "one,two,three, four five", |
| "four five", "four five six", |
| "five six" }); |
| } |
| |
| private void assertEqualShingle |
| (Analyzer analyzer, String text, String[] expected) throws Exception { |
| TokenStream stream = analyzer.tokenStream("bogus", new StringReader(text)); |
| stream.reset(); |
| TermAttribute termAtt = stream.addAttribute(TermAttribute.class); |
| int termNum = 0; |
| while (stream.incrementToken()) { |
| assertTrue("Extra output term(s), starting with '" |
| + new String(termAtt.termBuffer(), 0, termAtt.termLength()) + "'", |
| termNum < expected.length); |
| assertEquals("Mismatch in output term # " + termNum + " - ", |
| expected[termNum], |
| new String(termAtt.termBuffer(), 0, termAtt.termLength())); |
| ++termNum; |
| } |
| assertEquals("Too few output terms", expected.length, termNum); |
| stream.close(); |
| } |
| |
| private static String[] getShingleConfig(String params) { |
| String algLines[] = { |
| "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource", |
| "docs.file=" + getReuters20LinesFile(), |
| "content.source.forever=false", |
| "directory=RAMDirectory", |
| "NewShingleAnalyzer(" + params + ")", |
| "CreateIndex", |
| "{ \"AddDocs\" AddDoc > : * " |
| }; |
| return algLines; |
| } |
| |
| private static String getReuters20LinesFile() { |
| return System.getProperty("lucene.common.dir").replace('\\','/') + |
| "/contrib/benchmark/src/test/org/apache/lucene/benchmark/reuters.first20.lines.txt"; |
| } |
| } |