blob: d1eed4270610e21f9b9b834c8b24eac937f46c40 [file] [log] [blame]
package org.apache.lucene.codecs.perfield;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene42.Lucene42Codec;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
import org.apache.lucene.codecs.mocksep.MockSepPostingsFormat;
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.junit.Test;
/**
*
*
*/
//TODO: would be better in this test to pull termsenums and instanceof or something?
// this way we can verify PFPF is doing the right thing.
// for now we do termqueries.
public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
private IndexWriter newWriter(Directory dir, IndexWriterConfig conf)
throws IOException {
LogDocMergePolicy logByteSizeMergePolicy = new LogDocMergePolicy();
logByteSizeMergePolicy.setNoCFSRatio(0.0); // make sure we use plain
// files
conf.setMergePolicy(logByteSizeMergePolicy);
final IndexWriter writer = new IndexWriter(dir, conf);
return writer;
}
private void addDocs(IndexWriter writer, int numDocs) throws IOException {
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(newTextField("content", "aaa", Field.Store.NO));
writer.addDocument(doc);
}
}
private void addDocs2(IndexWriter writer, int numDocs) throws IOException {
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(newTextField("content", "bbb", Field.Store.NO));
writer.addDocument(doc);
}
}
private void addDocs3(IndexWriter writer, int numDocs) throws IOException {
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(newTextField("content", "ccc", Field.Store.NO));
doc.add(newStringField("id", "" + i, Field.Store.YES));
writer.addDocument(doc);
}
}
/*
* Test that heterogeneous index segments are merge successfully
*/
@Test
public void testMergeUnusedPerFieldCodec() throws IOException {
Directory dir = newDirectory();
IndexWriterConfig iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE).setCodec(new MockCodec());
IndexWriter writer = newWriter(dir, iwconf);
addDocs(writer, 10);
writer.commit();
addDocs3(writer, 10);
writer.commit();
addDocs2(writer, 10);
writer.commit();
assertEquals(30, writer.maxDoc());
_TestUtil.checkIndex(dir);
writer.forceMerge(1);
assertEquals(30, writer.maxDoc());
writer.close();
dir.close();
}
/*
* Test that heterogeneous index segments are merged sucessfully
*/
// TODO: not sure this test is that great, we should probably peek inside PerFieldPostingsFormat or something?!
@Test
public void testChangeCodecAndMerge() throws IOException {
Directory dir = newDirectory();
if (VERBOSE) {
System.out.println("TEST: make new index");
}
IndexWriterConfig iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE).setCodec(new MockCodec());
iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
//((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10);
IndexWriter writer = newWriter(dir, iwconf);
addDocs(writer, 10);
writer.commit();
assertQuery(new Term("content", "aaa"), dir, 10);
if (VERBOSE) {
System.out.println("TEST: addDocs3");
}
addDocs3(writer, 10);
writer.commit();
writer.close();
assertQuery(new Term("content", "ccc"), dir, 10);
assertQuery(new Term("content", "aaa"), dir, 10);
Codec codec = iwconf.getCodec();
iwconf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
.setOpenMode(OpenMode.APPEND).setCodec(codec);
//((LogMergePolicy) iwconf.getMergePolicy()).setNoCFSRatio(0.0);
//((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10);
iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
iwconf.setCodec(new MockCodec2()); // uses standard for field content
writer = newWriter(dir, iwconf);
// swap in new codec for currently written segments
if (VERBOSE) {
System.out.println("TEST: add docs w/ Standard codec for content field");
}
addDocs2(writer, 10);
writer.commit();
codec = iwconf.getCodec();
assertEquals(30, writer.maxDoc());
assertQuery(new Term("content", "bbb"), dir, 10);
assertQuery(new Term("content", "ccc"), dir, 10); ////
assertQuery(new Term("content", "aaa"), dir, 10);
if (VERBOSE) {
System.out.println("TEST: add more docs w/ new codec");
}
addDocs2(writer, 10);
writer.commit();
assertQuery(new Term("content", "ccc"), dir, 10);
assertQuery(new Term("content", "bbb"), dir, 20);
assertQuery(new Term("content", "aaa"), dir, 10);
assertEquals(40, writer.maxDoc());
if (VERBOSE) {
System.out.println("TEST: now optimize");
}
writer.forceMerge(1);
assertEquals(40, writer.maxDoc());
writer.close();
assertQuery(new Term("content", "ccc"), dir, 10);
assertQuery(new Term("content", "bbb"), dir, 20);
assertQuery(new Term("content", "aaa"), dir, 10);
dir.close();
}
public void assertQuery(Term t, Directory dir, int num)
throws IOException {
if (VERBOSE) {
System.out.println("\nTEST: assertQuery " + t);
}
IndexReader reader = DirectoryReader.open(dir, 1);
IndexSearcher searcher = newSearcher(reader);
TopDocs search = searcher.search(new TermQuery(t), num + 10);
assertEquals(num, search.totalHits);
reader.close();
}
public static class MockCodec extends Lucene42Codec {
final PostingsFormat lucene40 = new Lucene41PostingsFormat();
final PostingsFormat simpleText = new SimpleTextPostingsFormat();
final PostingsFormat mockSep = new MockSepPostingsFormat();
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (field.equals("id")) {
return simpleText;
} else if (field.equals("content")) {
return mockSep;
} else {
return lucene40;
}
}
}
public static class MockCodec2 extends Lucene42Codec {
final PostingsFormat lucene40 = new Lucene41PostingsFormat();
final PostingsFormat simpleText = new SimpleTextPostingsFormat();
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (field.equals("id")) {
return simpleText;
} else {
return lucene40;
}
}
}
/*
* Test per field codec support - adding fields with random codecs
*/
@Test
public void testStressPerFieldCodec() throws IOException {
Directory dir = newDirectory(random());
final int docsPerRound = 97;
int numRounds = atLeast(1);
for (int i = 0; i < numRounds; i++) {
int num = _TestUtil.nextInt(random(), 30, 60);
IndexWriterConfig config = newIndexWriterConfig(random(),
TEST_VERSION_CURRENT, new MockAnalyzer(random()));
config.setOpenMode(OpenMode.CREATE_OR_APPEND);
IndexWriter writer = newWriter(dir, config);
for (int j = 0; j < docsPerRound; j++) {
final Document doc = new Document();
for (int k = 0; k < num; k++) {
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
customType.setTokenized(random().nextBoolean());
customType.setOmitNorms(random().nextBoolean());
Field field = newField("" + k, _TestUtil
.randomRealisticUnicodeString(random(), 128), customType);
doc.add(field);
}
writer.addDocument(doc);
}
if (random().nextBoolean()) {
writer.forceMerge(1);
}
writer.commit();
assertEquals((i + 1) * docsPerRound, writer.maxDoc());
writer.close();
}
dir.close();
}
public void testSameCodecDifferentInstance() throws Exception {
Codec codec = new Lucene42Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if ("id".equals(field)) {
return new Pulsing41PostingsFormat(1);
} else if ("date".equals(field)) {
return new Pulsing41PostingsFormat(1);
} else {
return super.getPostingsFormatForField(field);
}
}
};
doTestMixedPostings(codec);
}
public void testSameCodecDifferentParams() throws Exception {
Codec codec = new Lucene42Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if ("id".equals(field)) {
return new Pulsing41PostingsFormat(1);
} else if ("date".equals(field)) {
return new Pulsing41PostingsFormat(2);
} else {
return super.getPostingsFormatForField(field);
}
}
};
doTestMixedPostings(codec);
}
private void doTestMixedPostings(Codec codec) throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwc.setCodec(codec);
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
// turn on vectors for the checkindex cross-check
ft.setStoreTermVectors(true);
ft.setStoreTermVectorOffsets(true);
ft.setStoreTermVectorPositions(true);
Field idField = new Field("id", "", ft);
Field dateField = new Field("date", "", ft);
doc.add(idField);
doc.add(dateField);
for (int i = 0; i < 100; i++) {
idField.setStringValue(Integer.toString(random().nextInt(50)));
dateField.setStringValue(Integer.toString(random().nextInt(100)));
iw.addDocument(doc);
}
iw.close();
dir.close(); // checkindex
}
}