blob: 5f39f3dd4bfadcdd535e5c298c4d0261f3a7cdc6 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
/** Test that creates way, way, way too many fields */
@LuceneTestCase.SuppressCodecs("SimpleText")
public class TestManyFields extends LuceneTestCase {
private static final FieldType storedTextType = new FieldType(TextField.TYPE_NOT_STORED);
public void testManyFields() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(10));
for(int j=0;j<100;j++) {
Document doc = new Document();
doc.add(newField("a"+j, "aaa" + j, storedTextType));
doc.add(newField("b"+j, "aaa" + j, storedTextType));
doc.add(newField("c"+j, "aaa" + j, storedTextType));
doc.add(newField("d"+j, "aaa", storedTextType));
doc.add(newField("e"+j, "aaa", storedTextType));
doc.add(newField("f"+j, "aaa", storedTextType));
writer.addDocument(doc);
}
writer.close();
IndexReader reader = DirectoryReader.open(dir);
assertEquals(100, reader.maxDoc());
assertEquals(100, reader.numDocs());
for(int j=0;j<100;j++) {
assertEquals(1, reader.docFreq(new Term("a"+j, "aaa"+j)));
assertEquals(1, reader.docFreq(new Term("b"+j, "aaa"+j)));
assertEquals(1, reader.docFreq(new Term("c"+j, "aaa"+j)));
assertEquals(1, reader.docFreq(new Term("d"+j, "aaa")));
assertEquals(1, reader.docFreq(new Term("e"+j, "aaa")));
assertEquals(1, reader.docFreq(new Term("f"+j, "aaa")));
}
reader.close();
dir.close();
}
public void testDiverseDocs() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
.setRAMBufferSizeMB(0.5));
int n = atLeast(1);
for(int i=0;i<n;i++) {
// First, docs where every term is unique (heavy on
// Posting instances)
for(int j=0;j<100;j++) {
Document doc = new Document();
for(int k=0;k<100;k++) {
doc.add(newField("field", Integer.toString(random().nextInt()), storedTextType));
}
writer.addDocument(doc);
}
// Next, many single term docs where only one term
// occurs (heavy on byte blocks)
for(int j=0;j<100;j++) {
Document doc = new Document();
doc.add(newField("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", storedTextType));
writer.addDocument(doc);
}
// Next, many single term docs where only one term
// occurs but the terms are very long (heavy on
// char[] arrays)
for(int j=0;j<100;j++) {
StringBuilder b = new StringBuilder();
String x = Integer.toString(j) + ".";
for(int k=0;k<1000;k++)
b.append(x);
String longTerm = b.toString();
Document doc = new Document();
doc.add(newField("field", longTerm, storedTextType));
writer.addDocument(doc);
}
}
writer.close();
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(reader);
long totalHits = searcher.count(new TermQuery(new Term("field", "aaa")));
assertEquals(n*100, totalHits);
reader.close();
dir.close();
}
// LUCENE-4398
public void testRotatingFieldNames() throws Exception {
Directory dir = newFSDirectory(createTempDir("TestIndexWriter.testChangingFields"));
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
iwc.setRAMBufferSizeMB(0.2);
iwc.setMaxBufferedDocs(-1);
IndexWriter w = new IndexWriter(dir, iwc);
int upto = 0;
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setOmitNorms(true);
int firstDocCount = -1;
for(int iter=0;iter<10;iter++) {
final int startFlushCount = w.getFlushCount();
int docCount = 0;
while(w.getFlushCount() == startFlushCount) {
Document doc = new Document();
for(int i=0;i<10;i++) {
doc.add(new Field("field" + (upto++), "content", ft));
}
w.addDocument(doc);
docCount++;
}
if (VERBOSE) {
System.out.println("TEST: iter=" + iter + " flushed after docCount=" + docCount);
}
if (iter == 0) {
firstDocCount = docCount;
}
assertTrue("flushed after too few docs: first segment flushed at docCount=" + firstDocCount + ", but current segment flushed after docCount=" + docCount + "; iter=" + iter, ((float) docCount) / firstDocCount > 0.9);
if (upto > 5000) {
// Start re-using field names after a while
// ... important because otherwise we can OOME due
// to too many FieldInfo instances.
upto = 0;
}
}
w.close();
dir.close();
}
}