lucene/core/src/test/org/apache/lucene/index/TestManyFields.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.index;


 import java.io.IOException;

 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;

 /** Test that creates way, way, way too many fields */
 @LuceneTestCase.SuppressCodecs("SimpleText")
 public class TestManyFields extends LuceneTestCase {
   private static final FieldType storedTextType = new FieldType(TextField.TYPE_NOT_STORED);

   public void testManyFields() throws IOException {
     Directory dir = newDirectory();
     IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                                  .setMaxBufferedDocs(10));
     for(int j=0;j<100;j++) {
       Document doc = new Document();
       doc.add(newField("a"+j, "aaa" + j, storedTextType));
       doc.add(newField("b"+j, "aaa" + j, storedTextType));
       doc.add(newField("c"+j, "aaa" + j, storedTextType));
       doc.add(newField("d"+j, "aaa", storedTextType));
       doc.add(newField("e"+j, "aaa", storedTextType));
       doc.add(newField("f"+j, "aaa", storedTextType));
       writer.addDocument(doc);
     }
     writer.close();

     IndexReader reader = DirectoryReader.open(dir);
     assertEquals(100, reader.maxDoc());
     assertEquals(100, reader.numDocs());
     for(int j=0;j<100;j++) {
       assertEquals(1, reader.docFreq(new Term("a"+j, "aaa"+j)));
       assertEquals(1, reader.docFreq(new Term("b"+j, "aaa"+j)));
       assertEquals(1, reader.docFreq(new Term("c"+j, "aaa"+j)));
       assertEquals(1, reader.docFreq(new Term("d"+j, "aaa")));
       assertEquals(1, reader.docFreq(new Term("e"+j, "aaa")));
       assertEquals(1, reader.docFreq(new Term("f"+j, "aaa")));
     }
     reader.close();
     dir.close();
   }

   public void testDiverseDocs() throws IOException {
     Directory dir = newDirectory();
     IndexWriter writer  = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                                  .setRAMBufferSizeMB(0.5));
     int n = atLeast(1);
     for(int i=0;i<n;i++) {
       // First, docs where every term is unique (heavy on
       // Posting instances)
       for(int j=0;j<100;j++) {
         Document doc = new Document();
         for(int k=0;k<100;k++) {
           doc.add(newField("field", Integer.toString(random().nextInt()), storedTextType));
         }
         writer.addDocument(doc);
       }

       // Next, many single term docs where only one term
       // occurs (heavy on byte blocks)
       for(int j=0;j<100;j++) {
         Document doc = new Document();
         doc.add(newField("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", storedTextType));
         writer.addDocument(doc);
       }

       // Next, many single term docs where only one term
       // occurs but the terms are very long (heavy on
       // char[] arrays)
       for(int j=0;j<100;j++) {
         StringBuilder b = new StringBuilder();
         String x = Integer.toString(j) + ".";
         for(int k=0;k<1000;k++)
           b.append(x);
         String longTerm = b.toString();

         Document doc = new Document();
         doc.add(newField("field", longTerm, storedTextType));
         writer.addDocument(doc);
       }
     }
     writer.close();

     IndexReader reader = DirectoryReader.open(dir);
     IndexSearcher searcher = newSearcher(reader);
     long totalHits = searcher.count(new TermQuery(new Term("field", "aaa")));
     assertEquals(n*100, totalHits);
     reader.close();

     dir.close();
   }

   // LUCENE-4398
   public void testRotatingFieldNames() throws Exception {
     Directory dir = newFSDirectory(createTempDir("TestIndexWriter.testChangingFields"));
     IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
     iwc.setRAMBufferSizeMB(0.2);
     iwc.setMaxBufferedDocs(-1);
     IndexWriter w = new IndexWriter(dir, iwc);
     int upto = 0;

     FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
     ft.setOmitNorms(true);

     int firstDocCount = -1;
     for(int iter=0;iter<10;iter++) {
       final int startFlushCount = w.getFlushCount();
       int docCount = 0;
       while(w.getFlushCount() == startFlushCount) {
         Document doc = new Document();
         for(int i=0;i<10;i++) {
           doc.add(new Field("field" + (upto++), "content", ft));
         }
         w.addDocument(doc);
         docCount++;
       }

       if (VERBOSE) {
         System.out.println("TEST: iter=" + iter + " flushed after docCount=" + docCount);
       }

       if (iter == 0) {
         firstDocCount = docCount;
       }

       assertTrue("flushed after too few docs: first segment flushed at docCount=" + firstDocCount + ", but current segment flushed after docCount=" + docCount + "; iter=" + iter, ((float) docCount) / firstDocCount > 0.9);

       if (upto > 5000) {
         // Start re-using field names after a while
         // ... important because otherwise we can OOME due
         // to too many FieldInfo instances.
         upto = 0;
       }
     }
     w.close();
     dir.close();
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.index;


	import java.io.IOException;

	import org.apache.lucene.analysis.MockAnalyzer;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.document.FieldType;
	import org.apache.lucene.document.TextField;
	import org.apache.lucene.search.IndexSearcher;
	import org.apache.lucene.search.TermQuery;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.util.LuceneTestCase;

	/** Test that creates way, way, way too many fields */
	@LuceneTestCase.SuppressCodecs("SimpleText")
	public class TestManyFields extends LuceneTestCase {
	private static final FieldType storedTextType = new FieldType(TextField.TYPE_NOT_STORED);

	public void testManyFields() throws IOException {
	Directory dir = newDirectory();
	IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
	.setMaxBufferedDocs(10));
	for(int j=0;j<100;j++) {
	Document doc = new Document();
	doc.add(newField("a"+j, "aaa" + j, storedTextType));
	doc.add(newField("b"+j, "aaa" + j, storedTextType));
	doc.add(newField("c"+j, "aaa" + j, storedTextType));
	doc.add(newField("d"+j, "aaa", storedTextType));
	doc.add(newField("e"+j, "aaa", storedTextType));
	doc.add(newField("f"+j, "aaa", storedTextType));
	writer.addDocument(doc);
	}
	writer.close();

	IndexReader reader = DirectoryReader.open(dir);
	assertEquals(100, reader.maxDoc());
	assertEquals(100, reader.numDocs());
	for(int j=0;j<100;j++) {
	assertEquals(1, reader.docFreq(new Term("a"+j, "aaa"+j)));
	assertEquals(1, reader.docFreq(new Term("b"+j, "aaa"+j)));
	assertEquals(1, reader.docFreq(new Term("c"+j, "aaa"+j)));
	assertEquals(1, reader.docFreq(new Term("d"+j, "aaa")));
	assertEquals(1, reader.docFreq(new Term("e"+j, "aaa")));
	assertEquals(1, reader.docFreq(new Term("f"+j, "aaa")));
	}
	reader.close();
	dir.close();
	}

	public void testDiverseDocs() throws IOException {
	Directory dir = newDirectory();
	IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
	.setRAMBufferSizeMB(0.5));
	int n = atLeast(1);
	for(int i=0;i<n;i++) {
	// First, docs where every term is unique (heavy on
	// Posting instances)
	for(int j=0;j<100;j++) {
	Document doc = new Document();
	for(int k=0;k<100;k++) {
	doc.add(newField("field", Integer.toString(random().nextInt()), storedTextType));
	}
	writer.addDocument(doc);
	}

	// Next, many single term docs where only one term
	// occurs (heavy on byte blocks)
	for(int j=0;j<100;j++) {
	Document doc = new Document();
	doc.add(newField("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", storedTextType));
	writer.addDocument(doc);
	}

	// Next, many single term docs where only one term
	// occurs but the terms are very long (heavy on
	// char[] arrays)
	for(int j=0;j<100;j++) {
	StringBuilder b = new StringBuilder();
	String x = Integer.toString(j) + ".";
	for(int k=0;k<1000;k++)
	b.append(x);
	String longTerm = b.toString();

	Document doc = new Document();
	doc.add(newField("field", longTerm, storedTextType));
	writer.addDocument(doc);
	}
	}
	writer.close();

	IndexReader reader = DirectoryReader.open(dir);
	IndexSearcher searcher = newSearcher(reader);
	long totalHits = searcher.count(new TermQuery(new Term("field", "aaa")));
	assertEquals(n*100, totalHits);
	reader.close();

	dir.close();
	}

	// LUCENE-4398
	public void testRotatingFieldNames() throws Exception {
	Directory dir = newFSDirectory(createTempDir("TestIndexWriter.testChangingFields"));
	IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
	iwc.setRAMBufferSizeMB(0.2);
	iwc.setMaxBufferedDocs(-1);
	IndexWriter w = new IndexWriter(dir, iwc);
	int upto = 0;

	FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
	ft.setOmitNorms(true);

	int firstDocCount = -1;
	for(int iter=0;iter<10;iter++) {
	final int startFlushCount = w.getFlushCount();
	int docCount = 0;
	while(w.getFlushCount() == startFlushCount) {
	Document doc = new Document();
	for(int i=0;i<10;i++) {
	doc.add(new Field("field" + (upto++), "content", ft));
	}
	w.addDocument(doc);
	docCount++;
	}

	if (VERBOSE) {
	System.out.println("TEST: iter=" + iter + " flushed after docCount=" + docCount);
	}

	if (iter == 0) {
	firstDocCount = docCount;
	}

	assertTrue("flushed after too few docs: first segment flushed at docCount=" + firstDocCount + ", but current segment flushed after docCount=" + docCount + "; iter=" + iter, ((float) docCount) / firstDocCount > 0.9);

	if (upto > 5000) {
	// Start re-using field names after a while
	// ... important because otherwise we can OOME due
	// to too many FieldInfo instances.
	upto = 0;
	}
	}
	w.close();
	dir.close();
	}
	}