lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java - lucene-solr - Git at Google

 package org.apache.lucene.index;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.util.BytesRef;

 import java.io.IOException;

 public class TestSegmentTermDocs extends LuceneTestCase {
   private Document testDoc = new Document();
   private Directory dir;
   private SegmentInfo info;

   @Override
   public void setUp() throws Exception {
     super.setUp();
     dir = newDirectory();
     DocHelper.setupDoc(testDoc);
     info = DocHelper.writeDoc(random, dir, testDoc);
   }

   @Override
   public void tearDown() throws Exception {
     dir.close();
     super.tearDown();
   }

   public void test() {
     assertTrue(dir != null);
   }

   public void testTermDocs() throws IOException {
     testTermDocs(1);
   }

   public void testTermDocs(int indexDivisor) throws IOException {
     //After adding the document, we should be able to read it back in
     SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
     assertTrue(reader != null);
     assertEquals(indexDivisor, reader.getTermInfosIndexDivisor());

     TermsEnum terms = reader.fields().terms(DocHelper.TEXT_FIELD_2_KEY).iterator();
     terms.seek(new BytesRef("field"));
     DocsEnum termDocs = terms.docs(reader.getDeletedDocs(), null);
     if (termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS)    {
       int docId = termDocs.docID();
       assertTrue(docId == 0);
       int freq = termDocs.freq();
       assertTrue(freq == 3);
     }
     reader.close();
   }

   public void testBadSeek() throws IOException {
     testBadSeek(1);
   }

   public void testBadSeek(int indexDivisor) throws IOException {
     {
       //After adding the document, we should be able to read it back in
       SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
       assertTrue(reader != null);
       DocsEnum termDocs = reader.termDocsEnum(reader.getDeletedDocs(),
                                               "textField2",
                                               new BytesRef("bad"));

       assertNull(termDocs);
       reader.close();
     }
     {
       //After adding the document, we should be able to read it back in
       SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
       assertTrue(reader != null);
       DocsEnum termDocs = reader.termDocsEnum(reader.getDeletedDocs(),
                                               "junk",
                                               new BytesRef("bad"));
       assertNull(termDocs);
       reader.close();
     }
   }

   public void testSkipTo() throws IOException {
     testSkipTo(1);
   }

   public void testSkipTo(int indexDivisor) throws IOException {
     Directory dir = newDirectory();
     IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));

     Term ta = new Term("content","aaa");
     for(int i = 0; i < 10; i++)
       addDoc(writer, "aaa aaa aaa aaa");

     Term tb = new Term("content","bbb");
     for(int i = 0; i < 16; i++)
       addDoc(writer, "bbb bbb bbb bbb");

     Term tc = new Term("content","ccc");
     for(int i = 0; i < 50; i++)
       addDoc(writer, "ccc ccc ccc ccc");

     // assure that we deal with a single segment
     writer.optimize();
     writer.close();

     IndexReader reader = IndexReader.open(dir, null, true, indexDivisor);

     DocsEnum tdocs = MultiFields.getTermDocsEnum(reader,
                                                  MultiFields.getDeletedDocs(reader),
                                                  ta.field(),
                                                  new BytesRef(ta.text()));

     // without optimization (assumption skipInterval == 16)

     // with next
     assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
     assertEquals(0, tdocs.docID());
     assertEquals(4, tdocs.freq());
     assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
     assertEquals(1, tdocs.docID());
     assertEquals(4, tdocs.freq());
     assertTrue(tdocs.advance(0) != DocsEnum.NO_MORE_DOCS);
     assertEquals(2, tdocs.docID());
     assertTrue(tdocs.advance(4) != DocsEnum.NO_MORE_DOCS);
     assertEquals(4, tdocs.docID());
     assertTrue(tdocs.advance(9) != DocsEnum.NO_MORE_DOCS);
     assertEquals(9, tdocs.docID());
     assertFalse(tdocs.advance(10) != DocsEnum.NO_MORE_DOCS);

     // without next
     tdocs = MultiFields.getTermDocsEnum(reader,
                                         MultiFields.getDeletedDocs(reader),
                                         ta.field(),
                                         new BytesRef(ta.text()));

     assertTrue(tdocs.advance(0) != DocsEnum.NO_MORE_DOCS);
     assertEquals(0, tdocs.docID());
     assertTrue(tdocs.advance(4) != DocsEnum.NO_MORE_DOCS);
     assertEquals(4, tdocs.docID());
     assertTrue(tdocs.advance(9) != DocsEnum.NO_MORE_DOCS);
     assertEquals(9, tdocs.docID());
     assertFalse(tdocs.advance(10) != DocsEnum.NO_MORE_DOCS);

     // exactly skipInterval documents and therefore with optimization

     // with next
     tdocs = MultiFields.getTermDocsEnum(reader,
                                         MultiFields.getDeletedDocs(reader),
                                         tb.field(),
                                         new BytesRef(tb.text()));

     assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
     assertEquals(10, tdocs.docID());
     assertEquals(4, tdocs.freq());
     assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
     assertEquals(11, tdocs.docID());
     assertEquals(4, tdocs.freq());
     assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS);
     assertEquals(12, tdocs.docID());
     assertTrue(tdocs.advance(15) != DocsEnum.NO_MORE_DOCS);
     assertEquals(15, tdocs.docID());
     assertTrue(tdocs.advance(24) != DocsEnum.NO_MORE_DOCS);
     assertEquals(24, tdocs.docID());
     assertTrue(tdocs.advance(25) != DocsEnum.NO_MORE_DOCS);
     assertEquals(25, tdocs.docID());
     assertFalse(tdocs.advance(26) != DocsEnum.NO_MORE_DOCS);

     // without next
     tdocs = MultiFields.getTermDocsEnum(reader,
                                         MultiFields.getDeletedDocs(reader),
                                         tb.field(),
                                         new BytesRef(tb.text()));

     assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS);
     assertEquals(10, tdocs.docID());
     assertTrue(tdocs.advance(15) != DocsEnum.NO_MORE_DOCS);
     assertEquals(15, tdocs.docID());
     assertTrue(tdocs.advance(24) != DocsEnum.NO_MORE_DOCS);
     assertEquals(24, tdocs.docID());
     assertTrue(tdocs.advance(25) != DocsEnum.NO_MORE_DOCS);
     assertEquals(25, tdocs.docID());
     assertFalse(tdocs.advance(26) != DocsEnum.NO_MORE_DOCS);

     // much more than skipInterval documents and therefore with optimization

     // with next
     tdocs = MultiFields.getTermDocsEnum(reader,
                                         MultiFields.getDeletedDocs(reader),
                                         tc.field(),
                                         new BytesRef(tc.text()));

     assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
     assertEquals(26, tdocs.docID());
     assertEquals(4, tdocs.freq());
     assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
     assertEquals(27, tdocs.docID());
     assertEquals(4, tdocs.freq());
     assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS);
     assertEquals(28, tdocs.docID());
     assertTrue(tdocs.advance(40) != DocsEnum.NO_MORE_DOCS);
     assertEquals(40, tdocs.docID());
     assertTrue(tdocs.advance(57) != DocsEnum.NO_MORE_DOCS);
     assertEquals(57, tdocs.docID());
     assertTrue(tdocs.advance(74) != DocsEnum.NO_MORE_DOCS);
     assertEquals(74, tdocs.docID());
     assertTrue(tdocs.advance(75) != DocsEnum.NO_MORE_DOCS);
     assertEquals(75, tdocs.docID());
     assertFalse(tdocs.advance(76) != DocsEnum.NO_MORE_DOCS);

     //without next
     tdocs = MultiFields.getTermDocsEnum(reader,
                                         MultiFields.getDeletedDocs(reader),
                                         tc.field(),
                                         new BytesRef(tc.text()));
     assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS);
     assertEquals(26, tdocs.docID());
     assertTrue(tdocs.advance(40) != DocsEnum.NO_MORE_DOCS);
     assertEquals(40, tdocs.docID());
     assertTrue(tdocs.advance(57) != DocsEnum.NO_MORE_DOCS);
     assertEquals(57, tdocs.docID());
     assertTrue(tdocs.advance(74) != DocsEnum.NO_MORE_DOCS);
     assertEquals(74, tdocs.docID());
     assertTrue(tdocs.advance(75) != DocsEnum.NO_MORE_DOCS);
     assertEquals(75, tdocs.docID());
     assertFalse(tdocs.advance(76) != DocsEnum.NO_MORE_DOCS);

     reader.close();
     dir.close();
   }

   public void testIndexDivisor() throws IOException {
     testDoc = new Document();
     DocHelper.setupDoc(testDoc);
     DocHelper.writeDoc(random, dir, testDoc);
     testTermDocs(2);
     testBadSeek(2);
     testSkipTo(2);
   }

   private void addDoc(IndexWriter writer, String value) throws IOException
   {
       Document doc = new Document();
       doc.add(newField("content", value, Field.Store.NO, Field.Index.ANALYZED));
       writer.addDocument(doc);
   }
 }
	package org.apache.lucene.index;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import org.apache.lucene.util.LuceneTestCase;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.analysis.MockAnalyzer;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.util.BytesRef;

	import java.io.IOException;

	public class TestSegmentTermDocs extends LuceneTestCase {
	private Document testDoc = new Document();
	private Directory dir;
	private SegmentInfo info;

	@Override
	public void setUp() throws Exception {
	super.setUp();
	dir = newDirectory();
	DocHelper.setupDoc(testDoc);
	info = DocHelper.writeDoc(random, dir, testDoc);
	}

	@Override
	public void tearDown() throws Exception {
	dir.close();
	super.tearDown();
	}

	public void test() {
	assertTrue(dir != null);
	}

	public void testTermDocs() throws IOException {
	testTermDocs(1);
	}

	public void testTermDocs(int indexDivisor) throws IOException {
	//After adding the document, we should be able to read it back in
	SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
	assertTrue(reader != null);
	assertEquals(indexDivisor, reader.getTermInfosIndexDivisor());

	TermsEnum terms = reader.fields().terms(DocHelper.TEXT_FIELD_2_KEY).iterator();
	terms.seek(new BytesRef("field"));
	DocsEnum termDocs = terms.docs(reader.getDeletedDocs(), null);
	if (termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
	int docId = termDocs.docID();
	assertTrue(docId == 0);
	int freq = termDocs.freq();
	assertTrue(freq == 3);
	}
	reader.close();
	}

	public void testBadSeek() throws IOException {
	testBadSeek(1);
	}

	public void testBadSeek(int indexDivisor) throws IOException {
	{
	//After adding the document, we should be able to read it back in
	SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
	assertTrue(reader != null);
	DocsEnum termDocs = reader.termDocsEnum(reader.getDeletedDocs(),
	"textField2",
	new BytesRef("bad"));

	assertNull(termDocs);
	reader.close();
	}
	{
	//After adding the document, we should be able to read it back in
	SegmentReader reader = SegmentReader.get(true, info, indexDivisor);
	assertTrue(reader != null);
	DocsEnum termDocs = reader.termDocsEnum(reader.getDeletedDocs(),
	"junk",
	new BytesRef("bad"));
	assertNull(termDocs);
	reader.close();
	}
	}

	public void testSkipTo() throws IOException {
	testSkipTo(1);
	}

	public void testSkipTo(int indexDivisor) throws IOException {
	Directory dir = newDirectory();
	IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));

	Term ta = new Term("content","aaa");
	for(int i = 0; i < 10; i++)
	addDoc(writer, "aaa aaa aaa aaa");

	Term tb = new Term("content","bbb");
	for(int i = 0; i < 16; i++)
	addDoc(writer, "bbb bbb bbb bbb");

	Term tc = new Term("content","ccc");
	for(int i = 0; i < 50; i++)
	addDoc(writer, "ccc ccc ccc ccc");

	// assure that we deal with a single segment
	writer.optimize();
	writer.close();

	IndexReader reader = IndexReader.open(dir, null, true, indexDivisor);

	DocsEnum tdocs = MultiFields.getTermDocsEnum(reader,
	MultiFields.getDeletedDocs(reader),
	ta.field(),
	new BytesRef(ta.text()));

	// without optimization (assumption skipInterval == 16)

	// with next
	assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
	assertEquals(0, tdocs.docID());
	assertEquals(4, tdocs.freq());
	assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
	assertEquals(1, tdocs.docID());
	assertEquals(4, tdocs.freq());
	assertTrue(tdocs.advance(0) != DocsEnum.NO_MORE_DOCS);
	assertEquals(2, tdocs.docID());
	assertTrue(tdocs.advance(4) != DocsEnum.NO_MORE_DOCS);
	assertEquals(4, tdocs.docID());
	assertTrue(tdocs.advance(9) != DocsEnum.NO_MORE_DOCS);
	assertEquals(9, tdocs.docID());
	assertFalse(tdocs.advance(10) != DocsEnum.NO_MORE_DOCS);

	// without next
	tdocs = MultiFields.getTermDocsEnum(reader,
	MultiFields.getDeletedDocs(reader),
	ta.field(),
	new BytesRef(ta.text()));

	assertTrue(tdocs.advance(0) != DocsEnum.NO_MORE_DOCS);
	assertEquals(0, tdocs.docID());
	assertTrue(tdocs.advance(4) != DocsEnum.NO_MORE_DOCS);
	assertEquals(4, tdocs.docID());
	assertTrue(tdocs.advance(9) != DocsEnum.NO_MORE_DOCS);
	assertEquals(9, tdocs.docID());
	assertFalse(tdocs.advance(10) != DocsEnum.NO_MORE_DOCS);

	// exactly skipInterval documents and therefore with optimization

	// with next
	tdocs = MultiFields.getTermDocsEnum(reader,
	MultiFields.getDeletedDocs(reader),
	tb.field(),
	new BytesRef(tb.text()));

	assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
	assertEquals(10, tdocs.docID());
	assertEquals(4, tdocs.freq());
	assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
	assertEquals(11, tdocs.docID());
	assertEquals(4, tdocs.freq());
	assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS);
	assertEquals(12, tdocs.docID());
	assertTrue(tdocs.advance(15) != DocsEnum.NO_MORE_DOCS);
	assertEquals(15, tdocs.docID());
	assertTrue(tdocs.advance(24) != DocsEnum.NO_MORE_DOCS);
	assertEquals(24, tdocs.docID());
	assertTrue(tdocs.advance(25) != DocsEnum.NO_MORE_DOCS);
	assertEquals(25, tdocs.docID());
	assertFalse(tdocs.advance(26) != DocsEnum.NO_MORE_DOCS);

	// without next
	tdocs = MultiFields.getTermDocsEnum(reader,
	MultiFields.getDeletedDocs(reader),
	tb.field(),
	new BytesRef(tb.text()));

	assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS);
	assertEquals(10, tdocs.docID());
	assertTrue(tdocs.advance(15) != DocsEnum.NO_MORE_DOCS);
	assertEquals(15, tdocs.docID());
	assertTrue(tdocs.advance(24) != DocsEnum.NO_MORE_DOCS);
	assertEquals(24, tdocs.docID());
	assertTrue(tdocs.advance(25) != DocsEnum.NO_MORE_DOCS);
	assertEquals(25, tdocs.docID());
	assertFalse(tdocs.advance(26) != DocsEnum.NO_MORE_DOCS);

	// much more than skipInterval documents and therefore with optimization

	// with next
	tdocs = MultiFields.getTermDocsEnum(reader,
	MultiFields.getDeletedDocs(reader),
	tc.field(),
	new BytesRef(tc.text()));

	assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
	assertEquals(26, tdocs.docID());
	assertEquals(4, tdocs.freq());
	assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
	assertEquals(27, tdocs.docID());
	assertEquals(4, tdocs.freq());
	assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS);
	assertEquals(28, tdocs.docID());
	assertTrue(tdocs.advance(40) != DocsEnum.NO_MORE_DOCS);
	assertEquals(40, tdocs.docID());
	assertTrue(tdocs.advance(57) != DocsEnum.NO_MORE_DOCS);
	assertEquals(57, tdocs.docID());
	assertTrue(tdocs.advance(74) != DocsEnum.NO_MORE_DOCS);
	assertEquals(74, tdocs.docID());
	assertTrue(tdocs.advance(75) != DocsEnum.NO_MORE_DOCS);
	assertEquals(75, tdocs.docID());
	assertFalse(tdocs.advance(76) != DocsEnum.NO_MORE_DOCS);

	//without next
	tdocs = MultiFields.getTermDocsEnum(reader,
	MultiFields.getDeletedDocs(reader),
	tc.field(),
	new BytesRef(tc.text()));
	assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS);
	assertEquals(26, tdocs.docID());
	assertTrue(tdocs.advance(40) != DocsEnum.NO_MORE_DOCS);
	assertEquals(40, tdocs.docID());
	assertTrue(tdocs.advance(57) != DocsEnum.NO_MORE_DOCS);
	assertEquals(57, tdocs.docID());
	assertTrue(tdocs.advance(74) != DocsEnum.NO_MORE_DOCS);
	assertEquals(74, tdocs.docID());
	assertTrue(tdocs.advance(75) != DocsEnum.NO_MORE_DOCS);
	assertEquals(75, tdocs.docID());
	assertFalse(tdocs.advance(76) != DocsEnum.NO_MORE_DOCS);

	reader.close();
	dir.close();
	}

	public void testIndexDivisor() throws IOException {
	testDoc = new Document();
	DocHelper.setupDoc(testDoc);
	DocHelper.writeDoc(random, dir, testDoc);
	testTermDocs(2);
	testBadSeek(2);
	testSkipTo(2);
	}

	private void addDoc(IndexWriter writer, String value) throws IOException
	{
	Document doc = new Document();
	doc.add(newField("content", value, Field.Store.NO, Field.Index.ANALYZED));
	writer.addDocument(doc);
	}
	}