lucene/core/src/test/org/apache/lucene/index/TestMultiFields.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.index;


 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;

 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOSupplier;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.UnicodeUtil;

 public class TestMultiFields extends LuceneTestCase {

   public void testRandom() throws Exception {

     int num = atLeast(2);
     for (int iter = 0; iter < num; iter++) {
       if (VERBOSE) {
         System.out.println("TEST: iter=" + iter);
       }

       Directory dir = newDirectory();

       IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                              .setMergePolicy(new FilterMergePolicy(NoMergePolicy.INSTANCE) {
                                                @Override
                                                public boolean keepFullyDeletedSegment(IOSupplier<CodecReader> readerIOSupplier) {
                                                  // we can do this because we use NoMergePolicy (and dont merge to "nothing")
                                                  return true;
                                                }
                                              }));
       Map<BytesRef,List<Integer>> docs = new HashMap<>();
       Set<Integer> deleted = new HashSet<>();
       List<BytesRef> terms = new ArrayList<>();

       int numDocs = TestUtil.nextInt(random(), 1, 100 * RANDOM_MULTIPLIER);
       Document doc = new Document();
       Field f = newStringField("field", "", Field.Store.NO);
       doc.add(f);
       Field id = newStringField("id", "", Field.Store.NO);
       doc.add(id);

       boolean onlyUniqueTerms = random().nextBoolean();
       if (VERBOSE) {
         System.out.println("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs);
       }
       Set<BytesRef> uniqueTerms = new HashSet<>();
       for(int i=0;i<numDocs;i++) {

         if (!onlyUniqueTerms && random().nextBoolean() && terms.size() > 0) {
           // re-use existing term
           BytesRef term = terms.get(random().nextInt(terms.size()));
           docs.get(term).add(i);
           f.setStringValue(term.utf8ToString());
         } else {
           String s = TestUtil.randomUnicodeString(random(), 10);
           BytesRef term = new BytesRef(s);
           if (!docs.containsKey(term)) {
             docs.put(term, new ArrayList<Integer>());
           }
           docs.get(term).add(i);
           terms.add(term);
           uniqueTerms.add(term);
           f.setStringValue(s);
         }
         id.setStringValue(""+i);
         w.addDocument(doc);
         if (random().nextInt(4) == 1) {
           w.commit();
         }
         if (i > 0 && random().nextInt(20) == 1) {
           int delID = random().nextInt(i);
           deleted.add(delID);
           w.deleteDocuments(new Term("id", ""+delID));
           if (VERBOSE) {
             System.out.println("TEST: delete " + delID);
           }
         }
       }

       if (VERBOSE) {
         List<BytesRef> termsList = new ArrayList<>(uniqueTerms);
         Collections.sort(termsList);
         System.out.println("TEST: terms in UTF-8 order:");
         for(BytesRef b : termsList) {
           System.out.println("  " + UnicodeUtil.toHexString(b.utf8ToString()) + " " + b);
           for(int docID : docs.get(b)) {
             if (deleted.contains(docID)) {
               System.out.println("    " + docID + " (deleted)");
             } else {
               System.out.println("    " + docID);
             }
           }
         }
       }

       IndexReader reader = w.getReader();
       w.close();
       if (VERBOSE) {
         System.out.println("TEST: reader=" + reader);
       }

       Bits liveDocs = MultiBits.getLiveDocs(reader);
       for(int delDoc : deleted) {
         assertFalse(liveDocs.get(delDoc));
       }

       for(int i=0;i<100;i++) {
         BytesRef term = terms.get(random().nextInt(terms.size()));
         if (VERBOSE) {
           System.out.println("TEST: seek term="+ UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
         }

         PostingsEnum postingsEnum = TestUtil.docs(random(), reader, "field", term, null, PostingsEnum.NONE);
         assertNotNull(postingsEnum);

         for(int docID : docs.get(term)) {
           assertEquals(docID, postingsEnum.nextDoc());
         }
         assertEquals(DocIdSetIterator.NO_MORE_DOCS, postingsEnum.nextDoc());
       }

       reader.close();
       dir.close();
     }
   }

   /*
   private void verify(IndexReader r, String term, List<Integer> expected) throws Exception {
     DocsEnum docs = _TestUtil.docs(random, r,
                                    "field",
                                    new BytesRef(term),
                                    MultiLeafReader.getLiveDocs(r),
                                    null,
                                    false);
     for(int docID : expected) {
       assertEquals(docID, docs.nextDoc());
     }
     assertEquals(docs.NO_MORE_DOCS, docs.nextDoc());
   }
   */

   public void testSeparateEnums() throws Exception {
     Directory dir = newDirectory();
     IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
     Document d = new Document();
     d.add(newStringField("f", "j", Field.Store.NO));
     w.addDocument(d);
     w.commit();
     w.addDocument(d);
     IndexReader r = w.getReader();
     w.close();
     PostingsEnum d1 = TestUtil.docs(random(), r, "f", new BytesRef("j"), null, PostingsEnum.NONE);
     PostingsEnum d2 = TestUtil.docs(random(), r, "f", new BytesRef("j"), null, PostingsEnum.NONE);
     assertEquals(0, d1.nextDoc());
     assertEquals(0, d2.nextDoc());
     r.close();
     dir.close();
   }

   public void testTermDocsEnum() throws Exception {
     Directory dir = newDirectory();
     IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
     Document d = new Document();
     d.add(newStringField("f", "j", Field.Store.NO));
     w.addDocument(d);
     w.commit();
     w.addDocument(d);
     IndexReader r = w.getReader();
     w.close();
     PostingsEnum de = MultiTerms.getTermPostingsEnum(r, "f", new BytesRef("j"), (int) PostingsEnum.FREQS);
     assertEquals(0, de.nextDoc());
     assertEquals(1, de.nextDoc());
     assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
     r.close();
     dir.close();
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.index;


	import java.util.ArrayList;
	import java.util.Collections;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.List;
	import java.util.Map;
	import java.util.Set;

	import org.apache.lucene.analysis.MockAnalyzer;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.search.DocIdSetIterator;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.util.Bits;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.IOSupplier;
	import org.apache.lucene.util.LuceneTestCase;
	import org.apache.lucene.util.TestUtil;
	import org.apache.lucene.util.UnicodeUtil;

	public class TestMultiFields extends LuceneTestCase {

	public void testRandom() throws Exception {

	int num = atLeast(2);
	for (int iter = 0; iter < num; iter++) {
	if (VERBOSE) {
	System.out.println("TEST: iter=" + iter);
	}

	Directory dir = newDirectory();

	IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
	.setMergePolicy(new FilterMergePolicy(NoMergePolicy.INSTANCE) {
	@Override
	public boolean keepFullyDeletedSegment(IOSupplier<CodecReader> readerIOSupplier) {
	// we can do this because we use NoMergePolicy (and dont merge to "nothing")
	return true;
	}
	}));
	Map<BytesRef,List<Integer>> docs = new HashMap<>();
	Set<Integer> deleted = new HashSet<>();
	List<BytesRef> terms = new ArrayList<>();

	int numDocs = TestUtil.nextInt(random(), 1, 100 * RANDOM_MULTIPLIER);
	Document doc = new Document();
	Field f = newStringField("field", "", Field.Store.NO);
	doc.add(f);
	Field id = newStringField("id", "", Field.Store.NO);
	doc.add(id);

	boolean onlyUniqueTerms = random().nextBoolean();
	if (VERBOSE) {
	System.out.println("TEST: onlyUniqueTerms=" + onlyUniqueTerms + " numDocs=" + numDocs);
	}
	Set<BytesRef> uniqueTerms = new HashSet<>();
	for(int i=0;i<numDocs;i++) {

	if (!onlyUniqueTerms && random().nextBoolean() && terms.size() > 0) {
	// re-use existing term
	BytesRef term = terms.get(random().nextInt(terms.size()));
	docs.get(term).add(i);
	f.setStringValue(term.utf8ToString());
	} else {
	String s = TestUtil.randomUnicodeString(random(), 10);
	BytesRef term = new BytesRef(s);
	if (!docs.containsKey(term)) {
	docs.put(term, new ArrayList<Integer>());
	}
	docs.get(term).add(i);
	terms.add(term);
	uniqueTerms.add(term);
	f.setStringValue(s);
	}
	id.setStringValue(""+i);
	w.addDocument(doc);
	if (random().nextInt(4) == 1) {
	w.commit();
	}
	if (i > 0 && random().nextInt(20) == 1) {
	int delID = random().nextInt(i);
	deleted.add(delID);
	w.deleteDocuments(new Term("id", ""+delID));
	if (VERBOSE) {
	System.out.println("TEST: delete " + delID);
	}
	}
	}

	if (VERBOSE) {
	List<BytesRef> termsList = new ArrayList<>(uniqueTerms);
	Collections.sort(termsList);
	System.out.println("TEST: terms in UTF-8 order:");
	for(BytesRef b : termsList) {
	System.out.println(" " + UnicodeUtil.toHexString(b.utf8ToString()) + " " + b);
	for(int docID : docs.get(b)) {
	if (deleted.contains(docID)) {
	System.out.println(" " + docID + " (deleted)");
	} else {
	System.out.println(" " + docID);
	}
	}
	}
	}

	IndexReader reader = w.getReader();
	w.close();
	if (VERBOSE) {
	System.out.println("TEST: reader=" + reader);
	}

	Bits liveDocs = MultiBits.getLiveDocs(reader);
	for(int delDoc : deleted) {
	assertFalse(liveDocs.get(delDoc));
	}

	for(int i=0;i<100;i++) {
	BytesRef term = terms.get(random().nextInt(terms.size()));
	if (VERBOSE) {
	System.out.println("TEST: seek term="+ UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
	}

	PostingsEnum postingsEnum = TestUtil.docs(random(), reader, "field", term, null, PostingsEnum.NONE);
	assertNotNull(postingsEnum);

	for(int docID : docs.get(term)) {
	assertEquals(docID, postingsEnum.nextDoc());
	}
	assertEquals(DocIdSetIterator.NO_MORE_DOCS, postingsEnum.nextDoc());
	}

	reader.close();
	dir.close();
	}
	}

	/*
	private void verify(IndexReader r, String term, List<Integer> expected) throws Exception {
	DocsEnum docs = _TestUtil.docs(random, r,
	"field",
	new BytesRef(term),
	MultiLeafReader.getLiveDocs(r),
	null,
	false);
	for(int docID : expected) {
	assertEquals(docID, docs.nextDoc());
	}
	assertEquals(docs.NO_MORE_DOCS, docs.nextDoc());
	}
	*/

	public void testSeparateEnums() throws Exception {
	Directory dir = newDirectory();
	IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
	Document d = new Document();
	d.add(newStringField("f", "j", Field.Store.NO));
	w.addDocument(d);
	w.commit();
	w.addDocument(d);
	IndexReader r = w.getReader();
	w.close();
	PostingsEnum d1 = TestUtil.docs(random(), r, "f", new BytesRef("j"), null, PostingsEnum.NONE);
	PostingsEnum d2 = TestUtil.docs(random(), r, "f", new BytesRef("j"), null, PostingsEnum.NONE);
	assertEquals(0, d1.nextDoc());
	assertEquals(0, d2.nextDoc());
	r.close();
	dir.close();
	}

	public void testTermDocsEnum() throws Exception {
	Directory dir = newDirectory();
	IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
	Document d = new Document();
	d.add(newStringField("f", "j", Field.Store.NO));
	w.addDocument(d);
	w.commit();
	w.addDocument(d);
	IndexReader r = w.getReader();
	w.close();
	PostingsEnum de = MultiTerms.getTermPostingsEnum(r, "f", new BytesRef("j"), (int) PostingsEnum.FREQS);
	assertEquals(0, de.nextDoc());
	assertEquals(1, de.nextDoc());
	assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
	r.close();
	dir.close();
	}
	}