lucene/core/src/test/org/apache/lucene/index/TestIndexableField.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.index;


 import java.io.Reader;
 import java.io.StringReader;
 import java.util.Collections;
 import java.util.Iterator;
 import java.util.Map;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;

 public class TestIndexableField extends LuceneTestCase {

   private static class MyField implements IndexableField {

     private final int counter;
     private final IndexableFieldType fieldType = new IndexableFieldType() {
       @Override
       public boolean stored() {
         return (counter & 1) == 0 || (counter % 10) == 3;
       }

       @Override
       public boolean tokenized() {
         return true;
       }

       @Override
       public boolean storeTermVectors() {
         return indexOptions() != IndexOptions.NONE && counter % 2 == 1 && counter % 10 != 9;
       }

       @Override
       public boolean storeTermVectorOffsets() {
         return storeTermVectors() && counter % 10 != 9;
       }

       @Override
       public boolean storeTermVectorPositions() {
         return storeTermVectors() && counter % 10 != 9;
       }

       @Override
       public boolean storeTermVectorPayloads() {
         return storeTermVectors() && counter % 10 != 9;
       }

       @Override
       public boolean omitNorms() {
         return false;
       }

       @Override
       public IndexOptions indexOptions() {
         return counter%10 == 3 ? IndexOptions.NONE : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
       }

       @Override
       public DocValuesType docValuesType() {
         return DocValuesType.NONE;
       }

       @Override
       public int pointDimensionCount() {
         return 0;
       }

       @Override
       public int pointIndexDimensionCount() {
         return 0;
       }

       @Override
       public int pointNumBytes() {
         return 0;
       }

       @Override
       public Map<String, String> getAttributes() {
         return null;
       }
     };

     public MyField(int counter) {
       this.counter = counter;
     }

     @Override
     public String name() {
       return "f" + counter;
     }

     @Override
     public BytesRef binaryValue() {
       if ((counter%10) == 3) {
         final byte[] bytes = new byte[10];
         for(int idx=0;idx<bytes.length;idx++) {
           bytes[idx] = (byte) (counter+idx);
         }
         return newBytesRef(bytes, 0, bytes.length);
       } else {
         return null;
       }
     }

     @Override
     public String stringValue() {
       final int fieldID = counter%10;
       if (fieldID != 3 && fieldID != 7) {
         return "text " + counter;
       } else {
         return null;
       }
     }

     @Override
     public Reader readerValue() {
       if (counter%10 == 7) {
         return new StringReader("text " + counter);
       } else {
         return null;
       }
     }

     @Override
     public Number numericValue() {
       return null;
     }

     @Override
     public IndexableFieldType fieldType() {
       return fieldType;
     }

     @Override
     public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
       return readerValue() != null ? analyzer.tokenStream(name(), readerValue()) :
         analyzer.tokenStream(name(), new StringReader(stringValue()));
     }
   }

   // Silly test showing how to index documents w/o using Lucene's core
   // Document nor Field class
   public void testArbitraryFields() throws Exception {

     final Directory dir = newDirectory();
     final RandomIndexWriter w = new RandomIndexWriter(random(), dir);

     final int NUM_DOCS = atLeast(27);
     if (VERBOSE) {
       System.out.println("TEST: " + NUM_DOCS + " docs");
     }
     final int[] fieldsPerDoc = new int[NUM_DOCS];
     int baseCount = 0;

     for(int docCount=0;docCount<NUM_DOCS;docCount++) {
       final int fieldCount = TestUtil.nextInt(random(), 1, 17);
       fieldsPerDoc[docCount] = fieldCount-1;

       final int finalDocCount = docCount;
       if (VERBOSE) {
         System.out.println("TEST: " + fieldCount + " fields in doc " + docCount);
       }

       final int finalBaseCount = baseCount;
       baseCount += fieldCount-1;

       Iterable<IndexableField> d = new Iterable<IndexableField>() {
         @Override
         public Iterator<IndexableField> iterator() {
           return new Iterator<IndexableField>() {
             int fieldUpto;

             @Override
             public boolean hasNext() {
               return fieldUpto < fieldCount;
             }

             @Override
             public IndexableField next() {
               assert fieldUpto < fieldCount;
               if (fieldUpto == 0) {
                 fieldUpto = 1;
                 return newStringField("id", ""+finalDocCount, Field.Store.YES);
               } else {
                 return new MyField(finalBaseCount + (fieldUpto++-1));
               }
             }

             @Override
             public void remove() {
               throw new UnsupportedOperationException();
             }
           };
         }
         };
       w.addDocument(d);
     }

     final IndexReader r = w.getReader();
     w.close();

     final IndexSearcher s = newSearcher(r);
     int counter = 0;
     for(int id=0;id<NUM_DOCS;id++) {
       if (VERBOSE) {
         System.out.println("TEST: verify doc id=" + id + " (" + fieldsPerDoc[id] + " fields) counter=" + counter);
       }

       final TopDocs hits = s.search(new TermQuery(new Term("id", ""+id)), 1);
       assertEquals(1, hits.totalHits.value);
       final int docID = hits.scoreDocs[0].doc;
       final Document doc = s.doc(docID);
       final int endCounter = counter + fieldsPerDoc[id];
       while(counter < endCounter) {
         final String name = "f" + counter;
         final int fieldID = counter % 10;

         final boolean stored = (counter&1) == 0 || fieldID == 3;
         final boolean binary = fieldID == 3;
         final boolean indexed = fieldID != 3;

         final String stringValue;
         if (fieldID != 3 && fieldID != 9) {
           stringValue = "text " + counter;
         } else {
           stringValue = null;
         }

         // stored:
         if (stored) {
           IndexableField f = doc.getField(name);
           assertNotNull("doc " + id + " doesn't have field f" + counter, f);
           if (binary) {
             assertNotNull("doc " + id + " doesn't have field f" + counter, f);
             final BytesRef b = f.binaryValue();
             assertNotNull(b);
             assertEquals(10, b.length);
             for(int idx=0;idx<10;idx++) {
               assertEquals((byte) (idx+counter), b.bytes[b.offset+idx]);
             }
           } else {
             assert stringValue != null;
             assertEquals(stringValue, f.stringValue());
           }
         }

         if (indexed) {
           final boolean tv = counter % 2 == 1 && fieldID != 9;
           if (tv) {
             final Terms tfv = r.getTermVectors(docID).terms(name);
             assertNotNull(tfv);
             TermsEnum termsEnum = tfv.iterator();
             assertEquals(newBytesRef("" + counter), termsEnum.next());
             assertEquals(1, termsEnum.totalTermFreq());
             PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
             assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
             assertEquals(1, dpEnum.freq());
             assertEquals(1, dpEnum.nextPosition());

             assertEquals(newBytesRef("text"), termsEnum.next());
             assertEquals(1, termsEnum.totalTermFreq());
             dpEnum = termsEnum.postings(dpEnum, PostingsEnum.ALL);
             assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
             assertEquals(1, dpEnum.freq());
             assertEquals(0, dpEnum.nextPosition());

             assertNull(termsEnum.next());

             // TODO: offsets

           } else {
             Fields vectors = r.getTermVectors(docID);
             assertTrue(vectors == null || vectors.terms(name) == null);
           }

           BooleanQuery.Builder bq = new BooleanQuery.Builder();
           bq.add(new TermQuery(new Term("id", ""+id)), BooleanClause.Occur.MUST);
           bq.add(new TermQuery(new Term(name, "text")), BooleanClause.Occur.MUST);
           final TopDocs hits2 = s.search(bq.build(), 1);
           assertEquals(1, hits2.totalHits.value);
           assertEquals(docID, hits2.scoreDocs[0].doc);

           bq = new BooleanQuery.Builder();
           bq.add(new TermQuery(new Term("id", ""+id)), BooleanClause.Occur.MUST);
           bq.add(new TermQuery(new Term(name, ""+counter)), BooleanClause.Occur.MUST);
           final TopDocs hits3 = s.search(bq.build(), 1);
           assertEquals(1, hits3.totalHits.value);
           assertEquals(docID, hits3.scoreDocs[0].doc);
         }

         counter++;
       }
     }

     r.close();
     dir.close();
   }

   private static class CustomField implements IndexableField {
     @Override
     public BytesRef binaryValue() {
       return null;
     }

     @Override
     public String stringValue() {
       return "foobar";
     }

     @Override
     public Reader readerValue() {
       return null;
     }

     @Override
     public Number numericValue() {
       return null;
     }

     @Override
     public String name() {
       return "field";
     }

     @Override
     public TokenStream tokenStream(Analyzer a, TokenStream reuse) {
       return null;
     }

     @Override
     public IndexableFieldType fieldType() {
       FieldType ft = new FieldType(StoredField.TYPE);
       ft.setStoreTermVectors(true);
       ft.freeze();
       return ft;
     }
   }

   // LUCENE-5611
   public void testNotIndexedTermVectors() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter w = new RandomIndexWriter(random(), dir);
     expectThrows(IllegalArgumentException.class, () -> w.addDocument(Collections.singletonList(new CustomField())));
     w.close();
     dir.close();
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.index;


	import java.io.Reader;
	import java.io.StringReader;
	import java.util.Collections;
	import java.util.Iterator;
	import java.util.Map;

	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.TokenStream;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.document.FieldType;
	import org.apache.lucene.document.StoredField;
	import org.apache.lucene.search.BooleanClause;
	import org.apache.lucene.search.BooleanQuery;
	import org.apache.lucene.search.DocIdSetIterator;
	import org.apache.lucene.search.IndexSearcher;
	import org.apache.lucene.search.TermQuery;
	import org.apache.lucene.search.TopDocs;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.LuceneTestCase;
	import org.apache.lucene.util.TestUtil;

	public class TestIndexableField extends LuceneTestCase {

	private static class MyField implements IndexableField {

	private final int counter;
	private final IndexableFieldType fieldType = new IndexableFieldType() {
	@Override
	public boolean stored() {
	return (counter & 1) == 0 \|\| (counter % 10) == 3;
	}

	@Override
	public boolean tokenized() {
	return true;
	}

	@Override
	public boolean storeTermVectors() {
	return indexOptions() != IndexOptions.NONE && counter % 2 == 1 && counter % 10 != 9;
	}

	@Override
	public boolean storeTermVectorOffsets() {
	return storeTermVectors() && counter % 10 != 9;
	}

	@Override
	public boolean storeTermVectorPositions() {
	return storeTermVectors() && counter % 10 != 9;
	}

	@Override
	public boolean storeTermVectorPayloads() {
	return storeTermVectors() && counter % 10 != 9;
	}

	@Override
	public boolean omitNorms() {
	return false;
	}

	@Override
	public IndexOptions indexOptions() {
	return counter%10 == 3 ? IndexOptions.NONE : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
	}

	@Override
	public DocValuesType docValuesType() {
	return DocValuesType.NONE;
	}

	@Override
	public int pointDimensionCount() {
	return 0;
	}

	@Override
	public int pointIndexDimensionCount() {
	return 0;
	}

	@Override
	public int pointNumBytes() {
	return 0;
	}

	@Override
	public Map<String, String> getAttributes() {
	return null;
	}
	};

	public MyField(int counter) {
	this.counter = counter;
	}

	@Override
	public String name() {
	return "f" + counter;
	}

	@Override
	public BytesRef binaryValue() {
	if ((counter%10) == 3) {
	final byte[] bytes = new byte[10];
	for(int idx=0;idx<bytes.length;idx++) {
	bytes[idx] = (byte) (counter+idx);
	}
	return newBytesRef(bytes, 0, bytes.length);
	} else {
	return null;
	}
	}

	@Override
	public String stringValue() {
	final int fieldID = counter%10;
	if (fieldID != 3 && fieldID != 7) {
	return "text " + counter;
	} else {
	return null;
	}
	}

	@Override
	public Reader readerValue() {
	if (counter%10 == 7) {
	return new StringReader("text " + counter);
	} else {
	return null;
	}
	}

	@Override
	public Number numericValue() {
	return null;
	}

	@Override
	public IndexableFieldType fieldType() {
	return fieldType;
	}

	@Override
	public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
	return readerValue() != null ? analyzer.tokenStream(name(), readerValue()) :
	analyzer.tokenStream(name(), new StringReader(stringValue()));
	}
	}

	// Silly test showing how to index documents w/o using Lucene's core
	// Document nor Field class
	public void testArbitraryFields() throws Exception {

	final Directory dir = newDirectory();
	final RandomIndexWriter w = new RandomIndexWriter(random(), dir);

	final int NUM_DOCS = atLeast(27);
	if (VERBOSE) {
	System.out.println("TEST: " + NUM_DOCS + " docs");
	}
	final int[] fieldsPerDoc = new int[NUM_DOCS];
	int baseCount = 0;

	for(int docCount=0;docCount<NUM_DOCS;docCount++) {
	final int fieldCount = TestUtil.nextInt(random(), 1, 17);
	fieldsPerDoc[docCount] = fieldCount-1;

	final int finalDocCount = docCount;
	if (VERBOSE) {
	System.out.println("TEST: " + fieldCount + " fields in doc " + docCount);
	}

	final int finalBaseCount = baseCount;
	baseCount += fieldCount-1;

	Iterable<IndexableField> d = new Iterable<IndexableField>() {
	@Override
	public Iterator<IndexableField> iterator() {
	return new Iterator<IndexableField>() {
	int fieldUpto;

	@Override
	public boolean hasNext() {
	return fieldUpto < fieldCount;
	}

	@Override
	public IndexableField next() {
	assert fieldUpto < fieldCount;
	if (fieldUpto == 0) {
	fieldUpto = 1;
	return newStringField("id", ""+finalDocCount, Field.Store.YES);
	} else {
	return new MyField(finalBaseCount + (fieldUpto++-1));
	}
	}

	@Override
	public void remove() {
	throw new UnsupportedOperationException();
	}
	};
	}
	};
	w.addDocument(d);
	}

	final IndexReader r = w.getReader();
	w.close();

	final IndexSearcher s = newSearcher(r);
	int counter = 0;
	for(int id=0;id<NUM_DOCS;id++) {
	if (VERBOSE) {
	System.out.println("TEST: verify doc id=" + id + " (" + fieldsPerDoc[id] + " fields) counter=" + counter);
	}

	final TopDocs hits = s.search(new TermQuery(new Term("id", ""+id)), 1);
	assertEquals(1, hits.totalHits.value);
	final int docID = hits.scoreDocs[0].doc;
	final Document doc = s.doc(docID);
	final int endCounter = counter + fieldsPerDoc[id];
	while(counter < endCounter) {
	final String name = "f" + counter;
	final int fieldID = counter % 10;

	final boolean stored = (counter&1) == 0 \|\| fieldID == 3;
	final boolean binary = fieldID == 3;
	final boolean indexed = fieldID != 3;

	final String stringValue;
	if (fieldID != 3 && fieldID != 9) {
	stringValue = "text " + counter;
	} else {
	stringValue = null;
	}

	// stored:
	if (stored) {
	IndexableField f = doc.getField(name);
	assertNotNull("doc " + id + " doesn't have field f" + counter, f);
	if (binary) {
	assertNotNull("doc " + id + " doesn't have field f" + counter, f);
	final BytesRef b = f.binaryValue();
	assertNotNull(b);
	assertEquals(10, b.length);
	for(int idx=0;idx<10;idx++) {
	assertEquals((byte) (idx+counter), b.bytes[b.offset+idx]);
	}
	} else {
	assert stringValue != null;
	assertEquals(stringValue, f.stringValue());
	}
	}

	if (indexed) {
	final boolean tv = counter % 2 == 1 && fieldID != 9;
	if (tv) {
	final Terms tfv = r.getTermVectors(docID).terms(name);
	assertNotNull(tfv);
	TermsEnum termsEnum = tfv.iterator();
	assertEquals(newBytesRef("" + counter), termsEnum.next());
	assertEquals(1, termsEnum.totalTermFreq());
	PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
	assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
	assertEquals(1, dpEnum.freq());
	assertEquals(1, dpEnum.nextPosition());

	assertEquals(newBytesRef("text"), termsEnum.next());
	assertEquals(1, termsEnum.totalTermFreq());
	dpEnum = termsEnum.postings(dpEnum, PostingsEnum.ALL);
	assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
	assertEquals(1, dpEnum.freq());
	assertEquals(0, dpEnum.nextPosition());

	assertNull(termsEnum.next());

	// TODO: offsets

	} else {
	Fields vectors = r.getTermVectors(docID);
	assertTrue(vectors == null \|\| vectors.terms(name) == null);
	}

	BooleanQuery.Builder bq = new BooleanQuery.Builder();
	bq.add(new TermQuery(new Term("id", ""+id)), BooleanClause.Occur.MUST);
	bq.add(new TermQuery(new Term(name, "text")), BooleanClause.Occur.MUST);
	final TopDocs hits2 = s.search(bq.build(), 1);
	assertEquals(1, hits2.totalHits.value);
	assertEquals(docID, hits2.scoreDocs[0].doc);

	bq = new BooleanQuery.Builder();
	bq.add(new TermQuery(new Term("id", ""+id)), BooleanClause.Occur.MUST);
	bq.add(new TermQuery(new Term(name, ""+counter)), BooleanClause.Occur.MUST);
	final TopDocs hits3 = s.search(bq.build(), 1);
	assertEquals(1, hits3.totalHits.value);
	assertEquals(docID, hits3.scoreDocs[0].doc);
	}

	counter++;
	}
	}

	r.close();
	dir.close();
	}

	private static class CustomField implements IndexableField {
	@Override
	public BytesRef binaryValue() {
	return null;
	}

	@Override
	public String stringValue() {
	return "foobar";
	}

	@Override
	public Reader readerValue() {
	return null;
	}

	@Override
	public Number numericValue() {
	return null;
	}

	@Override
	public String name() {
	return "field";
	}

	@Override
	public TokenStream tokenStream(Analyzer a, TokenStream reuse) {
	return null;
	}

	@Override
	public IndexableFieldType fieldType() {
	FieldType ft = new FieldType(StoredField.TYPE);
	ft.setStoreTermVectors(true);
	ft.freeze();
	return ft;
	}
	}

	// LUCENE-5611
	public void testNotIndexedTermVectors() throws Exception {
	Directory dir = newDirectory();
	RandomIndexWriter w = new RandomIndexWriter(random(), dir);
	expectThrows(IllegalArgumentException.class, () -> w.addDocument(Collections.singletonList(new CustomField())));
	w.close();
	dir.close();
	}
	}