blob: 0c839d8a9a2fb96b064914f8c76c6007a4d5829b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.Reader;
import java.io.StringReader;
import java.util.Collections;
import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestIndexableField extends LuceneTestCase {
private static class MyField implements IndexableField {
private final int counter;
private final IndexableFieldType fieldType = new IndexableFieldType() {
@Override
public boolean stored() {
return (counter & 1) == 0 || (counter % 10) == 3;
}
@Override
public boolean tokenized() {
return true;
}
@Override
public boolean storeTermVectors() {
return indexOptions() != IndexOptions.NONE && counter % 2 == 1 && counter % 10 != 9;
}
@Override
public boolean storeTermVectorOffsets() {
return storeTermVectors() && counter % 10 != 9;
}
@Override
public boolean storeTermVectorPositions() {
return storeTermVectors() && counter % 10 != 9;
}
@Override
public boolean storeTermVectorPayloads() {
return storeTermVectors() && counter % 10 != 9;
}
@Override
public boolean omitNorms() {
return false;
}
@Override
public IndexOptions indexOptions() {
return counter%10 == 3 ? IndexOptions.NONE : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
@Override
public DocValuesType docValuesType() {
return DocValuesType.NONE;
}
@Override
public int pointDimensionCount() {
return 0;
}
@Override
public int pointIndexDimensionCount() {
return 0;
}
@Override
public int pointNumBytes() {
return 0;
}
@Override
public Map<String, String> getAttributes() {
return null;
}
};
public MyField(int counter) {
this.counter = counter;
}
@Override
public String name() {
return "f" + counter;
}
@Override
public BytesRef binaryValue() {
if ((counter%10) == 3) {
final byte[] bytes = new byte[10];
for(int idx=0;idx<bytes.length;idx++) {
bytes[idx] = (byte) (counter+idx);
}
return newBytesRef(bytes, 0, bytes.length);
} else {
return null;
}
}
@Override
public String stringValue() {
final int fieldID = counter%10;
if (fieldID != 3 && fieldID != 7) {
return "text " + counter;
} else {
return null;
}
}
@Override
public Reader readerValue() {
if (counter%10 == 7) {
return new StringReader("text " + counter);
} else {
return null;
}
}
@Override
public Number numericValue() {
return null;
}
@Override
public IndexableFieldType fieldType() {
return fieldType;
}
@Override
public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) {
return readerValue() != null ? analyzer.tokenStream(name(), readerValue()) :
analyzer.tokenStream(name(), new StringReader(stringValue()));
}
}
// Silly test showing how to index documents w/o using Lucene's core
// Document nor Field class
public void testArbitraryFields() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
final int NUM_DOCS = atLeast(27);
if (VERBOSE) {
System.out.println("TEST: " + NUM_DOCS + " docs");
}
final int[] fieldsPerDoc = new int[NUM_DOCS];
int baseCount = 0;
for(int docCount=0;docCount<NUM_DOCS;docCount++) {
final int fieldCount = TestUtil.nextInt(random(), 1, 17);
fieldsPerDoc[docCount] = fieldCount-1;
final int finalDocCount = docCount;
if (VERBOSE) {
System.out.println("TEST: " + fieldCount + " fields in doc " + docCount);
}
final int finalBaseCount = baseCount;
baseCount += fieldCount-1;
Iterable<IndexableField> d = new Iterable<IndexableField>() {
@Override
public Iterator<IndexableField> iterator() {
return new Iterator<IndexableField>() {
int fieldUpto;
@Override
public boolean hasNext() {
return fieldUpto < fieldCount;
}
@Override
public IndexableField next() {
assert fieldUpto < fieldCount;
if (fieldUpto == 0) {
fieldUpto = 1;
return newStringField("id", ""+finalDocCount, Field.Store.YES);
} else {
return new MyField(finalBaseCount + (fieldUpto++-1));
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
w.addDocument(d);
}
final IndexReader r = w.getReader();
w.close();
final IndexSearcher s = newSearcher(r);
int counter = 0;
for(int id=0;id<NUM_DOCS;id++) {
if (VERBOSE) {
System.out.println("TEST: verify doc id=" + id + " (" + fieldsPerDoc[id] + " fields) counter=" + counter);
}
final TopDocs hits = s.search(new TermQuery(new Term("id", ""+id)), 1);
assertEquals(1, hits.totalHits.value);
final int docID = hits.scoreDocs[0].doc;
final Document doc = s.doc(docID);
final int endCounter = counter + fieldsPerDoc[id];
while(counter < endCounter) {
final String name = "f" + counter;
final int fieldID = counter % 10;
final boolean stored = (counter&1) == 0 || fieldID == 3;
final boolean binary = fieldID == 3;
final boolean indexed = fieldID != 3;
final String stringValue;
if (fieldID != 3 && fieldID != 9) {
stringValue = "text " + counter;
} else {
stringValue = null;
}
// stored:
if (stored) {
IndexableField f = doc.getField(name);
assertNotNull("doc " + id + " doesn't have field f" + counter, f);
if (binary) {
assertNotNull("doc " + id + " doesn't have field f" + counter, f);
final BytesRef b = f.binaryValue();
assertNotNull(b);
assertEquals(10, b.length);
for(int idx=0;idx<10;idx++) {
assertEquals((byte) (idx+counter), b.bytes[b.offset+idx]);
}
} else {
assert stringValue != null;
assertEquals(stringValue, f.stringValue());
}
}
if (indexed) {
final boolean tv = counter % 2 == 1 && fieldID != 9;
if (tv) {
final Terms tfv = r.getTermVectors(docID).terms(name);
assertNotNull(tfv);
TermsEnum termsEnum = tfv.iterator();
assertEquals(newBytesRef("" + counter), termsEnum.next());
assertEquals(1, termsEnum.totalTermFreq());
PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(1, dpEnum.freq());
assertEquals(1, dpEnum.nextPosition());
assertEquals(newBytesRef("text"), termsEnum.next());
assertEquals(1, termsEnum.totalTermFreq());
dpEnum = termsEnum.postings(dpEnum, PostingsEnum.ALL);
assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(1, dpEnum.freq());
assertEquals(0, dpEnum.nextPosition());
assertNull(termsEnum.next());
// TODO: offsets
} else {
Fields vectors = r.getTermVectors(docID);
assertTrue(vectors == null || vectors.terms(name) == null);
}
BooleanQuery.Builder bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term("id", ""+id)), BooleanClause.Occur.MUST);
bq.add(new TermQuery(new Term(name, "text")), BooleanClause.Occur.MUST);
final TopDocs hits2 = s.search(bq.build(), 1);
assertEquals(1, hits2.totalHits.value);
assertEquals(docID, hits2.scoreDocs[0].doc);
bq = new BooleanQuery.Builder();
bq.add(new TermQuery(new Term("id", ""+id)), BooleanClause.Occur.MUST);
bq.add(new TermQuery(new Term(name, ""+counter)), BooleanClause.Occur.MUST);
final TopDocs hits3 = s.search(bq.build(), 1);
assertEquals(1, hits3.totalHits.value);
assertEquals(docID, hits3.scoreDocs[0].doc);
}
counter++;
}
}
r.close();
dir.close();
}
private static class CustomField implements IndexableField {
@Override
public BytesRef binaryValue() {
return null;
}
@Override
public String stringValue() {
return "foobar";
}
@Override
public Reader readerValue() {
return null;
}
@Override
public Number numericValue() {
return null;
}
@Override
public String name() {
return "field";
}
@Override
public TokenStream tokenStream(Analyzer a, TokenStream reuse) {
return null;
}
@Override
public IndexableFieldType fieldType() {
FieldType ft = new FieldType(StoredField.TYPE);
ft.setStoreTermVectors(true);
ft.freeze();
return ft;
}
}
// LUCENE-5611
public void testNotIndexedTermVectors() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
expectThrows(IllegalArgumentException.class, () -> w.addDocument(Collections.singletonList(new CustomField())));
w.close();
dir.close();
}
}