| package org.apache.lucene.index; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.IOException; |
| import java.io.Reader; |
| import java.io.StringReader; |
| import java.util.Collections; |
| import java.util.Iterator; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.FieldType; |
| import org.apache.lucene.document.StoredField; |
| import org.apache.lucene.search.BooleanClause; |
| import org.apache.lucene.search.BooleanQuery; |
| import org.apache.lucene.search.DocIdSetIterator; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.search.TopDocs; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util.TestUtil; |
| |
| public class TestIndexableField extends LuceneTestCase { |
| |
| private class MyField implements IndexableField, StorableField { |
| |
| private final int counter; |
| private final IndexableFieldType fieldType = new IndexableFieldType() { |
| @Override |
| public boolean stored() { |
| return (counter & 1) == 0 || (counter % 10) == 3; |
| } |
| |
| @Override |
| public boolean tokenized() { |
| return true; |
| } |
| |
| @Override |
| public boolean storeTermVectors() { |
| return indexOptions() != IndexOptions.NONE && counter % 2 == 1 && counter % 10 != 9; |
| } |
| |
| @Override |
| public boolean storeTermVectorOffsets() { |
| return storeTermVectors() && counter % 10 != 9; |
| } |
| |
| @Override |
| public boolean storeTermVectorPositions() { |
| return storeTermVectors() && counter % 10 != 9; |
| } |
| |
| @Override |
| public boolean storeTermVectorPayloads() { |
| return storeTermVectors() && counter % 10 != 9; |
| } |
| |
| @Override |
| public boolean omitNorms() { |
| return false; |
| } |
| |
| @Override |
| public IndexOptions indexOptions() { |
| return counter%10 == 3 ? IndexOptions.NONE : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; |
| } |
| |
| @Override |
| public DocValuesType docValuesType() { |
| return DocValuesType.NONE; |
| } |
| }; |
| |
| public MyField(int counter) { |
| this.counter = counter; |
| } |
| |
| @Override |
| public String name() { |
| return "f" + counter; |
| } |
| |
| @Override |
| public float boost() { |
| return 1.0f + random().nextFloat(); |
| } |
| |
| @Override |
| public BytesRef binaryValue() { |
| if ((counter%10) == 3) { |
| final byte[] bytes = new byte[10]; |
| for(int idx=0;idx<bytes.length;idx++) { |
| bytes[idx] = (byte) (counter+idx); |
| } |
| return new BytesRef(bytes, 0, bytes.length); |
| } else { |
| return null; |
| } |
| } |
| |
| @Override |
| public String stringValue() { |
| final int fieldID = counter%10; |
| if (fieldID != 3 && fieldID != 7) { |
| return "text " + counter; |
| } else { |
| return null; |
| } |
| } |
| |
| @Override |
| public Reader readerValue() { |
| if (counter%10 == 7) { |
| return new StringReader("text " + counter); |
| } else { |
| return null; |
| } |
| } |
| |
| @Override |
| public Number numericValue() { |
| return null; |
| } |
| |
| @Override |
| public IndexableFieldType fieldType() { |
| return fieldType; |
| } |
| |
| @Override |
| public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws IOException { |
| return readerValue() != null ? analyzer.tokenStream(name(), readerValue()) : |
| analyzer.tokenStream(name(), new StringReader(stringValue())); |
| } |
| } |
| |
| // Silly test showing how to index documents w/o using Lucene's core |
| // Document nor Field class |
| public void testArbitraryFields() throws Exception { |
| |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| |
| final int NUM_DOCS = atLeast(27); |
| if (VERBOSE) { |
| System.out.println("TEST: " + NUM_DOCS + " docs"); |
| } |
| final int[] fieldsPerDoc = new int[NUM_DOCS]; |
| int baseCount = 0; |
| |
| for(int docCount=0;docCount<NUM_DOCS;docCount++) { |
| final int fieldCount = TestUtil.nextInt(random(), 1, 17); |
| fieldsPerDoc[docCount] = fieldCount-1; |
| |
| final int finalDocCount = docCount; |
| if (VERBOSE) { |
| System.out.println("TEST: " + fieldCount + " fields in doc " + docCount); |
| } |
| |
| final int finalBaseCount = baseCount; |
| baseCount += fieldCount-1; |
| |
| IndexDocument d = new IndexDocument() { |
| @Override |
| public Iterable<IndexableField> indexableFields() { |
| return new Iterable<IndexableField>() { |
| @Override |
| public Iterator<IndexableField> iterator() { |
| return new Iterator<IndexableField>() { |
| int fieldUpto = 0; |
| private IndexableField next; |
| |
| @Override |
| public boolean hasNext() { |
| if (fieldUpto >= fieldCount) return false; |
| |
| next = null; |
| if (fieldUpto == 0) { |
| fieldUpto = 1; |
| next = newStringField("id", ""+finalDocCount, Field.Store.YES); |
| } else { |
| next = new MyField(finalBaseCount + (fieldUpto++-1)); |
| } |
| |
| if (next != null && next.fieldType().indexOptions() != IndexOptions.NONE) return true; |
| else return this.hasNext(); |
| } |
| |
| @Override |
| public IndexableField next() { |
| assert fieldUpto <= fieldCount; |
| if (next == null && !hasNext()) { |
| return null; |
| } |
| else { |
| return next; |
| } |
| } |
| |
| @Override |
| public void remove() { |
| throw new UnsupportedOperationException(); |
| } |
| }; |
| } |
| }; |
| } |
| |
| @Override |
| public Iterable<StorableField> storableFields() { |
| return new Iterable<StorableField>() { |
| @Override |
| public Iterator<StorableField> iterator() { |
| return new Iterator<StorableField>() { |
| int fieldUpto = 0; |
| private StorableField next = null; |
| |
| @Override |
| public boolean hasNext() { |
| |
| if (fieldUpto == fieldCount) return false; |
| |
| next = null; |
| if (fieldUpto == 0) { |
| fieldUpto = 1; |
| next = newStringField("id", ""+finalDocCount, Field.Store.YES); |
| } else { |
| next = new MyField(finalBaseCount + (fieldUpto++-1)); |
| } |
| |
| if (next != null && next.fieldType().stored()) return true; |
| else return this.hasNext(); |
| } |
| |
| @Override |
| public StorableField next() { |
| assert fieldUpto <= fieldCount; |
| if (next == null && !hasNext()) { |
| return null; |
| } |
| else { |
| return next; |
| } |
| } |
| |
| @Override |
| public void remove() { |
| throw new UnsupportedOperationException(); |
| } |
| }; |
| } |
| }; |
| } |
| }; |
| |
| w.addDocument(d); |
| } |
| |
| final IndexReader r = w.getReader(); |
| w.close(); |
| |
| final IndexSearcher s = newSearcher(r); |
| int counter = 0; |
| for(int id=0;id<NUM_DOCS;id++) { |
| if (VERBOSE) { |
| System.out.println("TEST: verify doc id=" + id + " (" + fieldsPerDoc[id] + " fields) counter=" + counter); |
| } |
| |
| final TopDocs hits = s.search(new TermQuery(new Term("id", ""+id)), 1); |
| assertEquals(1, hits.totalHits); |
| final int docID = hits.scoreDocs[0].doc; |
| final StoredDocument doc = s.doc(docID); |
| final int endCounter = counter + fieldsPerDoc[id]; |
| while(counter < endCounter) { |
| final String name = "f" + counter; |
| final int fieldID = counter % 10; |
| |
| final boolean stored = (counter&1) == 0 || fieldID == 3; |
| final boolean binary = fieldID == 3; |
| final boolean indexed = fieldID != 3; |
| |
| final String stringValue; |
| if (fieldID != 3 && fieldID != 9) { |
| stringValue = "text " + counter; |
| } else { |
| stringValue = null; |
| } |
| |
| // stored: |
| if (stored) { |
| StorableField f = doc.getField(name); |
| assertNotNull("doc " + id + " doesn't have field f" + counter, f); |
| if (binary) { |
| assertNotNull("doc " + id + " doesn't have field f" + counter, f); |
| final BytesRef b = f.binaryValue(); |
| assertNotNull(b); |
| assertEquals(10, b.length); |
| for(int idx=0;idx<10;idx++) { |
| assertEquals((byte) (idx+counter), b.bytes[b.offset+idx]); |
| } |
| } else { |
| assert stringValue != null; |
| assertEquals(stringValue, f.stringValue()); |
| } |
| } |
| |
| if (indexed) { |
| final boolean tv = counter % 2 == 1 && fieldID != 9; |
| if (tv) { |
| final Terms tfv = r.getTermVectors(docID).terms(name); |
| assertNotNull(tfv); |
| TermsEnum termsEnum = tfv.iterator(); |
| assertEquals(new BytesRef(""+counter), termsEnum.next()); |
| assertEquals(1, termsEnum.totalTermFreq()); |
| PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL); |
| assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| assertEquals(1, dpEnum.freq()); |
| assertEquals(1, dpEnum.nextPosition()); |
| |
| assertEquals(new BytesRef("text"), termsEnum.next()); |
| assertEquals(1, termsEnum.totalTermFreq()); |
| dpEnum = termsEnum.postings(dpEnum, PostingsEnum.ALL); |
| assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); |
| assertEquals(1, dpEnum.freq()); |
| assertEquals(0, dpEnum.nextPosition()); |
| |
| assertNull(termsEnum.next()); |
| |
| // TODO: offsets |
| |
| } else { |
| Fields vectors = r.getTermVectors(docID); |
| assertTrue(vectors == null || vectors.terms(name) == null); |
| } |
| |
| BooleanQuery.Builder bq = new BooleanQuery.Builder(); |
| bq.add(new TermQuery(new Term("id", ""+id)), BooleanClause.Occur.MUST); |
| bq.add(new TermQuery(new Term(name, "text")), BooleanClause.Occur.MUST); |
| final TopDocs hits2 = s.search(bq.build(), 1); |
| assertEquals(1, hits2.totalHits); |
| assertEquals(docID, hits2.scoreDocs[0].doc); |
| |
| bq = new BooleanQuery.Builder(); |
| bq.add(new TermQuery(new Term("id", ""+id)), BooleanClause.Occur.MUST); |
| bq.add(new TermQuery(new Term(name, ""+counter)), BooleanClause.Occur.MUST); |
| final TopDocs hits3 = s.search(bq.build(), 1); |
| assertEquals(1, hits3.totalHits); |
| assertEquals(docID, hits3.scoreDocs[0].doc); |
| } |
| |
| counter++; |
| } |
| } |
| |
| r.close(); |
| dir.close(); |
| } |
| |
| private static class CustomField implements StorableField { |
| @Override |
| public BytesRef binaryValue() { |
| return null; |
| } |
| |
| @Override |
| public String stringValue() { |
| return "foobar"; |
| } |
| |
| @Override |
| public Reader readerValue() { |
| return null; |
| } |
| |
| @Override |
| public Number numericValue() { |
| return null; |
| } |
| |
| @Override |
| public String name() { |
| return "field"; |
| } |
| |
| @Override |
| public IndexableFieldType fieldType() { |
| FieldType ft = new FieldType(StoredField.TYPE); |
| ft.setStoreTermVectors(true); |
| ft.freeze(); |
| return ft; |
| } |
| } |
| |
| // LUCENE-5611 |
| public void testNotIndexedTermVectors() throws Exception { |
| Directory dir = newDirectory(); |
| RandomIndexWriter w = new RandomIndexWriter(random(), dir); |
| try { |
| w.addDocument( |
| new IndexDocument() { |
| @Override |
| public Iterable<IndexableField> indexableFields() { |
| return Collections.emptyList(); |
| } |
| @Override |
| public Iterable<StorableField> storableFields() { |
| return Collections.<StorableField>singletonList(new CustomField()); |
| } |
| }); |
| fail("didn't hit exception"); |
| } catch (IllegalArgumentException iae) { |
| // expected |
| } |
| w.close(); |
| dir.close(); |
| } |
| } |