| package org.apache.lucene.index; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.util.HashMap; |
| import java.util.Map; |
| |
| /** |
| * Access to the Field Info file that describes document fields and whether or |
| * not they are indexed. Each segment has a separate Field Info file. Objects |
| * of this class are thread-safe for multiple readers, but only one thread can |
| * be adding documents at a time, with no other reader or writer threads |
| * accessing this object. |
| **/ |
| |
| public final class FieldInfo { |
| /** Field's name */ |
| public final String name; |
| /** Internal field number */ |
| public final int number; |
| |
| private boolean indexed; |
| private DocValuesType docValueType; |
| |
| // True if any document indexed term vectors |
| private boolean storeTermVector; |
| |
| private DocValuesType normType; |
| private boolean omitNorms; // omit norms associated with indexed fields |
| private IndexOptions indexOptions; |
| private boolean storePayloads; // whether this field stores payloads together with term positions |
| |
| private Map<String,String> attributes; |
| |
| private long dvGen = -1; // the DocValues generation of this field |
| |
| /** |
| * Controls how much information is stored in the postings lists. |
| * @lucene.experimental |
| */ |
| public static enum IndexOptions { |
| // NOTE: order is important here; FieldInfo uses this |
| // order to merge two conflicting IndexOptions (always |
| // "downgrades" by picking the lowest). |
| /** |
| * Only documents are indexed: term frequencies and positions are omitted. |
| * Phrase and other positional queries on the field will throw an exception, and scoring |
| * will behave as if any term in the document appears only once. |
| */ |
| // TODO: maybe rename to just DOCS? |
| DOCS_ONLY, |
| /** |
| * Only documents and term frequencies are indexed: positions are omitted. |
| * This enables normal scoring, except Phrase and other positional queries |
| * will throw an exception. |
| */ |
| DOCS_AND_FREQS, |
| /** |
| * Indexes documents, frequencies and positions. |
| * This is a typical default for full-text search: full scoring is enabled |
| * and positional queries are supported. |
| */ |
| DOCS_AND_FREQS_AND_POSITIONS, |
| /** |
| * Indexes documents, frequencies, positions and offsets. |
| * Character offsets are encoded alongside the positions. |
| */ |
| DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, |
| } |
| |
| /** |
| * DocValues types. |
| * Note that DocValues is strongly typed, so a field cannot have different types |
| * across different documents. |
| */ |
| public static enum DocValuesType { |
| /** |
| * A per-document Number |
| */ |
| NUMERIC, |
| /** |
| * A per-document byte[]. Values may be larger than |
| * 32766 bytes, but different codecs may enforce their own limits. |
| */ |
| BINARY, |
| /** |
| * A pre-sorted byte[]. Fields with this type only store distinct byte values |
| * and store an additional offset pointer per document to dereference the shared |
| * byte[]. The stored byte[] is presorted and allows access via document id, |
| * ordinal and by-value. Values must be <= 32766 bytes. |
| */ |
| SORTED, |
| /** |
| * A pre-sorted Set<byte[]>. Fields with this type only store distinct byte values |
| * and store additional offset pointers per document to dereference the shared |
| * byte[]s. The stored byte[] is presorted and allows access via document id, |
| * ordinal and by-value. Values must be <= 32766 bytes. |
| */ |
| SORTED_SET |
| } |
| |
| /** |
| * Sole Constructor. |
| * |
| * @lucene.experimental |
| */ |
| public FieldInfo(String name, boolean indexed, int number, boolean storeTermVector, boolean omitNorms, |
| boolean storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normsType, |
| Map<String,String> attributes) { |
| this.name = name; |
| this.indexed = indexed; |
| this.number = number; |
| this.docValueType = docValues; |
| if (indexed) { |
| this.storeTermVector = storeTermVector; |
| this.storePayloads = storePayloads; |
| this.omitNorms = omitNorms; |
| this.indexOptions = indexOptions; |
| this.normType = !omitNorms ? normsType : null; |
| } else { // for non-indexed fields, leave defaults |
| this.storeTermVector = false; |
| this.storePayloads = false; |
| this.omitNorms = false; |
| this.indexOptions = null; |
| this.normType = null; |
| } |
| this.attributes = attributes; |
| assert checkConsistency(); |
| } |
| |
| private boolean checkConsistency() { |
| if (!indexed) { |
| assert !storeTermVector; |
| assert !storePayloads; |
| assert !omitNorms; |
| assert normType == null; |
| assert indexOptions == null; |
| } else { |
| assert indexOptions != null; |
| if (omitNorms) { |
| assert normType == null; |
| } |
| // Cannot store payloads unless positions are indexed: |
| assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !this.storePayloads; |
| } |
| |
| return true; |
| } |
| |
| void update(IndexableFieldType ft) { |
| update(ft.indexed(), false, ft.omitNorms(), false, ft.indexOptions()); |
| } |
| |
| // should only be called by FieldInfos#addOrUpdate |
| void update(boolean indexed, boolean storeTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) { |
| //System.out.println("FI.update field=" + name + " indexed=" + indexed + " omitNorms=" + omitNorms + " this.omitNorms=" + this.omitNorms); |
| if (this.indexed != indexed) { |
| this.indexed = true; // once indexed, always index |
| } |
| if (indexed) { // if updated field data is not for indexing, leave the updates out |
| if (this.storeTermVector != storeTermVector) { |
| this.storeTermVector = true; // once vector, always vector |
| } |
| if (this.storePayloads != storePayloads) { |
| this.storePayloads = true; |
| } |
| if (this.omitNorms != omitNorms) { |
| this.omitNorms = true; // if one require omitNorms at least once, it remains off for life |
| this.normType = null; |
| } |
| if (this.indexOptions != indexOptions) { |
| if (this.indexOptions == null) { |
| this.indexOptions = indexOptions; |
| } else { |
| // downgrade |
| this.indexOptions = this.indexOptions.compareTo(indexOptions) < 0 ? this.indexOptions : indexOptions; |
| } |
| if (this.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { |
| // cannot store payloads if we don't store positions: |
| this.storePayloads = false; |
| } |
| } |
| } |
| assert checkConsistency(); |
| } |
| |
| void setDocValuesType(DocValuesType type) { |
| if (docValueType != null && docValueType != type) { |
| throw new IllegalArgumentException("cannot change DocValues type from " + docValueType + " to " + type + " for field \"" + name + "\""); |
| } |
| docValueType = type; |
| assert checkConsistency(); |
| } |
| |
| /** Returns IndexOptions for the field, or null if the field is not indexed */ |
| public IndexOptions getIndexOptions() { |
| return indexOptions; |
| } |
| |
| /** |
| * Returns true if this field has any docValues. |
| */ |
| public boolean hasDocValues() { |
| return docValueType != null; |
| } |
| |
| /** |
| * Returns {@link DocValuesType} of the docValues. this may be null if the field has no docvalues. |
| */ |
| public DocValuesType getDocValuesType() { |
| return docValueType; |
| } |
| |
| /** Sets the docValues generation of this field. */ |
| public void setDocValuesGen(long dvGen) { |
| this.dvGen = dvGen; |
| } |
| |
| /** |
| * Returns the docValues generation of this field, or -1 if no docValues |
| * updates exist for it. |
| */ |
| public long getDocValuesGen() { |
| return dvGen; |
| } |
| |
| /** |
| * Returns {@link DocValuesType} of the norm. this may be null if the field has no norms. |
| */ |
| public DocValuesType getNormType() { |
| return normType; |
| } |
| |
| void setStoreTermVectors() { |
| storeTermVector = true; |
| assert checkConsistency(); |
| } |
| |
| void setStorePayloads() { |
| if (indexed && indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) { |
| storePayloads = true; |
| } |
| assert checkConsistency(); |
| } |
| |
| void setNormValueType(DocValuesType type) { |
| if (normType != null && normType != type) { |
| throw new IllegalArgumentException("cannot change Norm type from " + normType + " to " + type + " for field \"" + name + "\""); |
| } |
| normType = type; |
| assert checkConsistency(); |
| } |
| |
| /** |
| * Returns true if norms are explicitly omitted for this field |
| */ |
| public boolean omitsNorms() { |
| return omitNorms; |
| } |
| |
| /** |
| * Returns true if this field actually has any norms. |
| */ |
| public boolean hasNorms() { |
| return normType != null; |
| } |
| |
| /** |
| * Returns true if this field is indexed. |
| */ |
| public boolean isIndexed() { |
| return indexed; |
| } |
| |
| /** |
| * Returns true if any payloads exist for this field. |
| */ |
| public boolean hasPayloads() { |
| return storePayloads; |
| } |
| |
| /** |
| * Returns true if any term vectors exist for this field. |
| */ |
| public boolean hasVectors() { |
| return storeTermVector; |
| } |
| |
| /** |
| * Get a codec attribute value, or null if it does not exist |
| */ |
| public String getAttribute(String key) { |
| if (attributes == null) { |
| return null; |
| } else { |
| return attributes.get(key); |
| } |
| } |
| |
| /** |
| * Puts a codec attribute value. |
| * <p> |
| * This is a key-value mapping for the field that the codec can use |
| * to store additional metadata, and will be available to the codec |
| * when reading the segment via {@link #getAttribute(String)} |
| * <p> |
| * If a value already exists for the field, it will be replaced with |
| * the new value. |
| */ |
| public String putAttribute(String key, String value) { |
| if (attributes == null) { |
| attributes = new HashMap<String,String>(); |
| } |
| return attributes.put(key, value); |
| } |
| |
| /** |
| * Returns internal codec attributes map. May be null if no mappings exist. |
| */ |
| public Map<String,String> attributes() { |
| return attributes; |
| } |
| } |