| package org.apache.lucene.index; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.util.HashMap; |
| import java.util.Map; |
| |
| /** |
| * Access to the Field Info file that describes document fields and whether or |
| * not they are indexed. Each segment has a separate Field Info file. Objects |
| * of this class are thread-safe for multiple readers, but only one thread can |
| * be adding documents at a time, with no other reader or writer threads |
| * accessing this object. |
| **/ |
| |
| public final class FieldInfo { |
| /** Field's name */ |
| public final String name; |
| /** Internal field number */ |
| public final int number; |
| |
| private DocValuesType docValueType; |
| |
| // True if any document indexed term vectors |
| private boolean storeTermVector; |
| |
| private boolean omitNorms; // omit norms associated with indexed fields |
| |
| private IndexOptions indexOptions; |
| private boolean storePayloads; // whether this field stores payloads together with term positions |
| |
| private Map<String,String> attributes; |
| |
| private long dvGen; |
| |
| /** |
| * Controls how much information is stored in the postings lists. |
| * @lucene.experimental |
| */ |
| public static enum IndexOptions { |
| // NOTE: order is important here; FieldInfo uses this |
| // order to merge two conflicting IndexOptions (always |
| // "downgrades" by picking the lowest). |
| /** |
| * Only documents are indexed: term frequencies and positions are omitted. |
| * Phrase and other positional queries on the field will throw an exception, and scoring |
| * will behave as if any term in the document appears only once. |
| */ |
| // TODO: maybe rename to just DOCS? |
| DOCS_ONLY, |
| /** |
| * Only documents and term frequencies are indexed: positions are omitted. |
| * This enables normal scoring, except Phrase and other positional queries |
| * will throw an exception. |
| */ |
| DOCS_AND_FREQS, |
| /** |
| * Indexes documents, frequencies and positions. |
| * This is a typical default for full-text search: full scoring is enabled |
| * and positional queries are supported. |
| */ |
| DOCS_AND_FREQS_AND_POSITIONS, |
| /** |
| * Indexes documents, frequencies, positions and offsets. |
| * Character offsets are encoded alongside the positions. |
| */ |
| DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, |
| } |
| |
| /** |
| * DocValues types. |
| * Note that DocValues is strongly typed, so a field cannot have different types |
| * across different documents. |
| */ |
| public static enum DocValuesType { |
| /** |
| * A per-document Number |
| */ |
| NUMERIC, |
| /** |
| * A per-document byte[]. Values may be larger than |
| * 32766 bytes, but different codecs may enforce their own limits. |
| */ |
| BINARY, |
| /** |
| * A pre-sorted byte[]. Fields with this type only store distinct byte values |
| * and store an additional offset pointer per document to dereference the shared |
| * byte[]. The stored byte[] is presorted and allows access via document id, |
| * ordinal and by-value. Values must be <= 32766 bytes. |
| */ |
| SORTED, |
| /** |
| * A pre-sorted Number[]. Fields with this type store numeric values in sorted |
| * order according to {@link Long#compare(long, long)}. |
| */ |
| SORTED_NUMERIC, |
| /** |
| * A pre-sorted Set<byte[]>. Fields with this type only store distinct byte values |
| * and store additional offset pointers per document to dereference the shared |
| * byte[]s. The stored byte[] is presorted and allows access via document id, |
| * ordinal and by-value. Values must be <= 32766 bytes. |
| */ |
| SORTED_SET |
| } |
| |
| /** |
| * Sole constructor. |
| * |
| * @lucene.experimental |
| */ |
| public FieldInfo(String name, int number, boolean storeTermVector, boolean omitNorms, |
| boolean storePayloads, IndexOptions indexOptions, DocValuesType docValues, |
| long dvGen, Map<String,String> attributes) { |
| this.name = name; |
| this.number = number; |
| this.docValueType = docValues; |
| if (indexOptions != null) { |
| this.storeTermVector = storeTermVector; |
| this.storePayloads = storePayloads; |
| this.omitNorms = omitNorms; |
| this.indexOptions = indexOptions; |
| } else { // for non-indexed fields, leave defaults |
| this.storeTermVector = false; |
| this.storePayloads = false; |
| this.omitNorms = false; |
| this.indexOptions = null; |
| } |
| this.dvGen = dvGen; |
| this.attributes = attributes; |
| assert checkConsistency(); |
| } |
| |
| /** |
| * Performs internal consistency checks. |
| * Always returns true (or throws IllegalStateException) |
| */ |
| public boolean checkConsistency() { |
| if (indexOptions != null) { |
| // Cannot store payloads unless positions are indexed: |
| if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0 && storePayloads) { |
| throw new IllegalStateException("indexed field '" + name + "' cannot have payloads without positions"); |
| } |
| } else { |
| if (storeTermVector) { |
| throw new IllegalStateException("non-indexed field '" + name + "' cannot store term vectors"); |
| } |
| if (storePayloads) { |
| throw new IllegalStateException("non-indexed field '" + name + "' cannot store payloads"); |
| } |
| if (omitNorms) { |
| throw new IllegalStateException("non-indexed field '" + name + "' cannot omit norms"); |
| } |
| if (indexOptions != null) { |
| throw new IllegalStateException("non-indexed field '" + name + "' cannot have index options"); |
| } |
| } |
| |
| if (dvGen != -1 && docValueType == null) { |
| throw new IllegalStateException("field '" + name + "' cannot have a docvalues update generation without having docvalues"); |
| } |
| |
| return true; |
| } |
| |
| void update(IndexableFieldType ft) { |
| update(false, ft.omitNorms(), false, ft.indexOptions()); |
| } |
| |
| // should only be called by FieldInfos#addOrUpdate |
| void update(boolean storeTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) { |
| //System.out.println("FI.update field=" + name + " indexed=" + indexed + " omitNorms=" + omitNorms + " this.omitNorms=" + this.omitNorms); |
| if (this.indexOptions != indexOptions) { |
| if (this.indexOptions == null) { |
| this.indexOptions = indexOptions; |
| } else if (indexOptions != null) { |
| // downgrade |
| this.indexOptions = this.indexOptions.compareTo(indexOptions) < 0 ? this.indexOptions : indexOptions; |
| } |
| } |
| |
| if (this.indexOptions != null) { // if updated field data is not for indexing, leave the updates out |
| this.storeTermVector |= storeTermVector; // once vector, always vector |
| this.storePayloads |= storePayloads; |
| |
| // Awkward: only drop norms if incoming update is indexed: |
| if (indexOptions != null && this.omitNorms != omitNorms) { |
| this.omitNorms = true; // if one require omitNorms at least once, it remains off for life |
| } |
| } |
| if (this.indexOptions == null || this.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { |
| // cannot store payloads if we don't store positions: |
| this.storePayloads = false; |
| } |
| assert checkConsistency(); |
| } |
| |
| void setDocValuesType(DocValuesType type) { |
| if (docValueType != null && docValueType != type) { |
| throw new IllegalArgumentException("cannot change DocValues type from " + docValueType + " to " + type + " for field \"" + name + "\""); |
| } |
| docValueType = type; |
| assert checkConsistency(); |
| } |
| |
| /** Returns IndexOptions for the field, or null if the field is not indexed */ |
| public IndexOptions getIndexOptions() { |
| return indexOptions; |
| } |
| |
| /** |
| * Returns true if this field has any docValues. |
| */ |
| public boolean hasDocValues() { |
| return docValueType != null; |
| } |
| |
| /** |
| * Returns {@link DocValuesType} of the docValues. this may be null if the field has no docvalues. |
| */ |
| public DocValuesType getDocValuesType() { |
| return docValueType; |
| } |
| |
| /** Sets the docValues generation of this field. */ |
| void setDocValuesGen(long dvGen) { |
| this.dvGen = dvGen; |
| assert checkConsistency(); |
| } |
| |
| /** |
| * Returns the docValues generation of this field, or -1 if no docValues |
| * updates exist for it. |
| */ |
| public long getDocValuesGen() { |
| return dvGen; |
| } |
| |
| void setStoreTermVectors() { |
| storeTermVector = true; |
| assert checkConsistency(); |
| } |
| |
| void setStorePayloads() { |
| if (indexOptions != null && indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) { |
| storePayloads = true; |
| } |
| assert checkConsistency(); |
| } |
| |
| /** |
| * Returns true if norms are explicitly omitted for this field |
| */ |
| public boolean omitsNorms() { |
| return omitNorms; |
| } |
| |
| /** |
| * Returns true if this field actually has any norms. |
| */ |
| public boolean hasNorms() { |
| return isIndexed() && omitNorms == false; |
| } |
| |
| /** |
| * Returns true if this field is indexed (has non-null {@link #getIndexOptions}). |
| */ |
| public boolean isIndexed() { |
| return indexOptions != null; |
| } |
| |
| /** |
| * Returns true if any payloads exist for this field. |
| */ |
| public boolean hasPayloads() { |
| return storePayloads; |
| } |
| |
| /** |
| * Returns true if any term vectors exist for this field. |
| */ |
| public boolean hasVectors() { |
| return storeTermVector; |
| } |
| |
| /** |
| * Get a codec attribute value, or null if it does not exist |
| */ |
| public String getAttribute(String key) { |
| if (attributes == null) { |
| return null; |
| } else { |
| return attributes.get(key); |
| } |
| } |
| |
| /** |
| * Puts a codec attribute value. |
| * <p> |
| * This is a key-value mapping for the field that the codec can use |
| * to store additional metadata, and will be available to the codec |
| * when reading the segment via {@link #getAttribute(String)} |
| * <p> |
| * If a value already exists for the field, it will be replaced with |
| * the new value. |
| */ |
| public String putAttribute(String key, String value) { |
| if (attributes == null) { |
| attributes = new HashMap<>(); |
| } |
| return attributes.put(key, value); |
| } |
| |
| /** |
| * Returns internal codec attributes map. May be null if no mappings exist. |
| */ |
| public Map<String,String> attributes() { |
| return attributes; |
| } |
| } |