| package org.apache.lucene.index; |
| |
| /** |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.IOException; |
| import java.util.Collection; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.Map; |
| import java.util.Set; |
| import java.util.SortedMap; |
| import java.util.TreeMap; |
| import java.util.Map.Entry; |
| |
| import org.apache.lucene.index.FieldInfo.IndexOptions; |
| import org.apache.lucene.index.values.ValueType; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.util.CodecUtil; |
| |
| /** Access to the Field Info file that describes document fields and whether or |
| * not they are indexed. Each segment has a separate Field Info file. Objects |
| * of this class are thread-safe for multiple readers, but only one thread can |
| * be adding documents at a time, with no other reader or writer threads |
| * accessing this object. |
| * @lucene.experimental |
| */ |
| public final class FieldInfos implements Iterable<FieldInfo> { |
| static final class FieldNumberBiMap { |
| |
| final static String CODEC_NAME = "GLOBAL_FIELD_MAP"; |
| |
| // Initial format |
| private static final int VERSION_START = 0; |
| |
| private static final int VERSION_CURRENT = VERSION_START; |
| |
| private final Map<Integer,String> numberToName; |
| private final Map<String,Integer> nameToNumber; |
| private int lowestUnassignedFieldNumber = -1; |
| private long lastVersion = 0; |
| private long version = 0; |
| |
| FieldNumberBiMap() { |
| this.nameToNumber = new HashMap<String, Integer>(); |
| this.numberToName = new HashMap<Integer, String>(); |
| } |
| |
| /** |
| * Returns the global field number for the given field name. If the name |
| * does not exist yet it tries to add it with the given preferred field |
| * number assigned if possible otherwise the first unassigned field number |
| * is used as the field number. |
| */ |
| synchronized int addOrGet(String fieldName, int preferredFieldNumber) { |
| Integer fieldNumber = nameToNumber.get(fieldName); |
| if (fieldNumber == null) { |
| final Integer preferredBoxed = Integer.valueOf(preferredFieldNumber); |
| |
| if (preferredFieldNumber != -1 && !numberToName.containsKey(preferredBoxed)) { |
| // cool - we can use this number globally |
| fieldNumber = preferredBoxed; |
| } else { |
| // find a new FieldNumber |
| while (numberToName.containsKey(++lowestUnassignedFieldNumber)) { |
| // might not be up to date - lets do the work once needed |
| } |
| fieldNumber = lowestUnassignedFieldNumber; |
| } |
| |
| version++; |
| numberToName.put(fieldNumber, fieldName); |
| nameToNumber.put(fieldName, fieldNumber); |
| |
| } |
| |
| return fieldNumber.intValue(); |
| } |
| |
| /** |
| * Sets the given field number and name if not yet set. |
| */ |
| synchronized void setIfNotSet(int fieldNumber, String fieldName) { |
| final Integer boxedFieldNumber = Integer.valueOf(fieldNumber); |
| if (!numberToName.containsKey(boxedFieldNumber) |
| && !nameToNumber.containsKey(fieldName)) { |
| version++; |
| numberToName.put(boxedFieldNumber, fieldName); |
| nameToNumber.put(fieldName, boxedFieldNumber); |
| } else { |
| assert containsConsistent(boxedFieldNumber, fieldName); |
| } |
| } |
| |
| /** |
| * Writes this {@link FieldNumberBiMap} to the given output and returns its |
| * version. |
| */ |
| public synchronized long write(IndexOutput output) throws IOException{ |
| Set<Entry<String, Integer>> entrySet = nameToNumber.entrySet(); |
| CodecUtil.writeHeader(output, CODEC_NAME, VERSION_CURRENT); |
| output.writeVInt(entrySet.size()); |
| for (Entry<String, Integer> entry : entrySet) { |
| output.writeVInt(entry.getValue().intValue()); |
| output.writeString(entry.getKey()); |
| } |
| return version; |
| } |
| |
| /** |
| * Reads the {@link FieldNumberBiMap} from the given input and resets the |
| * version to 0. |
| */ |
| public synchronized void read(IndexInput input) throws IOException{ |
| CodecUtil.checkHeader(input, CODEC_NAME, |
| VERSION_START, |
| VERSION_CURRENT); |
| final int size = input.readVInt(); |
| for (int i = 0; i < size; i++) { |
| final int num = input.readVInt(); |
| final String name = input.readString(); |
| setIfNotSet(num, name); |
| } |
| version = lastVersion = 0; |
| } |
| |
| /** |
| * Returns <code>true</code> iff the last committed version differs from the |
| * current version, otherwise <code>false</code> |
| * |
| * @return <code>true</code> iff the last committed version differs from the |
| * current version, otherwise <code>false</code> |
| */ |
| public synchronized boolean isDirty() { |
| return lastVersion != version; |
| } |
| |
| /** |
| * commits the given version if the given version is greater than the previous committed version |
| * |
| * @param version |
| * the version to commit |
| * @return <code>true</code> iff the version was successfully committed otherwise <code>false</code> |
| * @see #write(IndexOutput) |
| */ |
| public synchronized boolean commitLastVersion(long version) { |
| if (version > lastVersion) { |
| lastVersion = version; |
| return true; |
| } |
| return false; |
| } |
| |
| // just for testing |
| Set<Entry<String, Integer>> entries() { |
| return new HashSet<Entry<String, Integer>>(nameToNumber.entrySet()); |
| } |
| |
| // used by assert |
| synchronized boolean containsConsistent(Integer number, String name) { |
| return name.equals(numberToName.get(number)) |
| && number.equals(nameToNumber.get(name)); |
| } |
| } |
| |
| private final SortedMap<Integer,FieldInfo> byNumber = new TreeMap<Integer,FieldInfo>(); |
| private final HashMap<String,FieldInfo> byName = new HashMap<String,FieldInfo>(); |
| private final FieldNumberBiMap globalFieldNumbers; |
| |
| private boolean hasFreq; // only set if readonly |
| private boolean hasProx; // only set if readonly |
| private boolean hasVectors; // only set if readonly |
| private long version; // internal use to track changes |
| |
| /** |
| * Creates a new read-only FieldInfos: only public to be accessible |
| * from the codecs package |
| * |
| * @lucene.internal |
| */ |
| public FieldInfos(FieldInfo[] infos, boolean hasFreq, boolean hasProx, boolean hasVectors) { |
| this(null); |
| this.hasFreq = hasFreq; |
| this.hasProx = hasProx; |
| this.hasVectors = hasVectors; |
| for (FieldInfo info : infos) { |
| putInternal(info); |
| } |
| } |
| |
| /** |
| * Creates a new FieldInfos instance with the given {@link FieldNumberBiMap}. |
| * If the {@link FieldNumberBiMap} is <code>null</code> this instance will be read-only. |
| * @see #isReadOnly() |
| */ |
| FieldInfos(FieldNumberBiMap globalFieldNumbers) { |
| this.globalFieldNumbers = globalFieldNumbers; |
| } |
| |
| /** |
| * adds the given field to this FieldInfos name / number mapping. The given FI |
| * must be present in the global field number mapping before this method it |
| * called |
| */ |
| private void putInternal(FieldInfo fi) { |
| assert !byNumber.containsKey(fi.number); |
| assert !byName.containsKey(fi.name); |
| assert globalFieldNumbers == null || globalFieldNumbers.containsConsistent(Integer.valueOf(fi.number), fi.name); |
| byNumber.put(fi.number, fi); |
| byName.put(fi.name, fi); |
| } |
| |
| private int nextFieldNumber(String name, int preferredFieldNumber) { |
| // get a global number for this field |
| final int fieldNumber = globalFieldNumbers.addOrGet(name, |
| preferredFieldNumber); |
| assert byNumber.get(fieldNumber) == null : "field number " + fieldNumber |
| + " already taken"; |
| return fieldNumber; |
| } |
| |
| /** |
| * Returns a deep clone of this FieldInfos instance. |
| */ |
| @Override |
| synchronized public Object clone() { |
| FieldInfos fis = new FieldInfos(globalFieldNumbers); |
| fis.hasFreq = hasFreq; |
| fis.hasProx = hasProx; |
| fis.hasVectors = hasVectors; |
| for (FieldInfo fi : this) { |
| FieldInfo clone = (FieldInfo) (fi).clone(); |
| fis.putInternal(clone); |
| } |
| return fis; |
| } |
| |
| /** Returns true if any fields do not positions */ |
| public boolean hasProx() { |
| if (isReadOnly()) { |
| return hasProx; |
| } |
| // mutable FIs must check! |
| for (FieldInfo fi : this) { |
| if (fi.isIndexed && fi.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** Returns true if any fields have freqs */ |
| public boolean hasFreq() { |
| if (isReadOnly()) { |
| return hasFreq; |
| } |
| // mutable FIs must check! |
| for (FieldInfo fi : this) { |
| if (fi.isIndexed && fi.indexOptions != IndexOptions.DOCS_ONLY) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Adds or updates fields that are indexed. Whether they have termvectors has to be specified. |
| * |
| * @param names The names of the fields |
| * @param storeTermVectors Whether the fields store term vectors or not |
| * @param storePositionWithTermVector true if positions should be stored. |
| * @param storeOffsetWithTermVector true if offsets should be stored |
| */ |
| synchronized public void addOrUpdateIndexed(Collection<String> names, boolean storeTermVectors, boolean storePositionWithTermVector, |
| boolean storeOffsetWithTermVector) { |
| for (String name : names) { |
| addOrUpdate(name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector); |
| } |
| } |
| |
| /** |
| * Assumes the fields are not storing term vectors. |
| * |
| * @param names The names of the fields |
| * @param isIndexed Whether the fields are indexed or not |
| * |
| * @see #addOrUpdate(String, boolean) |
| */ |
| synchronized public void addOrUpdate(Collection<String> names, boolean isIndexed) { |
| for (String name : names) { |
| addOrUpdate(name, isIndexed); |
| } |
| } |
| |
| /** |
| * Calls 5 parameter add with false for all TermVector parameters. |
| * |
| * @param name The name of the IndexableField |
| * @param isIndexed true if the field is indexed |
| * @see #addOrUpdate(String, boolean, boolean, boolean, boolean) |
| */ |
| synchronized public void addOrUpdate(String name, boolean isIndexed) { |
| addOrUpdate(name, isIndexed, false, false, false, false); |
| } |
| |
| /** |
| * Calls 5 parameter add with false for term vector positions and offsets. |
| * |
| * @param name The name of the field |
| * @param isIndexed true if the field is indexed |
| * @param storeTermVector true if the term vector should be stored |
| */ |
| synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector){ |
| addOrUpdate(name, isIndexed, storeTermVector, false, false, false); |
| } |
| |
| /** If the field is not yet known, adds it. If it is known, checks to make |
| * sure that the isIndexed flag is the same as was given previously for this |
| * field. If not - marks it as being indexed. Same goes for the TermVector |
| * parameters. |
| * |
| * @param name The name of the field |
| * @param isIndexed true if the field is indexed |
| * @param storeTermVector true if the term vector should be stored |
| * @param storePositionWithTermVector true if the term vector with positions should be stored |
| * @param storeOffsetWithTermVector true if the term vector with offsets should be stored |
| */ |
| synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector, |
| boolean storePositionWithTermVector, boolean storeOffsetWithTermVector) { |
| |
| addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false); |
| } |
| |
| /** If the field is not yet known, adds it. If it is known, checks to make |
| * sure that the isIndexed flag is the same as was given previously for this |
| * field. If not - marks it as being indexed. Same goes for the TermVector |
| * parameters. |
| * |
| * @param name The name of the field |
| * @param isIndexed true if the field is indexed |
| * @param storeTermVector true if the term vector should be stored |
| * @param storePositionWithTermVector true if the term vector with positions should be stored |
| * @param storeOffsetWithTermVector true if the term vector with offsets should be stored |
| * @param omitNorms true if the norms for the indexed field should be omitted |
| */ |
| synchronized public void addOrUpdate(String name, boolean isIndexed, boolean storeTermVector, |
| boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, boolean omitNorms) { |
| addOrUpdate(name, isIndexed, storeTermVector, storePositionWithTermVector, |
| storeOffsetWithTermVector, omitNorms, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null); |
| } |
| |
| /** If the field is not yet known, adds it. If it is known, checks to make |
| * sure that the isIndexed flag is the same as was given previously for this |
| * field. If not - marks it as being indexed. Same goes for the TermVector |
| * parameters. |
| * |
| * @param name The name of the field |
| * @param isIndexed true if the field is indexed |
| * @param storeTermVector true if the term vector should be stored |
| * @param storePositionWithTermVector true if the term vector with positions should be stored |
| * @param storeOffsetWithTermVector true if the term vector with offsets should be stored |
| * @param omitNorms true if the norms for the indexed field should be omitted |
| * @param storePayloads true if payloads should be stored for this field |
| * @param indexOptions if term freqs should be omitted for this field |
| */ |
| synchronized public FieldInfo addOrUpdate(String name, boolean isIndexed, boolean storeTermVector, |
| boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, |
| boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValues) { |
| return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, storePositionWithTermVector, |
| storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues); |
| } |
| |
| synchronized public FieldInfo addOrUpdate(String name, IndexableFieldType fieldType, boolean scorePayloads, ValueType docValues) { |
| return addOrUpdateInternal(name, -1, fieldType.indexed(), fieldType.storeTermVectors(), |
| fieldType.storeTermVectorPositions(), fieldType.storeTermVectorOffsets(), fieldType.omitNorms(), scorePayloads, |
| fieldType.indexOptions(), docValues); |
| } |
| |
| synchronized private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed, |
| boolean storeTermVector, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, |
| boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValues) { |
| if (globalFieldNumbers == null) { |
| throw new IllegalStateException("FieldInfos are read-only, create a new instance with a global field map to make modifications to FieldInfos"); |
| } |
| FieldInfo fi = fieldInfo(name); |
| if (fi == null) { |
| final int fieldNumber = nextFieldNumber(name, preferredFieldNumber); |
| fi = addInternal(name, fieldNumber, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValues); |
| } else { |
| fi.update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions); |
| fi.setDocValues(docValues); |
| } |
| version++; |
| return fi; |
| } |
| |
| synchronized public FieldInfo add(FieldInfo fi) { |
| // IMPORTANT - reuse the field number if possible for consistent field numbers across segments |
| return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed, fi.storeTermVector, |
| fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, |
| fi.omitNorms, fi.storePayloads, |
| fi.indexOptions, fi.docValues); |
| } |
| |
| /* |
| * NOTE: if you call this method from a public method make sure you check if we are modifiable and throw an exception otherwise |
| */ |
| private FieldInfo addInternal(String name, int fieldNumber, boolean isIndexed, |
| boolean storeTermVector, boolean storePositionWithTermVector, |
| boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, ValueType docValuesType) { |
| // don't check modifiable here since we use that to initially build up FIs |
| if (globalFieldNumbers != null) { |
| globalFieldNumbers.setIfNotSet(fieldNumber, name); |
| } |
| final FieldInfo fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, storePositionWithTermVector, |
| storeOffsetWithTermVector, omitNorms, storePayloads, indexOptions, docValuesType); |
| putInternal(fi); |
| return fi; |
| } |
| |
| public int fieldNumber(String fieldName) { |
| FieldInfo fi = fieldInfo(fieldName); |
| return (fi != null) ? fi.number : -1; |
| } |
| |
| public FieldInfo fieldInfo(String fieldName) { |
| return byName.get(fieldName); |
| } |
| |
| /** |
| * Return the fieldName identified by its number. |
| * |
| * @param fieldNumber |
| * @return the fieldName or an empty string when the field |
| * with the given number doesn't exist. |
| */ |
| public String fieldName(int fieldNumber) { |
| FieldInfo fi = fieldInfo(fieldNumber); |
| return (fi != null) ? fi.name : ""; |
| } |
| |
| /** |
| * Return the fieldinfo object referenced by the fieldNumber. |
| * @param fieldNumber |
| * @return the FieldInfo object or null when the given fieldNumber |
| * doesn't exist. |
| */ |
| public FieldInfo fieldInfo(int fieldNumber) { |
| return (fieldNumber >= 0) ? byNumber.get(fieldNumber) : null; |
| } |
| |
| public Iterator<FieldInfo> iterator() { |
| return byNumber.values().iterator(); |
| } |
| |
| public int size() { |
| assert byNumber.size() == byName.size(); |
| return byNumber.size(); |
| } |
| |
| public boolean hasVectors() { |
| if (isReadOnly()) { |
| return hasVectors; |
| } |
| // mutable FIs must check |
| for (FieldInfo fi : this) { |
| if (fi.storeTermVector) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| public boolean hasNorms() { |
| for (FieldInfo fi : this) { |
| if (!fi.omitNorms) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Returns <code>true</code> iff this instance is not backed by a |
| * {@link org.apache.lucene.index.FieldInfos.FieldNumberBiMap}. Instances read from a directory via |
| * {@link FieldInfos#FieldInfos(FieldInfo[], boolean, boolean, boolean)} will always be read-only |
| * since no {@link org.apache.lucene.index.FieldInfos.FieldNumberBiMap} is supplied, otherwise |
| * <code>false</code>. |
| */ |
| public final boolean isReadOnly() { |
| return globalFieldNumbers == null; |
| } |
| |
| synchronized final long getVersion() { |
| return version; |
| } |
| |
| /** |
| * Reverts all uncommitted changes |
| * @see FieldInfo#revertUncommitted() |
| */ |
| void revertUncommitted() { |
| for (FieldInfo fieldInfo : this) { |
| fieldInfo.revertUncommitted(); |
| } |
| } |
| |
| final FieldInfos asReadOnly() { |
| if (isReadOnly()) { |
| return this; |
| } |
| final FieldInfos roFis = new FieldInfos((FieldNumberBiMap)null); |
| for (FieldInfo fieldInfo : this) { |
| FieldInfo clone = (FieldInfo) (fieldInfo).clone(); |
| roFis.putInternal(clone); |
| roFis.hasVectors |= clone.storeTermVector; |
| roFis.hasProx |= clone.isIndexed && clone.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; |
| roFis.hasFreq |= clone.isIndexed && clone.indexOptions != IndexOptions.DOCS_ONLY; |
| } |
| return roFis; |
| } |
| |
| public boolean anyDocValuesFields() { |
| for (FieldInfo fi : this) { |
| if (fi.hasDocValues()) { |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| /** |
| * Creates a new {@link FieldInfo} instance from the given instance. If the given instance is |
| * read-only this instance will be read-only too. |
| * |
| * @see #isReadOnly() |
| */ |
| static FieldInfos from(FieldInfos other) { |
| return new FieldInfos(other.globalFieldNumbers); |
| } |
| } |