blob: 4c64e409dcd1ea54a970e2c15c7f8a61fd20861c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.regex.Matcher;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.search.Sort;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.TrackingDirectoryWrapper;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
/**
* Information about a segment such as its name, directory, and files related
* to the segment.
*
* @lucene.experimental
*/
public final class SegmentInfo {
// TODO: remove these from this class, for now this is the representation
/** Used by some member fields to mean not present (e.g.,
* norms, deletions). */
public static final int NO = -1; // e.g. no norms; no deletes;
/** Used by some member fields to mean present (e.g.,
* norms, deletions). */
public static final int YES = 1; // e.g. have norms; have deletes;
/** Unique segment name in the directory. */
public final String name;
private int maxDoc; // number of docs in seg
/** Where this segment resides. */
public final Directory dir;
private boolean isCompoundFile;
/** Id that uniquely identifies this segment. */
private final byte[] id;
private Codec codec;
private Map<String,String> diagnostics;
private Map<String,String> attributes;
private final Sort indexSort;
// Tracks the Lucene version this segment was created with, since 3.1. Null
// indicates an older than 3.0 index, and it's used to detect a too old index.
// The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
// specific versions afterwards ("3.0.0", "3.1.0" etc.).
// see o.a.l.util.Version.
private final Version version;
// Tracks the minimum version that contributed documents to a segment. For
// flush segments, that is the version that wrote it. For merged segments,
// this is the minimum minVersion of all the segments that have been merged
// into this segment
Version minVersion;
void setDiagnostics(Map<String, String> diagnostics) {
this.diagnostics = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(diagnostics)));
}
/** Returns diagnostics saved into the segment when it was
* written. The map is immutable. */
public Map<String, String> getDiagnostics() {
return diagnostics;
}
/**
* Construct a new complete SegmentInfo instance from input.
* <p>Note: this is public only to allow access from
* the codecs package.</p>
*/
public SegmentInfo(Directory dir, Version version, Version minVersion, String name, int maxDoc,
boolean isCompoundFile, Codec codec, Map<String,String> diagnostics,
byte[] id, Map<String,String> attributes, Sort indexSort) {
assert !(dir instanceof TrackingDirectoryWrapper);
this.dir = Objects.requireNonNull(dir);
this.version = Objects.requireNonNull(version);
this.minVersion = minVersion;
this.name = Objects.requireNonNull(name);
this.maxDoc = maxDoc;
this.isCompoundFile = isCompoundFile;
this.codec = codec;
this.diagnostics = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(diagnostics)));
this.id = id;
if (id.length != StringHelper.ID_LENGTH) {
throw new IllegalArgumentException("invalid id: " + Arrays.toString(id));
}
this.attributes = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(attributes)));
this.indexSort = indexSort;
}
/**
* Mark whether this segment is stored as a compound file.
*
* @param isCompoundFile true if this is a compound file;
* else, false
*/
void setUseCompoundFile(boolean isCompoundFile) {
this.isCompoundFile = isCompoundFile;
}
/**
* Returns true if this segment is stored as a compound
* file; else, false.
*/
public boolean getUseCompoundFile() {
return isCompoundFile;
}
/** Can only be called once. */
public void setCodec(Codec codec) {
assert this.codec == null;
if (codec == null) {
throw new IllegalArgumentException("codec must be non-null");
}
this.codec = codec;
}
/** Return {@link Codec} that wrote this segment. */
public Codec getCodec() {
return codec;
}
/** Returns number of documents in this segment (deletions
* are not taken into account). */
public int maxDoc() {
if (this.maxDoc == -1) {
throw new IllegalStateException("maxDoc isn't set yet");
}
return maxDoc;
}
// NOTE: leave package private
void setMaxDoc(int maxDoc) {
if (this.maxDoc != -1) {
throw new IllegalStateException("maxDoc was already set: this.maxDoc=" + this.maxDoc + " vs maxDoc=" + maxDoc);
}
this.maxDoc = maxDoc;
}
/** Return all files referenced by this SegmentInfo. */
public Set<String> files() {
if (setFiles == null) {
throw new IllegalStateException("files were not computed yet; segment=" + name + " maxDoc=" + maxDoc);
}
return Collections.unmodifiableSet(setFiles);
}
@Override
public String toString() {
return toString(0);
}
/** Used for debugging. Format may suddenly change.
*
* <p>Current format looks like
* <code>_a(3.1):c45/4:[sorter=&lt;long: "timestamp"&gt;!]</code>, which means
* the segment's name is <code>_a</code>; it was created with Lucene 3.1 (or
* '?' if it's unknown); it's using compound file
* format (would be <code>C</code> if not compound); it
* has 45 documents; it has 4 deletions (this part is
* left off when there are no deletions); it is sorted by the timestamp field
* in descending order (this part is omitted for unsorted segments).</p>
*/
public String toString(int delCount) {
StringBuilder s = new StringBuilder();
s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':');
char cfs = getUseCompoundFile() ? 'c' : 'C';
s.append(cfs);
s.append(maxDoc);
if (delCount != 0) {
s.append('/').append(delCount);
}
if (indexSort != null) {
s.append(":[indexSort=");
s.append(indexSort);
s.append(']');
}
if (!diagnostics.isEmpty()) {
s.append(":[diagnostics=");
s.append(diagnostics.toString());
s.append(']');
}
if (!attributes.isEmpty()) {
s.append(":[attributes=");
s.append(attributes.toString());
s.append(']');
}
return s.toString();
}
/** We consider another SegmentInfo instance equal if it
* has the same dir and same name. */
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj instanceof SegmentInfo) {
final SegmentInfo other = (SegmentInfo) obj;
return other.dir == dir && other.name.equals(name);
} else {
return false;
}
}
@Override
public int hashCode() {
return dir.hashCode() + name.hashCode();
}
/** Returns the version of the code which wrote the segment.
*/
public Version getVersion() {
return version;
}
/**
* Return the minimum Lucene version that contributed documents to this
* segment, or {@code null} if it is unknown.
*/
public Version getMinVersion() {
return minVersion;
}
/** Return the id that uniquely identifies this segment. */
public byte[] getId() {
return id.clone();
}
private Set<String> setFiles;
/** Sets the files written for this segment. */
public void setFiles(Collection<String> files) {
setFiles = new HashSet<>();
addFiles(files);
}
/** Add these files to the set of files written for this
* segment. */
public void addFiles(Collection<String> files) {
checkFileNames(files);
for (String f : files) {
setFiles.add(namedForThisSegment(f));
}
}
/** Add this file to the set of files written for this
* segment. */
public void addFile(String file) {
checkFileNames(Collections.singleton(file));
setFiles.add(namedForThisSegment(file));
}
private void checkFileNames(Collection<String> files) {
Matcher m = IndexFileNames.CODEC_FILE_PATTERN.matcher("");
for (String file : files) {
m.reset(file);
if (!m.matches()) {
throw new IllegalArgumentException("invalid codec filename '" + file + "', must match: " + IndexFileNames.CODEC_FILE_PATTERN.pattern());
}
if (file.toLowerCase(Locale.ROOT).endsWith(".tmp")) {
throw new IllegalArgumentException("invalid codec filename '" + file + "', cannot end with .tmp extension");
}
}
}
/**
* strips any segment name from the file, naming it with this segment
* this is because "segment names" can change, e.g. by addIndexes(Dir)
*/
String namedForThisSegment(String file) {
return name + IndexFileNames.stripSegmentName(file);
}
/**
* Get a codec attribute value, or null if it does not exist
*/
public String getAttribute(String key) {
return attributes.get(key);
}
/**
* Puts a codec attribute value.
* <p>
* This is a key-value mapping for the field that the codec can use to store
* additional metadata, and will be available to the codec when reading the
* segment via {@link #getAttribute(String)}
* <p>
* If a value already exists for the field, it will be replaced with the new
* value.
* This method make a copy on write for every attribute change.
*/
public String putAttribute(String key, String value) {
HashMap<String, String> newMap = new HashMap<>(attributes);
String oldValue = newMap.put(key, value);
// we make a full copy of this to prevent concurrent modifications to this in the toString method
// this method is only called when a segment is written but the SegmentInfo might be exposed
// in running merges which can cause ConcurrentModificationExceptions if we modify / share
// the same instance. Technically that's an unsafe publication but IW design would require
// significant changes to prevent this. On the other hand, since we expose the map in getAttributes()
// it's a good design to make it unmodifiable anyway.
attributes = Collections.unmodifiableMap(newMap);
return oldValue;
}
/**
* Returns the internal codec attributes map.
* @return internal codec attributes map.
*/
public Map<String,String> getAttributes() {
return attributes;
}
/** Return the sort order of this segment, or null if the index has no sort. */
public Sort getIndexSort() {
return indexSort;
}
}