| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.index; |
| |
| |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Locale; |
| import java.util.Map; |
| import java.util.Objects; |
| import java.util.Set; |
| import java.util.regex.Matcher; |
| |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.search.Sort; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.TrackingDirectoryWrapper; |
| import org.apache.lucene.util.StringHelper; |
| import org.apache.lucene.util.Version; |
| |
| /** |
| * Information about a segment such as its name, directory, and files related |
| * to the segment. |
| * |
| * @lucene.experimental |
| */ |
| public final class SegmentInfo { |
| |
| // TODO: remove these from this class, for now this is the representation |
| /** Used by some member fields to mean not present (e.g., |
| * norms, deletions). */ |
| public static final int NO = -1; // e.g. no norms; no deletes; |
| |
| /** Used by some member fields to mean present (e.g., |
| * norms, deletions). */ |
| public static final int YES = 1; // e.g. have norms; have deletes; |
| |
| /** Unique segment name in the directory. */ |
| public final String name; |
| |
| private int maxDoc; // number of docs in seg |
| |
| /** Where this segment resides. */ |
| public final Directory dir; |
| |
| private boolean isCompoundFile; |
| |
| /** Id that uniquely identifies this segment. */ |
| private final byte[] id; |
| |
| private Codec codec; |
| |
| private Map<String,String> diagnostics; |
| |
| private Map<String,String> attributes; |
| |
| private final Sort indexSort; |
| |
| // Tracks the Lucene version this segment was created with, since 3.1. Null |
| // indicates an older than 3.0 index, and it's used to detect a too old index. |
| // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and |
| // specific versions afterwards ("3.0.0", "3.1.0" etc.). |
| // see o.a.l.util.Version. |
| private final Version version; |
| |
| // Tracks the minimum version that contributed documents to a segment. For |
| // flush segments, that is the version that wrote it. For merged segments, |
| // this is the minimum minVersion of all the segments that have been merged |
| // into this segment |
| Version minVersion; |
| |
| void setDiagnostics(Map<String, String> diagnostics) { |
| this.diagnostics = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(diagnostics))); |
| } |
| |
| /** Returns diagnostics saved into the segment when it was |
| * written. The map is immutable. */ |
| public Map<String, String> getDiagnostics() { |
| return diagnostics; |
| } |
| |
| /** |
| * Construct a new complete SegmentInfo instance from input. |
| * <p>Note: this is public only to allow access from |
| * the codecs package.</p> |
| */ |
| public SegmentInfo(Directory dir, Version version, Version minVersion, String name, int maxDoc, |
| boolean isCompoundFile, Codec codec, Map<String,String> diagnostics, |
| byte[] id, Map<String,String> attributes, Sort indexSort) { |
| assert !(dir instanceof TrackingDirectoryWrapper); |
| this.dir = Objects.requireNonNull(dir); |
| this.version = Objects.requireNonNull(version); |
| this.minVersion = minVersion; |
| this.name = Objects.requireNonNull(name); |
| this.maxDoc = maxDoc; |
| this.isCompoundFile = isCompoundFile; |
| this.codec = codec; |
| this.diagnostics = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(diagnostics))); |
| this.id = id; |
| if (id.length != StringHelper.ID_LENGTH) { |
| throw new IllegalArgumentException("invalid id: " + Arrays.toString(id)); |
| } |
| this.attributes = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(attributes))); |
| this.indexSort = indexSort; |
| } |
| |
| /** |
| * Mark whether this segment is stored as a compound file. |
| * |
| * @param isCompoundFile true if this is a compound file; |
| * else, false |
| */ |
| void setUseCompoundFile(boolean isCompoundFile) { |
| this.isCompoundFile = isCompoundFile; |
| } |
| |
| /** |
| * Returns true if this segment is stored as a compound |
| * file; else, false. |
| */ |
| public boolean getUseCompoundFile() { |
| return isCompoundFile; |
| } |
| |
| /** Can only be called once. */ |
| public void setCodec(Codec codec) { |
| assert this.codec == null; |
| if (codec == null) { |
| throw new IllegalArgumentException("codec must be non-null"); |
| } |
| this.codec = codec; |
| } |
| |
| /** Return {@link Codec} that wrote this segment. */ |
| public Codec getCodec() { |
| return codec; |
| } |
| |
| /** Returns number of documents in this segment (deletions |
| * are not taken into account). */ |
| public int maxDoc() { |
| if (this.maxDoc == -1) { |
| throw new IllegalStateException("maxDoc isn't set yet"); |
| } |
| return maxDoc; |
| } |
| |
| // NOTE: leave package private |
| void setMaxDoc(int maxDoc) { |
| if (this.maxDoc != -1) { |
| throw new IllegalStateException("maxDoc was already set: this.maxDoc=" + this.maxDoc + " vs maxDoc=" + maxDoc); |
| } |
| this.maxDoc = maxDoc; |
| } |
| |
| /** Return all files referenced by this SegmentInfo. */ |
| public Set<String> files() { |
| if (setFiles == null) { |
| throw new IllegalStateException("files were not computed yet; segment=" + name + " maxDoc=" + maxDoc); |
| } |
| return Collections.unmodifiableSet(setFiles); |
| } |
| |
| @Override |
| public String toString() { |
| return toString(0); |
| } |
| |
| /** Used for debugging. Format may suddenly change. |
| * |
| * <p>Current format looks like |
| * <code>_a(3.1):c45/4:[sorter=<long: "timestamp">!]</code>, which means |
| * the segment's name is <code>_a</code>; it was created with Lucene 3.1 (or |
| * '?' if it's unknown); it's using compound file |
| * format (would be <code>C</code> if not compound); it |
| * has 45 documents; it has 4 deletions (this part is |
| * left off when there are no deletions); it is sorted by the timestamp field |
| * in descending order (this part is omitted for unsorted segments).</p> |
| */ |
| public String toString(int delCount) { |
| StringBuilder s = new StringBuilder(); |
| s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':'); |
| char cfs = getUseCompoundFile() ? 'c' : 'C'; |
| s.append(cfs); |
| |
| s.append(maxDoc); |
| |
| if (delCount != 0) { |
| s.append('/').append(delCount); |
| } |
| |
| if (indexSort != null) { |
| s.append(":[indexSort="); |
| s.append(indexSort); |
| s.append(']'); |
| } |
| |
| if (!diagnostics.isEmpty()) { |
| s.append(":[diagnostics="); |
| s.append(diagnostics.toString()); |
| s.append(']'); |
| } |
| |
| if (!attributes.isEmpty()) { |
| s.append(":[attributes="); |
| s.append(attributes.toString()); |
| s.append(']'); |
| } |
| |
| return s.toString(); |
| } |
| |
| /** We consider another SegmentInfo instance equal if it |
| * has the same dir and same name. */ |
| @Override |
| public boolean equals(Object obj) { |
| if (this == obj) return true; |
| if (obj instanceof SegmentInfo) { |
| final SegmentInfo other = (SegmentInfo) obj; |
| return other.dir == dir && other.name.equals(name); |
| } else { |
| return false; |
| } |
| } |
| |
| @Override |
| public int hashCode() { |
| return dir.hashCode() + name.hashCode(); |
| } |
| |
| /** Returns the version of the code which wrote the segment. |
| */ |
| public Version getVersion() { |
| return version; |
| } |
| |
| /** |
| * Return the minimum Lucene version that contributed documents to this |
| * segment, or {@code null} if it is unknown. |
| */ |
| public Version getMinVersion() { |
| return minVersion; |
| } |
| |
| /** Return the id that uniquely identifies this segment. */ |
| public byte[] getId() { |
| return id.clone(); |
| } |
| |
| private Set<String> setFiles; |
| |
| /** Sets the files written for this segment. */ |
| public void setFiles(Collection<String> files) { |
| setFiles = new HashSet<>(); |
| addFiles(files); |
| } |
| |
| /** Add these files to the set of files written for this |
| * segment. */ |
| public void addFiles(Collection<String> files) { |
| checkFileNames(files); |
| for (String f : files) { |
| setFiles.add(namedForThisSegment(f)); |
| } |
| } |
| |
| /** Add this file to the set of files written for this |
| * segment. */ |
| public void addFile(String file) { |
| checkFileNames(Collections.singleton(file)); |
| setFiles.add(namedForThisSegment(file)); |
| } |
| |
| private void checkFileNames(Collection<String> files) { |
| Matcher m = IndexFileNames.CODEC_FILE_PATTERN.matcher(""); |
| for (String file : files) { |
| m.reset(file); |
| if (!m.matches()) { |
| throw new IllegalArgumentException("invalid codec filename '" + file + "', must match: " + IndexFileNames.CODEC_FILE_PATTERN.pattern()); |
| } |
| if (file.toLowerCase(Locale.ROOT).endsWith(".tmp")) { |
| throw new IllegalArgumentException("invalid codec filename '" + file + "', cannot end with .tmp extension"); |
| } |
| } |
| } |
| |
| /** |
| * strips any segment name from the file, naming it with this segment |
| * this is because "segment names" can change, e.g. by addIndexes(Dir) |
| */ |
| String namedForThisSegment(String file) { |
| return name + IndexFileNames.stripSegmentName(file); |
| } |
| |
| /** |
| * Get a codec attribute value, or null if it does not exist |
| */ |
| public String getAttribute(String key) { |
| return attributes.get(key); |
| } |
| |
| /** |
| * Puts a codec attribute value. |
| * <p> |
| * This is a key-value mapping for the field that the codec can use to store |
| * additional metadata, and will be available to the codec when reading the |
| * segment via {@link #getAttribute(String)} |
| * <p> |
| * If a value already exists for the field, it will be replaced with the new |
| * value. |
| * This method make a copy on write for every attribute change. |
| */ |
| public String putAttribute(String key, String value) { |
| HashMap<String, String> newMap = new HashMap<>(attributes); |
| String oldValue = newMap.put(key, value); |
| // we make a full copy of this to prevent concurrent modifications to this in the toString method |
| // this method is only called when a segment is written but the SegmentInfo might be exposed |
| // in running merges which can cause ConcurrentModificationExceptions if we modify / share |
| // the same instance. Technically that's an unsafe publication but IW design would require |
| // significant changes to prevent this. On the other hand, since we expose the map in getAttributes() |
| // it's a good design to make it unmodifiable anyway. |
| attributes = Collections.unmodifiableMap(newMap); |
| return oldValue; |
| } |
| |
| /** |
| * Returns the internal codec attributes map. |
| * @return internal codec attributes map. |
| */ |
| public Map<String,String> getAttributes() { |
| return attributes; |
| } |
| |
| /** Return the sort order of this segment, or null if the index has no sort. */ |
| public Sort getIndexSort() { |
| return indexSort; |
| } |
| } |
| |