lucene/core/src/java/org/apache/lucene/index/SegmentInfo.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.index;


 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
 import java.util.regex.Matcher;

 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.TrackingDirectoryWrapper;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.Version;

 /**
  * Information about a segment such as its name, directory, and files related
  * to the segment.
  *
  * @lucene.experimental
  */
 public final class SegmentInfo {

   // TODO: remove these from this class, for now this is the representation
   /** Used by some member fields to mean not present (e.g.,
    *  norms, deletions). */
   public static final int NO = -1;          // e.g. no norms; no deletes;

   /** Used by some member fields to mean present (e.g.,
    *  norms, deletions). */
   public static final int YES = 1;          // e.g. have norms; have deletes;

   /** Unique segment name in the directory. */
   public final String name;

   private int maxDoc;         // number of docs in seg

   /** Where this segment resides. */
   public final Directory dir;

   private boolean isCompoundFile;

   /** Id that uniquely identifies this segment. */
   private final byte[] id;

   private Codec codec;

   private Map<String,String> diagnostics;

   private Map<String,String> attributes;

   private final Sort indexSort;

   // Tracks the Lucene version this segment was created with, since 3.1. Null
   // indicates an older than 3.0 index, and it's used to detect a too old index.
   // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
   // specific versions afterwards ("3.0.0", "3.1.0" etc.).
   // see o.a.l.util.Version.
   private final Version version;

   // Tracks the minimum version that contributed documents to a segment. For
   // flush segments, that is the version that wrote it. For merged segments,
   // this is the minimum minVersion of all the segments that have been merged
   // into this segment
   Version minVersion;

   void setDiagnostics(Map<String, String> diagnostics) {
     this.diagnostics = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(diagnostics)));
   }

   /** Returns diagnostics saved into the segment when it was
    *  written. The map is immutable. */
   public Map<String, String> getDiagnostics() {
     return diagnostics;
   }

   /**
    * Construct a new complete SegmentInfo instance from input.
    * <p>Note: this is public only to allow access from
    * the codecs package.</p>
    */
   public SegmentInfo(Directory dir, Version version, Version minVersion, String name, int maxDoc,
                      boolean isCompoundFile, Codec codec, Map<String,String> diagnostics,
                      byte[] id, Map<String,String> attributes, Sort indexSort) {
     assert !(dir instanceof TrackingDirectoryWrapper);
     this.dir = Objects.requireNonNull(dir);
     this.version = Objects.requireNonNull(version);
     this.minVersion = minVersion;
     this.name = Objects.requireNonNull(name);
     this.maxDoc = maxDoc;
     this.isCompoundFile = isCompoundFile;
     this.codec = codec;
     this.diagnostics = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(diagnostics)));
     this.id = id;
     if (id.length != StringHelper.ID_LENGTH) {
       throw new IllegalArgumentException("invalid id: " + Arrays.toString(id));
     }
     this.attributes = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(attributes)));
     this.indexSort = indexSort;
   }

   /**
    * Mark whether this segment is stored as a compound file.
    *
    * @param isCompoundFile true if this is a compound file;
    * else, false
    */
   void setUseCompoundFile(boolean isCompoundFile) {
     this.isCompoundFile = isCompoundFile;
   }

   /**
    * Returns true if this segment is stored as a compound
    * file; else, false.
    */
   public boolean getUseCompoundFile() {
     return isCompoundFile;
   }

   /** Can only be called once. */
   public void setCodec(Codec codec) {
     assert this.codec == null;
     if (codec == null) {
       throw new IllegalArgumentException("codec must be non-null");
     }
     this.codec = codec;
   }

   /** Return {@link Codec} that wrote this segment. */
   public Codec getCodec() {
     return codec;
   }

   /** Returns number of documents in this segment (deletions
    *  are not taken into account). */
   public int maxDoc() {
     if (this.maxDoc == -1) {
       throw new IllegalStateException("maxDoc isn't set yet");
     }
     return maxDoc;
   }

   // NOTE: leave package private
   void setMaxDoc(int maxDoc) {
     if (this.maxDoc != -1) {
       throw new IllegalStateException("maxDoc was already set: this.maxDoc=" + this.maxDoc + " vs maxDoc=" + maxDoc);
     }
     this.maxDoc = maxDoc;
   }

   /** Return all files referenced by this SegmentInfo. */
   public Set<String> files() {
     if (setFiles == null) {
       throw new IllegalStateException("files were not computed yet; segment=" + name + " maxDoc=" + maxDoc);
     }
     return Collections.unmodifiableSet(setFiles);
   }

   @Override
   public String toString() {
     return toString(0);
   }

   /** Used for debugging.  Format may suddenly change.
    *
    *  <p>Current format looks like
    *  <code>_a(3.1):c45/4:[sorter=&lt;long: "timestamp"&gt;!]</code>, which means
    *  the segment's name is <code>_a</code>; it was created with Lucene 3.1 (or
    *  '?' if it's unknown); it's using compound file
    *  format (would be <code>C</code> if not compound); it
    *  has 45 documents; it has 4 deletions (this part is
    *  left off when there are no deletions); it is sorted by the timestamp field
    *  in descending order (this part is omitted for unsorted segments).</p>
    */
   public String toString(int delCount) {
     StringBuilder s = new StringBuilder();
     s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':');
     char cfs = getUseCompoundFile() ? 'c' : 'C';
     s.append(cfs);

     s.append(maxDoc);

     if (delCount != 0) {
       s.append('/').append(delCount);
     }

     if (indexSort != null) {
       s.append(":[indexSort=");
       s.append(indexSort);
       s.append(']');
     }

     if (!diagnostics.isEmpty()) {
       s.append(":[diagnostics=");
       s.append(diagnostics.toString());
       s.append(']');
     }

     if (!attributes.isEmpty()) {
       s.append(":[attributes=");
       s.append(attributes.toString());
       s.append(']');
     }

     return s.toString();
   }

   /** We consider another SegmentInfo instance equal if it
    *  has the same dir and same name. */
   @Override
   public boolean equals(Object obj) {
     if (this == obj) return true;
     if (obj instanceof SegmentInfo) {
       final SegmentInfo other = (SegmentInfo) obj;
       return other.dir == dir && other.name.equals(name);
     } else {
       return false;
     }
   }

   @Override
   public int hashCode() {
     return dir.hashCode() + name.hashCode();
   }

   /** Returns the version of the code which wrote the segment.
    */
   public Version getVersion() {
     return version;
   }

   /**
    * Return the minimum Lucene version that contributed documents to this
    * segment, or {@code null} if it is unknown.
    */
   public Version getMinVersion() {
     return minVersion;
   }

   /** Return the id that uniquely identifies this segment. */
   public byte[] getId() {
     return id.clone();
   }

   private Set<String> setFiles;

   /** Sets the files written for this segment. */
   public void setFiles(Collection<String> files) {
     setFiles = new HashSet<>();
     addFiles(files);
   }

   /** Add these files to the set of files written for this
    *  segment. */
   public void addFiles(Collection<String> files) {
     checkFileNames(files);
     for (String f : files) {
       setFiles.add(namedForThisSegment(f));
     }
   }

   /** Add this file to the set of files written for this
    *  segment. */
   public void addFile(String file) {
     checkFileNames(Collections.singleton(file));
     setFiles.add(namedForThisSegment(file));
   }

   private void checkFileNames(Collection<String> files) {
     Matcher m = IndexFileNames.CODEC_FILE_PATTERN.matcher("");
     for (String file : files) {
       m.reset(file);
       if (!m.matches()) {
         throw new IllegalArgumentException("invalid codec filename '" + file + "', must match: " + IndexFileNames.CODEC_FILE_PATTERN.pattern());
       }
       if (file.toLowerCase(Locale.ROOT).endsWith(".tmp")) {
         throw new IllegalArgumentException("invalid codec filename '" + file + "', cannot end with .tmp extension");
       }
     }
   }

   /**
    * strips any segment name from the file, naming it with this segment
    * this is because "segment names" can change, e.g. by addIndexes(Dir)
    */
   String namedForThisSegment(String file) {
     return name + IndexFileNames.stripSegmentName(file);
   }

   /**
    * Get a codec attribute value, or null if it does not exist
    */
   public String getAttribute(String key) {
     return attributes.get(key);
   }

   /**
    * Puts a codec attribute value.
    * <p>
    * This is a key-value mapping for the field that the codec can use to store
    * additional metadata, and will be available to the codec when reading the
    * segment via {@link #getAttribute(String)}
    * <p>
    * If a value already exists for the field, it will be replaced with the new
    * value.
    * This method make a copy on write for every attribute change.
    */
   public String putAttribute(String key, String value) {
     HashMap<String, String> newMap = new HashMap<>(attributes);
     String oldValue = newMap.put(key, value);
     // we make a full copy of this to prevent concurrent modifications to this in the toString method
     // this method is only called when a segment is written but the SegmentInfo might be exposed
     // in running merges which can cause ConcurrentModificationExceptions if we modify / share
     // the same instance. Technically that's an unsafe publication but IW design would require
     // significant changes to prevent this. On the other hand, since we expose the map in getAttributes()
     // it's a good design to make it unmodifiable anyway.
     attributes = Collections.unmodifiableMap(newMap);
     return oldValue;
   }

   /**
    * Returns the internal codec attributes map.
    * @return internal codec attributes map.
    */
   public Map<String,String> getAttributes() {
     return attributes;
   }

   /** Return the sort order of this segment, or null if the index has no sort. */
   public Sort getIndexSort() {
     return indexSort;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.index;


	import java.util.Arrays;
	import java.util.Collection;
	import java.util.Collections;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.Locale;
	import java.util.Map;
	import java.util.Objects;
	import java.util.Set;
	import java.util.regex.Matcher;

	import org.apache.lucene.codecs.Codec;
	import org.apache.lucene.search.Sort;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.store.TrackingDirectoryWrapper;
	import org.apache.lucene.util.StringHelper;
	import org.apache.lucene.util.Version;

	/**
	* Information about a segment such as its name, directory, and files related
	* to the segment.
	*
	* @lucene.experimental
	*/
	public final class SegmentInfo {

	// TODO: remove these from this class, for now this is the representation
	/** Used by some member fields to mean not present (e.g.,
	* norms, deletions). */
	public static final int NO = -1; // e.g. no norms; no deletes;

	/** Used by some member fields to mean present (e.g.,
	* norms, deletions). */
	public static final int YES = 1; // e.g. have norms; have deletes;

	/** Unique segment name in the directory. */
	public final String name;

	private int maxDoc; // number of docs in seg

	/** Where this segment resides. */
	public final Directory dir;

	private boolean isCompoundFile;

	/** Id that uniquely identifies this segment. */
	private final byte[] id;

	private Codec codec;

	private Map<String,String> diagnostics;

	private Map<String,String> attributes;

	private final Sort indexSort;

	// Tracks the Lucene version this segment was created with, since 3.1. Null
	// indicates an older than 3.0 index, and it's used to detect a too old index.
	// The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and
	// specific versions afterwards ("3.0.0", "3.1.0" etc.).
	// see o.a.l.util.Version.
	private final Version version;

	// Tracks the minimum version that contributed documents to a segment. For
	// flush segments, that is the version that wrote it. For merged segments,
	// this is the minimum minVersion of all the segments that have been merged
	// into this segment
	Version minVersion;

	void setDiagnostics(Map<String, String> diagnostics) {
	this.diagnostics = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(diagnostics)));
	}

	/** Returns diagnostics saved into the segment when it was
	* written. The map is immutable. */
	public Map<String, String> getDiagnostics() {
	return diagnostics;
	}

	/**
	* Construct a new complete SegmentInfo instance from input.
	* <p>Note: this is public only to allow access from
	* the codecs package.</p>
	*/
	public SegmentInfo(Directory dir, Version version, Version minVersion, String name, int maxDoc,
	boolean isCompoundFile, Codec codec, Map<String,String> diagnostics,
	byte[] id, Map<String,String> attributes, Sort indexSort) {
	assert !(dir instanceof TrackingDirectoryWrapper);
	this.dir = Objects.requireNonNull(dir);
	this.version = Objects.requireNonNull(version);
	this.minVersion = minVersion;
	this.name = Objects.requireNonNull(name);
	this.maxDoc = maxDoc;
	this.isCompoundFile = isCompoundFile;
	this.codec = codec;
	this.diagnostics = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(diagnostics)));
	this.id = id;
	if (id.length != StringHelper.ID_LENGTH) {
	throw new IllegalArgumentException("invalid id: " + Arrays.toString(id));
	}
	this.attributes = Collections.unmodifiableMap(new HashMap<>(Objects.requireNonNull(attributes)));
	this.indexSort = indexSort;
	}

	/**
	* Mark whether this segment is stored as a compound file.
	*
	* @param isCompoundFile true if this is a compound file;
	* else, false
	*/
	void setUseCompoundFile(boolean isCompoundFile) {
	this.isCompoundFile = isCompoundFile;
	}

	/**
	* Returns true if this segment is stored as a compound
	* file; else, false.
	*/
	public boolean getUseCompoundFile() {
	return isCompoundFile;
	}

	/** Can only be called once. */
	public void setCodec(Codec codec) {
	assert this.codec == null;
	if (codec == null) {
	throw new IllegalArgumentException("codec must be non-null");
	}
	this.codec = codec;
	}

	/** Return {@link Codec} that wrote this segment. */
	public Codec getCodec() {
	return codec;
	}

	/** Returns number of documents in this segment (deletions
	* are not taken into account). */
	public int maxDoc() {
	if (this.maxDoc == -1) {
	throw new IllegalStateException("maxDoc isn't set yet");
	}
	return maxDoc;
	}

	// NOTE: leave package private
	void setMaxDoc(int maxDoc) {
	if (this.maxDoc != -1) {
	throw new IllegalStateException("maxDoc was already set: this.maxDoc=" + this.maxDoc + " vs maxDoc=" + maxDoc);
	}
	this.maxDoc = maxDoc;
	}

	/** Return all files referenced by this SegmentInfo. */
	public Set<String> files() {
	if (setFiles == null) {
	throw new IllegalStateException("files were not computed yet; segment=" + name + " maxDoc=" + maxDoc);
	}
	return Collections.unmodifiableSet(setFiles);
	}

	@Override
	public String toString() {
	return toString(0);
	}

	/** Used for debugging. Format may suddenly change.
	*
	* <p>Current format looks like
	* <code>_a(3.1):c45/4:[sorter=<long: "timestamp">!]</code>, which means
	* the segment's name is <code>_a</code>; it was created with Lucene 3.1 (or
	* '?' if it's unknown); it's using compound file
	* format (would be <code>C</code> if not compound); it
	* has 45 documents; it has 4 deletions (this part is
	* left off when there are no deletions); it is sorted by the timestamp field
	* in descending order (this part is omitted for unsorted segments).</p>
	*/
	public String toString(int delCount) {
	StringBuilder s = new StringBuilder();
	s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':');
	char cfs = getUseCompoundFile() ? 'c' : 'C';
	s.append(cfs);

	s.append(maxDoc);

	if (delCount != 0) {
	s.append('/').append(delCount);
	}

	if (indexSort != null) {
	s.append(":[indexSort=");
	s.append(indexSort);
	s.append(']');
	}

	if (!diagnostics.isEmpty()) {
	s.append(":[diagnostics=");
	s.append(diagnostics.toString());
	s.append(']');
	}

	if (!attributes.isEmpty()) {
	s.append(":[attributes=");
	s.append(attributes.toString());
	s.append(']');
	}

	return s.toString();
	}

	/** We consider another SegmentInfo instance equal if it
	* has the same dir and same name. */
	@Override
	public boolean equals(Object obj) {
	if (this == obj) return true;
	if (obj instanceof SegmentInfo) {
	final SegmentInfo other = (SegmentInfo) obj;
	return other.dir == dir && other.name.equals(name);
	} else {
	return false;
	}
	}

	@Override
	public int hashCode() {
	return dir.hashCode() + name.hashCode();
	}

	/** Returns the version of the code which wrote the segment.
	*/
	public Version getVersion() {
	return version;
	}

	/**
	* Return the minimum Lucene version that contributed documents to this
	* segment, or {@code null} if it is unknown.
	*/
	public Version getMinVersion() {
	return minVersion;
	}

	/** Return the id that uniquely identifies this segment. */
	public byte[] getId() {
	return id.clone();
	}

	private Set<String> setFiles;

	/** Sets the files written for this segment. */
	public void setFiles(Collection<String> files) {
	setFiles = new HashSet<>();
	addFiles(files);
	}

	/** Add these files to the set of files written for this
	* segment. */
	public void addFiles(Collection<String> files) {
	checkFileNames(files);
	for (String f : files) {
	setFiles.add(namedForThisSegment(f));
	}
	}

	/** Add this file to the set of files written for this
	* segment. */
	public void addFile(String file) {
	checkFileNames(Collections.singleton(file));
	setFiles.add(namedForThisSegment(file));
	}

	private void checkFileNames(Collection<String> files) {
	Matcher m = IndexFileNames.CODEC_FILE_PATTERN.matcher("");
	for (String file : files) {
	m.reset(file);
	if (!m.matches()) {
	throw new IllegalArgumentException("invalid codec filename '" + file + "', must match: " + IndexFileNames.CODEC_FILE_PATTERN.pattern());
	}
	if (file.toLowerCase(Locale.ROOT).endsWith(".tmp")) {
	throw new IllegalArgumentException("invalid codec filename '" + file + "', cannot end with .tmp extension");
	}
	}
	}

	/**
	* strips any segment name from the file, naming it with this segment
	* this is because "segment names" can change, e.g. by addIndexes(Dir)
	*/
	String namedForThisSegment(String file) {
	return name + IndexFileNames.stripSegmentName(file);
	}

	/**
	* Get a codec attribute value, or null if it does not exist
	*/
	public String getAttribute(String key) {
	return attributes.get(key);
	}

	/**
	* Puts a codec attribute value.
	* <p>
	* This is a key-value mapping for the field that the codec can use to store
	* additional metadata, and will be available to the codec when reading the
	* segment via {@link #getAttribute(String)}
	* <p>
	* If a value already exists for the field, it will be replaced with the new
	* value.
	* This method make a copy on write for every attribute change.
	*/
	public String putAttribute(String key, String value) {
	HashMap<String, String> newMap = new HashMap<>(attributes);
	String oldValue = newMap.put(key, value);
	// we make a full copy of this to prevent concurrent modifications to this in the toString method
	// this method is only called when a segment is written but the SegmentInfo might be exposed
	// in running merges which can cause ConcurrentModificationExceptions if we modify / share
	// the same instance. Technically that's an unsafe publication but IW design would require
	// significant changes to prevent this. On the other hand, since we expose the map in getAttributes()
	// it's a good design to make it unmodifiable anyway.
	attributes = Collections.unmodifiableMap(newMap);
	return oldValue;
	}

	/**
	* Returns the internal codec attributes map.
	* @return internal codec attributes map.
	*/
	public Map<String,String> getAttributes() {
	return attributes;
	}

	/** Return the sort order of this segment, or null if the index has no sort. */
	public Sort getIndexSort() {
	return indexSort;
	}
	}