blob: 2c7e246967394fd87e45e2feb75c8220c135e2a9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using Directory = Lucene.Net.Store.Directory;
using IndexOutput = Lucene.Net.Store.IndexOutput;
using IndexInput = Lucene.Net.Store.IndexInput;
namespace Lucene.Net.Index
{
sealed public class SegmentInfo : System.ICloneable
{
internal const int NO = - 1; // e.g. no norms; no deletes;
internal const int YES = 1; // e.g. have norms; have deletes;
internal const int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions
internal const int WITHOUT_GEN = 0; // a file name that has no GEN in it.
public System.String name; // unique name in dir
public int docCount; // number of docs in seg
public Directory dir; // where segment resides
private bool preLockless; // true if this is a segments file written before
// lock-less commits (2.1)
private long delGen; // current generation of del file; NO if there
// are no deletes; CHECK_DIR if it's a pre-2.1 segment
// (and we must check filesystem); YES or higher if
// there are deletes at generation N
private long[] normGen; // current generation of each field's norm file.
// If this array is null, for lockLess this means no
// separate norms. For preLockLess this means we must
// check filesystem. If this array is not null, its
// values mean: NO says this field has no separate
// norms; CHECK_DIR says it is a preLockLess segment and
// filesystem must be checked; >= YES says this field
// has separate norms with the specified generation
private sbyte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's
// pre-2.1 (ie, must check file system to see
// if <name>.cfs and <name>.nrm exist)
private bool hasSingleNormFile; // true if this segment maintains norms in a single file;
// false otherwise
// this is currently false for segments populated by DocumentWriter
// and true for newly created merged segments (both
// compound and non compound).
private System.Collections.IList files; // cached list of files that this segment uses
// in the Directory
internal long sizeInBytes = - 1; // total byte size of all of our files (computed on demand)
private int docStoreOffset; // if this segment shares stored fields & vectors, this
// offset is where in that file this segment's docs begin
private System.String docStoreSegment; // name used to derive fields/vectors file we share with
// other segments
private bool docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx)
public SegmentInfo(System.String name, int docCount, Directory dir)
{
this.name = name;
this.docCount = docCount;
this.dir = dir;
delGen = NO;
isCompoundFile = (sbyte) (CHECK_DIR);
preLockless = true;
hasSingleNormFile = false;
docStoreOffset = - 1;
docStoreSegment = name;
docStoreIsCompoundFile = false;
}
public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile) : this(name, docCount, dir, isCompoundFile, hasSingleNormFile, - 1, null, false)
{
}
public SegmentInfo(System.String name, int docCount, Directory dir, bool isCompoundFile, bool hasSingleNormFile, int docStoreOffset, System.String docStoreSegment, bool docStoreIsCompoundFile) : this(name, docCount, dir)
{
this.isCompoundFile = (sbyte) (isCompoundFile ? YES : NO);
this.hasSingleNormFile = hasSingleNormFile;
preLockless = false;
this.docStoreOffset = docStoreOffset;
this.docStoreSegment = docStoreSegment;
this.docStoreIsCompoundFile = docStoreIsCompoundFile;
System.Diagnostics.Debug.Assert(docStoreOffset == - 1 || docStoreSegment != null);
}
/// <summary> Copy everything from src SegmentInfo into our instance.</summary>
internal void Reset(SegmentInfo src)
{
ClearFiles();
name = src.name;
docCount = src.docCount;
dir = src.dir;
preLockless = src.preLockless;
delGen = src.delGen;
docStoreOffset = src.docStoreOffset;
docStoreIsCompoundFile = src.docStoreIsCompoundFile;
if (src.normGen == null)
{
normGen = null;
}
else
{
normGen = new long[src.normGen.Length];
Array.Copy(src.normGen, 0, normGen, 0, src.normGen.Length);
}
isCompoundFile = src.isCompoundFile;
hasSingleNormFile = src.hasSingleNormFile;
}
/// <summary> Construct a new SegmentInfo instance by reading a
/// previously saved SegmentInfo from input.
///
/// </summary>
/// <param name="dir">directory to load from
/// </param>
/// <param name="format">format of the segments info file
/// </param>
/// <param name="input">input handle to read segment info from
/// </param>
internal SegmentInfo(Directory dir, int format, IndexInput input)
{
this.dir = dir;
name = input.ReadString();
docCount = input.ReadInt();
if (format <= SegmentInfos.FORMAT_LOCKLESS)
{
delGen = input.ReadLong();
if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE)
{
docStoreOffset = input.ReadInt();
if (docStoreOffset != - 1)
{
docStoreSegment = input.ReadString();
docStoreIsCompoundFile = (1 == input.ReadByte());
}
else
{
docStoreSegment = name;
docStoreIsCompoundFile = false;
}
}
else
{
docStoreOffset = - 1;
docStoreSegment = name;
docStoreIsCompoundFile = false;
}
if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE)
{
hasSingleNormFile = (1 == input.ReadByte());
}
else
{
hasSingleNormFile = false;
}
int numNormGen = input.ReadInt();
if (numNormGen == NO)
{
normGen = null;
}
else
{
normGen = new long[numNormGen];
for (int j = 0; j < numNormGen; j++)
{
normGen[j] = input.ReadLong();
}
}
isCompoundFile = (sbyte) input.ReadByte();
preLockless = (isCompoundFile == CHECK_DIR);
}
else
{
delGen = CHECK_DIR;
normGen = null;
isCompoundFile = (sbyte) (CHECK_DIR);
preLockless = true;
hasSingleNormFile = false;
docStoreOffset = - 1;
docStoreIsCompoundFile = false;
docStoreSegment = null;
}
}
internal void SetNumFields(int numFields)
{
if (normGen == null)
{
// normGen is null if we loaded a pre-2.1 segment
// file, or, if this segments file hasn't had any
// norms set against it yet:
normGen = new long[numFields];
if (preLockless)
{
// Do nothing: thus leaving normGen[k]==CHECK_DIR (==0), so that later we know
// we have to check filesystem for norm files, because this is prelockless.
}
else
{
// This is a FORMAT_LOCKLESS segment, which means
// there are no separate norms:
for (int i = 0; i < numFields; i++)
{
normGen[i] = NO;
}
}
}
}
/// <summary>Returns total size in bytes of all of files used by
/// this segment.
/// </summary>
internal long SizeInBytes()
{
if (sizeInBytes == - 1)
{
System.Collections.IList files = Files();
int size = files.Count;
sizeInBytes = 0;
for (int i = 0; i < size; i++)
{
System.String fileName = (System.String) files[i];
// We don't count bytes used by a shared doc store
// against this segment:
if (docStoreOffset == - 1 || !IndexFileNames.IsDocStoreFile(fileName))
sizeInBytes += dir.FileLength(fileName);
}
}
return sizeInBytes;
}
internal bool HasDeletions()
{
// Cases:
//
// delGen == NO: this means this segment was written
// by the LOCKLESS code and for certain does not have
// deletions yet
//
// delGen == CHECK_DIR: this means this segment was written by
// pre-LOCKLESS code which means we must check
// directory to see if .del file exists
//
// delGen >= YES: this means this segment was written by
// the LOCKLESS code and for certain has
// deletions
//
if (delGen == NO)
{
return false;
}
else if (delGen >= YES)
{
return true;
}
else
{
return dir.FileExists(GetDelFileName());
}
}
internal void AdvanceDelGen()
{
// delGen 0 is reserved for pre-LOCKLESS format
if (delGen == NO)
{
delGen = YES;
}
else
{
delGen++;
}
ClearFiles();
}
internal void ClearDelGen()
{
delGen = NO;
ClearFiles();
}
public System.Object Clone()
{
SegmentInfo si = new SegmentInfo(name, docCount, dir);
si.isCompoundFile = isCompoundFile;
si.delGen = delGen;
si.preLockless = preLockless;
si.hasSingleNormFile = hasSingleNormFile;
if (normGen != null)
{
si.normGen = new long[normGen.Length];
normGen.CopyTo(si.normGen, 0);
}
si.docStoreOffset = docStoreOffset;
si.docStoreSegment = docStoreSegment;
si.docStoreIsCompoundFile = docStoreIsCompoundFile;
si.docCount = docCount;
si.dir = dir;
si.files = files;
si.name = name;
si.sizeInBytes = sizeInBytes;
return si;
}
internal System.String GetDelFileName()
{
if (delGen == NO)
{
// In this case we know there is no deletion filename
// against this segment
return null;
}
else
{
// If delGen is CHECK_DIR, it's the pre-lockless-commit file format
return IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
}
}
/// <summary> Returns true if this field for this segment has saved a separate norms file (_<segment>_N.sX).
///
/// </summary>
/// <param name="fieldNumber">the field index to check
/// </param>
internal bool HasSeparateNorms(int fieldNumber)
{
if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR))
{
// Must fallback to directory file exists check:
System.String fileName = name + ".s" + fieldNumber;
return dir.FileExists(fileName);
}
else if (normGen == null || normGen[fieldNumber] == NO)
{
return false;
}
else
{
return true;
}
}
/// <summary> Returns true if any fields in this segment have separate norms.</summary>
internal bool HasSeparateNorms()
{
if (normGen == null)
{
if (!preLockless)
{
// This means we were created w/ LOCKLESS code and no
// norms are written yet:
return false;
}
else
{
// This means this segment was saved with pre-LOCKLESS
// code. So we must fallback to the original
// directory list check:
System.String[] result = dir.List();
if (result == null)
{
throw new System.IO.IOException("cannot read directory " + dir + ": list() returned null");
}
System.String pattern;
pattern = name + ".s";
int patternLength = pattern.Length;
for (int i = 0; i < result.Length; i++)
{
if (result[i].StartsWith(pattern) && System.Char.IsDigit(result[i][patternLength]))
return true;
}
return false;
}
}
else
{
// This means this segment was saved with LOCKLESS
// code so we first check whether any normGen's are >= 1
// (meaning they definitely have separate norms):
for (int i = 0; i < normGen.Length; i++)
{
if (normGen[i] >= YES)
{
return true;
}
}
// Next we look for any == 0. These cases were
// pre-LOCKLESS and must be checked in directory:
for (int i = 0; i < normGen.Length; i++)
{
if (normGen[i] == CHECK_DIR)
{
if (HasSeparateNorms(i))
{
return true;
}
}
}
}
return false;
}
/// <summary> Increment the generation count for the norms file for
/// this field.
///
/// </summary>
/// <param name="fieldIndex">field whose norm file will be rewritten
/// </param>
internal void AdvanceNormGen(int fieldIndex)
{
if (normGen[fieldIndex] == NO)
{
normGen[fieldIndex] = YES;
}
else
{
normGen[fieldIndex]++;
}
ClearFiles();
}
/// <summary> Get the file name for the norms file for this field.
///
/// </summary>
/// <param name="number">field index
/// </param>
internal System.String GetNormFileName(int number)
{
System.String prefix;
long gen;
if (normGen == null)
{
gen = CHECK_DIR;
}
else
{
gen = normGen[number];
}
if (HasSeparateNorms(number))
{
// case 1: separate norm
prefix = ".s";
return IndexFileNames.FileNameFromGeneration(name, prefix + number, gen);
}
if (hasSingleNormFile)
{
// case 2: lockless (or nrm file exists) - single file for all norms
prefix = "." + IndexFileNames.NORMS_EXTENSION;
return IndexFileNames.FileNameFromGeneration(name, prefix, WITHOUT_GEN);
}
// case 3: norm file for each field
prefix = ".f";
return IndexFileNames.FileNameFromGeneration(name, prefix + number, WITHOUT_GEN);
}
/// <summary> Mark whether this segment is stored as a compound file.
///
/// </summary>
/// <param name="isCompoundFile">true if this is a compound file;
/// else, false
/// </param>
internal void SetUseCompoundFile(bool isCompoundFile)
{
if (isCompoundFile)
{
this.isCompoundFile = (sbyte) (YES);
}
else
{
this.isCompoundFile = (sbyte) (NO);
}
ClearFiles();
}
/// <summary> Returns true if this segment is stored as a compound
/// file; else, false.
/// </summary>
internal bool GetUseCompoundFile()
{
if (isCompoundFile == NO)
{
return false;
}
else if (isCompoundFile == YES)
{
return true;
}
else
{
return dir.FileExists(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
}
}
internal int GetDocStoreOffset()
{
return docStoreOffset;
}
internal bool GetDocStoreIsCompoundFile()
{
return docStoreIsCompoundFile;
}
internal void SetDocStoreIsCompoundFile(bool v)
{
docStoreIsCompoundFile = v;
ClearFiles();
}
internal System.String GetDocStoreSegment()
{
return docStoreSegment;
}
internal void SetDocStoreOffset(int offset)
{
docStoreOffset = offset;
ClearFiles();
}
/// <summary> Save this segment's info.</summary>
internal void Write(IndexOutput output)
{
output.WriteString(name);
output.WriteInt(docCount);
output.WriteLong(delGen);
output.WriteInt(docStoreOffset);
if (docStoreOffset != - 1)
{
output.WriteString(docStoreSegment);
output.WriteByte((byte) (docStoreIsCompoundFile ? 1 : 0));
}
output.WriteByte((byte) (hasSingleNormFile ? 1 : 0));
if (normGen == null)
{
output.WriteInt(NO);
}
else
{
output.WriteInt(normGen.Length);
for (int j = 0; j < normGen.Length; j++)
{
output.WriteLong(normGen[j]);
}
}
output.WriteByte((byte) isCompoundFile);
}
private void AddIfExists(System.Collections.IList files, System.String fileName)
{
if (dir.FileExists(fileName))
files.Add(fileName);
}
/*
* Return all files referenced by this SegmentInfo. The
* returns List is a locally cached List so you should not
* modify it.
*/
public System.Collections.IList Files()
{
if (files != null)
{
// Already cached:
return files;
}
files = new System.Collections.ArrayList();
bool useCompoundFile = GetUseCompoundFile();
if (useCompoundFile)
{
files.Add(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION);
}
else
{
System.String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS;
for (int i = 0; i < exts.Length; i++)
AddIfExists(files, name + "." + exts[i]);
}
if (docStoreOffset != - 1)
{
// We are sharing doc stores (stored fields, term
// vectors) with other segments
System.Diagnostics.Debug.Assert(docStoreSegment != null);
if (docStoreIsCompoundFile)
{
files.Add(docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION);
}
else
{
System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
for (int i = 0; i < exts.Length; i++)
AddIfExists(files, docStoreSegment + "." + exts[i]);
}
}
else if (!useCompoundFile)
{
// We are not sharing, and, these files were not
// included in the compound file
System.String[] exts = IndexFileNames.STORE_INDEX_EXTENSIONS;
for (int i = 0; i < exts.Length; i++)
AddIfExists(files, name + "." + exts[i]);
}
System.String delFileName = IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen);
if (delFileName != null && (delGen >= YES || dir.FileExists(delFileName)))
{
files.Add(delFileName);
}
// Careful logic for norms files
if (normGen != null)
{
for (int i = 0; i < normGen.Length; i++)
{
long gen = normGen[i];
if (gen >= YES)
{
// Definitely a separate norm file, with generation:
files.Add(IndexFileNames.FileNameFromGeneration(name, "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen));
}
else if (NO == gen)
{
// No separate norms but maybe plain norms
// in the non compound file case:
if (!hasSingleNormFile && !useCompoundFile)
{
System.String fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i;
if (dir.FileExists(fileName))
{
files.Add(fileName);
}
}
}
else if (CHECK_DIR == gen)
{
// Pre-2.1: we have to check file existence
System.String fileName = null;
if (useCompoundFile)
{
fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i;
}
else if (!hasSingleNormFile)
{
fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i;
}
if (fileName != null && dir.FileExists(fileName))
{
files.Add(fileName);
}
}
}
}
else if (preLockless || (!hasSingleNormFile && !useCompoundFile))
{
// Pre-2.1: we have to scan the dir to find all
// matching _X.sN/_X.fN files for our segment:
System.String prefix;
if (useCompoundFile)
prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION;
else
prefix = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION;
int prefixLength = prefix.Length;
System.String[] allFiles = dir.List();
if (allFiles == null)
{
throw new System.IO.IOException("cannot read directory " + dir + ": list() returned null");
}
for (int i = 0; i < allFiles.Length; i++)
{
System.String fileName = allFiles[i];
if (fileName.Length > prefixLength && System.Char.IsDigit(fileName[prefixLength]) && fileName.StartsWith(prefix))
{
files.Add(fileName);
}
}
}
return files;
}
/* Called whenever any change is made that affects which
* files this segment has. */
private void ClearFiles()
{
files = null;
sizeInBytes = - 1;
}
/// <summary>Used for debugging </summary>
public System.String SegString(Directory dir)
{
System.String cfs;
try
{
if (GetUseCompoundFile())
cfs = "c";
else
cfs = "C";
}
catch (System.IO.IOException ioe)
{
cfs = "?";
}
System.String docStore;
if (docStoreOffset != - 1)
docStore = "->" + docStoreSegment;
else
docStore = "";
return name + ":" + cfs + (this.dir == dir ? "" : "x") + docCount + docStore;
}
/// <summary>We consider another SegmentInfo instance equal if it
/// has the same dir and same name.
/// </summary>
public override bool Equals(System.Object obj)
{
SegmentInfo other;
try
{
other = (SegmentInfo) obj;
}
catch (System.InvalidCastException cce)
{
return false;
}
return other.dir == dir && other.name.Equals(name);
}
public override int GetHashCode()
{
return dir.GetHashCode() + name.GetHashCode();
}
}
}