blob: e4edd4c85912ffbd16ec0c2b69a18f722b2bfed8 [file] [log] [blame]
using System;
using System.Collections.Generic;
namespace Lucene.Net.Facet.Taxonomy
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using ITwoPhaseCommit = Lucene.Net.Index.ITwoPhaseCommit;
/// <summary>
/// <see cref="ITaxonomyWriter"/> is the interface which the faceted-search library uses
/// to dynamically build the taxonomy at indexing time.
/// <para>
/// Notes about concurrent access to the taxonomy:
/// </para>
/// <para>
/// An implementation must allow multiple readers and a single writer to be
/// active concurrently. Readers follow so-called "point in time" semantics,
/// i.e., a reader object will only see taxonomy entries which were available
/// at the time it was created. What the writer writes is only available to
/// (new) readers after the writer's <see cref="Index.IndexWriter.Commit"/> is called.
/// </para>
/// <para>
/// Faceted search keeps two indices - namely Lucene's main index, and this
/// taxonomy index. When one or more readers are active concurrently with the
/// writer, care must be taken to avoid an inconsistency between the state of
/// these two indices: When writing to the indices, the taxonomy must always
/// be committed to disk *before* the main index, because the main index
/// refers to categories listed in the taxonomy.
/// Such control can best be achieved by turning off the main index's
/// "autocommit" feature, and explicitly calling <see cref="Index.IndexWriter.Commit"/> for both indices
/// (first for the taxonomy, then for the main index).
/// In old versions of Lucene (2.2 or earlier), when autocommit could not be
/// turned off, a more complicated solution needs to be used. E.g., use
/// some sort of (possibly inter-process) locking to ensure that a reader
/// is being opened only right after both indices have been flushed (and
/// before anything else is written to them).
/// </para>
///
/// @lucene.experimental
/// </summary>
public interface ITaxonomyWriter : IDisposable, ITwoPhaseCommit
{
/// <summary>
/// <see cref="AddCategory"/> adds a category with a given path name to the taxonomy,
/// and returns its ordinal. If the category was already present in
/// the taxonomy, its existing ordinal is returned.
/// <para/>
/// Before adding a category, <see cref="AddCategory"/> makes sure that all its
/// ancestor categories exist in the taxonomy as well. As result, the
/// ordinal of a category is guaranteed to be smaller then the ordinal of
/// any of its descendants.
/// </summary>
int AddCategory(FacetLabel categoryPath);
/// <summary>
/// <see cref="GetParent"/> returns the ordinal of the parent category of the category
/// with the given ordinal.
/// <para>
/// When a category is specified as a path name, finding the path of its
/// parent is as trivial as dropping the last component of the path.
/// <see cref="GetParent"/> is functionally equivalent to calling <see cref="TaxonomyReader.GetPath"/> on the
/// given ordinal, dropping the last component of the path, and then calling
/// <see cref="TaxonomyReader.GetOrdinal(FacetLabel)"/> to get an ordinal back.
/// </para>
/// <para>
/// If the given ordinal is the <see cref="TaxonomyReader.ROOT_ORDINAL"/>, an
/// <see cref="TaxonomyReader.INVALID_ORDINAL"/> is returned.
/// If the given ordinal is a top-level category, the
/// <see cref="TaxonomyReader.ROOT_ORDINAL"/> is returned.
/// If an invalid ordinal is given (negative or beyond the last available
/// ordinal), an <see cref="IndexOutOfRangeException"/> is thrown. However, it is
/// expected that <see cref="GetParent"/> will only be called for ordinals which are
/// already known to be in the taxonomy.
/// </para>
/// <para>
/// TODO (Facet): instead of a <see cref="GetParent(int)">GetParent(ordinal)</see> method, consider having a
/// GetCategory(categorypath, prefixlen) which is similar to <see cref="AddCategory"/>
/// except it doesn't add new categories; This method can be used to get
/// the ordinals of all prefixes of the given category, and it can use
/// exactly the same code and cache used by <see cref="AddCategory"/> so it means less code.
/// </para>
/// </summary>
int GetParent(int ordinal);
/// <summary>
/// <see cref="Count"/> returns the number of categories in the taxonomy.
/// <para/>
/// Because categories are numbered consecutively starting with 0, it
/// means the taxonomy contains ordinals 0 through <see cref="Count"/>-1.
/// <para/>
/// Note that the number returned by <see cref="Count"/> is often slightly higher
/// than the number of categories inserted into the taxonomy; This is
/// because when a category is added to the taxonomy, its ancestors
/// are also added automatically (including the root, which always get
/// ordinal 0).
/// </summary>
int Count { get; }
/// <summary>
/// Sets the commit user data map. That method is considered a transaction and
/// will be committed (<see cref="Index.IndexWriter.Commit"/>) even if no other changes were made to
/// the writer instance.
/// <para>
/// <b>NOTE:</b> the map is cloned internally, therefore altering the map's
/// contents after calling this method has no effect.
/// </para>
/// </summary>
void SetCommitData(IDictionary<string, string> commitUserData);
/// <summary>
/// Returns the commit user data map that was set on
/// <see cref="SetCommitData(IDictionary{string, string})"/>.
/// </summary>
IDictionary<string, string> CommitData { get; }
}
}