lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyWriter.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.facet.taxonomy;

 import java.io.Closeable;
 import java.io.IOException;
 import java.util.Map;

 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.TwoPhaseCommit;

 /**
  * TaxonomyWriter is the interface which the faceted-search library uses
  * to dynamically build the taxonomy at indexing time.
  * <P>
  * Notes about concurrent access to the taxonomy:
  * <P>
  * An implementation must allow multiple readers and a single writer to be
  * active concurrently. Readers follow so-called "point in time" semantics,
  * i.e., a reader object will only see taxonomy entries which were available
  * at the time it was created. What the writer writes is only available to
  * (new) readers after the writer's commit() is called.
  * <P>
  * Faceted search keeps two indices - namely Lucene's main index, and this
  * taxonomy index. When one or more readers are active concurrently with the
  * writer, care must be taken to avoid an inconsistency between the state of
  * these two indices: When writing to the indices, the taxonomy must always
  * be committed to disk *before* the main index, because the main index
  * refers to categories listed in the taxonomy.
  * Such control can best be achieved by turning off the main index's
  * "autocommit" feature, and explicitly calling commit() for both indices
  * (first for the taxonomy, then for the main index).
  * In old versions of Lucene (2.2 or earlier), when autocommit could not be
  * turned off, a more complicated solution needs to be used. E.g., use
  * some sort of (possibly inter-process) locking to ensure that a reader
  * is being opened only right after both indices have been flushed (and
  * before anything else is written to them).
  *
  * @lucene.experimental
  */
 public interface TaxonomyWriter extends Closeable, TwoPhaseCommit {

   /**
    * addCategory() adds a category with a given path name to the taxonomy,
    * and returns its ordinal. If the category was already present in
    * the taxonomy, its existing ordinal is returned.
    * <P>
    * Before adding a category, addCategory() makes sure that all its
    * ancestor categories exist in the taxonomy as well. As result, the
    * ordinal of a category is guaranteed to be smaller then the ordinal of
    * any of its descendants.
    */
   public int addCategory(FacetLabel categoryPath) throws IOException;

   /**
    * getParent() returns the ordinal of the parent category of the category
    * with the given ordinal.
    * <P>
    * When a category is specified as a path name, finding the path of its
    * parent is as trivial as dropping the last component of the path.
    * getParent() is functionally equivalent to calling getPath() on the
    * given ordinal, dropping the last component of the path, and then calling
    * getOrdinal() to get an ordinal back.
    * <P>
    * If the given ordinal is the ROOT_ORDINAL, an INVALID_ORDINAL is returned.
    * If the given ordinal is a top-level category, the ROOT_ORDINAL is returned.
    * If an invalid ordinal is given (negative or beyond the last available
    * ordinal), an IndexOutOfBoundsException is thrown. However, it is
    * expected that getParent will only be called for ordinals which are
    * already known to be in the taxonomy.
    * TODO (Facet): instead of a getParent(ordinal) method, consider having a
    * <P>
    * getCategory(categorypath, prefixlen) which is similar to addCategory
    * except it doesn't add new categories; This method can be used to get
    * the ordinals of all prefixes of the given category, and it can use
    * exactly the same code and cache used by addCategory() so it means less code.
    */
   public int getParent(int ordinal) throws IOException;

   /**
    * getSize() returns the number of categories in the taxonomy.
    * <P>
    * Because categories are numbered consecutively starting with 0, it
    * means the taxonomy contains ordinals 0 through getSize()-1.
    * <P>
    * Note that the number returned by getSize() is often slightly higher
    * than the number of categories inserted into the taxonomy; This is
    * because when a category is added to the taxonomy, its ancestors
    * are also added automatically (including the root, which always get
    * ordinal 0).
    */
   public int getSize();

   /**
    * Sets the commit user data iterable.  See {@link IndexWriter#setLiveCommitData}.
    */
   public void setLiveCommitData(Iterable<Map.Entry<String,String>> commitUserData);

   /**
    * Returns the commit user data iterable that was set on
    * {@link #setLiveCommitData(Iterable)}.
    */
   public Iterable<Map.Entry<String,String>> getLiveCommitData();

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.facet.taxonomy;

	import java.io.Closeable;
	import java.io.IOException;
	import java.util.Map;

	import org.apache.lucene.index.IndexWriter;
	import org.apache.lucene.index.TwoPhaseCommit;

	/**
	* TaxonomyWriter is the interface which the faceted-search library uses
	* to dynamically build the taxonomy at indexing time.
	* <P>
	* Notes about concurrent access to the taxonomy:
	* <P>
	* An implementation must allow multiple readers and a single writer to be
	* active concurrently. Readers follow so-called "point in time" semantics,
	* i.e., a reader object will only see taxonomy entries which were available
	* at the time it was created. What the writer writes is only available to
	* (new) readers after the writer's commit() is called.
	* <P>
	* Faceted search keeps two indices - namely Lucene's main index, and this
	* taxonomy index. When one or more readers are active concurrently with the
	* writer, care must be taken to avoid an inconsistency between the state of
	* these two indices: When writing to the indices, the taxonomy must always
	* be committed to disk before the main index, because the main index
	* refers to categories listed in the taxonomy.
	* Such control can best be achieved by turning off the main index's
	* "autocommit" feature, and explicitly calling commit() for both indices
	* (first for the taxonomy, then for the main index).
	* In old versions of Lucene (2.2 or earlier), when autocommit could not be
	* turned off, a more complicated solution needs to be used. E.g., use
	* some sort of (possibly inter-process) locking to ensure that a reader
	* is being opened only right after both indices have been flushed (and
	* before anything else is written to them).
	*
	* @lucene.experimental
	*/
	public interface TaxonomyWriter extends Closeable, TwoPhaseCommit {

	/**
	* addCategory() adds a category with a given path name to the taxonomy,
	* and returns its ordinal. If the category was already present in
	* the taxonomy, its existing ordinal is returned.
	* <P>
	* Before adding a category, addCategory() makes sure that all its
	* ancestor categories exist in the taxonomy as well. As result, the
	* ordinal of a category is guaranteed to be smaller then the ordinal of
	* any of its descendants.
	*/
	public int addCategory(FacetLabel categoryPath) throws IOException;

	/**
	* getParent() returns the ordinal of the parent category of the category
	* with the given ordinal.
	* <P>
	* When a category is specified as a path name, finding the path of its
	* parent is as trivial as dropping the last component of the path.
	* getParent() is functionally equivalent to calling getPath() on the
	* given ordinal, dropping the last component of the path, and then calling
	* getOrdinal() to get an ordinal back.
	* <P>
	* If the given ordinal is the ROOT_ORDINAL, an INVALID_ORDINAL is returned.
	* If the given ordinal is a top-level category, the ROOT_ORDINAL is returned.
	* If an invalid ordinal is given (negative or beyond the last available
	* ordinal), an IndexOutOfBoundsException is thrown. However, it is
	* expected that getParent will only be called for ordinals which are
	* already known to be in the taxonomy.
	* TODO (Facet): instead of a getParent(ordinal) method, consider having a
	* <P>
	* getCategory(categorypath, prefixlen) which is similar to addCategory
	* except it doesn't add new categories; This method can be used to get
	* the ordinals of all prefixes of the given category, and it can use
	* exactly the same code and cache used by addCategory() so it means less code.
	*/
	public int getParent(int ordinal) throws IOException;

	/**
	* getSize() returns the number of categories in the taxonomy.
	* <P>
	* Because categories are numbered consecutively starting with 0, it
	* means the taxonomy contains ordinals 0 through getSize()-1.
	* <P>
	* Note that the number returned by getSize() is often slightly higher
	* than the number of categories inserted into the taxonomy; This is
	* because when a category is added to the taxonomy, its ancestors
	* are also added automatically (including the root, which always get
	* ordinal 0).
	*/
	public int getSize();

	/**
	* Sets the commit user data iterable. See {@link IndexWriter#setLiveCommitData}.
	*/
	public void setLiveCommitData(Iterable<Map.Entry<String,String>> commitUserData);

	/**
	* Returns the commit user data iterable that was set on
	* {@link #setLiveCommitData(Iterable)}.
	*/
	public Iterable<Map.Entry<String,String>> getLiveCommitData();

	}