| package org.apache.lucene.facet.taxonomy; |
| |
| import java.io.Closeable; |
| import java.io.IOException; |
| import java.util.Map; |
| import java.util.concurrent.atomic.AtomicInteger; |
| |
| import org.apache.lucene.store.AlreadyClosedException; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /** |
| * TaxonomyReader is the read-only interface with which the faceted-search |
| * library uses the taxonomy during search time. |
| * <P> |
| * A TaxonomyReader holds a list of categories. Each category has a serial |
| * number which we call an "ordinal", and a hierarchical "path" name: |
| * <UL> |
| * <LI> |
| * The ordinal is an integer that starts at 0 for the first category (which is |
| * always the root category), and grows contiguously as more categories are |
| * added; Note that once a category is added, it can never be deleted. |
| * <LI> |
| * The path is a CategoryPath object specifying the category's position in the |
| * hierarchy. |
| * </UL> |
| * <B>Notes about concurrent access to the taxonomy:</B> |
| * <P> |
| * An implementation must allow multiple readers to be active concurrently |
| * with a single writer. Readers follow so-called "point in time" semantics, |
| * i.e., a TaxonomyReader object will only see taxonomy entries which were |
| * available at the time it was created. What the writer writes is only |
| * available to (new) readers after the writer's commit() is called. |
| * <P> |
| * In faceted search, two separate indices are used: the main Lucene index, |
| * and the taxonomy. Because the main index refers to the categories listed |
| * in the taxonomy, it is important to open the taxonomy *after* opening the |
| * main index, and it is also necessary to reopen() the taxonomy after |
| * reopen()ing the main index. |
| * <P> |
| * This order is important, otherwise it would be possible for the main index |
| * to refer to a category which is not yet visible in the old snapshot of |
| * the taxonomy. Note that it is indeed fine for the the taxonomy to be opened |
| * after the main index - even a long time after. The reason is that once |
| * a category is added to the taxonomy, it can never be changed or deleted, |
| * so there is no danger that a "too new" taxonomy not being consistent with |
| * an older index. |
| * |
| * @lucene.experimental |
| */ |
| public abstract class TaxonomyReader implements Closeable { |
| |
| /** An iterator over a category's children. */ |
| public static class ChildrenIterator { |
| |
| private final int[] siblings; |
| private int child; |
| |
| ChildrenIterator(int child, int[] siblings) { |
| this.siblings = siblings; |
| this.child = child; |
| } |
| |
| /** |
| * Return the next child ordinal, or {@link TaxonomyReader#INVALID_ORDINAL} |
| * if no more children. |
| */ |
| public int next() { |
| int res = child; |
| if (child != TaxonomyReader.INVALID_ORDINAL) { |
| child = siblings[child]; |
| } |
| return res; |
| } |
| |
| } |
| |
| /** Sole constructor. */ |
| public TaxonomyReader() { |
| } |
| |
| /** |
| * The root category (the category with the empty path) always has the ordinal |
| * 0, to which we give a name ROOT_ORDINAL. {@link #getOrdinal(FacetLabel)} |
| * of an empty path will always return {@code ROOT_ORDINAL}, and |
| * {@link #getPath(int)} with {@code ROOT_ORDINAL} will return the empty path. |
| */ |
| public final static int ROOT_ORDINAL = 0; |
| |
| /** |
| * Ordinals are always non-negative, so a negative ordinal can be used to |
| * signify an error. Methods here return INVALID_ORDINAL (-1) in this case. |
| */ |
| public final static int INVALID_ORDINAL = -1; |
| |
| /** |
| * If the taxonomy has changed since the provided reader was opened, open and |
| * return a new {@link TaxonomyReader}; else, return {@code null}. The new |
| * reader, if not {@code null}, will be the same type of reader as the one |
| * given to this method. |
| * |
| * <p> |
| * This method is typically far less costly than opening a fully new |
| * {@link TaxonomyReader} as it shares resources with the provided |
| * {@link TaxonomyReader}, when possible. |
| */ |
| public static <T extends TaxonomyReader> T openIfChanged(T oldTaxoReader) throws IOException { |
| @SuppressWarnings("unchecked") |
| final T newTaxoReader = (T) oldTaxoReader.doOpenIfChanged(); |
| assert newTaxoReader != oldTaxoReader; |
| return newTaxoReader; |
| } |
| |
| private volatile boolean closed = false; |
| |
| // set refCount to 1 at start |
| private final AtomicInteger refCount = new AtomicInteger(1); |
| |
| /** |
| * performs the actual task of closing the resources that are used by the |
| * taxonomy reader. |
| */ |
| protected abstract void doClose() throws IOException; |
| |
| /** |
| * Implements the actual opening of a new {@link TaxonomyReader} instance if |
| * the taxonomy has changed. |
| * |
| * @see #openIfChanged(TaxonomyReader) |
| */ |
| protected abstract TaxonomyReader doOpenIfChanged() throws IOException; |
| |
| /** |
| * Throws {@link AlreadyClosedException} if this IndexReader is closed |
| */ |
| protected final void ensureOpen() throws AlreadyClosedException { |
| if (getRefCount() <= 0) { |
| throw new AlreadyClosedException("this TaxonomyReader is closed"); |
| } |
| } |
| |
| @Override |
| public final void close() throws IOException { |
| if (!closed) { |
| synchronized (this) { |
| if (!closed) { |
| decRef(); |
| closed = true; |
| } |
| } |
| } |
| } |
| |
| /** |
| * Expert: decreases the refCount of this TaxonomyReader instance. If the |
| * refCount drops to 0 this taxonomy reader is closed. |
| */ |
| public final void decRef() throws IOException { |
| ensureOpen(); |
| final int rc = refCount.decrementAndGet(); |
| if (rc == 0) { |
| boolean success = false; |
| try { |
| doClose(); |
| closed = true; |
| success = true; |
| } finally { |
| if (!success) { |
| // Put reference back on failure |
| refCount.incrementAndGet(); |
| } |
| } |
| } else if (rc < 0) { |
| throw new IllegalStateException("too many decRef calls: refCount is " + rc + " after decrement"); |
| } |
| } |
| |
| /** |
| * Returns a {@link ParallelTaxonomyArrays} object which can be used to |
| * efficiently traverse the taxonomy tree. |
| */ |
| public abstract ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException; |
| |
| /** Returns an iterator over the children of the given ordinal. */ |
| public ChildrenIterator getChildren(final int ordinal) throws IOException { |
| ParallelTaxonomyArrays arrays = getParallelTaxonomyArrays(); |
| int child = ordinal >= 0 ? arrays.children()[ordinal] : INVALID_ORDINAL; |
| return new ChildrenIterator(child, arrays.siblings()); |
| } |
| |
| /** |
| * Retrieve user committed data. |
| * |
| * @see TaxonomyWriter#setCommitData(Map) |
| */ |
| public abstract Map<String, String> getCommitUserData() throws IOException; |
| |
| /** |
| * Returns the ordinal of the category given as a path. The ordinal is the |
| * category's serial number, an integer which starts with 0 and grows as more |
| * categories are added (note that once a category is added, it can never be |
| * deleted). |
| * |
| * @return the category's ordinal or {@link #INVALID_ORDINAL} if the category |
| * wasn't foun. |
| */ |
| public abstract int getOrdinal(FacetLabel categoryPath) throws IOException; |
| |
| /** Returns ordinal for the dim + path. */ |
| public int getOrdinal(String dim, String[] path) throws IOException { |
| String[] fullPath = new String[path.length+1]; |
| fullPath[0] = dim; |
| System.arraycopy(path, 0, fullPath, 1, path.length); |
| return getOrdinal(new FacetLabel(fullPath)); |
| } |
| |
| /** Returns the path name of the category with the given ordinal. */ |
| public abstract FacetLabel getPath(int ordinal) throws IOException; |
| |
| /** Returns the current refCount for this taxonomy reader. */ |
| public final int getRefCount() { |
| return refCount.get(); |
| } |
| |
| /** |
| * Returns the number of categories in the taxonomy. Note that the number of |
| * categories returned is often slightly higher than the number of categories |
| * inserted into the taxonomy; This is because when a category is added to the |
| * taxonomy, its ancestors are also added automatically (including the root, |
| * which always get ordinal 0). |
| */ |
| public abstract int getSize(); |
| |
| /** |
| * Expert: increments the refCount of this TaxonomyReader instance. RefCounts |
| * can be used to determine when a taxonomy reader can be closed safely, i.e. |
| * as soon as there are no more references. Be sure to always call a |
| * corresponding decRef(), in a finally clause; otherwise the reader may never |
| * be closed. |
| */ |
| public final void incRef() { |
| ensureOpen(); |
| refCount.incrementAndGet(); |
| } |
| |
| /** Expert: increments the refCount of this TaxonomyReader |
| * instance only if it has not been closed yet. Returns |
| * true on success. */ |
| public final boolean tryIncRef() { |
| int count; |
| while ((count = refCount.get()) > 0) { |
| if (refCount.compareAndSet(count, count+1)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| } |