blob: d368e74de2ec0b9e168a427e17cdc285ad8a26ce [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.facet.taxonomy;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.IntsRef;
import java.io.IOException;
import static org.apache.lucene.facet.taxonomy.TaxonomyReader.INVALID_ORDINAL;
import static org.apache.lucene.facet.taxonomy.TaxonomyReader.ROOT_ORDINAL;
/**
* Utility class to easily retrieve previously indexed facet labels, allowing you to skip also adding stored fields for these values,
* reducing your index size.
*
* @lucene.experimental
**/
public class TaxonomyFacetLabels {
/**
* Index field name provided to the constructor
*/
private final String indexFieldName;
/**
* {@code TaxonomyReader} provided to the constructor
*/
private final TaxonomyReader taxoReader;
/**
* {@code OrdinalsReader} to decode ordinals previously indexed into the {@code BinaryDocValues} facet field
*/
private final OrdinalsReader ordsReader;
/**
* Sole constructor. Do not close the provided {@link TaxonomyReader} while still using this instance!
*/
public TaxonomyFacetLabels(TaxonomyReader taxoReader, String indexFieldName) throws IOException {
this.taxoReader = taxoReader;
this.indexFieldName = indexFieldName;
this.ordsReader = new DocValuesOrdinalsReader(indexFieldName);
}
/**
* Create and return an instance of {@link FacetLabelReader} to retrieve facet labels for
* multiple documents and (optionally) for a specific dimension. You must create this per-segment,
* and then step through all hits, in order, for that segment.
*
* <p><b>NOTE</b>: This class is not thread-safe, so you must use a new instance of this
* class for each thread.</p>
*
* @param readerContext LeafReaderContext used to access the {@code BinaryDocValues} facet field
* @return an instance of {@link FacetLabelReader}
* @throws IOException when a low-level IO issue occurs
*/
public FacetLabelReader getFacetLabelReader(LeafReaderContext readerContext) throws IOException {
return new FacetLabelReader(ordsReader, readerContext);
}
/**
* Utility class to retrieve facet labels for multiple documents.
*
* @lucene.experimental
*/
public class FacetLabelReader {
private final OrdinalsReader.OrdinalsSegmentReader ordinalsSegmentReader;
private final IntsRef decodedOrds = new IntsRef();
private int currentDocId = -1;
private int currentPos = -1;
// Lazily set when nextFacetLabel(int docId, String facetDimension) is first called
private int[] parents;
/**
* Sole constructor.
*/
public FacetLabelReader(OrdinalsReader ordsReader, LeafReaderContext readerContext) throws IOException {
ordinalsSegmentReader = ordsReader.getReader(readerContext);
}
/**
* Retrieves the next {@link FacetLabel} for the specified {@code docId}, or {@code null} if there are no more.
* This method has state: if the provided {@code docId} is the same as the previous invocation, it returns the
* next {@link FacetLabel} for that document. Otherwise, it advances to the new {@code docId} and provides the
* first {@link FacetLabel} for that document, or {@code null} if that document has no indexed facets. Each
* new {@code docId} must be in strictly monotonic (increasing) order.
*
* <p><b>NOTE</b>: The returned FacetLabels may not be in the same order in which they were indexed</p>
*
* @param docId input docId provided in monotonic (non-decreasing) order
* @return the first or next {@link FacetLabel}, or {@code null} if there are no more
* @throws IOException when a low-level IO issue occurs
* @throws IllegalArgumentException if docId provided is less than docId supplied in an earlier invocation
*/
public FacetLabel nextFacetLabel(int docId) throws IOException {
if (currentDocId != docId) {
if (docId < currentDocId) {
throw new IllegalArgumentException("docs out of order: previous docId=" + currentDocId
+ " current docId=" + docId);
}
ordinalsSegmentReader.get(docId, decodedOrds);
currentDocId = docId;
currentPos = decodedOrds.offset;
}
int endPos = decodedOrds.offset + decodedOrds.length;
assert currentPos <= endPos;
if (currentPos == endPos) {
// no more FacetLabels
return null;
}
int ord = decodedOrds.ints[currentPos++];
return taxoReader.getPath(ord);
}
private boolean isDescendant(int ord, int ancestorOrd) {
while (ord != INVALID_ORDINAL && ord != ROOT_ORDINAL) {
if (parents[ord] == ancestorOrd) {
return true;
}
ord = parents[ord];
}
return false;
}
/**
* Retrieves the next {@link FacetLabel} for the specified {@code docId} under the requested {@code facetDimension},
* or {@code null} if there are no more. This method has state: if the provided {@code docId} is the same as the
* previous invocation, it returns the next {@link FacetLabel} for that document. Otherwise, it advances to
* the new {@code docId} and provides the first {@link FacetLabel} for that document, or {@code null} if that document
* has no indexed facets. Each new {@code docId} must be in strictly monotonic (increasing) order.
*
* <p><b>NOTE</b>: This method loads the {@code int[] parents} array from the taxonomy index.
* The returned FacetLabels may not be in the same order in which they were indexed.</p>
*
* @param docId input docId provided in non-decreasing order
* @return the first or next {@link FacetLabel}, or {@code null} if there are no more
* @throws IOException if {@link TaxonomyReader} has problems getting path for an ordinal
* @throws IllegalArgumentException if docId provided is less than docId supplied in an earlier invocation
* @throws IllegalArgumentException if facetDimension is null
*/
public FacetLabel nextFacetLabel(int docId, String facetDimension) throws IOException {
if (facetDimension == null) {
throw new IllegalArgumentException("Input facet dimension cannot be null");
}
final int parentOrd = taxoReader.getOrdinal(new FacetLabel(facetDimension));
if (parentOrd == INVALID_ORDINAL) {
throw new IllegalArgumentException("Category ordinal not found for facet dimension: " + facetDimension);
}
if (currentDocId != docId) {
if (docId < currentDocId) {
throw new IllegalArgumentException("docs out of order: previous docId=" + currentDocId
+ " current docId=" + docId);
}
ordinalsSegmentReader.get(docId, decodedOrds);
currentPos = decodedOrds.offset;
currentDocId = docId;
}
if (parents == null) {
parents = taxoReader.getParallelTaxonomyArrays().parents();
}
int endPos = decodedOrds.offset + decodedOrds.length;
assert currentPos <= endPos;
for (; currentPos < endPos; ) {
int ord = decodedOrds.ints[currentPos++];
if (isDescendant(ord, parentOrd) == true) {
return taxoReader.getPath(ord);
}
}
return null;
}
}
}