| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.solr.uninverting; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Map; |
| import java.util.function.Function; |
| |
| import org.apache.lucene.document.BinaryDocValuesField; |
| import org.apache.lucene.document.NumericDocValuesField; |
| import org.apache.lucene.document.SortedDocValuesField; |
| import org.apache.lucene.document.SortedSetDocValuesField; |
| import org.apache.lucene.document.StringField; |
| import org.apache.lucene.index.BinaryDocValues; |
| import org.apache.lucene.index.DirectoryReader; |
| import org.apache.lucene.index.DocValuesType; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| import org.apache.lucene.index.FilterDirectoryReader; |
| import org.apache.lucene.index.FilterLeafReader; |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.LeafReader; |
| import org.apache.lucene.index.NumericDocValues; |
| import org.apache.lucene.index.SortedDocValues; |
| import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.util.RamUsageEstimator; |
| import org.apache.solr.uninverting.FieldCache.CacheEntry; |
| |
| /** |
| * A FilterReader that exposes <i>indexed</i> values as if they also had |
| * docvalues. |
| * <p> |
| * This is accomplished by "inverting the inverted index" or "uninversion". |
| * <p> |
| * The uninversion process happens lazily: upon the first request for the |
| * field's docvalues (e.g. via {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)} |
| * or similar), it will create the docvalues on-the-fly if needed and cache it, |
| * based on the core cache key of the wrapped LeafReader. |
| */ |
| public class UninvertingReader extends FilterLeafReader { |
| |
| /** |
| * Specifies the type of uninversion to apply for the field. |
| */ |
| public static enum Type { |
| /** |
| * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.IntPoint}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link NumericDocValuesField}. |
| */ |
| INTEGER_POINT, |
| /** |
| * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LongPoint}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link NumericDocValuesField}. |
| */ |
| LONG_POINT, |
| /** |
| * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.FloatPoint}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link NumericDocValuesField}. |
| */ |
| FLOAT_POINT, |
| /** |
| * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.DoublePoint}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link NumericDocValuesField}. |
| */ |
| DOUBLE_POINT, |
| /** |
| * Single-valued Integer, (e.g. indexed with {@link org.apache.solr.legacy.LegacyIntField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link NumericDocValuesField}. |
| * @deprecated Index with points and use {@link #INTEGER_POINT} instead. |
| */ |
| @Deprecated |
| LEGACY_INTEGER, |
| /** |
| * Single-valued Long, (e.g. indexed with {@link org.apache.solr.legacy.LegacyLongField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link NumericDocValuesField}. |
| * @deprecated Index with points and use {@link #LONG_POINT} instead. |
| */ |
| @Deprecated |
| LEGACY_LONG, |
| /** |
| * Single-valued Float, (e.g. indexed with {@link org.apache.solr.legacy.LegacyFloatField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link NumericDocValuesField}. |
| * @deprecated Index with points and use {@link #FLOAT_POINT} instead. |
| */ |
| @Deprecated |
| LEGACY_FLOAT, |
| /** |
| * Single-valued Double, (e.g. indexed with {@link org.apache.solr.legacy.LegacyDoubleField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link NumericDocValuesField}. |
| * @deprecated Index with points and use {@link #DOUBLE_POINT} instead. |
| */ |
| @Deprecated |
| LEGACY_DOUBLE, |
| /** |
| * Single-valued Binary, (e.g. indexed with {@link StringField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link BinaryDocValuesField}. |
| */ |
| BINARY, |
| /** |
| * Single-valued Binary, (e.g. indexed with {@link StringField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link SortedDocValuesField}. |
| */ |
| SORTED, |
| /** |
| * Multi-valued Binary, (e.g. indexed with {@link StringField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link SortedSetDocValuesField}. |
| */ |
| SORTED_SET_BINARY, |
| /** |
| * Multi-valued Integer, (e.g. indexed with {@link org.apache.solr.legacy.LegacyIntField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link SortedSetDocValuesField}. |
| */ |
| SORTED_SET_INTEGER, |
| /** |
| * Multi-valued Float, (e.g. indexed with {@link org.apache.solr.legacy.LegacyFloatField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link SortedSetDocValuesField}. |
| */ |
| SORTED_SET_FLOAT, |
| /** |
| * Multi-valued Long, (e.g. indexed with {@link org.apache.solr.legacy.LegacyLongField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link SortedSetDocValuesField}. |
| */ |
| SORTED_SET_LONG, |
| /** |
| * Multi-valued Double, (e.g. indexed with {@link org.apache.solr.legacy.LegacyDoubleField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link SortedSetDocValuesField}. |
| */ |
| SORTED_SET_DOUBLE |
| |
| } |
| |
| /** @see #wrap(DirectoryReader, Function) */ |
| public static DirectoryReader wrap(DirectoryReader reader, Map<String, Type> mapping) throws IOException { |
| return wrap(reader, mapping::get); |
| } |
| |
| /** |
| * Wraps a provided {@link DirectoryReader}. Note that for convenience, the returned reader |
| * can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)}) |
| * and so on. |
| * |
| * @param in input directory reader |
| * @param mapper function to map a field name to an uninversion type. A Null result means to not uninvert. |
| * @return a wrapped directory reader |
| */ |
| public static DirectoryReader wrap(DirectoryReader in, Function<String, Type> mapper) throws IOException { |
| return new UninvertingDirectoryReader(in, mapper); |
| } |
| |
| static class UninvertingDirectoryReader extends FilterDirectoryReader { |
| final Function<String, Type> mapper; |
| |
| public UninvertingDirectoryReader(DirectoryReader in, final Function<String, Type> mapper) throws IOException { |
| super(in, new FilterDirectoryReader.SubReaderWrapper() { |
| @Override |
| public LeafReader wrap(LeafReader reader) { |
| return UninvertingReader.wrap(reader, mapper); |
| } |
| }); |
| this.mapper = mapper; |
| } |
| |
| @Override |
| protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException { |
| return new UninvertingDirectoryReader(in, mapper); |
| } |
| |
| // NOTE: delegating the cache helpers is wrong since this wrapper alters the |
| // content of the reader, it is only fine to do that because Solr ALWAYS |
| // consumes index readers through this wrapper |
| |
| @Override |
| public CacheHelper getReaderCacheHelper() { |
| return in.getReaderCacheHelper(); |
| } |
| } |
| |
| /** |
| * Create a new UninvertingReader with the specified mapping, wrapped around the input. It may be deemed that there |
| * is no mapping to do, in which case the input is returned. |
| * <p> |
| * Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Function)} instead. |
| * |
| * @lucene.internal |
| */ |
| public static LeafReader wrap(LeafReader in, Function<String, Type> mapping) { |
| boolean wrap = false; |
| |
| // Calculate a new FieldInfos that has DocValuesType where we didn't before |
| ArrayList<FieldInfo> newFieldInfos = new ArrayList<>(in.getFieldInfos().size()); |
| for (FieldInfo fi : in.getFieldInfos()) { |
| DocValuesType type = fi.getDocValuesType(); |
| // fields which currently don't have docValues, but are uninvertable (indexed or points data present) |
| if (type == DocValuesType.NONE && |
| (fi.getIndexOptions() != IndexOptions.NONE || (fi.getPointNumBytes() > 0 && fi.getPointDataDimensionCount() == 1))) { |
| Type t = mapping.apply(fi.name); // could definitely return null, thus still can't uninvert it |
| if (t != null) { |
| if (t == Type.INTEGER_POINT || t == Type.LONG_POINT || t == Type.FLOAT_POINT || t == Type.DOUBLE_POINT) { |
| // type uses points |
| if (fi.getPointDataDimensionCount() == 0) { |
| continue; |
| } |
| } else { |
| // type uses inverted index |
| if (fi.getIndexOptions() == IndexOptions.NONE) { |
| continue; |
| } |
| } |
| switch(t) { |
| case INTEGER_POINT: |
| case LONG_POINT: |
| case FLOAT_POINT: |
| case DOUBLE_POINT: |
| case LEGACY_INTEGER: |
| case LEGACY_LONG: |
| case LEGACY_FLOAT: |
| case LEGACY_DOUBLE: |
| type = DocValuesType.NUMERIC; |
| break; |
| case BINARY: |
| type = DocValuesType.BINARY; |
| break; |
| case SORTED: |
| type = DocValuesType.SORTED; |
| break; |
| case SORTED_SET_BINARY: |
| case SORTED_SET_INTEGER: |
| case SORTED_SET_FLOAT: |
| case SORTED_SET_LONG: |
| case SORTED_SET_DOUBLE: |
| type = DocValuesType.SORTED_SET; |
| break; |
| default: |
| throw new AssertionError(); |
| } |
| } |
| } |
| if (type != fi.getDocValuesType()) { // we changed it |
| wrap = true; |
| newFieldInfos.add(new FieldInfo(fi.name, fi.number, fi.hasVectors(), fi.omitsNorms(), |
| fi.hasPayloads(), fi.getIndexOptions(), type, fi.getDocValuesGen(), fi.attributes(), |
| fi.getPointDataDimensionCount(), fi.getPointIndexDimensionCount(), fi.getPointNumBytes(), fi.isSoftDeletesField())); |
| } else { |
| newFieldInfos.add(fi); |
| } |
| } |
| if (!wrap) { |
| return in; |
| } else { |
| FieldInfos fieldInfos = new FieldInfos(newFieldInfos.toArray(new FieldInfo[newFieldInfos.size()])); |
| return new UninvertingReader(in, mapping, fieldInfos); |
| } |
| } |
| |
| final Function<String, Type> mapping; |
| final FieldInfos fieldInfos; |
| |
| private UninvertingReader(LeafReader in, Function<String, Type> mapping, FieldInfos fieldInfos) { |
| super(in); |
| this.mapping = mapping; |
| this.fieldInfos = fieldInfos; |
| } |
| |
| @Override |
| public FieldInfos getFieldInfos() { |
| return fieldInfos; |
| } |
| |
| @Override |
| public NumericDocValues getNumericDocValues(String field) throws IOException { |
| NumericDocValues values = super.getNumericDocValues(field); |
| if (values != null) { |
| return values; |
| } |
| Type v = getType(field); |
| if (v != null) { |
| switch (v) { |
| case INTEGER_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.INT_POINT_PARSER); |
| case FLOAT_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.FLOAT_POINT_PARSER); |
| case LONG_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LONG_POINT_PARSER); |
| case DOUBLE_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.DOUBLE_POINT_PARSER); |
| case LEGACY_INTEGER: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_INT_PARSER); |
| case LEGACY_FLOAT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_FLOAT_PARSER); |
| case LEGACY_LONG: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_LONG_PARSER); |
| case LEGACY_DOUBLE: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_DOUBLE_PARSER); |
| case BINARY: |
| case SORTED: |
| case SORTED_SET_BINARY: |
| case SORTED_SET_DOUBLE: |
| case SORTED_SET_FLOAT: |
| case SORTED_SET_INTEGER: |
| case SORTED_SET_LONG: |
| break; |
| } |
| } |
| return null; |
| } |
| |
| @Override |
| public BinaryDocValues getBinaryDocValues(String field) throws IOException { |
| BinaryDocValues values = in.getBinaryDocValues(field); |
| if (values != null) { |
| return values; |
| } |
| Type v = getType(field); |
| if (v == Type.BINARY) { |
| return FieldCache.DEFAULT.getTerms(in, field); |
| } else { |
| return null; |
| } |
| } |
| |
| @Override |
| public SortedDocValues getSortedDocValues(String field) throws IOException { |
| SortedDocValues values = in.getSortedDocValues(field); |
| if (values != null) { |
| return values; |
| } |
| Type v = getType(field); |
| if (v == Type.SORTED) { |
| return FieldCache.DEFAULT.getTermsIndex(in, field); |
| } else { |
| return null; |
| } |
| } |
| |
| @Override |
| public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { |
| SortedSetDocValues values = in.getSortedSetDocValues(field); |
| if (values != null) { |
| return values; |
| } |
| Type v = getType(field); |
| if (v != null) { |
| switch (v) { |
| case SORTED_SET_INTEGER: |
| case SORTED_SET_FLOAT: |
| return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT32_TERM_PREFIX); |
| case SORTED_SET_LONG: |
| case SORTED_SET_DOUBLE: |
| return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT64_TERM_PREFIX); |
| case SORTED_SET_BINARY: |
| return FieldCache.DEFAULT.getDocTermOrds(in, field, null); |
| case BINARY: |
| case LEGACY_DOUBLE: |
| case LEGACY_FLOAT: |
| case LEGACY_INTEGER: |
| case LEGACY_LONG: |
| case DOUBLE_POINT: |
| case FLOAT_POINT: |
| case INTEGER_POINT: |
| case LONG_POINT: |
| case SORTED: |
| break; |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Returns the field's uninversion type, or null |
| * if the field doesn't exist or doesn't have a mapping. |
| */ |
| private Type getType(String field) { |
| return mapping.apply(field); |
| } |
| |
| // NOTE: delegating the cache helpers is wrong since this wrapper alters the |
| // content of the reader, it is only fine to do that because Solr ALWAYS |
| // consumes index readers through this wrapper |
| |
| @Override |
| public CacheHelper getCoreCacheHelper() { |
| return in.getCoreCacheHelper(); |
| } |
| |
| @Override |
| public CacheHelper getReaderCacheHelper() { |
| return in.getReaderCacheHelper(); |
| } |
| |
| @Override |
| public String toString() { |
| return "Uninverting(" + in.toString() + ")"; |
| } |
| |
| /** |
| * Return information about the backing cache |
| * @lucene.internal |
| */ |
| public static FieldCacheStats getUninvertedStats() { |
| CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries(); |
| long totalBytesUsed = 0; |
| String[] info = new String[entries.length]; |
| for (int i = 0; i < entries.length; i++) { |
| info[i] = entries[i].toString(); |
| totalBytesUsed += entries[i].getValue().ramBytesUsed(); |
| } |
| String totalSize = RamUsageEstimator.humanReadableUnits(totalBytesUsed); |
| return new FieldCacheStats(totalSize, info); |
| } |
| |
| public static int getUninvertedStatsSize() { |
| return FieldCache.DEFAULT.getCacheEntries().length; |
| } |
| |
| /** |
| * Return information about the backing cache |
| * @lucene.internal |
| */ |
| public static class FieldCacheStats { |
| public String totalSize; |
| public String[] info; |
| |
| public FieldCacheStats(String totalSize, String[] info) { |
| this.totalSize = totalSize; |
| this.info = info; |
| } |
| |
| } |
| } |