| package org.apache.lucene.uninverting; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Map; |
| |
| import org.apache.lucene.document.IntField; // javadocs |
| import org.apache.lucene.document.LongField; // javadocs |
| import org.apache.lucene.document.FloatField; // javadocs |
| import org.apache.lucene.document.DoubleField; // javadocs |
| import org.apache.lucene.document.BinaryDocValuesField; // javadocs |
| import org.apache.lucene.document.NumericDocValuesField; // javadocs |
| import org.apache.lucene.document.SortedDocValuesField; // javadocs |
| import org.apache.lucene.document.SortedSetDocValuesField; // javadocs |
| import org.apache.lucene.document.StringField; // javadocs |
| import org.apache.lucene.index.AtomicReader; |
| import org.apache.lucene.index.BinaryDocValues; |
| import org.apache.lucene.index.DirectoryReader; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| import org.apache.lucene.index.FilterAtomicReader; |
| import org.apache.lucene.index.FilterDirectoryReader; |
| import org.apache.lucene.index.NumericDocValues; |
| import org.apache.lucene.index.SortedDocValues; |
| import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.uninverting.FieldCache.CacheEntry; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.NumericUtils; |
| |
| /** |
| * A FilterReader that exposes <i>indexed</i> values as if they also had |
| * docvalues. |
| * <p> |
| * This is accomplished by "inverting the inverted index" or "uninversion". |
| * <p> |
| * The uninversion process happens lazily: upon the first request for the |
| * field's docvalues (e.g. via {@link AtomicReader#getNumericDocValues(String)} |
| * or similar), it will create the docvalues on-the-fly if needed and cache it, |
| * based on the core cache key of the wrapped AtomicReader. |
| */ |
| public class UninvertingReader extends FilterAtomicReader { |
| |
| /** |
| * Specifies the type of uninversion to apply for the field. |
| */ |
| public static enum Type { |
| /** |
| * Single-valued Integer, (e.g. indexed with {@link IntField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link NumericDocValuesField}. |
| */ |
| INTEGER, |
| /** |
| * Single-valued Long, (e.g. indexed with {@link LongField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link NumericDocValuesField}. |
| */ |
| LONG, |
| /** |
| * Single-valued Float, (e.g. indexed with {@link FloatField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link NumericDocValuesField}. |
| */ |
| FLOAT, |
| /** |
| * Single-valued Double, (e.g. indexed with {@link DoubleField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link NumericDocValuesField}. |
| */ |
| DOUBLE, |
| /** |
| * Single-valued Binary, (e.g. indexed with {@link StringField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link BinaryDocValuesField}. |
| */ |
| BINARY, |
| /** |
| * Single-valued Binary, (e.g. indexed with {@link StringField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link SortedDocValuesField}. |
| */ |
| SORTED, |
| /** |
| * Multi-valued Binary, (e.g. indexed with {@link StringField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link SortedSetDocValuesField}. |
| */ |
| SORTED_SET_BINARY, |
| /** |
| * Multi-valued Integer, (e.g. indexed with {@link IntField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link SortedSetDocValuesField}. |
| */ |
| SORTED_SET_INTEGER, |
| /** |
| * Multi-valued Float, (e.g. indexed with {@link FloatField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link SortedSetDocValuesField}. |
| */ |
| SORTED_SET_FLOAT, |
| /** |
| * Multi-valued Long, (e.g. indexed with {@link LongField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link SortedSetDocValuesField}. |
| */ |
| SORTED_SET_LONG, |
| /** |
| * Multi-valued Double, (e.g. indexed with {@link DoubleField}) |
| * <p> |
| * Fields with this type act as if they were indexed with |
| * {@link SortedSetDocValuesField}. |
| */ |
| SORTED_SET_DOUBLE |
| } |
| |
| /** |
| * Wraps a provided DirectoryReader. Note that for convenience, the returned reader |
| * can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)}) |
| * and so on. |
| */ |
| public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) { |
| return new UninvertingDirectoryReader(in, mapping); |
| } |
| |
| static class UninvertingDirectoryReader extends FilterDirectoryReader { |
| final Map<String,Type> mapping; |
| |
| public UninvertingDirectoryReader(DirectoryReader in, final Map<String,Type> mapping) { |
| super(in, new FilterDirectoryReader.SubReaderWrapper() { |
| @Override |
| public AtomicReader wrap(AtomicReader reader) { |
| return new UninvertingReader(reader, mapping); |
| } |
| }); |
| this.mapping = mapping; |
| } |
| |
| @Override |
| protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) { |
| return new UninvertingDirectoryReader(in, mapping); |
| } |
| } |
| |
| final Map<String,Type> mapping; |
| final FieldInfos fieldInfos; |
| |
| /** |
| * Create a new UninvertingReader with the specified mapping |
| * <p> |
| * Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Map)} |
| * instead. |
| * |
| * @lucene.internal |
| */ |
| public UninvertingReader(AtomicReader in, Map<String,Type> mapping) { |
| super(in); |
| this.mapping = mapping; |
| ArrayList<FieldInfo> filteredInfos = new ArrayList<>(); |
| for (FieldInfo fi : in.getFieldInfos()) { |
| FieldInfo.DocValuesType type = fi.getDocValuesType(); |
| if (fi.isIndexed() && !fi.hasDocValues()) { |
| Type t = mapping.get(fi.name); |
| if (t != null) { |
| switch(t) { |
| case INTEGER: |
| case LONG: |
| case FLOAT: |
| case DOUBLE: |
| type = FieldInfo.DocValuesType.NUMERIC; |
| break; |
| case BINARY: |
| type = FieldInfo.DocValuesType.BINARY; |
| break; |
| case SORTED: |
| type = FieldInfo.DocValuesType.SORTED; |
| break; |
| case SORTED_SET_BINARY: |
| case SORTED_SET_INTEGER: |
| case SORTED_SET_FLOAT: |
| case SORTED_SET_LONG: |
| case SORTED_SET_DOUBLE: |
| type = FieldInfo.DocValuesType.SORTED_SET; |
| break; |
| default: |
| throw new AssertionError(); |
| } |
| } |
| } |
| filteredInfos.add(new FieldInfo(fi.name, fi.isIndexed(), fi.number, fi.hasVectors(), fi.omitsNorms(), |
| fi.hasPayloads(), fi.getIndexOptions(), type, fi.getNormType(), null)); |
| } |
| fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()])); |
| } |
| |
| @Override |
| public FieldInfos getFieldInfos() { |
| return fieldInfos; |
| } |
| |
| @Override |
| public NumericDocValues getNumericDocValues(String field) throws IOException { |
| Type v = mapping.get(field); |
| if (v != null) { |
| switch (v) { |
| case INTEGER: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_INT_PARSER, true); |
| case FLOAT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_FLOAT_PARSER, true); |
| case LONG: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_LONG_PARSER, true); |
| case DOUBLE: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, true); |
| default: |
| throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v); |
| } |
| } |
| return super.getNumericDocValues(field); |
| } |
| |
| @Override |
| public BinaryDocValues getBinaryDocValues(String field) throws IOException { |
| Type v = mapping.get(field); |
| if (v == Type.BINARY) { |
| return FieldCache.DEFAULT.getTerms(in, field, true); |
| } else if (v != null && v != Type.SORTED) { |
| throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v); |
| } else { |
| return in.getBinaryDocValues(field); |
| } |
| } |
| |
| @Override |
| public SortedDocValues getSortedDocValues(String field) throws IOException { |
| Type v = mapping.get(field); |
| if (v == Type.SORTED) { |
| return FieldCache.DEFAULT.getTermsIndex(in, field); |
| } else if (v != null) { |
| throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v); |
| } else { |
| return in.getSortedDocValues(field); |
| } |
| } |
| |
| @Override |
| public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { |
| Type v = mapping.get(field); |
| if (v != null) { |
| switch (v) { |
| case SORTED_SET_INTEGER: |
| case SORTED_SET_FLOAT: |
| return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT32_TERM_PREFIX); |
| case SORTED_SET_LONG: |
| case SORTED_SET_DOUBLE: |
| return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT64_TERM_PREFIX); |
| case SORTED_SET_BINARY: |
| return FieldCache.DEFAULT.getDocTermOrds(in, field, null); |
| default: |
| if (v != Type.SORTED) { |
| throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v); |
| } |
| } |
| } |
| return in.getSortedSetDocValues(field); |
| } |
| |
| @Override |
| public Bits getDocsWithField(String field) throws IOException { |
| if (mapping.containsKey(field)) { |
| return FieldCache.DEFAULT.getDocsWithField(in, field); |
| } else { |
| return in.getDocsWithField(field); |
| } |
| } |
| |
| @Override |
| public Object getCoreCacheKey() { |
| return in.getCoreCacheKey(); |
| } |
| |
| @Override |
| public Object getCombinedCoreAndDeletesKey() { |
| return in.getCombinedCoreAndDeletesKey(); |
| } |
| |
| @Override |
| public String toString() { |
| return "Uninverting(" + in.toString() + ")"; |
| } |
| |
| /** |
| * Return information about the backing cache |
| * @lucene.internal |
| */ |
| public static String[] getUninvertedStats() { |
| CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries(); |
| String[] info = new String[entries.length]; |
| for (int i = 0; i < entries.length; i++) { |
| info[i] = entries[i].toString(); |
| } |
| return info; |
| } |
| } |