blob: 7f5b67394921de36710bbad1e0074d8f5f055efc [file] [log] [blame]
package org.apache.lucene.uninverting;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import org.apache.lucene.document.IntField; // javadocs
import org.apache.lucene.document.LongField; // javadocs
import org.apache.lucene.document.FloatField; // javadocs
import org.apache.lucene.document.DoubleField; // javadocs
import org.apache.lucene.document.BinaryDocValuesField; // javadocs
import org.apache.lucene.document.NumericDocValuesField; // javadocs
import org.apache.lucene.document.SortedDocValuesField; // javadocs
import org.apache.lucene.document.SortedSetDocValuesField; // javadocs
import org.apache.lucene.document.StringField; // javadocs
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FilterAtomicReader;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.uninverting.FieldCache.CacheEntry;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
/**
* A FilterReader that exposes <i>indexed</i> values as if they also had
* docvalues.
* <p>
* This is accomplished by "inverting the inverted index" or "uninversion".
* <p>
* The uninversion process happens lazily: upon the first request for the
* field's docvalues (e.g. via {@link AtomicReader#getNumericDocValues(String)}
* or similar), it will create the docvalues on-the-fly if needed and cache it,
* based on the core cache key of the wrapped AtomicReader.
*/
public class UninvertingReader extends FilterAtomicReader {
/**
* Specifies the type of uninversion to apply for the field.
*/
public static enum Type {
/**
* Single-valued Integer, (e.g. indexed with {@link IntField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
*/
INTEGER,
/**
* Single-valued Long, (e.g. indexed with {@link LongField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
*/
LONG,
/**
* Single-valued Float, (e.g. indexed with {@link FloatField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
*/
FLOAT,
/**
* Single-valued Double, (e.g. indexed with {@link DoubleField})
* <p>
* Fields with this type act as if they were indexed with
* {@link NumericDocValuesField}.
*/
DOUBLE,
/**
* Single-valued Binary, (e.g. indexed with {@link StringField})
* <p>
* Fields with this type act as if they were indexed with
* {@link BinaryDocValuesField}.
*/
BINARY,
/**
* Single-valued Binary, (e.g. indexed with {@link StringField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedDocValuesField}.
*/
SORTED,
/**
* Multi-valued Binary, (e.g. indexed with {@link StringField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_BINARY,
/**
* Multi-valued Integer, (e.g. indexed with {@link IntField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_INTEGER,
/**
* Multi-valued Float, (e.g. indexed with {@link FloatField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_FLOAT,
/**
* Multi-valued Long, (e.g. indexed with {@link LongField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_LONG,
/**
* Multi-valued Double, (e.g. indexed with {@link DoubleField})
* <p>
* Fields with this type act as if they were indexed with
* {@link SortedSetDocValuesField}.
*/
SORTED_SET_DOUBLE
}
/**
* Wraps a provided DirectoryReader. Note that for convenience, the returned reader
* can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
* and so on.
*/
public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) {
return new UninvertingDirectoryReader(in, mapping);
}
static class UninvertingDirectoryReader extends FilterDirectoryReader {
final Map<String,Type> mapping;
public UninvertingDirectoryReader(DirectoryReader in, final Map<String,Type> mapping) {
super(in, new FilterDirectoryReader.SubReaderWrapper() {
@Override
public AtomicReader wrap(AtomicReader reader) {
return new UninvertingReader(reader, mapping);
}
});
this.mapping = mapping;
}
@Override
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) {
return new UninvertingDirectoryReader(in, mapping);
}
}
final Map<String,Type> mapping;
final FieldInfos fieldInfos;
/**
* Create a new UninvertingReader with the specified mapping
* <p>
* Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Map)}
* instead.
*
* @lucene.internal
*/
public UninvertingReader(AtomicReader in, Map<String,Type> mapping) {
super(in);
this.mapping = mapping;
ArrayList<FieldInfo> filteredInfos = new ArrayList<>();
for (FieldInfo fi : in.getFieldInfos()) {
FieldInfo.DocValuesType type = fi.getDocValuesType();
if (fi.isIndexed() && !fi.hasDocValues()) {
Type t = mapping.get(fi.name);
if (t != null) {
switch(t) {
case INTEGER:
case LONG:
case FLOAT:
case DOUBLE:
type = FieldInfo.DocValuesType.NUMERIC;
break;
case BINARY:
type = FieldInfo.DocValuesType.BINARY;
break;
case SORTED:
type = FieldInfo.DocValuesType.SORTED;
break;
case SORTED_SET_BINARY:
case SORTED_SET_INTEGER:
case SORTED_SET_FLOAT:
case SORTED_SET_LONG:
case SORTED_SET_DOUBLE:
type = FieldInfo.DocValuesType.SORTED_SET;
break;
default:
throw new AssertionError();
}
}
}
filteredInfos.add(new FieldInfo(fi.name, fi.isIndexed(), fi.number, fi.hasVectors(), fi.omitsNorms(),
fi.hasPayloads(), fi.getIndexOptions(), type, fi.getNormType(), null));
}
fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()]));
}
@Override
public FieldInfos getFieldInfos() {
return fieldInfos;
}
@Override
public NumericDocValues getNumericDocValues(String field) throws IOException {
Type v = mapping.get(field);
if (v != null) {
switch (v) {
case INTEGER: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_INT_PARSER, true);
case FLOAT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_FLOAT_PARSER, true);
case LONG: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_LONG_PARSER, true);
case DOUBLE: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.NUMERIC_UTILS_DOUBLE_PARSER, true);
default:
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v);
}
}
return super.getNumericDocValues(field);
}
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
Type v = mapping.get(field);
if (v == Type.BINARY) {
return FieldCache.DEFAULT.getTerms(in, field, true);
} else if (v != null && v != Type.SORTED) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v);
} else {
return in.getBinaryDocValues(field);
}
}
@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
Type v = mapping.get(field);
if (v == Type.SORTED) {
return FieldCache.DEFAULT.getTermsIndex(in, field);
} else if (v != null) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v);
} else {
return in.getSortedDocValues(field);
}
}
@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
Type v = mapping.get(field);
if (v != null) {
switch (v) {
case SORTED_SET_INTEGER:
case SORTED_SET_FLOAT:
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT32_TERM_PREFIX);
case SORTED_SET_LONG:
case SORTED_SET_DOUBLE:
return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT64_TERM_PREFIX);
case SORTED_SET_BINARY:
return FieldCache.DEFAULT.getDocTermOrds(in, field, null);
default:
if (v != Type.SORTED) {
throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + v);
}
}
}
return in.getSortedSetDocValues(field);
}
@Override
public Bits getDocsWithField(String field) throws IOException {
if (mapping.containsKey(field)) {
return FieldCache.DEFAULT.getDocsWithField(in, field);
} else {
return in.getDocsWithField(field);
}
}
@Override
public Object getCoreCacheKey() {
return in.getCoreCacheKey();
}
@Override
public Object getCombinedCoreAndDeletesKey() {
return in.getCombinedCoreAndDeletesKey();
}
@Override
public String toString() {
return "Uninverting(" + in.toString() + ")";
}
/**
* Return information about the backing cache
* @lucene.internal
*/
public static String[] getUninvertedStats() {
CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries();
String[] info = new String[entries.length];
for (int i = 0; i < entries.length; i++) {
info[i] = entries[i].toString();
}
return info;
}
}