blob: 563a01d14df3374e3e4d6ad404d2f35255c4ae8d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.ltr.feature;
import java.io.IOException;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.BoolField;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher;
/**
* This feature returns the value of a field in the current document.
* The field must have stored="true" or docValues="true" properties.
* Example configuration:
* <pre>{
"name": "rawHits",
"class": "org.apache.solr.ltr.feature.FieldValueFeature",
"params": {
"field": "hits"
}
}</pre>
*
* <p>There are 4 different types of FeatureScorers that a FieldValueFeatureWeight may use.
* The chosen scorer depends on the field attributes.</p>
*
* <p>FieldValueFeatureScorer (FVFS): used for stored=true, no matter if docValues=true or docValues=false</p>
*
* <p>NumericDocValuesFVFS: used for stored=false and docValues=true, if docValueType == NUMERIC</p>
* <p>SortedDocValuesFVFS: used for stored=false and docValues=true, if docValueType == SORTED
*
* <p>DefaultValueFVFS: used for stored=false and docValues=true, a fallback scorer that is used on segments
* where no document has a value set in the field of this feature</p>
*/
public class FieldValueFeature extends Feature {
private String field;
private Set<String> fieldAsSet;
public String getField() {
return field;
}
public void setField(String field) {
this.field = field;
fieldAsSet = Collections.singleton(field);
}
@Override
public LinkedHashMap<String,Object> paramsToMap() {
final LinkedHashMap<String,Object> params = defaultParamsToMap();
params.put("field", field);
return params;
}
@Override
protected void validate() throws FeatureException {
if (field == null || field.isEmpty()) {
throw new FeatureException(getClass().getSimpleName()+
": field must be provided");
}
}
public FieldValueFeature(String name, Map<String,Object> params) {
super(name, params);
}
@Override
public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores,
SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi)
throws IOException {
return new FieldValueFeatureWeight(searcher, request, originalQuery, efi);
}
public class FieldValueFeatureWeight extends FeatureWeight {
private final SchemaField schemaField;
public FieldValueFeatureWeight(IndexSearcher searcher,
SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) {
super(FieldValueFeature.this, searcher, request, originalQuery, efi);
if (searcher instanceof SolrIndexSearcher) {
schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field);
} else { // some tests pass a null or a non-SolrIndexSearcher searcher
schemaField = null;
}
}
/**
* Return a FeatureScorer that uses docValues or storedFields if no docValues are present
*
* @param context the segment this FeatureScorer is working with
* @return FeatureScorer for the current segment and field
* @throws IOException as defined by abstract class Feature
*/
@Override
public FeatureScorer scorer(LeafReaderContext context) throws IOException {
if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) {
final FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field);
final DocValuesType docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE;
if (DocValuesType.NUMERIC.equals(docValuesType)) {
return new NumericDocValuesFieldValueFeatureScorer(this, context,
DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType().getNumberType());
} else if (DocValuesType.SORTED.equals(docValuesType)) {
return new SortedDocValuesFieldValueFeatureScorer(this, context,
DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
} else if (DocValuesType.NONE.equals(docValuesType)) {
// Using a fallback feature scorer because this segment has no documents with a doc value for the current field
return new DefaultValueFieldValueFeatureScorer(this, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
}
throw new IllegalArgumentException("Doc values type " + docValuesType.name() + " of field " + field
+ " is not supported");
}
return new FieldValueFeatureScorer(this, context,
DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
}
/**
* A FeatureScorer that reads the stored value for a field
*/
public class FieldValueFeatureScorer extends FeatureScorer {
LeafReaderContext context = null;
public FieldValueFeatureScorer(FeatureWeight weight,
LeafReaderContext context, DocIdSetIterator itr) {
super(weight, itr);
this.context = context;
}
@Override
public float score() throws IOException {
try {
final Document document = context.reader().document(itr.docID(),
fieldAsSet);
final IndexableField indexableField = document.getField(field);
if (indexableField == null) {
return getDefaultValue();
}
final Number number = indexableField.numericValue();
if (number != null) {
return number.floatValue();
} else {
final String string = indexableField.stringValue();
if (string.length() == 1) {
// boolean values in the index are encoded with the
// a single char contained in TRUE_TOKEN or FALSE_TOKEN
// (see BoolField)
if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
return 1;
}
if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) {
return 0;
}
}
}
} catch (final IOException e) {
throw new FeatureException(
e.toString() + ": " +
"Unable to extract feature for "
+ name, e);
}
return getDefaultValue();
}
@Override
public float getMaxScore(int upTo) throws IOException {
return Float.POSITIVE_INFINITY;
}
}
/**
* A FeatureScorer that reads the numeric docValues for a field
*/
public final class NumericDocValuesFieldValueFeatureScorer extends FeatureScorer {
private final NumericDocValues docValues;
private final NumberType numberType;
public NumericDocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context,
final DocIdSetIterator itr, final NumberType numberType) {
super(weight, itr);
this.numberType = numberType;
NumericDocValues docValues;
try {
docValues = DocValues.getNumeric(context.reader(), field);
} catch (IOException e) {
throw new IllegalArgumentException("Could not read numeric docValues for field " + field);
}
this.docValues = docValues;
}
@Override
public float score() throws IOException {
if (docValues.advanceExact(itr.docID())) {
return readNumericDocValues();
}
return FieldValueFeature.this.getDefaultValue();
}
/**
* Read the numeric value for a field and convert the different number types to float.
*
* @return The numeric value that the docValues contain for the current document
* @throws IOException if docValues cannot be read
*/
private float readNumericDocValues() throws IOException {
if (NumberType.FLOAT.equals(numberType)) {
// convert float value that was stored as long back to float
return Float.intBitsToFloat((int) docValues.longValue());
} else if (NumberType.DOUBLE.equals(numberType)) {
// handle double value conversion
return (float) Double.longBitsToDouble(docValues.longValue());
}
// just take the long value
return docValues.longValue();
}
@Override
public float getMaxScore(int upTo) throws IOException {
return Float.POSITIVE_INFINITY;
}
}
/**
* A FeatureScorer that reads the sorted docValues for a field
*/
public final class SortedDocValuesFieldValueFeatureScorer extends FeatureScorer {
private final SortedDocValues docValues;
public SortedDocValuesFieldValueFeatureScorer(final FeatureWeight weight, final LeafReaderContext context,
final DocIdSetIterator itr) {
super(weight, itr);
SortedDocValues docValues;
try {
docValues = DocValues.getSorted(context.reader(), field);
} catch (IOException e) {
throw new IllegalArgumentException("Could not read sorted docValues for field " + field);
}
this.docValues = docValues;
}
@Override
public float score() throws IOException {
if (docValues.advanceExact(itr.docID())) {
int ord = docValues.ordValue();
return readSortedDocValues(docValues.lookupOrd(ord));
}
return FieldValueFeature.this.getDefaultValue();
}
/**
* Interprets the bytesRef either as true / false token or tries to read it as number string
*
* @param bytesRef the value of the field that should be used as score
* @return the input converted to a number
*/
private float readSortedDocValues(BytesRef bytesRef) {
String string = bytesRef.utf8ToString();
if (string.length() == 1) {
// boolean values in the index are encoded with the
// a single char contained in TRUE_TOKEN or FALSE_TOKEN
// (see BoolField)
if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
return 1;
}
if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) {
return 0;
}
}
return FieldValueFeature.this.getDefaultValue();
}
@Override
public float getMaxScore(int upTo) throws IOException {
return Float.POSITIVE_INFINITY;
}
}
/**
* A FeatureScorer that always returns the default value.
*
* It is used as a fallback for cases when a segment does not have any documents that contain doc values for a field.
* By doing so, we prevent a fallback to the FieldValueFeatureScorer, which would also return the default value but
* in a less performant way because it would first try to read the stored fields for the doc (which aren't present).
*/
public final class DefaultValueFieldValueFeatureScorer extends FeatureScorer {
public DefaultValueFieldValueFeatureScorer(final FeatureWeight weight, final DocIdSetIterator itr) {
super(weight, itr);
}
@Override
public float score() throws IOException {
return FieldValueFeature.this.getDefaultValue();
}
@Override
public float getMaxScore(int upTo) throws IOException {
return Float.POSITIVE_INFINITY;
}
}
}
}