blob: 068273d9298498813a5243712981b18564abd807 [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.solr.schema;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.util.RefCounted;
import org.junit.Before;
import org.junit.BeforeClass;
import static org.hamcrest.CoreMatchers.instanceOf;
public class TestSortableTextField extends SolrTestCaseJ4 {
protected static final String BIG_CONST
= StringUtils.repeat("x", SortableTextField.DEFAULT_MAX_CHARS_FOR_DOC_VALUES);
public static void create() throws Exception {
// sanity check our fields & types...
// these should all use docValues (either explicitly or implicitly)...
for (String n : Arrays.asList("keyword_stxt",
"whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) {
FieldType ft = h.getCore().getLatestSchema().getFieldTypeByName(n);
assertEquals("type " + ft.getTypeName() + " should have docvalues - schema got changed?",
true, ft.getNamedPropertyValues(true).get("docValues")) ;
for (String n : Arrays.asList("keyword_stxt", "keyword_dv_stxt",
"whitespace_stxt", "whitespace_nois_stxt",
"whitespace_f_stxt", "whitespace_l_stxt")) {
SchemaField sf = h.getCore().getLatestSchema().getField(n);
assertTrue("field " + sf.getName() + " should have docvalues - schema got changed?",
sf.hasDocValues()) ;
{ // this field should *NOT* have docValues .. should behave like a plain old TextField
SchemaField sf = h.getCore().getLatestSchema().getField("whitespace_nodv_stxt");
assertFalse("field " + sf.getName() + " should not have docvalues - schema got changed?",
sf.hasDocValues()) ;
public void cleanup() throws Exception {
public void testSimple() throws Exception {
assertU(adoc("id","1", "whitespace_stxt", "how now brown cow ?", "whitespace_f_stxt", "aaa bbb"));
assertU(adoc("id","2", "whitespace_stxt", "how now brown dog ?", "whitespace_f_stxt", "bbb aaa"));
assertU(adoc("id","3", "whitespace_stxt", "how now brown cat ?", "whitespace_f_stxt", "xxx yyy"));
assertU(adoc("id","4", "whitespace_stxt", "dog and cat" /* no val for whitespace_f_stxt */));
// search & sort
// NOTE: even if the field is indexed=false, should still be able to sort on it
for (String sortf : Arrays.asList("whitespace_stxt", "whitespace_nois_stxt", "whitespace_plain_str")) {
assertQ(req("q", "whitespace_stxt:cat", "sort", sortf + " asc")
, "//*[@numFound='2']"
, "//result/doc[1]/str[@name='id'][.=4]"
, "//result/doc[2]/str[@name='id'][.=3]"
assertQ(req("q", "whitespace_stxt:cat", "sort", sortf + " desc")
, "//*[@numFound='2']"
, "//result/doc[1]/str[@name='id'][.=3]"
, "//result/doc[2]/str[@name='id'][.=4]"
assertQ(req("q", "whitespace_stxt:brown", "sort", sortf + " asc")
, "//*[@numFound='3']"
, "//result/doc[1]/str[@name='id'][.=3]"
, "//result/doc[2]/str[@name='id'][.=1]"
, "//result/doc[3]/str[@name='id'][.=2]"
assertQ(req("q", "whitespace_stxt:brown", "sort", sortf + " desc")
, "//*[@numFound='3']"
, "//result/doc[1]/str[@name='id'][.=2]"
, "//result/doc[2]/str[@name='id'][.=1]"
, "//result/doc[3]/str[@name='id'][.=3]"
// we should still be able to search if docValues="false" (but sort on a diff field)
assertQ(req("q","whitespace_nodv_stxt:cat", "sort", sortf + " asc")
, "//*[@numFound='2']"
, "//result/doc[1]/str[@name='id'][.=4]"
, "//result/doc[2]/str[@name='id'][.=3]"
// attempting to sort on docValues="false" field should give an error...
assertQEx("attempting to sort on docValues=false field should give an error",
"when docValues=\"false\"",
req("q","*:*", "sort", "whitespace_nodv_stxt asc"),
// sortMissing - whitespace_f_stxt copyField to whitespace_l_stxt
assertQ(req("q","*:*", "sort", "whitespace_f_stxt asc")
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=4]"
, "//result/doc[2]/str[@name='id'][.=1]"
, "//result/doc[3]/str[@name='id'][.=2]"
, "//result/doc[4]/str[@name='id'][.=3]"
assertQ(req("q","*:*", "sort", "whitespace_f_stxt desc")
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=4]"
, "//result/doc[2]/str[@name='id'][.=3]"
, "//result/doc[3]/str[@name='id'][.=2]"
, "//result/doc[4]/str[@name='id'][.=1]"
assertQ(req("q","*:*", "sort", "whitespace_l_stxt asc")
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=1]"
, "//result/doc[2]/str[@name='id'][.=2]"
, "//result/doc[3]/str[@name='id'][.=3]"
, "//result/doc[4]/str[@name='id'][.=4]"
assertQ(req("q","*:*", "sort", "whitespace_l_stxt desc")
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=3]"
, "//result/doc[2]/str[@name='id'][.=2]"
, "//result/doc[3]/str[@name='id'][.=1]"
, "//result/doc[4]/str[@name='id'][.=4]"
public void testSimpleSearchAndFacets() throws Exception {
assertU(adoc("id","1", "whitespace_stxt", "how now brown cow ?"));
assertU(adoc("id","2", "whitespace_stxt", "how now brown cow ?"));
assertU(adoc("id","3", "whitespace_stxt", "holy cow !"));
assertU(adoc("id","4", "whitespace_stxt", "dog and cat"));
// NOTE: even if the field is indexed=false, should still be able to facet on it
for (String facet : Arrays.asList("whitespace_stxt", "whitespace_nois_stxt",
"whitespace_m_stxt", "whitespace_plain_str")) {
for (String search : Arrays.asList("whitespace_stxt", "whitespace_nodv_stxt",
"whitespace_m_stxt", "whitespace_plain_txt")) {
// facet.field
final String fpre = "//lst[@name='facet_fields']/lst[@name='"+facet+"']/";
assertQ(req("q", search + ":cow", "rows", "0",
"facet.field", facet, "facet", "true")
, "//*[@numFound='3']"
, fpre + "int[@name='how now brown cow ?'][.=2]"
, fpre + "int[@name='holy cow !'][.=1]"
, fpre + "int[@name='dog and cat'][.=0]"
// json facet
final String jpre = "//lst[@name='facets']/lst[@name='x']/arr[@name='buckets']/";
assertQ(req("q", search + ":cow", "rows", "0",
"json.facet", "{x:{ type: terms, field:'" + facet + "', mincount:0 }}")
, "//*[@numFound='3']"
, jpre + "lst[str[@name='val'][.='how now brown cow ?']][int[@name='count'][.=2]]"
, jpre + "lst[str[@name='val'][.='holy cow !']][int[@name='count'][.=1]]"
, jpre + "lst[str[@name='val'][.='dog and cat']][int[@name='count'][.=0]]"
public void testWhiteboxIndexReader() throws Exception {
"whitespace_stxt", "how now brown cow ?",
"whitespace_m_stxt", "xxx",
"whitespace_m_stxt", "yyy",
"whitespace_f_stxt", "aaa bbb",
"keyword_stxt", "Blarggghhh!"));
final RefCounted<SolrIndexSearcher> searcher = h.getCore().getNewestSearcher(false);
try {
final LeafReader r = searcher.get().getSlowAtomicReader();
// common cases...
for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt",
"whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) {
assertNotNull("FieldInfos: " + field, r.getFieldInfos().fieldInfo(field));
assertEquals("DocValuesType: " + field,
DocValuesType.SORTED, r.getFieldInfos().fieldInfo(field).getDocValuesType());
assertNotNull("DocValues: " + field, r.getSortedDocValues(field));
assertNotNull("Terms: " + field, r.terms(field));
// special cases...
} finally {
if (null != searcher) {
public void testWhiteboxCreateFields() throws Exception {
List<IndexableField> values = null;
// common case...
for (String field : Arrays.asList("keyword_stxt", "keyword_dv_stxt",
"whitespace_stxt", "whitespace_f_stxt", "whitespace_l_stxt")) {
values = createIndexableFields(field);
assertEquals(field, 2, values.size());
assertThat(field, values.get(0), instanceOf(Field.class));
assertThat(field, values.get(1), instanceOf(SortedDocValuesField.class));
// special cases...
values = createIndexableFields("whitespace_nois_stxt");
assertEquals(1, values.size());
assertThat(values.get(0), instanceOf(SortedDocValuesField.class));
values = createIndexableFields("whitespace_nodv_stxt");
assertEquals(1, values.size());
assertThat(values.get(0), instanceOf(Field.class));
values = createIndexableFields("whitespace_m_stxt");
assertEquals(2, values.size());
assertThat(values.get(0), instanceOf(Field.class));
assertThat(values.get(1), instanceOf(SortedSetDocValuesField.class));
private List<IndexableField> createIndexableFields(String fieldName) {
SchemaField sf = h.getCore().getLatestSchema().getField(fieldName);
return sf.getType().createFields(sf, "dummy value");
public void testMaxCharsSort() throws Exception {
assertU(adoc("id","1", "whitespace_stxt", "aaa bbb ccc ddd"));
assertU(adoc("id","2", "whitespace_stxt", "aaa bbb xxx yyy"));
assertU(adoc("id","3", "whitespace_stxt", "aaa bbb ccc xxx"));
assertU(adoc("id","4", "whitespace_stxt", "aaa"));
// all terms should be searchable in all fields, even if the docvalues are limited
for (String searchF : Arrays.asList("whitespace_stxt", "whitespace_plain_txt",
"whitespace_max3_stxt", "whitespace_max6_stxt",
"whitespace_max0_stxt", "whitespace_maxNeg_stxt")) {
// maxChars of 0 or neg should be equivalent to no max at all
for (String sortF : Arrays.asList("whitespace_stxt", "whitespace_plain_str",
"whitespace_max0_stxt", "whitespace_maxNeg_stxt")) {
assertQ(req("q", searchF + ":ccc", "sort", sortF + " desc, id asc")
, "//*[@numFound='2']"
, "//result/doc[1]/str[@name='id'][.=3]"
, "//result/doc[2]/str[@name='id'][.=1]"
assertQ(req("q", searchF + ":ccc", "sort", sortF + " asc, id desc")
, "//*[@numFound='2']"
, "//result/doc[1]/str[@name='id'][.=1]"
, "//result/doc[2]/str[@name='id'][.=3]"
// sorting on a maxChars limited fields should force tie breaker
for (String dir : Arrays.asList("asc", "desc")) {
// for max3, dir shouldn't matter - should always tie..
assertQ(req("q", "*:*", "sort", "whitespace_max3_stxt "+dir+", id desc") // max3, id desc
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=4]"
, "//result/doc[2]/str[@name='id'][.=3]"
, "//result/doc[3]/str[@name='id'][.=2]"
, "//result/doc[4]/str[@name='id'][.=1]"
assertQ(req("q", "*:*", "sort", "whitespace_max3_stxt "+dir+", id asc") // max3, id desc
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=1]"
, "//result/doc[2]/str[@name='id'][.=2]"
, "//result/doc[3]/str[@name='id'][.=3]"
, "//result/doc[4]/str[@name='id'][.=4]"
assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt asc, id desc") // max6 asc, id desc
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=4]" // no tiebreaker needed
, "//result/doc[2]/str[@name='id'][.=3]"
, "//result/doc[3]/str[@name='id'][.=2]"
, "//result/doc[4]/str[@name='id'][.=1]"
assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt asc, id asc") // max6 asc, id desc
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=4]" // no tiebreaker needed
, "//result/doc[2]/str[@name='id'][.=1]"
, "//result/doc[3]/str[@name='id'][.=2]"
, "//result/doc[4]/str[@name='id'][.=3]"
assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt desc, id desc") // max6 desc, id desc
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=3]"
, "//result/doc[2]/str[@name='id'][.=2]"
, "//result/doc[3]/str[@name='id'][.=1]"
, "//result/doc[4]/str[@name='id'][.=4]" // no tiebreaker needed
assertQ(req("q", "*:*", "sort", "whitespace_max6_stxt desc, id asc") // max6 desc, id desc
, "//*[@numFound='4']"
, "//result/doc[1]/str[@name='id'][.=1]"
, "//result/doc[2]/str[@name='id'][.=2]"
, "//result/doc[3]/str[@name='id'][.=3]"
, "//result/doc[4]/str[@name='id'][.=4]" // no tiebreaker needed
// sanity check that the default max is working....
assertU(adoc("id","5", "whitespace_stxt", BIG_CONST + " aaa zzz"));
assertU(adoc("id","6", "whitespace_stxt", BIG_CONST + " bbb zzz "));
// for these fields, the tie breaker should be the only thing that matters, regardless of direction...
for (String sortF : Arrays.asList("whitespace_stxt", "whitespace_nois_stxt")) {
for (String dir : Arrays.asList("asc", "desc")) {
assertQ(req("q", "whitespace_stxt:zzz", "sort", sortF + " " + dir + ", id asc")
, "//*[@numFound='2']"
, "//result/doc[1]/str[@name='id'][.=5]"
, "//result/doc[2]/str[@name='id'][.=6]"
assertQ(req("q", "whitespace_stxt:zzz", "sort", sortF + " " + dir + ", id desc")
, "//*[@numFound='2']"
, "//result/doc[1]/str[@name='id'][.=6]"
, "//result/doc[2]/str[@name='id'][.=5]"
* test how various permutations of useDocValuesAsStored and maxCharsForDocValues interact
public void testUseDocValuesAsStored() throws Exception {
ignoreException("when useDocValuesAsStored=true \\(length=");
// first things first...
// unlike most field types, SortableTextField should default to useDocValuesAsStored==false
// (check a handful that should have the default behavior)
for (String n : Arrays.asList("keyword_stxt", "whitespace_max0_stxt", "whitespace_max6_stxt")) {
FieldType ft = h.getCore().getLatestSchema().getFieldTypeByName(n);
assertEquals("type " + ft.getTypeName() + " should not default to useDocValuesAsStored",
false, ft.useDocValuesAsStored()) ;
SchemaField sf = h.getCore().getLatestSchema().getField(n);
assertEquals("field " + sf.getName() + " should not default to useDocValuesAsStored",
false, sf.useDocValuesAsStored()) ;
// but it should be possible to set useDocValuesAsStored=true explicitly on types...
int num_types_found = 0;
for (Map.Entry<String,FieldType> entry : h.getCore().getLatestSchema().getFieldTypes().entrySet()) {
if (entry.getKey().endsWith("_has_usedvs")) {
FieldType ft = entry.getValue();
assertEquals("type " + ft.getTypeName() + " has unexpected useDocValuesAsStored value",
true, ft.useDocValuesAsStored()) ;
assertEquals("sanity check: wrong number of *_has_usedvs types found -- schema changed?",
2, num_types_found);
// ...and it should be possible to set/override useDocValuesAsStored=true on fields...
int num_fields_found = 0;
List<String> xpaths = new ArrayList<>(42);
for (Map.Entry<String,SchemaField> entry : h.getCore().getLatestSchema().getFields().entrySet()) {
if (entry.getKey().endsWith("_usedvs")) {
final SchemaField sf = entry.getValue();
final String name = sf.getName();
// some sanity check before we move on with the rest of our testing...
assertFalse("schema change? field should not be stored=true: " + name, sf.stored());
final boolean usedvs = name.endsWith("_has_usedvs");
assertTrue("schema change broke assumptions: field must be '*_has_usedvs' or '*_negates_usedvs': " +
name, usedvs ^ name.endsWith("_negates_usedvs"));
final boolean max6 = name.startsWith("max6_");
assertTrue("schema change broke assumptions: field must be 'max6_*' or 'max0_*': " +
name, max6 ^ name.startsWith("max0_"));
assertEquals("Unexpected useDocValuesAsStored value for field: " + name,
usedvs, sf.useDocValuesAsStored()) ;
final String docid = ""+num_fields_found;
if (usedvs && max6) {
// if useDocValuesAsStored==true and maxCharsForDocValues=N then longer values should fail
final String doc = adoc("id", docid, name, "apple pear orange");
SolrException ex = expectThrows(SolrException.class, () -> { assertU(doc); });
for (String expect : Arrays.asList("field " + name,
"maxCharsForDocValues=6")) {
assertTrue("exception must mention " + expect + ": " + ex.getMessage(),
} else {
// otherwise (useDocValuesAsStored==false *OR* maxCharsForDocValues=0) any value
// should be fine when adding a doc and we should be able to search for it later...
final String val = docid + " apple pear orange " + BIG_CONST;
assertU(adoc("id", docid, name, val));
String doc_xpath = "//result/doc[str[@name='id'][.='"+docid+"']]";
if (usedvs) {
// ...and if it *does* usedvs, then we should defnitely see our value when searching...
doc_xpath = doc_xpath + "[str[@name='"+name+"'][.='"+val+"']]";
} else {
// ...but if not, then we should definitely not see any value for our field...
doc_xpath = doc_xpath + "[not(str[@name='"+name+"'])]";
assertEquals("sanity check: wrong number of *_usedvs fields found -- schema changed?",
6, num_fields_found);
// check all our expected docs can be found (with the expected values)
assertQ(req("q", "*:*", "fl", "*"), xpaths.toArray(new String[xpaths.size()]));
* tests that a SortableTextField using KeywordTokenzier (w/docValues) behaves exactly the same as
* StrFields that it's copied to for quering and sorting
public void testRandomStrEquivalentBehavior() throws Exception {
final List<String> test_fields = Arrays.asList("keyword_stxt", "keyword_dv_stxt",
"keyword_s_dv", "keyword_s");
// we use embedded client instead of assertQ: we want to compare the responses from multiple requests
@SuppressWarnings("resource") final SolrClient client = new EmbeddedSolrServer(h.getCore());
final int numDocs = atLeast(100);
final int magicIdx = TestUtil.nextInt(random(), 1, numDocs);
String magic = null;
for (int i = 1; i <= numDocs; i++) {
// ideally we'd test all "realistic" unicode string, but EmbeddedSolrServer uses XML request writer
// and has no option to change this so ctrl-characters break the request
final String val = TestUtil.randomSimpleString(random(), 100);
if (i == magicIdx) {
magic = val;
assertEquals(0, client.add(sdoc("id", ""+i, "keyword_stxt", val)).getStatus());
assertEquals(0, client.commit().getStatus());
// query for magic term should match same doc regardless of field (reminder: keyword tokenizer)
// (we need the filter in the unlikely event that magic value with randomly picked twice)
for (String f : test_fields) {
final SolrDocumentList results = client.query(params("q", "{!field f="+f+" v=$v}",
"v", magic,
"fq", "id:" + magicIdx )).getResults();
assertEquals(f + ": Query ("+magic+") filtered by id: " + magicIdx + " ==> " + results,
1L, results.getNumFound());
final SolrDocument doc = results.get(0);
assertEquals(f + ": Query ("+magic+") filtered by id: " + magicIdx + " ==> " + doc,
""+magicIdx, doc.getFieldValue("id"));
assertEquals(f + ": Query ("+magic+") filtered by id: " + magicIdx + " ==> " + doc,
magic, doc.getFieldValue(f));
// do some random id range queries using all 3 fields for sorting. results should be identical
final int numQ = atLeast(10);
for (int i = 0; i < numQ; i++) {
final int hi = TestUtil.nextInt(random(), 1, numDocs-1);
final int lo = TestUtil.nextInt(random(), 1, hi);
final boolean fwd = random().nextBoolean();
SolrDocumentList previous = null;
String prevField = null;
for (String f : test_fields) {
final SolrDocumentList results = client.query(params("q","id_i:["+lo+" TO "+hi+"]",
"sort", f + (fwd ? " asc" : " desc") +
// secondary on id for determinism
", id asc")
assertEquals(results.toString(), (1L + hi - lo), results.getNumFound());
if (null != previous) {
assertEquals(prevField + " vs " + f,
previous.getNumFound(), results.getNumFound());
for (int d = 0; d < results.size(); d++) {
assertEquals(prevField + " vs " + f + ": " + d,
assertEquals(prevField + " vs " + f + ": " + d,
previous = results;
prevField = f;