| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.solr.update; |
| |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.index.IndexableField; |
| import org.apache.lucene.index.AtomicReader; |
| import org.apache.lucene.index.NumericDocValues; |
| import org.apache.lucene.index.SlowCompositeReaderWrapper; |
| import org.apache.lucene.search.similarities.DefaultSimilarity; |
| import org.apache.solr.SolrTestCaseJ4; |
| import org.apache.solr.common.params.CommonParams; |
| import org.apache.solr.common.SolrException; |
| import org.apache.solr.common.SolrInputDocument; |
| import org.apache.solr.common.SolrInputField; |
| import org.apache.solr.core.SolrCore; |
| import org.apache.solr.search.SolrIndexSearcher; |
| import org.apache.solr.search.DocList; |
| import org.apache.solr.schema.FieldType; |
| import org.apache.solr.schema.IndexSchema; |
| import org.apache.solr.request.SolrQueryRequest; |
| import org.apache.solr.response.SolrQueryResponse; |
| import org.apache.solr.response.ResultContext; |
| |
| import org.junit.BeforeClass; |
| import org.junit.Test; |
| |
| /** |
| * |
| * |
| */ |
| public class DocumentBuilderTest extends SolrTestCaseJ4 { |
| |
| @BeforeClass |
| public static void beforeClass() throws Exception { |
| initCore("solrconfig.xml", "schema.xml"); |
| } |
| |
| @Test |
| public void testBuildDocument() throws Exception |
| { |
| SolrCore core = h.getCore(); |
| |
| // undefined field |
| try { |
| SolrInputDocument doc = new SolrInputDocument(); |
| doc.setField( "unknown field", 12345, 1.0f ); |
| DocumentBuilder.toDocument( doc, core.getLatestSchema() ); |
| fail( "should throw an error" ); |
| } |
| catch( SolrException ex ) { |
| assertEquals( "should be bad request", 400, ex.code() ); |
| } |
| } |
| |
| @Test |
| public void testNullField() |
| { |
| SolrCore core = h.getCore(); |
| |
| // make sure a null value is not indexed |
| SolrInputDocument doc = new SolrInputDocument(); |
| doc.addField( "name", null, 1.0f ); |
| Document out = DocumentBuilder.toDocument( doc, core.getLatestSchema() ); |
| assertNull( out.get( "name" ) ); |
| } |
| |
| @Test |
| public void testExceptions() |
| { |
| SolrCore core = h.getCore(); |
| |
| // make sure a null value is not indexed |
| SolrInputDocument doc = new SolrInputDocument(); |
| doc.addField( "id", "123", 1.0f ); |
| doc.addField( "unknown", "something", 1.0f ); |
| try { |
| DocumentBuilder.toDocument( doc, core.getLatestSchema() ); |
| fail( "added an unknown field" ); |
| } |
| catch( Exception ex ) { |
| assertTrue( "should have document ID", ex.getMessage().indexOf( "doc=123" ) > 0 ); |
| } |
| doc.remove( "unknown" ); |
| |
| |
| doc.addField( "weight", "not a number", 1.0f ); |
| try { |
| DocumentBuilder.toDocument( doc, core.getLatestSchema() ); |
| fail( "invalid 'float' field value" ); |
| } |
| catch( Exception ex ) { |
| assertTrue( "should have document ID", ex.getMessage().indexOf( "doc=123" ) > 0 ); |
| assertTrue( "cause is number format", ex.getCause() instanceof NumberFormatException ); |
| } |
| |
| // now make sure it is OK |
| doc.setField( "weight", "1.34", 1.0f ); |
| DocumentBuilder.toDocument( doc, core.getLatestSchema() ); |
| } |
| |
| @Test |
| public void testMultiField() throws Exception { |
| SolrCore core = h.getCore(); |
| |
| // make sure a null value is not indexed |
| SolrInputDocument doc = new SolrInputDocument(); |
| doc.addField( "home", "2.2,3.3", 1.0f ); |
| Document out = DocumentBuilder.toDocument( doc, core.getLatestSchema() ); |
| assertNotNull( out.get( "home" ) );//contains the stored value and term vector, if there is one |
| assertNotNull( out.getField( "home_0" + FieldType.POLY_FIELD_SEPARATOR + "double" ) ); |
| assertNotNull( out.getField( "home_1" + FieldType.POLY_FIELD_SEPARATOR + "double" ) ); |
| } |
| |
| @Test |
| public void testCopyFieldWithDocumentBoost() { |
| SolrCore core = h.getCore(); |
| IndexSchema schema = core.getLatestSchema(); |
| assertFalse(schema.getField("title").omitNorms()); |
| assertTrue(schema.getField("title_stringNoNorms").omitNorms()); |
| SolrInputDocument doc = new SolrInputDocument(); |
| doc.setDocumentBoost(3f); |
| doc.addField( "title", "mytitle"); |
| Document out = DocumentBuilder.toDocument( doc, schema ); |
| assertNotNull( out.get( "title_stringNoNorms" ) ); |
| assertTrue("title_stringNoNorms has the omitNorms attribute set to true, if the boost is different than 1.0, it will fail",1.0f == out.getField( "title_stringNoNorms" ).boost() ); |
| assertTrue("It is OK that title has a boost of 3",3.0f == out.getField( "title" ).boost() ); |
| } |
| |
| |
| @Test |
| public void testCopyFieldWithFieldBoost() { |
| SolrCore core = h.getCore(); |
| IndexSchema schema = core.getLatestSchema(); |
| assertFalse(schema.getField("title").omitNorms()); |
| assertTrue(schema.getField("title_stringNoNorms").omitNorms()); |
| SolrInputDocument doc = new SolrInputDocument(); |
| doc.addField( "title", "mytitle", 3.0f ); |
| Document out = DocumentBuilder.toDocument( doc, schema ); |
| assertNotNull( out.get( "title_stringNoNorms" ) ); |
| assertTrue("title_stringNoNorms has the omitNorms attribute set to true, if the boost is different than 1.0, it will fail",1.0f == out.getField( "title_stringNoNorms" ).boost() ); |
| assertTrue("It is OK that title has a boost of 3",3.0f == out.getField( "title" ).boost() ); |
| } |
| |
| @Test |
| public void testWithPolyFieldsAndFieldBoost() { |
| SolrCore core = h.getCore(); |
| IndexSchema schema = core.getLatestSchema(); |
| assertFalse(schema.getField("store").omitNorms()); |
| assertTrue(schema.getField("store_0_coordinate").omitNorms()); |
| assertTrue(schema.getField("store_1_coordinate").omitNorms()); |
| assertFalse(schema.getField("amount").omitNorms()); |
| assertTrue(schema.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_currency").omitNorms()); |
| assertTrue(schema.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_amount_raw").omitNorms()); |
| |
| SolrInputDocument doc = new SolrInputDocument(); |
| doc.addField( "store", "40.7143,-74.006", 3.0f ); |
| doc.addField( "amount", "10.5", 3.0f ); |
| Document out = DocumentBuilder.toDocument( doc, schema ); |
| assertNotNull( out.get( "store" ) ); |
| assertNotNull( out.get( "amount" ) ); |
| assertNotNull(out.getField("store_0_coordinate")); |
| //NOTE: As the subtypes have omitNorm=true, they must have boost=1F, otherwise this is going to fail when adding the doc to Lucene. |
| assertTrue(1f == out.getField("store_0_coordinate").boost()); |
| assertTrue(1f == out.getField("store_1_coordinate").boost()); |
| assertTrue(1f == out.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_currency").boost()); |
| assertTrue(1f == out.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_amount_raw").boost()); |
| } |
| |
| @Test |
| public void testWithPolyFieldsAndDocumentBoost() { |
| SolrCore core = h.getCore(); |
| IndexSchema schema = core.getLatestSchema(); |
| assertFalse(schema.getField("store").omitNorms()); |
| assertTrue(schema.getField("store_0_coordinate").omitNorms()); |
| assertTrue(schema.getField("store_1_coordinate").omitNorms()); |
| assertFalse(schema.getField("amount").omitNorms()); |
| assertTrue(schema.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_currency").omitNorms()); |
| assertTrue(schema.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_amount_raw").omitNorms()); |
| |
| SolrInputDocument doc = new SolrInputDocument(); |
| doc.setDocumentBoost(3.0f); |
| doc.addField( "store", "40.7143,-74.006"); |
| doc.addField( "amount", "10.5"); |
| Document out = DocumentBuilder.toDocument( doc, schema ); |
| assertNotNull( out.get( "store" ) ); |
| assertNotNull(out.getField("store_0_coordinate")); |
| //NOTE: As the subtypes have omitNorm=true, they must have boost=1F, otherwise this is going to fail when adding the doc to Lucene. |
| assertTrue(1f == out.getField("store_0_coordinate").boost()); |
| assertTrue(1f == out.getField("store_1_coordinate").boost()); |
| assertTrue(1f == out.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_currency").boost()); |
| assertTrue(1f == out.getField("amount" + FieldType.POLY_FIELD_SEPARATOR + "_amount_raw").boost()); |
| } |
| |
| /** |
| * Its ok to boost a field if it has norms |
| */ |
| public void testBoost() throws Exception { |
| XmlDoc xml = new XmlDoc(); |
| xml.xml = "<doc>" |
| + "<field name=\"id\">0</field>" |
| + "<field name=\"title\" boost=\"3.0\">mytitle</field>" |
| + "</doc>"; |
| assertNull(h.validateUpdate(add(xml, new String[0]))); |
| } |
| |
| public void testMultiValuedFieldAndDocBoosts() throws Exception { |
| SolrCore core = h.getCore(); |
| IndexSchema schema = core.getLatestSchema(); |
| SolrInputDocument doc = new SolrInputDocument(); |
| doc.setDocumentBoost(3.0f); |
| SolrInputField field = new SolrInputField( "foo_t" ); |
| field.addValue( "summer time" , 1.0f ); |
| field.addValue( "in the city" , 5.0f ); // using boost |
| field.addValue( "living is easy" , 1.0f ); |
| doc.put( field.getName(), field ); |
| |
| Document out = DocumentBuilder.toDocument( doc, schema ); |
| IndexableField[] outF = out.getFields( field.getName() ); |
| assertEquals("wrong number of field values", |
| 3, outF.length); |
| |
| // since Lucene no longer has native documnt boosts, we should find |
| // the doc boost multiplied into the boost o nthe first field value |
| // all other field values should be 1.0f |
| // (lucene will multiply all of the field boosts later) |
| assertEquals(15.0f, outF[0].boost(), 0.0f); |
| assertEquals(1.0f, outF[1].boost(), 0.0f); |
| assertEquals(1.0f, outF[2].boost(), 0.0f); |
| |
| } |
| |
| public void testCopyFieldsAndFieldBoostsAndDocBoosts() throws Exception { |
| SolrCore core = h.getCore(); |
| IndexSchema schema = core.getLatestSchema(); |
| SolrInputDocument doc = new SolrInputDocument(); |
| |
| final float DOC_BOOST = 3.0F; |
| doc.setDocumentBoost(DOC_BOOST); |
| doc.addField("id", "42"); |
| |
| SolrInputField inTitle = new SolrInputField( "title" ); |
| inTitle.addValue( "titleA" , 2.0F ); |
| inTitle.addValue( "titleB" , 7.0F ); |
| final float TITLE_BOOST = 2.0F * 7.0F; |
| assertEquals(TITLE_BOOST, inTitle.getBoost(), 0.0F); |
| doc.put( inTitle.getName(), inTitle ); |
| |
| SolrInputField inFoo = new SolrInputField( "foo_t" ); |
| inFoo.addValue( "summer time" , 1.0F ); |
| inFoo.addValue( "in the city" , 5.0F ); |
| inFoo.addValue( "living is easy" , 11.0F ); |
| final float FOO_BOOST = 1.0F * 5.0F * 11.0F; |
| assertEquals(FOO_BOOST, inFoo.getBoost(), 0.0F); |
| doc.put( inFoo.getName(), inFoo ); |
| |
| Document out = DocumentBuilder.toDocument( doc, schema ); |
| |
| IndexableField[] outTitle = out.getFields( inTitle.getName() ); |
| assertEquals("wrong number of title values", |
| 2, outTitle.length); |
| |
| IndexableField[] outNoNorms = out.getFields( "title_stringNoNorms" ); |
| assertEquals("wrong number of nonorms values", |
| 2, outNoNorms.length); |
| |
| IndexableField[] outFoo = out.getFields( inFoo.getName() ); |
| assertEquals("wrong number of foo values", |
| 3, outFoo.length); |
| |
| IndexableField[] outText = out.getFields( "text" ); |
| assertEquals("wrong number of text values", |
| 5, outText.length); |
| |
| // since Lucene no longer has native document boosts, we should find |
| // the doc boost multiplied into the boost on the first field value |
| // of each field. All other field values should be 1.0f |
| // (lucene will multiply all of the field value boosts later) |
| assertEquals(TITLE_BOOST * DOC_BOOST, outTitle[0].boost(), 0.0F); |
| assertEquals(1.0F, outTitle[1].boost(), 0.0F); |
| // |
| assertEquals(FOO_BOOST * DOC_BOOST, outFoo[0].boost(), 0.0F); |
| assertEquals(1.0F, outFoo[1].boost(), 0.0F); |
| assertEquals(1.0F, outFoo[2].boost(), 0.0F); |
| // |
| assertEquals(TITLE_BOOST * DOC_BOOST, outText[0].boost(), 0.0F); |
| assertEquals(1.0F, outText[1].boost(), 0.0F); |
| assertEquals(FOO_BOOST, outText[2].boost(), 0.0F); |
| assertEquals(1.0F, outText[3].boost(), 0.0F); |
| assertEquals(1.0F, outText[4].boost(), 0.0F); |
| |
| // copyField dest with no norms should not have recieved any boost |
| assertEquals(1.0F, outNoNorms[0].boost(), 0.0F); |
| assertEquals(1.0F, outNoNorms[1].boost(), 0.0F); |
| |
| // now index that SolrInputDocument to check the computed norms |
| |
| assertU(adoc(doc)); |
| assertU(commit()); |
| |
| SolrQueryRequest req = req("q", "id:42"); |
| try { |
| // very hack-ish |
| |
| SolrQueryResponse rsp = new SolrQueryResponse(); |
| core.execute(core.getRequestHandler(req.getParams().get(CommonParams.QT)), req, rsp); |
| |
| DocList dl = ((ResultContext) rsp.getValues().get("response")).docs; |
| assertTrue("can't find the doc we just added", 1 == dl.size()); |
| int docid = dl.iterator().nextDoc(); |
| |
| SolrIndexSearcher searcher = req.getSearcher(); |
| AtomicReader reader = SlowCompositeReaderWrapper.wrap(searcher.getTopReaderContext().reader()); |
| |
| assertTrue("similarity doesn't extend DefaultSimilarity, " + |
| "config or defaults have changed since test was written", |
| searcher.getSimilarity() instanceof DefaultSimilarity); |
| |
| DefaultSimilarity sim = (DefaultSimilarity) searcher.getSimilarity(); |
| |
| NumericDocValues titleNorms = reader.getNormValues("title"); |
| NumericDocValues fooNorms = reader.getNormValues("foo_t"); |
| NumericDocValues textNorms = reader.getNormValues("text"); |
| |
| assertEquals(expectedNorm(sim, 2, TITLE_BOOST * DOC_BOOST), |
| titleNorms.get(docid)); |
| |
| assertEquals(expectedNorm(sim, 8-3, FOO_BOOST * DOC_BOOST), |
| fooNorms.get(docid)); |
| |
| assertEquals(expectedNorm(sim, 2 + 8-3, |
| TITLE_BOOST * FOO_BOOST * DOC_BOOST), |
| textNorms.get(docid)); |
| |
| } finally { |
| req.close(); |
| } |
| } |
| |
| /** |
| * Given a length, and boost returns the expected encoded norm |
| */ |
| private static byte expectedNorm(final DefaultSimilarity sim, |
| final int length, final float boost) { |
| |
| return sim.encodeNormValue(boost / ((float) Math.sqrt(length))); |
| |
| } |
| |
| |
| public void testBoostOmitNorms() throws Exception { |
| XmlDoc xml = new XmlDoc(); |
| // explicitly boosting a field if that omits norms is not ok |
| xml.xml = "<doc>" |
| + "<field name=\"id\">ignore_exception</field>" |
| + "<field name=\"title_stringNoNorms\" boost=\"3.0\">mytitle</field>" |
| + "</doc>"; |
| try { |
| assertNull(h.validateUpdate(add(xml, new String[0]))); |
| fail("didn't get expected exception for boosting omit norms field"); |
| } catch (SolrException expected) { |
| // expected exception |
| } |
| // boosting a field that is copied to another field that omits norms is ok |
| xml.xml = "<doc>" |
| + "<field name=\"id\">42</field>" |
| + "<field name=\"title\" boost=\"3.0\">mytitle</field>" |
| + "</doc>"; |
| assertNull(h.validateUpdate(add(xml, new String[0]))); |
| } |
| |
| /** |
| * Its ok to supply a document boost even if a field omits norms |
| */ |
| public void testDocumentBoostOmitNorms() throws Exception { |
| XmlDoc xml = new XmlDoc(); |
| xml.xml = "<doc boost=\"3.0\">" |
| + "<field name=\"id\">2</field>" |
| + "<field name=\"title_stringNoNorms\">mytitle</field>" |
| + "</doc>"; |
| assertNull(h.validateUpdate(add(xml, new String[0]))); |
| } |
| |
| } |