| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.solr.schema; |
| |
| import java.util.Collections; |
| import java.util.HashMap; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| import org.apache.lucene.document.Field; |
| import org.apache.solr.SolrTestCaseJ4; |
| import org.apache.solr.schema.PreAnalyzedField.PreAnalyzedParser; |
| import org.junit.BeforeClass; |
| import org.junit.Test; |
| |
| public class PreAnalyzedFieldTest extends SolrTestCaseJ4 { |
| |
| private static final String[] valid = { |
| "1 one two three", // simple parsing |
| "1 one two three ", // spurious spaces |
| "1 one,s=123,e=128,i=22 two three,s=20,e=22,y=foobar", // attribs |
| "1 \\ one\\ \\,,i=22,a=\\, two\\=\n\r\t\\n,\\ =\\ \\", // escape madness |
| "1 ,i=22 ,i=33,s=2,e=20 , ", // empty token text, non-empty attribs |
| "1 =This is the stored part with \\= \n \\n \t \\t escapes.=one two three \u0001ąćęłńóśźż", // stored plus token stream |
| "1 ==", // empty stored, no token stream |
| "1 =this is a test.=", // stored + empty token stream |
| "1 one,p=deadbeef two,p=0123456789abcdef three" // payloads |
| }; |
| |
| private static final String[] validParsed = { |
| "1 one,s=0,e=3 two,s=4,e=7 three,s=8,e=13", |
| "1 one,s=1,e=4 two,s=6,e=9 three,s=12,e=17", |
| "1 one,i=22,s=123,e=128,y=word two,i=1,s=5,e=8,y=word three,i=1,s=20,e=22,y=foobar", |
| "1 \\ one\\ \\,,i=22,s=0,e=6 two\\=\\n\\r\\t\\n,i=1,s=7,e=15 \\\\,i=1,s=17,e=18", |
| "1 i=22,s=0,e=0 i=33,s=2,e=20 i=1,s=2,e=2", |
| "1 =This is the stored part with = \n \\n \t \\t escapes.=one,s=0,e=3 two,s=4,e=7 three,s=8,e=13 \u0001ąćęłńóśźż,s=15,e=25", |
| "1 ==", |
| "1 =this is a test.=", |
| "1 one,p=deadbeef,s=0,e=3 two,p=0123456789abcdef,s=4,e=7 three,s=8,e=13" |
| }; |
| |
| private static final String[] invalidSimple = { |
| "one two three", // missing version # |
| "2 one two three", // invalid version # |
| "1 o,ne two", // missing escape |
| "1 one t=wo", // missing escape |
| "1 one,, two", // missing attribs, unescaped comma |
| "1 one,s ", // missing attrib value |
| "1 one,s= val", // missing attrib value, unescaped space |
| "1 one,s=,val", // unescaped comma |
| "1 =", // unescaped equals |
| "1 =stored ", // unterminated stored |
| "1 ===" // empty stored (ok), but unescaped = in token stream |
| }; |
| |
| private static final String validJson |
| = json("{'v':'1','str':'stored-value','tokens':[{'t':'a'},{'t':'b'},{'t':'c'}]}"); |
| |
| private static final String[] invalidJson = { |
| json("'v':'1','str':'stored-value','tokens':[{'t':'a'},{'t':'b'},{'t':'c'}]"), // missing enclosing object |
| json("{'str':'stored-value','tokens':[{'t':'a'},{'t':'b'},{'t':'c'}]}"), // missing version # |
| json("{'v':'2','str':'stored-value','tokens':[{'t':'a'},{'t':'b'},{'t':'c'}]}"), // invalid version # |
| json("{'v':'1','str':'stored-value','tokens':[{}]}"), // single token no attribs |
| json("{'v':'1','str':'stored-value','tokens':[{'t'}]}"), // missing attrib value |
| }; |
| |
| SchemaField field = null; |
| int props = |
| FieldProperties.INDEXED | FieldProperties.STORED; |
| |
| @BeforeClass |
| public static void beforeClass() throws Exception { |
| initCore("solrconfig-minimal.xml","schema-preanalyzed.xml"); |
| } |
| |
| @Override |
| public void setUp() throws Exception { |
| super.setUp(); |
| field = new SchemaField("content", new TextField(), props, null); |
| } |
| |
| @Test |
| public void testValidSimple() { |
| PreAnalyzedField paf = new PreAnalyzedField(); |
| // use Simple format |
| HashMap<String,String> args = new HashMap<>(); |
| args.put(PreAnalyzedField.PARSER_IMPL, SimplePreAnalyzedParser.class.getName()); |
| paf.init(h.getCore().getLatestSchema(), args); |
| PreAnalyzedParser parser = new SimplePreAnalyzedParser(); |
| for (int i = 0; i < valid.length; i++) { |
| String s = valid[i]; |
| try { |
| Field f = (Field)paf.fromString(field, s); |
| //System.out.println(" - toString: '" + sb.toString() + "'"); |
| assertEquals(validParsed[i], parser.toFormattedString(f)); |
| } catch (Exception e) { |
| e.printStackTrace(); |
| fail("Should pass: '" + s + "', exception: " + e); |
| } |
| } |
| } |
| |
| private String addTwoDocs(int firstId, String field) { |
| return "<add>\n" |
| + doc("id", Integer.toString(firstId), field, |
| json("{'v':'1','str':'document one','tokens':[{'t':'one'},{'t':'two'},{'t':'three','i':100}]}")) |
| + doc("id", Integer.toString(firstId + 1), field, |
| json("{'v':'1','str':'document two','tokens':[{'t':'eleven'},{'t':'twelve'},{'t':'thirteen','i':110}]}")) |
| + "</add>\n"; |
| } |
| |
| @Test |
| public void testIndexAndQueryNoSchemaAnalyzer() throws Exception { |
| assertU(addTwoDocs(1, "pre_no_analyzer")); |
| assertU(commit()); |
| assertQ(req("q", "id:(1 2)", "sort", "id asc") |
| ,"//result[@numFound='2']" |
| ,"//result/doc[1]/str[@name='id'][.='1']" |
| ,"//result/doc[1]/str[@name='pre_no_analyzer'][.='document one']" |
| ,"//result/doc[2]/str[@name='id'][.='2']" |
| ,"//result/doc[2]/str[@name='pre_no_analyzer'][.='document two']" |
| ); |
| assertQ(req("q", "{!field f='pre_no_analyzer'}{'v':'1','tokens':[{'t':'two'}]}") |
| ,"//result[@numFound='1']" |
| ); |
| assertQ(req("q", "{!field f='pre_no_analyzer'}{'v':'1','tokens':[{'t':'eleven'},{'t':'twelve'}]}") |
| ,"//result[@numFound='1']" |
| ); |
| } |
| |
| @Test |
| public void testIndexAndQueryWithSchemaAnalyzer() { |
| assertU(addTwoDocs(3, "pre_with_analyzer")); |
| assertU(commit()); |
| assertQ(req("q", "id:(3 4)", "sort", "id asc") |
| ,"//result[@numFound='2']" |
| ,"//result/doc[1]/str[@name='id'][.='3']" |
| ,"//result/doc[1]/str[@name='pre_with_analyzer'][.='document one']" |
| ,"//result/doc[2]/str[@name='id'][.='4']" |
| ,"//result/doc[2]/str[@name='pre_with_analyzer'][.='document two']" |
| ); |
| assertQ(req("q", "pre_with_analyzer:(+two +three)"), "//result[@numFound='1']"); |
| assertQ(req("q", "pre_with_analyzer:(+eleven +twelve)"), "//result[@numFound='1']"); |
| } |
| |
| @Test |
| public void testIndexAndQueryWithSchemaQueryAnalyzer() { |
| assertU(addTwoDocs(5, "pre_with_query_analyzer")); |
| assertU(commit()); |
| assertQ(req("q", "id:(5 6)", "sort", "id asc") |
| ,"//result[@numFound='2']" |
| ,"//result/doc[1]/str[@name='id'][.='5']" |
| ,"//result/doc[1]/str[@name='pre_with_query_analyzer'][.='document one']" |
| ,"//result/doc[2]/str[@name='id'][.='6']" |
| ,"//result/doc[2]/str[@name='pre_with_query_analyzer'][.='document two']" |
| ); |
| assertQ(req("q", "pre_with_query_analyzer:one,two"), "//result[@numFound='1']"); |
| assertQ(req("q", "pre_with_query_analyzer:eleven,twelve"), "//result[@numFound='1']"); |
| } |
| |
| @Test |
| public void testInvalidSimple() { |
| PreAnalyzedField paf = new PreAnalyzedField(); |
| paf.init(h.getCore().getLatestSchema(), Collections.<String,String>emptyMap()); |
| for (String s : invalidSimple) { |
| try { |
| paf.fromString(field, s); |
| fail("should fail: '" + s + "'"); |
| } catch (Exception e) { |
| // |
| } |
| } |
| } |
| |
| public void testInvalidJson() throws Exception { |
| PreAnalyzedField paf = new PreAnalyzedField(); |
| paf.init(h.getCore().getLatestSchema(), Collections.emptyMap()); |
| Analyzer preAnalyzer = paf.getIndexAnalyzer(); |
| for (String s: invalidJson) { |
| TokenStream stream = null; |
| try { |
| stream = preAnalyzer.tokenStream("dummy", s); |
| stream.reset(); // exception should be triggered here. |
| fail("should fail: '" + s + "'"); |
| } catch (Exception e) { |
| // expected |
| } finally { |
| if (stream != null) { |
| stream.close(); |
| } |
| } |
| } |
| // make sure the analyzer can now handle properly formatted input |
| TokenStream stream = preAnalyzer.tokenStream("dummy", validJson); |
| CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class); |
| stream.reset(); |
| while (stream.incrementToken()) { |
| assertFalse("zero-length token", termAttr.length() == 0); |
| } |
| stream.end(); |
| stream.close(); |
| } |
| |
| // "1 =test ąćęłńóśźż \u0001=one,i=22,s=123,e=128,p=deadbeef,y=word two,i=1,s=5,e=8,y=word three,i=1,s=20,e=22,y=foobar" |
| |
| private static final String jsonValid = "{\"v\":\"1\",\"str\":\"test ąćęłńóśźż\",\"tokens\":[" + |
| "{\"e\":128,\"i\":22,\"p\":\"DQ4KDQsODg8=\",\"s\":123,\"t\":\"one\",\"y\":\"word\"}," + |
| "{\"e\":8,\"i\":1,\"s\":5,\"t\":\"two\",\"y\":\"word\"}," + |
| "{\"e\":22,\"i\":1,\"s\":20,\"t\":\"three\",\"y\":\"foobar\"}" + |
| "]}"; |
| |
| @Test |
| public void testParsers() throws Exception { |
| PreAnalyzedField paf = new PreAnalyzedField(); |
| // use Simple format |
| HashMap<String,String> args = new HashMap<>(); |
| args.put(PreAnalyzedField.PARSER_IMPL, SimplePreAnalyzedParser.class.getName()); |
| paf.init(h.getCore().getLatestSchema(), args); |
| { |
| Field f = (Field)paf.fromString(field, valid[0]); |
| } |
| |
| // use JSON format |
| args.put(PreAnalyzedField.PARSER_IMPL, JsonPreAnalyzedParser.class.getName()); |
| paf.init(h.getCore().getLatestSchema(), args); |
| expectThrows(Exception.class, () -> paf.fromString(field, valid[0])); |
| |
| byte[] deadbeef = new byte[]{(byte)0xd, (byte)0xe, (byte)0xa, (byte)0xd, (byte)0xb, (byte)0xe, (byte)0xe, (byte)0xf}; |
| PreAnalyzedParser parser = new JsonPreAnalyzedParser(); |
| |
| { |
| Field f = (Field)paf.fromString(field, jsonValid); |
| assertEquals(jsonValid, parser.toFormattedString(f)); |
| } |
| } |
| } |