| <?xml version="1.0" ?> |
| |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <config> |
| <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion> |
| <xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/> |
| <requestHandler name="/select" class="solr.SearchHandler"></requestHandler> |
| <requestHandler name="/update" class="solr.UpdateRequestHandler" /> |
| <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/> |
| <schemaFactory class="ClassicIndexSchemaFactory"/> |
| |
| <updateRequestProcessorChain name="extract-single"> |
| <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"> |
| <str name="modelFile">en-test-ner.bin</str> |
| <str name="analyzerFieldType">opennlp-en-tokenization</str> |
| <str name="source">source1_s</str> |
| <str name="dest">dest_s</str> |
| </processor> |
| </updateRequestProcessorChain> |
| |
| <updateRequestProcessorChain name="extract-single-regex"> |
| <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"> |
| <str name="modelFile">en-test-ner.bin</str> |
| <str name="analyzerFieldType">opennlp-en-tokenization</str> |
| <str name="source">source1_s</str> |
| <lst name="dest"> |
| <str name="pattern">source\d(_s)</str> |
| <str name="replacement">dest$1</str> |
| </lst> |
| </processor> |
| </updateRequestProcessorChain> |
| |
| <updateRequestProcessorChain name="extract-multi"> |
| <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"> |
| <str name="modelFile">en-test-ner.bin</str> |
| <str name="analyzerFieldType">opennlp-en-tokenization</str> |
| <str name="source">source1_s</str> |
| <str name="source">source2_s</str> |
| <str name="dest">dest_s</str> |
| </processor> |
| </updateRequestProcessorChain> |
| |
| <updateRequestProcessorChain name="extract-multi-regex"> |
| <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"> |
| <str name="modelFile">en-test-ner.bin</str> |
| <str name="analyzerFieldType">opennlp-en-tokenization</str> |
| <str name="source">source1_s</str> |
| <str name="source">source2_s</str> |
| <lst name="dest"> |
| <str name="pattern">source\d(_s)</str> |
| <str name="replacement">dest$1</str> |
| </lst> |
| </processor> |
| </updateRequestProcessorChain> |
| |
| <updateRequestProcessorChain name="extract-array"> |
| <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"> |
| <str name="modelFile">en-test-ner.bin</str> |
| <str name="analyzerFieldType">opennlp-en-tokenization</str> |
| <arr name="source"> |
| <str>source1_s</str> |
| <str>source2_s</str> |
| </arr> |
| <str name="dest">dest_s</str> |
| </processor> |
| </updateRequestProcessorChain> |
| |
| <updateRequestProcessorChain name="extract-array-regex"> |
| <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"> |
| <str name="modelFile">en-test-ner.bin</str> |
| <str name="analyzerFieldType">opennlp-en-tokenization</str> |
| <arr name="source"> |
| <str>source1_s</str> |
| <str>source2_s</str> |
| </arr> |
| <lst name="dest"> |
| <str name="pattern">source\d(_s)</str> |
| <str name="replacement">dest$1</str> |
| </lst> |
| </processor> |
| </updateRequestProcessorChain> |
| |
| <updateRequestProcessorChain name="extract-selector"> |
| <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"> |
| <str name="modelFile">en-test-ner.bin</str> |
| <str name="analyzerFieldType">opennlp-en-tokenization</str> |
| <lst name="source"> |
| <str name="fieldRegex">source\d_.*</str> |
| <lst name="exclude"> |
| <str name="fieldRegex">source0_.*</str> |
| </lst> |
| </lst> |
| <str name="dest">dest_s</str> |
| </processor> |
| </updateRequestProcessorChain> |
| |
| <updateRequestProcessorChain name="extract-selector-regex"> |
| <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"> |
| <str name="modelFile">en-test-ner.bin</str> |
| <str name="analyzerFieldType">opennlp-en-tokenization</str> |
| <lst name="source"> |
| <str name="fieldRegex">source\d_.*</str> |
| <lst name="exclude"> |
| <str name="fieldRegex">source0_.*</str> |
| </lst> |
| </lst> |
| <lst name="dest"> |
| <str name="pattern">source\d(_s)</str> |
| <str name="replacement">dest$1</str> |
| </lst> |
| </processor> |
| </updateRequestProcessorChain> |
| |
| <updateRequestProcessorChain name="extract-regex-replaceall"> |
| <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"> |
| <str name="modelFile">en-test-ner.bin</str> |
| <str name="analyzerFieldType">opennlp-en-tokenization</str> |
| <lst name="source"> |
| <str name="fieldRegex">foo.*</str> |
| </lst> |
| <lst name="dest"> |
| <!-- unbounded pattern that can be replaced multiple times in field name --> |
| <str name="pattern">x(\d)</str> |
| <str name="replacement">y$1</str> |
| </lst> |
| </processor> |
| </updateRequestProcessorChain> |
| |
| <updateRequestProcessorChain name="extract-regex-replaceall-with-entity-type"> |
| <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"> |
| <str name="modelFile">en-test-ner.bin</str> |
| <str name="analyzerFieldType">opennlp-en-tokenization</str> |
| <lst name="source"> |
| <str name="fieldRegex">foo.*</str> |
| </lst> |
| <lst name="dest"> |
| <!-- unbounded pattern that can be replaced multiple times in field name --> |
| <str name="pattern">x(\d)</str> |
| <str name="replacement">{EntityType}_y$1</str> |
| </lst> |
| </processor> |
| </updateRequestProcessorChain> |
| |
| <!-- example used in OpenNLPExtractNamedEntitiesUpdateProcessorFactory javadocs --> |
| <updateRequestProcessorChain name="multiple-extract"> |
| <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"> |
| <str name="modelFile">en-test-ner.bin</str> |
| <str name="analyzerFieldType">opennlp-en-tokenization</str> |
| <str name="source">text</str> |
| <str name="dest">people_s</str> |
| </processor> |
| <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"> |
| <str name="modelFile">en-test-ner.bin</str> |
| <str name="analyzerFieldType">opennlp-en-tokenization</str> |
| <arr name="source"> |
| <str>title</str> |
| <str>subtitle</str> |
| </arr> |
| <str name="dest">titular_people</str> |
| </processor> |
| <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"> |
| <str name="modelFile">en-test-ner.bin</str> |
| <str name="analyzerFieldType">opennlp-en-tokenization</str> |
| <lst name="source"> |
| <str name="fieldRegex">.*_txt$</str> |
| <lst name="exclude"> |
| <str name="fieldName">notes_txt</str> |
| </lst> |
| </lst> |
| <str name="dest">people_s</str> |
| </processor> |
| <processor class="solr.processor.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"> |
| <str name="modelFile">en-test-ner.bin</str> |
| <str name="analyzerFieldType">opennlp-en-tokenization</str> |
| <lst name="source"> |
| <str name="fieldRegex">^desc(.*)s$</str> |
| </lst> |
| <lst name="dest"> |
| <str name="pattern">^desc(.*)s$</str> |
| <str name="replacement">key_desc$1_people</str> |
| </lst> |
| </processor> |
| <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"> |
| <str name="modelFile">en-test-ner.bin</str> |
| <str name="analyzerFieldType">opennlp-en-tokenization</str> |
| <str name="source">summary</str> |
| <str name="dest">summary_{EntityType}_s</str> |
| </processor> |
| </updateRequestProcessorChain> |
| </config> |