| <?xml version="1.0" encoding="UTF-8" ?> |
| <!-- |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| |
| <schema name="example-DIH-atom" version="1.6"> |
| <uniqueKey>id</uniqueKey> |
| |
| <field name="id" type="string" indexed="true" stored="true" required="true"/> |
| <field name="title" type="text_en_splitting" indexed="true" stored="true"/> |
| <field name="author" type="string" indexed="true" stored="true"/> |
| <field name="category" type="string" indexed="true" stored="true" multiValued="true"/> |
| <field name="link" type="string" indexed="true" stored="true"/> |
| <field name="summary" type="text_en_splitting" indexed="true" stored="true"/> |
| <field name="rank" type="pint" indexed="true" stored="true"/> |
| |
| <dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/> |
| |
| <!-- Catch-all field, aggregating all "useful to search as text" fields via the copyField instructions --> |
| <field name="text" type="text_en_splitting" indexed="true" stored="false" multiValued="true"/> |
| |
| <field name="urls" type="url_only" indexed="true" stored="false"/> |
| |
| |
| <copyField source="id" dest="text"/> |
| <copyField source="title" dest="text"/> |
| <copyField source="author" dest="text"/> |
| <copyField source="category" dest="text"/> |
| <copyField source="summary" dest="text"/> |
| |
| <!-- extract URLs from summary for faceting --> |
| <copyField source="summary" dest="urls"/> |
| |
| <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true"/> |
| <fieldType name="pint" class="solr.IntPointField" docValues="true"/> |
| <fieldType name="pdate" class="solr.DatePointField" docValues="true"/> |
| |
| |
| <!-- A text field with defaults appropriate for English, plus |
| aggressive word-splitting and autophrase features enabled. |
| This field is just like text_en, except it adds |
| WordDelimiterFilter to enable splitting and matching of |
| words on case-change, alpha numeric boundaries, and |
| non-alphanumeric chars. This means certain compound word |
| cases will work, for example query "wi fi" will match |
| document "WiFi" or "wi-fi". |
| --> |
| <fieldType name="text_en_splitting" class="solr.TextField" |
| positionIncrementGap="100" autoGeneratePhraseQueries="true"> |
| <analyzer type="index"> |
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> |
| <!-- in this example, we will only use synonyms at query time |
| <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
| --> |
| <!-- Case insensitive stop word removal. --> |
| <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/> |
| <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" |
| catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/> |
| <filter class="solr.LowerCaseFilterFactory"/> |
| <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
| <filter class="solr.PorterStemFilterFactory"/> |
| <filter class="solr.FlattenGraphFilterFactory"/> |
| </analyzer> |
| <analyzer type="query"> |
| <tokenizer class="solr.WhitespaceTokenizerFactory"/> |
| <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> |
| <filter class="solr.StopFilterFactory" |
| ignoreCase="true" |
| words="lang/stopwords_en.txt" |
| /> |
| <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" |
| catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> |
| <filter class="solr.LowerCaseFilterFactory"/> |
| <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/> |
| <filter class="solr.PorterStemFilterFactory"/> |
| </analyzer> |
| </fieldType> |
| |
| <!-- Field type that extracts URLs from the text. |
| As the stored representation is not changed, it is only useful for faceting. |
| It is not terribly useful for searching URLs either, as there are too many special symbols. |
| --> |
| <fieldType name="url_only" class="solr.TextField" positionIncrementGap="100"> |
| <analyzer type="index"> |
| <tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/> |
| <filter class="solr.TypeTokenFilterFactory" types="url_types.txt" useWhitelist="true"/> |
| </analyzer> |
| <analyzer type="query"> |
| <tokenizer class="solr.KeywordTokenizerFactory"/> |
| </analyzer> |
| </fieldType> |
| |
| </schema> |