blob: 9dea3618b0848224e43d319d212a298489058de0 [file] [log] [blame]
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!-- The Solr schema file. This file should be named "schema.xml" and
should be located where the classloader for the Solr webapp can find it.
This schema is used for testing, and as such has everything and the
kitchen sink thrown in. See example/solr/conf/schema.xml for a
more concise example.
-->
<schema name="test" version="1.0">
<!--
Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
-->
<fieldType name="int" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="float" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="long" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="double" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" positionIncrementGap="0"/>
<!--
Numeric field types that index each value at various levels of precision
to accelerate range queries when the number of values between the range
endpoints is large. See the javadoc for LegacyNumericRangeQuery for internal
implementation details.
Smaller precisionStep values (specified in bits) will lead to more tokens
indexed per value, slightly larger index size, and faster range queries.
A precisionStep of 0 disables indexing at different precision levels.
-->
<fieldType name="tint" class="${solr.tests.IntegerFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tfloat" class="${solr.tests.FloatFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tlong" class="${solr.tests.LongFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
<fieldType name="tdouble" class="${solr.tests.DoubleFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="8" positionIncrementGap="0"/>
<!-- Field type demonstrating an Analyzer failure -->
<fieldType name="failtype1" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
</fieldType>
<!-- Demonstrating ignoreCaseChange -->
<fieldType name="wdf_nocase" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="wdf_preserve" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- HighlitText optimizes storage for (long) columns which will be highlit -->
<fieldType name="highlittext" class="solr.TextField" compressThreshold="345"/>
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
<fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
<!-- format for date is 1995-12-31T23:59:59.999Z and only the fractional
seconds part (.999) is optional.
-->
<fieldType name="date" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}" sortMissingLast="true" omitNorms="true"/>
<!-- solr.TextField allows the specification of custom
text analyzers specified as a tokenizer and a list
of token filters.
-->
<fieldType name="text" class="solr.TextField">
<analyzer>
<tokenizer class="solr.ClassicTokenizerFactory"/>
<filter class="solr.ClassicFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="nametext" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
</fieldType>
<!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
<fieldType name="keywordtok" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
</analyzer>
</fieldType>
<fieldType name="standardtok" class="solr.TextField">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="lettertok" class="solr.TextField">
<analyzer>
<tokenizer class="solr.LetterTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="whitetok" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="HTMLstandardtok" class="solr.TextField">
<analyzer>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="HTMLwhitetok" class="solr.TextField">
<analyzer>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldType name="standardtokfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.ClassicTokenizerFactory"/>
<filter class="solr.ClassicFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="standardfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.ClassicFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="lowerfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="patternreplacefilt" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
<filter class="solr.PatternReplaceFilterFactory"
pattern="([^a-zA-Z])" replacement="_" replace="all"
/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
</analyzer>
</fieldType>
<fieldType name="porterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
<!-- fieldType name="snowballfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SnowballPorterFilterFactory"/>
</analyzer>
</fieldType -->
<fieldType name="engporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="custengporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="stopfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
</analyzer>
</fieldType>
<fieldType name="custstopfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldType>
<fieldType name="lengthfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LengthFilterFactory" min="2" max="5"/>
</analyzer>
</fieldType>
<fieldType name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
<!-- more flexible in matching skus, but more chance of a false match -->
<fieldType name="skutype1" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldType name="skutype2" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1"
catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldType name="syn" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"/>
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt"/>
</analyzer>
</fieldType>
<!-- Demonstrates How RemoveDuplicatesTokenFilter makes stemmed
synonyms "better"
-->
<fieldType name="dedup" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory"
synonyms="synonyms.txt" expand="true"/>
<filter class="solr.PorterStemFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory"
synonyms="synonyms.txt" expand="true"/>
<filter class="solr.PorterStemFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="unstored" class="solr.StrField" indexed="true" stored="false"/>
<fieldType name="textgap" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- Point Fields -->
<fieldType name="pint" class="solr.IntPointField" docValues="true"/>
<fieldType name="plong" class="solr.LongPointField" docValues="true"/>
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
<fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="name" type="nametext" indexed="true" stored="true"/>
<field name="text" type="text" indexed="true" stored="false"/>
<field name="subject" type="text" indexed="true" stored="true"/>
<field name="title" type="nametext" indexed="true" stored="true"/>
<field name="weight" type="float" indexed="true" stored="true"/>
<field name="bday" type="date" indexed="true" stored="true"/>
<field name="title_stemmed" type="text" indexed="true" stored="false"/>
<field name="title_lettertok" type="lettertok" indexed="true" stored="false"/>
<field name="syn" type="syn" indexed="true" stored="true"/>
<!-- to test property inheritance and overriding -->
<field name="shouldbeunstored" type="unstored"/>
<field name="shouldbestored" type="unstored" stored="true"/>
<field name="shouldbeunindexed" type="unstored" indexed="false" stored="true"/>
<!-- test different combinations of indexed and stored -->
<field name="bind" type="boolean" indexed="true" stored="false"/>
<field name="bsto" type="boolean" indexed="false" stored="true"/>
<field name="bindsto" type="boolean" indexed="true" stored="true"/>
<field name="isto" type="int" indexed="false" stored="true"/>
<field name="iind" type="int" indexed="true" stored="false"/>
<field name="ssto" type="string" indexed="false" stored="true"/>
<field name="sind" type="string" indexed="true" stored="false"/>
<field name="sindsto" type="string" indexed="true" stored="true"/>
<!-- test combinations of term vector settings -->
<field name="test_basictv" type="text" termVectors="true"/>
<field name="test_notv" type="text" termVectors="false"/>
<field name="test_postv" type="text" termVectors="true" termPositions="true"/>
<field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
<field name="test_posofftv" type="text" termVectors="true"
termPositions="true" termOffsets="true"/>
<!-- test highlit field settings -->
<field name="test_hlt" type="highlittext" indexed="true"/>
<field name="test_hlt_off" type="highlittext" indexed="true"/>
<!-- fields to test individual tokenizers and tokenfilters -->
<field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
<field name="lettertok" type="lettertok" indexed="true" stored="true"/>
<field name="whitetok" type="whitetok" indexed="true" stored="true"/>
<field name="HTMLwhitetok" type="HTMLwhitetok" indexed="true" stored="true"/>
<field name="standardtokfilt" type="standardtokfilt" indexed="true" stored="true"/>
<field name="standardfilt" type="standardfilt" indexed="true" stored="true"/>
<field name="lowerfilt" type="lowerfilt" indexed="true" stored="true"/>
<field name="patternreplacefilt" type="patternreplacefilt" indexed="true" stored="true"/>
<field name="porterfilt" type="porterfilt" indexed="true" stored="true"/>
<field name="engporterfilt" type="engporterfilt" indexed="true" stored="true"/>
<field name="custengporterfilt" type="custengporterfilt" indexed="true" stored="true"/>
<field name="stopfilt" type="stopfilt" indexed="true" stored="true"/>
<field name="custstopfilt" type="custstopfilt" indexed="true" stored="true"/>
<field name="lengthfilt" type="lengthfilt" indexed="true" stored="true"/>
<field name="dedup" type="dedup" indexed="true" stored="true"/>
<field name="wdf_nocase" type="wdf_nocase" indexed="true" stored="true"/>
<field name="wdf_preserve" type="wdf_preserve" indexed="true" stored="true"/>
<field name="numberpartfail" type="failtype1" indexed="true" stored="true"/>
<field name="nullfirst" type="string" indexed="true" stored="true" sortMissingFirst="true"/>
<field name="subword" type="subword" indexed="true" stored="true"/>
<field name="sku1" type="skutype1" indexed="true" stored="true"/>
<field name="sku2" type="skutype2" indexed="true" stored="true"/>
<field name="textgap" type="textgap" indexed="true" stored="true"/>
<field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
<field name="multiDefault" type="string" indexed="true" stored="true" default="muLti-Default" multiValued="true"/>
<field name="intDefault" type="int" indexed="true" stored="true" default="42" multiValued="false"/>
<field name="extractedDate" type="date" indexed="true" stored="true" multiValued="true"/>
<field name="extractedContent" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="extractedProducer" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="extractedCreator" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="extractedKeywords" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="extractedAuthor" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="extractedLanguage" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="resourceName" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="extractionLiteralMV" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="extractionLiteral" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="defaultExtr" type="string" indexed="true" stored="false"/>
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns.
RESTRICTION: the glob-like pattern in the name attribute must have
a "*" only at the start or the end.
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
Longer patterns will be matched first. if equal size patterns
both match, the first appearing in the schema will be used.
-->
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*_s1" type="string" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_f" type="float" indexed="true" stored="true"/>
<dynamicField name="*_d" type="double" indexed="true" stored="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_dt1" type="date" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="*_sI" type="string" indexed="true" stored="false"/>
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
<dynamicField name="t_*" type="text" indexed="true" stored="true"/>
<dynamicField name="tv_*" type="text" indexed="true" stored="true"
termVectors="true" termPositions="true" termOffsets="true"/>
<dynamicField name="stream_*" type="text" indexed="true" stored="true"/>
<dynamicField name="Content*" type="text" indexed="true" stored="true"/>
<!-- special fields for dynamic copyField test -->
<dynamicField name="dynamic_*" type="string" indexed="true" stored="true"/>
<dynamicField name="*_dynamic" type="string" indexed="true" stored="true"/>
<!-- for testing to ensure that longer patterns are matched first -->
<dynamicField name="*aa" type="string" indexed="true" stored="true"/>
<dynamicField name="*aaa" type="int" indexed="false" stored="true"/>
<!-- ignored because not stored or indexed -->
<dynamicField name="ignored_*" type="text" indexed="false" stored="false"/>
<dynamicField name="*_ii" type="pint" indexed="false" stored="false" useDocValuesAsStored="true"/>
<dynamicField name="*_iis" type="pint" indexed="false" stored="false" useDocValuesAsStored="true"/>
<dynamicField name="*_ff" type="pfloat" indexed="false" stored="false" useDocValuesAsStored="false"/>
<uniqueKey>id</uniqueKey>
<!-- copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field different
ways, or to add multiple fields to the same field for easier/faster searching.
-->
<copyField source="title" dest="title_stemmed"/>
<copyField source="title" dest="title_lettertok"/>
<copyField source="title" dest="text"/>
<copyField source="subject" dest="text"/>
<copyField source="*_t" dest="text"/>
<!-- dynamic destination -->
<copyField source="*_dynamic" dest="dynamic_*"/>
</schema>