| <?xml version="1.0" encoding="UTF-8" ?> |
| <!-- |
| This software was produced for the U. S. Government |
| under Contract No. W15P7T-11-C-F600, and is |
| subject to the Rights in Noncommercial Computer Software |
| and Noncommercial Computer Software Documentation |
| Clause 252.227-7014 (JUN 1995) |
| |
| Copyright 2013 The MITRE Corporation. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| --> |
| <schema name="minimal" version="1.6"> |
| |
| <fields> |
| <field name="id" type="string" docValues="true" required="true"/> |
| <field name="name" type="string"/> |
| <!-- freq, positions, and norms are not needed by the tagger. However if you |
| intend to have this field be used for general search, you should not exclude |
| these stats. --> |
| <field name="name_tag" type="tag" stored="false"/> |
| <field name="name_tagStop" type="tagStop" stored="false"/> |
| <field name="name_tagWDF" type="tagWDF" stored="false"/> |
| <!--<field name="name_tagPartial" type="tagPartial" stored="false"/>--> |
| <field name="name_tagXml" type="tagXml" stored="false"/> |
| <field name="name_tagAttribute" type="tagAttribute" stored="false"/> |
| |
| <copyField source="name" dest="name_tag"/> |
| <copyField source="name" dest="name_tagStop"/> |
| <copyField source="name" dest="name_tagWDF"/> |
| <!--<copyField source="name" dest="name_tagPartial"/>--> |
| <copyField source="name" dest="name_tagXml"/> |
| <copyField source="name" dest="name_tagAttribute"/> |
| |
| <dynamicField name="*" type="string" indexed="true" stored="true"/> |
| </fields> |
| |
| <uniqueKey>id</uniqueKey> |
| |
| <types> |
| <fieldType name="string" class="solr.StrField"/> |
| |
| <fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/> |
| <fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/> |
| |
| <fieldType name="tag" class="solr.TextField" positionIncrementGap="100" |
| postingsFormat="FST50" omitTermFreqAndPositions="true" omitNorms="true"> |
| <analyzer type="index"> |
| <tokenizer class="solr.StandardTokenizerFactory" /> |
| <!--Usually good but for our tests, lets not. <filter class="solr.ASCIIFoldingFilterFactory"/>--> |
| <filter class="solr.LowerCaseFilterFactory"/> |
| |
| <filter class="solr.ConcatenateGraphFilterFactory" preservePositionIncrements="false" /> |
| </analyzer> |
| <analyzer type="query"> |
| <tokenizer class="solr.StandardTokenizerFactory" /> |
| <!--<filter class="solr.ASCIIFoldingFilterFactory"/>--> |
| <filter class="solr.LowerCaseFilterFactory"/> |
| </analyzer> |
| </fieldType> |
| |
| <!-- adds a stop filter --> |
| <fieldType name="tagStop" class="solr.TextField" positionIncrementGap="100" |
| postingsFormat="FST50" omitTermFreqAndPositions="true" omitNorms="true"> |
| <analyzer type="index"> |
| <tokenizer class="solr.StandardTokenizerFactory" /> |
| <filter class="solr.LowerCaseFilterFactory"/> |
| <filter class="solr.StopFilterFactory" /><!-- by default english stopwords --> |
| |
| <filter class="solr.ConcatenateGraphFilterFactory" preservePositionIncrements="false" /> |
| </analyzer> |
| <analyzer type="query"> |
| <tokenizer class="solr.StandardTokenizerFactory" /> |
| <filter class="solr.LowerCaseFilterFactory"/> |
| <filter class="solr.StopFilterFactory" /><!-- by default english stopwords --> |
| </analyzer> |
| </fieldType> |
| |
| <!-- adds a WordDelimiterFilter producing stacked/synonym tokens --> |
| <fieldType name="tagWDF" class="solr.TextField" positionIncrementGap="100" |
| postingsFormat="FST50" omitTermFreqAndPositions="true" omitNorms="true"> |
| <analyzer type="index"> |
| <tokenizer class="solr.WhitespaceTokenizerFactory" /> |
| <filter class="solr.WordDelimiterGraphFilterFactory" |
| generateWordParts="1" generateNumberParts="1" |
| catenateWords="1" catenateNumbers="1" catenateAll="0"/> |
| <filter class="solr.LowerCaseFilterFactory"/> |
| |
| <filter class="solr.ConcatenateGraphFilterFactory" preservePositionIncrements="false" /> |
| </analyzer> |
| <analyzer type="query"> |
| <tokenizer class="solr.WhitespaceTokenizerFactory" /> |
| <filter class="solr.WordDelimiterGraphFilterFactory" |
| generateWordParts="1" generateNumberParts="1" |
| catenateWords="0" catenateNumbers="0" catenateAll="0"/> |
| <filter class="solr.LowerCaseFilterFactory"/> |
| </analyzer> |
| </fieldType> |
| |
| <!-- Problem: in XML can't write \u001F |
| <fieldType name="tagPartial" class="solr.TextField" positionIncrementGap="100" |
| postingsFormat="FST50" omitTermFreqAndPositions="true" omitNorms="true"> |
| <analyzer type="index"> |
| <tokenizer class="solr.StandardTokenizerFactory" /> |
| <filter class="solr.LowerCaseFilterFactory"/> |
| |
| <filter class="solr.ShingleFilterFactory" tokenSeparator="" |
| outputUnigramsIfNoShingles="true" maxShingleSize="10" /> |
| </analyzer> |
| <analyzer type="query"> |
| <tokenizer class="solr.StandardTokenizerFactory" /> |
| <filter class="solr.LowerCaseFilterFactory"/> |
| </analyzer> |
| </fieldType> |
| --> |
| |
| <fieldType name="tagXml" class="solr.TextField" positionIncrementGap="100" |
| postingsFormat="FST50" omitTermFreqAndPositions="true" omitNorms="true"> |
| <analyzer type="index"> |
| <tokenizer class="solr.StandardTokenizerFactory" /> |
| <filter class="solr.LowerCaseFilterFactory"/> |
| |
| <filter class="solr.ConcatenateGraphFilterFactory" preservePositionIncrements="false" /> |
| </analyzer> |
| <analyzer type="query"> |
| <charFilter class="solr.HTMLStripCharFilterFactory" /><!-- ADDED THIS! --> |
| <tokenizer class="solr.StandardTokenizerFactory" /> |
| <filter class="solr.LowerCaseFilterFactory"/> |
| </analyzer> |
| </fieldType> |
| |
| <fieldType name="tagAttribute" class="solr.TextField" positionIncrementGap="100" postingsFormat="FST50"> |
| <analyzer type="index"> |
| <tokenizer class="solr.StandardTokenizerFactory" /> |
| <filter class="solr.ASCIIFoldingFilterFactory"/> |
| <filter class="solr.LowerCaseFilterFactory"/> |
| |
| <filter class="solr.ConcatenateGraphFilterFactory" preservePositionIncrements="false" /> |
| </analyzer> |
| <analyzer type="query"> |
| <!-- 32 just for tests, bumps posInc --> |
| <tokenizer class="solr.StandardTokenizerFactory" |
| maxTokenLength="32"/> |
| <!-- |
| NOTE: This used the WordLengthTaggingFilterFactory to test the |
| TaggingAttribute. The WordLengthTaggingFilter set the TaggingAttribute |
| for words based on their length. The attribute is ignored at indexing |
| time, but the Tagger will use it to only start tags for words that are |
| equals or longer as the configured minLength. |
| --> |
| <filter class="org.apache.solr.handler.tagger.WordLengthTaggingFilterFactory" minLength="4"/> |
| <filter class="solr.ASCIIFoldingFilterFactory"/> |
| <filter class="solr.LowerCaseFilterFactory"/> |
| </analyzer> |
| </fieldType> |
| |
| <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100"> |
| <analyzer type="index"> |
| <tokenizer class="solr.StandardTokenizerFactory"/> |
| <!-- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> --> |
| <!-- in this example, we will only use synonyms at query time |
| <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/> |
| --> |
| <filter class="solr.LowerCaseFilterFactory"/> |
| </analyzer> |
| <analyzer type="query"> |
| <tokenizer class="solr.StandardTokenizerFactory"/> |
| <!-- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" /> --> |
| <!-- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> --> |
| <filter class="solr.LowerCaseFilterFactory"/> |
| </analyzer> |
| </fieldType> |
| |
| </types> |
| |
| |
| </schema> |