| <?xml version="1.0" encoding="UTF-8" ?> |
| <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor |
| license agreements. See the NOTICE file distributed with this work for additional |
| information regarding copyright ownership. The ASF licenses this file to |
| You under the Apache License, Version 2.0 (the "License"); you may not use |
| this file except in compliance with the License. You may obtain a copy of |
| the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required |
| by applicable law or agreed to in writing, software distributed under the |
| License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS |
| OF ANY KIND, either express or implied. See the License for the specific |
| language governing permissions and limitations under the License. --> |
| <schema name="shingle-topic-model" version="1.3"> |
| <types> |
| <fieldType name="uuid" class="solr.UUIDField" indexed="true" /> |
| |
| <fieldType name="string" class="solr.StrField" |
| sortMissingLast="true" omitNorms="true" /> |
| |
| <fieldType name="tint" class="solr.TrieIntField" |
| precisionStep="0" omitNorms="true" positionIncrementGap="0" /> |
| |
| <fieldType name="tfloat" class="solr.TrieFloatField" |
| precisionStep="0" omitNorms="true" positionIncrementGap="0" /> |
| |
| <fieldType name="tdate" class="solr.TrieDateField" |
| omitNorms="true" precisionStep="6" positionIncrementGap="0" /> |
| |
| <fieldType name="random" class="solr.RandomSortField" |
| indexed="true" /> |
| |
| <fieldType name="text" class="solr.TextField"> |
| <analyzer type="index"> |
| <tokenizer class="solr.StandardTokenizerFactory" /> |
| <!-- <filter class="solr.StopFilterFactory" ignoreCase="true" --> |
| <!-- words="stopwords_en.txt" enablePositionIncrements="false" /> --> |
| <filter class="solr.LowerCaseFilterFactory" /> |
| <!-- Shingle help improve the quality but they increase |
| the size of the index --> |
| <filter class="solr.ShingleFilterFactory" maxShingleSize="2" outputUnigrams="true"/> |
| </analyzer> |
| <analyzer type="query"> |
| <tokenizer class="solr.StandardTokenizerFactory" /> |
| <!-- <filter class="solr.StopFilterFactory" ignoreCase="true" --> |
| <!-- words="stopwords_en.txt" enablePositionIncrements="false" /> --> |
| <!-- <filter class="solr.SynonymFilterFactory" --> |
| <!-- synonyms="synonyms.txt" ignoreCase="true" expand="true"/> --> |
| <filter class="solr.LowerCaseFilterFactory" /> |
| <filter class="solr.ShingleFilterFactory" maxShingleSize="2" |
| outputUnigrams="false" outputUnigramIfNoNgram="true"/> |
| </analyzer> |
| </fieldType> |
| |
| </types> |
| |
| <fields> |
| <!-- Physical (automated) primary key. Each topic is stored into 2 Solr |
| entries to be able to handle the partial update of stored attributes such |
| as estimation of the predictive accuracy and broader topic links while preserving |
| the previous version of the statistical model --> |
| <field name="entry_id" type="string" indexed="true" stored="true" |
| required="true" /> |
| |
| <!-- Mandatory field for all entries: this is the logical primary key --> |
| <field name="concept" type="string" indexed="true" stored="true" |
| required="true" /> |
| |
| <!-- If entry_type can be model 'model' or 'metadata' --> |
| <field name="entry_type" type="string" indexed="true" stored="true" |
| required="true" /> |
| |
| <!-- Mandatory classifier model attribute when entry_type == 'model' --> |
| <field name="classifier_features" type="text" indexed="true" |
| stored="false" termVectors="true" termPositions="false" |
| termOffsets="false" /> |
| |
| <!-- Classifier model stored attributes when entry_type == 'metadata' --> |
| <field name="model_entry_id" type="string" indexed="true" |
| stored="true" /> |
| <field name="primary_topic" type="string" indexed="true" stored="true" /> |
| <field name="broader" type="string" indexed="true" stored="true" |
| multiValued="true" /> |
| <field name="last_update_dt" type="tdate" indexed="true" |
| stored="true" /> |
| <!-- Accuracy evaluation of the model (accross CV folds) --> |
| <field name="precision" type="tfloat" indexed="true" stored="true" |
| multiValued="true" /> |
| <field name="recall" type="tfloat" indexed="true" stored="true" |
| multiValued="true" /> |
| <field name="last_evaluation_dt" type="tdate" indexed="true" |
| stored="true" /> |
| <field name="positive_support" type="tint" indexed="false" |
| stored="true" multiValued="true" /> |
| <field name="negative_support" type="tint" indexed="false" |
| stored="true" multiValued="true" /> |
| <!-- Store ids of some false positive and negative examples (accumulated |
| over several CV folds) --> |
| <field name="false_positives" type="string" indexed="false" |
| multiValued="true" stored="true" /> |
| <field name="false_negatives" type="string" indexed="false" |
| multiValued="true" stored="true" /> |
| |
| </fields> |
| |
| <uniqueKey>entry_id</uniqueKey> |
| <defaultSearchField>classifier_features</defaultSearchField> |
| <solrQueryParser defaultOperator="AND" /> |
| </schema> |