enhancement-engines/topic/engine/src/main/resources/shingle-topic-model/conf/solrconfig.xml - stanbol - Git at Google

 <?xml version="1.0" encoding="UTF-8" ?>
 <!--
  Licensed to the Apache Software Foundation (ASF) under one or more
  contributor license agreements.  See the NOTICE file distributed with
  this work for additional information regarding copyright ownership.
  The ASF licenses this file to You under the Apache License, Version 2.0
  (the "License"); you may not use this file except in compliance with
  the License.  You may obtain a copy of the License at

      http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
 -->

 <!--
      For more details about configurations options that may appear in
      this file, see http://wiki.apache.org/solr/SolrConfigXml.
 -->
 <config>
   <!-- In all configuration below, a prefix of "solr." for class names
        is an alias that causes solr to search appropriate packages,
        including org.apache.solr.(search|update|request|core|analysis)

        You may also specify a fully qualified Java classname if you
        have your own custom plugins.
     -->

   <!-- Set this to 'false' if you want solr to continue working after
        it has encountered an severe configuration error.  In a
        production environment, you may want solr to keep working even
        if one handler is mis-configured.

        You may also set this to false using by setting the system
        property:

          -Dsolr.abortOnConfigurationError=false
     -->
   <abortOnConfigurationError>${solr.abortOnConfigurationError:true}</abortOnConfigurationError>

   <!-- Controls what version of Lucene various components of Solr
        adhere to.  Generally, you want to use the latest version to
        get all bug fixes and improvements. It is highly recommended
        that you fully re-index after changing this setting as it can
        affect both how text is indexed and queried.
     -->
   <luceneMatchVersion>LUCENE_42</luceneMatchVersion>

   <dataDir>${solr.data.dir:}</dataDir>

   <directoryFactory name="DirectoryFactory"
                     class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>


   <indexConfig>
     <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a
          LimitTokenCountFilterFactory in your fieldType definition. E.g.
      <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/>
     -->
     <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 -->
     <writeLockTimeout>5000</writeLockTimeout>
  </indexConfig>

    <!-- The default high-performance update handler -->

   <updateHandler class="solr.DirectUpdateHandler2">
     <!-- Deactivate transaction log
     <updateLog>
       <str name="dir">${solr.ulog.dir:}</str>
     </updateLog > -->

     <!-- no auto commit
      <autoCommit>
        <maxTime>15000</maxTime>
        <openSearcher>false</openSearcher>
      </autoCommit>
      -->
      <!--
        <autoSoftCommit>
          <maxTime>1000</maxTime>
        </autoSoftCommit>
       -->
   </updateHandler>

   <query>
     <maxBooleanClauses>1024</maxBooleanClauses>

     <filterCache class="solr.FastLRUCache"
                  size="2048"
                  initialSize="1024"
                  autowarmCount="512"/>
     <queryResultCache class="solr.LRUCache"
                      size="2048"
                      initialSize="1024"
                      autowarmCount="512"/>
     <documentCache class="solr.LRUCache"
                    size="4096"
                    initialSize="1024"
                    autowarmCount="0"/>

     <!--
        <fieldValueCache class="solr.FastLRUCache"
                         size="512"
                         autowarmCount="128"
                         showItems="32" />
       -->
     <enableLazyFieldLoading>true</enableLazyFieldLoading>

    <!-- Result Window Size

         An optimization for use with the queryResultCache.  When a search
         is requested, a superset of the requested number of document ids
         are collected.  For example, if a search for a particular query
         requests matching documents 10 through 19, and queryWindowSize is 50,
         then documents 0 through 49 will be collected and cached.  Any further
         requests in that range can be satisfied via the cache.
      -->
    <queryResultWindowSize>20</queryResultWindowSize>

    <!-- Maximum number of documents to cache for any entry in the
         queryResultCache.
      -->
    <queryResultMaxDocsCached>200</queryResultMaxDocsCached>

     <!-- QuerySenderListener takes an array of NamedList and executes a
          local query request for each NamedList in sequence.
       -->
     <listener event="newSearcher" class="solr.QuerySenderListener">
       <arr name="queries">
         <!--
            <lst><str name="q">solr</str><str name="sort">price asc</str></lst>
            <lst><str name="q">rocks</str><str name="sort">weight asc</str></lst>
           -->
       </arr>
     </listener>
     <listener event="firstSearcher" class="solr.QuerySenderListener">
       <arr name="queries">
         <lst>
           <str name="q">static firstSearcher warming in solrconfig.xml</str>
         </lst>
       </arr>
     </listener>

     <!-- Use Cold Searcher

          If a search request comes in and there is no current
          registered searcher, then immediately register the still
          warming searcher and use it.  If "false" then all requests
          will block until the first searcher is done warming.
       -->
     <useColdSearcher>false</useColdSearcher>

     <!-- Max Warming Searchers

          Maximum number of searchers that may be warming in the
          background concurrently.  An error is returned if this limit
          is exceeded.

          Recommend values of 1-2 for read-only slaves, higher for
          masters w/o cache warming.
       -->
     <maxWarmingSearchers>2</maxWarmingSearchers>

   </query>

   <requestDispatcher handleSelect="false" >
     <requestParsers enableRemoteStreaming="true"
                     multipartUploadLimitInKB="2048000"
                     formdataUploadLimitInKB="2048"/>
     <httpCaching never304="true" />
   </requestDispatcher>

   <!-- Request Handlers

        http://wiki.apache.org/solr/SolrRequestHandler
     -->
   <!-- SearchHandler

        http://wiki.apache.org/solr/SearchHandler
     -->
   <requestHandler name="/select" class="solr.SearchHandler">
     <!-- default values for query parameters can be specified, these
          will be overridden by parameters in the request
       -->
      <lst name="defaults">
        <str name="echoParams">explicit</str>
        <int name="rows">10</int>
     </lst>
     </requestHandler>

   <!-- Request Handler for similarity queries and topic classification -->
   <requestHandler name="/mlt" class="solr.MoreLikeThisHandler" startup="lazy" />

   <!-- A request handler that returns indented JSON by default -->
   <requestHandler name="/query" class="solr.SearchHandler">
      <lst name="defaults">
        <str name="echoParams">explicit</str>
        <str name="wt">json</str>
        <str name="indent">true</str>
        <str name="df">text</str>
      </lst>
   </requestHandler>


   <!-- realtime get handler, guaranteed to return the latest stored fields of
        any document, without the need to commit or open a new searcher.  The
        current implementation relies on the updateLog feature being enabled. -->
   <requestHandler name="/get" class="solr.RealTimeGetHandler">
      <lst name="defaults">
        <str name="omitHeader">true</str>
        <str name="wt">json</str>
        <str name="indent">true</str>
      </lst>
   </requestHandler>


   <!-- Update Request Handler.

        http://wiki.apache.org/solr/UpdateXmlMessages

     -->
   <requestHandler name="/update" class="solr.UpdateRequestHandler" />

   <!-- for back compat with clients using /update/json and /update/csv -->
   <requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler">
         <lst name="defaults">
          <str name="stream.contentType">application/json</str>
        </lst>
   </requestHandler>
   <requestHandler name="/update/csv" class="solr.CSVRequestHandler">
         <lst name="defaults">
          <str name="stream.contentType">application/csv</str>
        </lst>
   </requestHandler>

   <!-- Solr Cell Update Request Handler

        http://wiki.apache.org/solr/ExtractingRequestHandler

     -->
   <requestHandler name="/update/extract"
                   startup="lazy"
                   class="solr.extraction.ExtractingRequestHandler" >
     <lst name="defaults">
       <str name="lowernames">true</str>
       <str name="uprefix">ignored_</str>

       <!-- capture link hrefs but ignore div attributes -->
       <str name="captureAttr">true</str>
       <str name="fmap.a">links</str>
       <str name="fmap.div">ignored_</str>
     </lst>
   </requestHandler>


   <!-- Field Analysis Request Handler

        RequestHandler that provides much the same functionality as
        analysis.jsp. Provides the ability to specify multiple field
        types and field names in the same request and outputs
        index-time and query-time analysis for each of them.

        Request parameters are:
        analysis.fieldname - field name whose analyzers are to be used

        analysis.fieldtype - field type whose analyzers are to be used
        analysis.fieldvalue - text for index-time analysis
        q (or analysis.q) - text for query time analysis
        analysis.showmatch (true|false) - When set to true and when
            query analysis is performed, the produced tokens of the
            field value analysis will be marked as "matched" for every
            token that is produces by the query analysis
    -->
   <requestHandler name="/analysis/field"
                   startup="lazy"
                   class="solr.FieldAnalysisRequestHandler" />


   <!-- Document Analysis Handler

        http://wiki.apache.org/solr/AnalysisRequestHandler

        An analysis handler that provides a breakdown of the analysis
        process of provided documents. This handler expects a (single)
        content stream with the following format:

        <docs>
          <doc>
            <field name="id">1</field>
            <field name="name">The Name</field>
            <field name="text">The Text Value</field>
          </doc>
          <doc>...</doc>
          <doc>...</doc>
          ...
        </docs>

     Note: Each document must contain a field which serves as the
     unique key. This key is used in the returned response to associate
     an analysis breakdown to the analyzed document.

     Like the FieldAnalysisRequestHandler, this handler also supports
     query analysis by sending either an "analysis.query" or "q"
     request parameter that holds the query text to be analyzed. It
     also supports the "analysis.showmatch" parameter which when set to
     true, all field tokens that match the query tokens will be marked
     as a "match".
   -->
   <requestHandler name="/analysis/document"
                   class="solr.DocumentAnalysisRequestHandler"
                   startup="lazy" />

   <!-- Admin Handlers

        Admin Handlers - This will register all the standard admin
        RequestHandlers.
     -->
   <requestHandler name="/admin/"
                   class="solr.admin.AdminHandlers" />

   <!-- ping/healthcheck -->
   <requestHandler name="/admin/ping" class="solr.PingRequestHandler">
     <lst name="invariants">
       <str name="q">solrpingquery</str>
     </lst>
     <lst name="defaults">
       <str name="echoParams">all</str>
     </lst>
     <!-- An optional feature of the PingRequestHandler is to configure the
          handler with a "healthcheckFile" which can be used to enable/disable
          the PingRequestHandler.
          relative paths are resolved against the data dir
       -->
     <!-- <str name="healthcheckFile">server-enabled.txt</str> -->
   </requestHandler>

   <!-- Echo the request contents back to the client -->
   <requestHandler name="/debug/dump" class="solr.DumpRequestHandler" >
     <lst name="defaults">
      <str name="echoParams">explicit</str>
      <str name="echoHandler">true</str>
     </lst>
   </requestHandler>

   <!-- Solr Replication

        The SolrReplicationHandler supports replicating indexes from a
        "master" used for indexing and "slaves" used for queries.

        http://wiki.apache.org/solr/SolrReplication

        It is also neccessary for SolrCloud to function (in Cloud mode, the
        replication handler is used to bulk transfer segments when nodes
        are added or need to recover).

        https://wiki.apache.org/solr/SolrCloud/
     -->
   <requestHandler name="/replication" class="solr.ReplicationHandler" >
     <!--
        To enable simple master/slave replication, uncomment one of the
        sections below, depending on wether this solr instance should be
        the "master" or a "slave".  If this instance is a "slave" you will
        also need to fill in the masterUrl to point to a real machine.
     -->
     <!--
        <lst name="master">
          <str name="replicateAfter">commit</str>
          <str name="replicateAfter">startup</str>
          <str name="confFiles">schema.xml,stopwords.txt</str>
        </lst>
     -->
     <!--
        <lst name="slave">
          <str name="masterUrl">http://your-master-hostname:8983/solr</str>
          <str name="pollInterval">00:00:60</str>
        </lst>
     -->
   </requestHandler>
    <!-- Spell Check

         The spell check component can return a list of alternative spelling
         suggestions.

         http://wiki.apache.org/solr/SpellCheckComponent
      -->
   <searchComponent name="spellcheck" class="solr.SpellCheckComponent">

     <str name="queryAnalyzerFieldType">textSpell</str>

     <!-- Multiple "Spell Checkers" can be declared and used by this
          component
       -->

     <!-- a spellchecker built from a field of the main index -->
     <lst name="spellchecker">
       <str name="name">default</str>
       <str name="field">name</str>
       <str name="classname">solr.DirectSolrSpellChecker</str>
       <!-- the spellcheck distance measure used, the default is the internal levenshtein -->
       <str name="distanceMeasure">internal</str>
       <!-- minimum accuracy needed to be considered a valid spellcheck suggestion -->
       <float name="accuracy">0.5</float>
       <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 -->
       <int name="maxEdits">2</int>
       <!-- the minimum shared prefix when enumerating terms -->
       <int name="minPrefix">1</int>
       <!-- maximum number of inspections per result. -->
       <int name="maxInspections">5</int>
       <!-- minimum length of a query term to be considered for correction -->
       <int name="minQueryLength">4</int>
       <!-- maximum threshold of documents a query term can appear to be considered for correction -->
       <float name="maxQueryFrequency">0.01</float>
       <!-- uncomment this to require suggestions to occur in 1% of the documents
         <float name="thresholdTokenFrequency">.01</float>
       -->
     </lst>

     <!-- a spellchecker that can break or combine words.  See "/spell" handler below for usage -->
     <lst name="spellchecker">
       <str name="name">wordbreak</str>
       <str name="classname">solr.WordBreakSolrSpellChecker</str>
       <str name="field">name</str>
       <str name="combineWords">true</str>
       <str name="breakWords">true</str>
       <int name="maxChanges">10</int>
     </lst>

     <!-- a spellchecker that uses a different distance measure -->
     <!--
        <lst name="spellchecker">
          <str name="name">jarowinkler</str>
          <str name="field">spell</str>
          <str name="classname">solr.DirectSolrSpellChecker</str>
          <str name="distanceMeasure">
            org.apache.lucene.search.spell.JaroWinklerDistance
          </str>
        </lst>
      -->

     <!-- a spellchecker that use an alternate comparator

          comparatorClass be one of:
           1. score (default)
           2. freq (Frequency first, then score)
           3. A fully qualified class name
       -->
     <!--
        <lst name="spellchecker">
          <str name="name">freq</str>
          <str name="field">lowerfilt</str>
          <str name="classname">solr.DirectSolrSpellChecker</str>
          <str name="comparatorClass">freq</str>
       -->

     <!-- A spellchecker that reads the list of words from a file -->
     <!--
        <lst name="spellchecker">
          <str name="classname">solr.FileBasedSpellChecker</str>
          <str name="name">file</str>
          <str name="sourceLocation">spellings.txt</str>
          <str name="characterEncoding">UTF-8</str>
          <str name="spellcheckIndexDir">spellcheckerFile</str>
        </lst>
       -->
   </searchComponent>

   <!-- A request handler for demonstrating the spellcheck component.

        NOTE: This is purely as an example.  The whole purpose of the
        SpellCheckComponent is to hook it into the request handler that
        handles your normal user queries so that a separate request is
        not needed to get suggestions.

        IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS
        NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM!

        See http://wiki.apache.org/solr/SpellCheckComponent for details
        on the request parameters.
     -->
   <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
     <lst name="defaults">
       <str name="df">text</str>
       <!-- Solr will use suggestions from both the 'default' spellchecker
            and from the 'wordbreak' spellchecker and combine them.
            collations (re-written queries) can include a combination of
            corrections from both spellcheckers -->
       <str name="spellcheck.dictionary">default</str>
       <str name="spellcheck.dictionary">wordbreak</str>
       <str name="spellcheck">on</str>
       <str name="spellcheck.extendedResults">true</str>
       <str name="spellcheck.count">10</str>
       <str name="spellcheck.alternativeTermCount">5</str>
       <str name="spellcheck.maxResultsForSuggest">5</str>
       <str name="spellcheck.collate">true</str>
       <str name="spellcheck.collateExtendedResults">true</str>
       <str name="spellcheck.maxCollationTries">10</str>
       <str name="spellcheck.maxCollations">5</str>
     </lst>
     <arr name="last-components">
       <str>spellcheck</str>
     </arr>
   </requestHandler>

   <!-- Term Vector Component

        http://wiki.apache.org/solr/TermVectorComponent
     -->
   <searchComponent name="tvComponent" class="solr.TermVectorComponent"/>


   <!-- Clustering Component

        http://wiki.apache.org/solr/ClusteringComponent

        You'll need to set the solr.clustering.enabled system property
        when running solr to run with clustering enabled:

             java -Dsolr.clustering.enabled=true -jar start.jar

     -->
   <searchComponent name="clustering"
                    enable="${solr.clustering.enabled:false}"
                    class="solr.clustering.ClusteringComponent" >
     <!-- Declare an engine -->
     <lst name="engine">
       <!-- The name, only one can be named "default" -->
       <str name="name">default</str>

       <!-- Class name of Carrot2 clustering algorithm.

            Currently available algorithms are:

            * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
            * org.carrot2.clustering.stc.STCClusteringAlgorithm
            * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm

            See http://project.carrot2.org/algorithms.html for the
            algorithm's characteristics.
         -->
       <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>

       <!-- Overriding values for Carrot2 default algorithm attributes.

            For a description of all available attributes, see:
            http://download.carrot2.org/stable/manual/#chapter.components.
            Use attribute key as name attribute of str elements
            below. These can be further overridden for individual
            requests by specifying attribute key as request parameter
            name and attribute value as parameter value.
         -->
       <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>

       <!-- Location of Carrot2 lexical resources.

            A directory from which to load Carrot2-specific stop words
            and stop labels. Absolute or relative to Solr config directory.
            If a specific resource (e.g. stopwords.en) is present in the
            specified dir, it will completely override the corresponding
            default one that ships with Carrot2.

            For an overview of Carrot2 lexical resources, see:
            http://download.carrot2.org/head/manual/#chapter.lexical-resources
         -->
       <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>

       <!-- The language to assume for the documents.

            For a list of allowed values, see:
            http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
        -->
       <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
     </lst>
     <lst name="engine">
       <str name="name">stc</str>
       <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
     </lst>
   </searchComponent>

   <!-- A request handler for demonstrating the clustering component

        This is purely as an example.

        In reality you will likely want to add the component to your
        already specified request handlers.
     -->
   <requestHandler name="/clustering"
                   startup="lazy"
                   enable="${solr.clustering.enabled:false}"
                   class="solr.SearchHandler">
     <lst name="defaults">
       <bool name="clustering">true</bool>
       <str name="clustering.engine">default</str>
       <bool name="clustering.results">true</bool>
       <!-- The title field -->
       <str name="carrot.title">name</str>
       <str name="carrot.url">id</str>
       <!-- The field to cluster on -->
        <str name="carrot.snippet">features</str>
        <!-- produce summaries -->
        <bool name="carrot.produceSummary">true</bool>
        <!-- the maximum number of labels per cluster -->
        <!--<int name="carrot.numDescriptions">5</int>-->
        <!-- produce sub clusters -->
        <bool name="carrot.outputSubClusters">false</bool>

        <str name="defType">edismax</str>
        <str name="qf">
          text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
        </str>
        <str name="q.alt">*:*</str>
        <str name="rows">10</str>
        <str name="fl">*,score</str>
     </lst>
     <arr name="last-components">
       <str>clustering</str>
     </arr>
   </requestHandler>

   <!-- Terms Component

        http://wiki.apache.org/solr/TermsComponent

        A component to return terms and document frequency of those
        terms
     -->
   <searchComponent name="terms" class="solr.TermsComponent"/>

   <!-- A request handler for demonstrating the terms component -->
   <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy">
      <lst name="defaults">
       <bool name="terms">true</bool>
       <bool name="distrib">false</bool>
     </lst>
     <arr name="components">
       <str>terms</str>
     </arr>
   </requestHandler>


   <!-- Update Processors

        Chains of Update Processor Factories for dealing with Update
        Requests can be declared, and then used by name in Update
        Request Processors

        http://wiki.apache.org/solr/UpdateRequestProcessor

     -->

   <queryResponseWriter name="json" class="solr.JSONResponseWriter">
      <!-- For the purposes of the tutorial, JSON responses are written as
       plain text so that they are easy to read in *any* browser.
       If you expect a MIME type of "application/json" just remove this override.
      -->
     <str name="content-type">text/plain; charset=UTF-8</str>
   </queryResponseWriter>

   <!--
      Custom response writers can be declared as needed...
     -->
     <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/>


   <!-- XSLT response writer transforms the XML output by any xslt file found
        in Solr's conf/xslt directory.  Changes to xslt files are checked for
        every xsltCacheLifetimeSeconds.
     -->
   <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter">
     <int name="xsltCacheLifetimeSeconds">5</int>
   </queryResponseWriter>

   <!-- Query Parsers

        http://wiki.apache.org/solr/SolrQuerySyntax

        Multiple QParserPlugins can be registered by name, and then
        used in either the "defType" param for the QueryComponent (used
        by SearchHandler) or in LocalParams
     -->

   <!-- Legacy config for the admin interface -->
   <admin>
     <defaultQuery>*:*</defaultQuery>
   </admin>


 </config>