LUCENE-3456: use MockTokenizer instead of WhitespaceTokenizer in test configs
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/branch_3x@1175532 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/solr/contrib/clustering/src/test-files/clustering/solr/conf/schema.xml b/solr/contrib/clustering/src/test-files/clustering/solr/conf/schema.xml
index 9614be4..76dfecc 100644
--- a/solr/contrib/clustering/src/test-files/clustering/solr/conf/schema.xml
+++ b/solr/contrib/clustering/src/test-files/clustering/solr/conf/schema.xml
@@ -147,7 +147,7 @@
<!-- A text field that only splits on whitespace for exact matching of words -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
@@ -160,7 +160,7 @@
-->
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
@@ -180,7 +180,7 @@
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>-->
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
@@ -196,7 +196,7 @@
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
diff --git a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/conf/dataimport-schema-no-unique-key.xml b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/conf/dataimport-schema-no-unique-key.xml
index b1ec8be..e502f74 100644
--- a/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/conf/dataimport-schema-no-unique-key.xml
+++ b/solr/contrib/dataimporthandler-extras/src/test-files/dihextras/solr/conf/dataimport-schema-no-unique-key.xml
@@ -147,7 +147,7 @@
<!-- A text field that only splits on whitespace for exact matching of words -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
@@ -160,7 +160,7 @@
-->
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
@@ -172,7 +172,7 @@
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>-->
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
diff --git a/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport-schema.xml b/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport-schema.xml
index d39d7fb..8ab5a94 100644
--- a/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport-schema.xml
+++ b/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport-schema.xml
@@ -147,7 +147,7 @@
<!-- A text field that only splits on whitespace for exact matching of words -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
@@ -160,7 +160,7 @@
-->
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
@@ -172,7 +172,7 @@
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>-->
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
@@ -188,7 +188,7 @@
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>-->
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
diff --git a/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport-solr_id-schema.xml b/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport-solr_id-schema.xml
index 734e24b..738bb09 100644
--- a/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport-solr_id-schema.xml
+++ b/solr/contrib/dataimporthandler/src/test-files/dih/solr/conf/dataimport-solr_id-schema.xml
@@ -147,7 +147,7 @@
<!-- A text field that only splits on whitespace for exact matching of words -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
@@ -160,7 +160,7 @@
-->
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
@@ -172,7 +172,7 @@
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>-->
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
@@ -188,7 +188,7 @@
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<!--<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>-->
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
diff --git a/solr/contrib/extraction/src/test-files/extraction/solr/conf/schema.xml b/solr/contrib/extraction/src/test-files/extraction/solr/conf/schema.xml
index 65546c4..e012c66 100644
--- a/solr/contrib/extraction/src/test-files/extraction/solr/conf/schema.xml
+++ b/solr/contrib/extraction/src/test-files/extraction/solr/conf/schema.xml
@@ -76,7 +76,7 @@
<!-- Field type demonstrating an Analyzer failure -->
<fieldtype name="failtype1" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -85,7 +85,7 @@
<!-- Demonstrating ignoreCaseChange -->
<fieldtype name="wdf_nocase" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -93,7 +93,7 @@
<fieldtype name="wdf_preserve" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -152,7 +152,7 @@
<analyzer><tokenizer class="solr.LetterTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="whitetok" class="solr.TextField">
- <analyzer><tokenizer class="solr.WhitespaceTokenizerFactory"/></analyzer>
+ <analyzer><tokenizer class="solr.MockTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="HTMLstandardtok" class="solr.TextField">
<analyzer>
@@ -163,7 +163,7 @@
<fieldtype name="HTMLwhitetok" class="solr.TextField">
<analyzer>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="standardtokfilt" class="solr.TextField">
@@ -174,13 +174,13 @@
</fieldtype>
<fieldtype name="standardfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.ClassicFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="lowerfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
@@ -197,57 +197,57 @@
</fieldtype>
<fieldtype name="porterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<!-- fieldtype name="snowballfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SnowballPorterFilterFactory"/>
</analyzer>
</fieldtype -->
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
</analyzer>
</fieldtype>
<fieldtype name="custstopfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="lengthfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LengthFilterFactory" min="2" max="5"/>
</analyzer>
</fieldtype>
<fieldtype name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
@@ -258,12 +258,12 @@
<!-- more flexible in matching skus, but more chance of a false match -->
<fieldtype name="skutype1" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -272,12 +272,12 @@
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="skutype2" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -286,7 +286,7 @@
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="syn" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
</analyzer>
</fieldtype>
@@ -296,7 +296,7 @@
-->
<fieldtype name="dedup" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" expand="true" />
<filter class="solr.PorterStemFilterFactory"/>
@@ -309,7 +309,7 @@
<fieldtype name="textgap" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
diff --git a/solr/contrib/uima/src/test-files/uima/solr/conf/schema.xml b/solr/contrib/uima/src/test-files/uima/solr/conf/schema.xml
index 85d15ef..5dd69d2 100644
--- a/solr/contrib/uima/src/test-files/uima/solr/conf/schema.xml
+++ b/solr/contrib/uima/src/test-files/uima/solr/conf/schema.xml
@@ -230,7 +230,7 @@
<fieldType name="text_ws" class="solr.TextField"
positionIncrementGap="100">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory" />
+ <tokenizer class="solr.MockTokenizerFactory" />
</analyzer>
</fieldType>
@@ -244,7 +244,7 @@
<fieldType name="text" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory" />
+ <tokenizer class="solr.MockTokenizerFactory" />
<!--
in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory"
@@ -265,7 +265,7 @@
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory" />
+ <tokenizer class="solr.MockTokenizerFactory" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true" />
<filter class="solr.StopFilterFactory" ignoreCase="true"
@@ -287,7 +287,7 @@
<fieldType name="textTight" class="solr.TextField"
positionIncrementGap="100">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory" />
+ <tokenizer class="solr.MockTokenizerFactory" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="false" />
<filter class="solr.StopFilterFactory" ignoreCase="true"
@@ -314,7 +314,7 @@
<fieldType name="textgen" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory" />
+ <tokenizer class="solr.MockTokenizerFactory" />
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.WordDelimiterFilterFactory"
@@ -323,7 +323,7 @@
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory" />
+ <tokenizer class="solr.MockTokenizerFactory" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true" />
<filter class="solr.StopFilterFactory" ignoreCase="true"
@@ -344,7 +344,7 @@
<fieldType name="text_rev" class="solr.TextField"
positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory" />
+ <tokenizer class="solr.MockTokenizerFactory" />
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt" enablePositionIncrements="true" />
<filter class="solr.WordDelimiterFilterFactory"
@@ -356,7 +356,7 @@
maxFractionAsterisk="0.33" />
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory" />
+ <tokenizer class="solr.MockTokenizerFactory" />
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true" />
<filter class="solr.StopFilterFactory" ignoreCase="true"
@@ -374,7 +374,7 @@
positionIncrementGap="100" > <analyzer> <charFilter
class="solr.MappingCharFilterFactory"
mapping="mapping-ISOLatin1Accent.txt"/> <tokenizer
- class="solr.WhitespaceTokenizerFactory"/> </analyzer> </fieldType>
+ class="solr.MockTokenizerFactory"/> </analyzer> </fieldType>
-->
<!--
@@ -424,7 +424,7 @@
<fieldtype name="payloads" stored="false" indexed="true"
class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory" />
+ <tokenizer class="solr.MockTokenizerFactory" />
<!--
The DelimitedPayloadTokenFilter can put payloads on tokens...
for example, a token of "foo|1.4" would be indexed as "foo"
diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
index 1b4a8c6..3d091c6 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java
@@ -303,6 +303,8 @@
while( tokens.incrementToken() ) {
norm.append( termAtt.buffer(), 0, termAtt.length() );
}
+ tokens.end();
+ tokens.close();
return norm.toString();
}
diff --git a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
index 8c51cc9..809614e 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
@@ -493,6 +493,8 @@
token.setPositionIncrement(posIncAtt.getPositionIncrement());
result.add(token);
}
+ ts.end();
+ ts.close();
return result;
}
diff --git a/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java b/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
index 688f4a6..635fae5 100644
--- a/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
+++ b/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
@@ -123,6 +123,8 @@
token.setPositionIncrement(posIncAtt.getPositionIncrement());
result.add(token);
}
+ stream.end();
+ stream.close();
} catch (IOException e) {
}
}
diff --git a/solr/core/src/test-files/solr/conf/schema-copyfield-test.xml b/solr/core/src/test-files/solr/conf/schema-copyfield-test.xml
index 2a6fe0d..f257f57 100644
--- a/solr/core/src/test-files/solr/conf/schema-copyfield-test.xml
+++ b/solr/core/src/test-files/solr/conf/schema-copyfield-test.xml
@@ -76,7 +76,7 @@
<!-- Field type demonstrating an Analyzer failure -->
<fieldtype name="failtype1" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -85,7 +85,7 @@
<!-- Demonstrating ignoreCaseChange -->
<fieldtype name="wdf_nocase" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -144,7 +144,7 @@
<analyzer><tokenizer class="solr.LetterTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="whitetok" class="solr.TextField">
- <analyzer><tokenizer class="solr.WhitespaceTokenizerFactory"/></analyzer>
+ <analyzer><tokenizer class="solr.MockTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="HTMLstandardtok" class="solr.TextField">
<analyzer>
@@ -155,7 +155,7 @@
<fieldtype name="HTMLwhitetok" class="solr.TextField">
<analyzer>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="standardtokfilt" class="solr.TextField">
@@ -166,13 +166,13 @@
</fieldtype>
<fieldtype name="standardfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="lowerfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
@@ -189,57 +189,57 @@
</fieldtype>
<fieldtype name="porterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<!-- fieldtype name="snowballfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SnowballPorterFilterFactory"/>
</analyzer>
</fieldtype -->
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
</analyzer>
</fieldtype>
<fieldtype name="custstopfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="lengthfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LengthFilterFactory" min="2" max="5"/>
</analyzer>
</fieldtype>
<fieldtype name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
@@ -250,12 +250,12 @@
<!-- more flexible in matching skus, but more chance of a false match -->
<fieldtype name="skutype1" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -264,12 +264,12 @@
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="skutype2" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -278,7 +278,7 @@
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="syn" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
</analyzer>
</fieldtype>
@@ -288,7 +288,7 @@
-->
<fieldtype name="dedup" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" expand="true" />
<filter class="solr.PorterStemFilterFactory"/>
@@ -301,7 +301,7 @@
<fieldtype name="textgap" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
diff --git a/solr/core/src/test-files/solr/conf/schema-required-fields.xml b/solr/core/src/test-files/solr/conf/schema-required-fields.xml
index 871c995..d52fa47 100644
--- a/solr/core/src/test-files/solr/conf/schema-required-fields.xml
+++ b/solr/core/src/test-files/solr/conf/schema-required-fields.xml
@@ -76,7 +76,7 @@
<!-- Field type demonstrating an Analyzer failure -->
<fieldtype name="failtype1" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -135,7 +135,7 @@
<analyzer><tokenizer class="solr.LetterTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="whitetok" class="solr.TextField">
- <analyzer><tokenizer class="solr.WhitespaceTokenizerFactory"/></analyzer>
+ <analyzer><tokenizer class="solr.MockTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="HTMLstandardtok" class="solr.TextField">
<analyzer>
@@ -146,7 +146,7 @@
<fieldtype name="HTMLwhitetok" class="solr.TextField">
<analyzer>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="standardtokfilt" class="solr.TextField">
@@ -157,13 +157,13 @@
</fieldtype>
<fieldtype name="standardfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="lowerfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
@@ -180,57 +180,57 @@
</fieldtype>
<fieldtype name="porterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<!-- fieldtype name="snowballfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SnowballPorterFilterFactory"/>
</analyzer>
</fieldtype -->
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
</analyzer>
</fieldtype>
<fieldtype name="custstopfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="lengthfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LengthFilterFactory" min="2" max="5"/>
</analyzer>
</fieldtype>
<fieldtype name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
@@ -241,12 +241,12 @@
<!-- more flexible in matching skus, but more chance of a false match -->
<fieldtype name="skutype1" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -255,12 +255,12 @@
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="skutype2" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -269,7 +269,7 @@
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="syn" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
</analyzer>
</fieldtype>
@@ -279,7 +279,7 @@
-->
<fieldtype name="dedup" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" expand="true" />
<filter class="solr.PorterStemFilterFactory"/>
@@ -292,7 +292,7 @@
<fieldtype name="textgap" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
diff --git a/solr/core/src/test-files/solr/conf/schema-reversed.xml b/solr/core/src/test-files/solr/conf/schema-reversed.xml
index f4cea9e..21c805a 100644
--- a/solr/core/src/test-files/solr/conf/schema-reversed.xml
+++ b/solr/core/src/test-files/solr/conf/schema-reversed.xml
@@ -29,28 +29,28 @@
<fieldtype name="text" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="srev" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="rev" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="false"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"
@@ -58,7 +58,7 @@
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
diff --git a/solr/core/src/test-files/solr/conf/schema-stop-keep.xml b/solr/core/src/test-files/solr/conf/schema-stop-keep.xml
index 13229f2..e3d7839 100644
--- a/solr/core/src/test-files/solr/conf/schema-stop-keep.xml
+++ b/solr/core/src/test-files/solr/conf/schema-stop-keep.xml
@@ -34,7 +34,7 @@
<fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
<fieldtype name="stop-one" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stop-1.txt"/>
@@ -43,7 +43,7 @@
</fieldtype>
<fieldtype name="stop-two" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"
words="stop-1.txt,stop-2.txt"/>
diff --git a/solr/core/src/test-files/solr/conf/schema-trie.xml b/solr/core/src/test-files/solr/conf/schema-trie.xml
index fd6f0d6..f74c1d3 100644
--- a/solr/core/src/test-files/solr/conf/schema-trie.xml
+++ b/solr/core/src/test-files/solr/conf/schema-trie.xml
@@ -156,7 +156,7 @@
<!-- A text field that only splits on whitespace for exact matching of words -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
@@ -169,7 +169,7 @@
-->
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
@@ -181,7 +181,7 @@
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
@@ -197,7 +197,7 @@
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
diff --git a/solr/core/src/test-files/solr/conf/schema.xml b/solr/core/src/test-files/solr/conf/schema.xml
index a50c3ea..df052ff 100644
--- a/solr/core/src/test-files/solr/conf/schema.xml
+++ b/solr/core/src/test-files/solr/conf/schema.xml
@@ -86,7 +86,7 @@
<!-- Field type demonstrating an Analyzer failure -->
<fieldtype name="failtype1" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -95,7 +95,7 @@
<!-- Demonstrating ignoreCaseChange -->
<fieldtype name="wdf_nocase" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -103,7 +103,7 @@
<fieldtype name="wdf_preserve" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -165,7 +165,7 @@
<analyzer><tokenizer class="solr.LetterTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="whitetok" class="solr.TextField">
- <analyzer><tokenizer class="solr.WhitespaceTokenizerFactory"/></analyzer>
+ <analyzer><tokenizer class="solr.MockTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="HTMLstandardtok" class="solr.TextField">
<analyzer>
@@ -176,7 +176,7 @@
<fieldtype name="HTMLwhitetok" class="solr.TextField">
<analyzer>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="standardtokfilt" class="solr.TextField">
@@ -187,19 +187,19 @@
</fieldtype>
<fieldtype name="standardfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="lowerfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="lowerpunctfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt" expand="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
@@ -223,43 +223,43 @@
</fieldtype>
<fieldtype name="porterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<!-- fieldtype name="snowballfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SnowballPorterFilterFactory"/>
</analyzer>
</fieldtype -->
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
</analyzer>
</fieldtype>
<fieldtype name="custstopfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="lengthfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LengthFilterFactory" min="2" max="5"/>
</analyzer>
</fieldtype>
@@ -267,20 +267,20 @@
<analyzer>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldtype name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
@@ -290,14 +290,14 @@
<fieldtype name="numericsubword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.StopFilterFactory"/>
@@ -307,12 +307,12 @@
<fieldtype name="protectedsubword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
@@ -321,12 +321,12 @@
<!-- more flexible in matching skus, but more chance of a false match -->
<fieldtype name="skutype1" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -335,12 +335,12 @@
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="skutype2" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -349,7 +349,7 @@
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="syn" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="old_synonyms.txt"/>
</analyzer>
</fieldtype>
@@ -359,7 +359,7 @@
-->
<fieldtype name="dedup" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="old_synonyms.txt" expand="true" />
<filter class="solr.PorterStemFilterFactory"/>
@@ -372,7 +372,7 @@
<fieldtype name="textgap" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
@@ -394,7 +394,7 @@
<!-- omitPositions example -->
<fieldType name="nopositions" class="solr.TextField" omitPositions="true">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
</types>
diff --git a/solr/core/src/test-files/solr/conf/schema11.xml b/solr/core/src/test-files/solr/conf/schema11.xml
index d8f37a7..2766af1 100755
--- a/solr/core/src/test-files/solr/conf/schema11.xml
+++ b/solr/core/src/test-files/solr/conf/schema11.xml
@@ -147,7 +147,7 @@
<!-- A text field that only splits on whitespace for exact matching of words -->
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
@@ -160,7 +160,7 @@
-->
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
@@ -172,7 +172,7 @@
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
@@ -188,7 +188,7 @@
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
<fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
diff --git a/solr/core/src/test-files/solr/conf/schema12.xml b/solr/core/src/test-files/solr/conf/schema12.xml
index 7006482..8fac8a1 100755
--- a/solr/core/src/test-files/solr/conf/schema12.xml
+++ b/solr/core/src/test-files/solr/conf/schema12.xml
@@ -79,7 +79,7 @@
<!-- Field type demonstrating an Analyzer failure -->
<fieldtype name="failtype1" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -88,7 +88,7 @@
<!-- Demonstrating ignoreCaseChange -->
<fieldtype name="wdf_nocase" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -96,7 +96,7 @@
<fieldtype name="wdf_preserve" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -118,7 +118,7 @@
<fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true" >
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
@@ -130,7 +130,7 @@
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
@@ -148,7 +148,7 @@
<!-- field type that doesn't generate phrases from unquoted multiple tokens per analysis unit -->
<fieldType name="text_np" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
@@ -160,7 +160,7 @@
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
@@ -200,7 +200,7 @@
<analyzer><tokenizer class="solr.LetterTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="whitetok" class="solr.TextField">
- <analyzer><tokenizer class="solr.WhitespaceTokenizerFactory"/></analyzer>
+ <analyzer><tokenizer class="solr.MockTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="HTMLstandardtok" class="solr.TextField">
<analyzer>
@@ -211,7 +211,7 @@
<fieldtype name="HTMLwhitetok" class="solr.TextField">
<analyzer>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="standardtokfilt" class="solr.TextField">
@@ -222,13 +222,13 @@
</fieldtype>
<fieldtype name="standardfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="lowerfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
@@ -245,51 +245,51 @@
</fieldtype>
<fieldtype name="porterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
</analyzer>
</fieldtype>
<fieldtype name="custstopfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="lengthfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LengthFilterFactory" min="2" max="5"/>
</analyzer>
</fieldtype>
<fieldtype name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
@@ -299,14 +299,14 @@
<fieldtype name="numericsubword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.StopFilterFactory"/>
@@ -316,12 +316,12 @@
<fieldtype name="protectedsubword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
@@ -330,12 +330,12 @@
<!-- more flexible in matching skus, but more chance of a false match -->
<fieldtype name="skutype1" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -344,12 +344,12 @@
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="skutype2" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -358,7 +358,7 @@
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="syn" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
</analyzer>
</fieldtype>
@@ -367,7 +367,7 @@
-->
<fieldType name="text_sw" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
@@ -378,7 +378,7 @@
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
@@ -394,7 +394,7 @@
-->
<fieldtype name="dedup" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" expand="true" />
<filter class="solr.EnglishPorterFilterFactory"/>
@@ -407,7 +407,7 @@
<fieldtype name="textgap" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
diff --git a/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java
index 7fb673f..d15f6c6 100644
--- a/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java
@@ -17,6 +17,7 @@
package org.apache.solr.handler;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.solr.client.solrj.request.DocumentAnalysisRequest;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
@@ -252,8 +253,8 @@
NamedList<NamedList<Object>> whitetokResult = documentResult.get("whitetok");
assertNotNull("an analysis for the 'whitetok' field should be returned", whitetokResult);
queryResult = whitetokResult.get("query");
- tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.WhitespaceTokenizer");
- assertNotNull("Expecting the 'WhitespaceTokenizer' to be applied on the query for the 'whitetok' field", tokenList);
+ tokenList = (List<NamedList>) queryResult.get(MockTokenizer.class.getName());
+ assertNotNull("Expecting the 'MockTokenizer' to be applied on the query for the 'whitetok' field", tokenList);
assertEquals("Query has only one token", 1, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "word", 0, 7, 1, new int[]{1}, null, false));
indexResult = whitetokResult.get("index");
diff --git a/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java
index 833c5f9..60bbc42 100644
--- a/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java
@@ -17,6 +17,7 @@
package org.apache.solr.handler;
+import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.KeywordTokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.solr.common.params.AnalysisParams;
@@ -261,10 +262,10 @@
indexPart = whitetok.get("index");
assertNotNull("expecting an index token analysis for field 'whitetok'", indexPart);
- assertEquals("expecting only WhitespaceTokenizer to be applied", 1, indexPart.size());
- tokenList = indexPart.get(WhitespaceTokenizer.class.getName());
- assertNotNull("expecting only WhitespaceTokenizer to be applied", tokenList);
- assertEquals("expecting WhitespaceTokenizer to produce 10 tokens", 10, tokenList.size());
+ assertEquals("expecting only MockTokenizer to be applied", 1, indexPart.size());
+ tokenList = indexPart.get(MockTokenizer.class.getName());
+ assertNotNull("expecting only MockTokenizer to be applied", tokenList);
+ assertEquals("expecting MockTokenizer to produce 10 tokens", 10, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("the", null, "word", 0, 3, 1, new int[]{1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("quick", null, "word", 4, 9, 2, new int[]{2}, null, false));
assertToken(tokenList.get(2), new TokenInfo("red", null, "word", 10, 13, 3, new int[]{3}, null, false));
@@ -278,10 +279,10 @@
queryPart = whitetok.get("query");
assertNotNull("expecting a query token analysis for field 'whitetok'", queryPart);
- assertEquals("expecting only WhitespaceTokenizer to be applied", 1, queryPart.size());
- tokenList = queryPart.get(WhitespaceTokenizer.class.getName());
- assertNotNull("expecting only WhitespaceTokenizer to be applied", tokenList);
- assertEquals("expecting WhitespaceTokenizer to produce 2 tokens", 2, tokenList.size());
+ assertEquals("expecting only MockTokenizer to be applied", 1, queryPart.size());
+ tokenList = queryPart.get(MockTokenizer.class.getName());
+ assertNotNull("expecting only MockTokenizer to be applied", tokenList);
+ assertEquals("expecting MockTokenizer to produce 2 tokens", 2, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("fox", null, "word", 0, 3, 1, new int[]{1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("brown", null, "word", 4, 9, 2, new int[]{2}, null, false));
@@ -328,8 +329,8 @@
assertEquals(" whátëvêr ", indexPart.get("org.apache.solr.analysis.HTMLStripCharFilter"));
assertEquals(" whatever ", indexPart.get("org.apache.lucene.analysis.MappingCharFilter"));
- List<NamedList> tokenList = (List<NamedList>)indexPart.get("org.apache.lucene.analysis.WhitespaceTokenizer");
- assertNotNull("Expecting WhitespaceTokenizer analysis breakdown", tokenList);
+ List<NamedList> tokenList = (List<NamedList>)indexPart.get(MockTokenizer.class.getName());
+ assertNotNull("Expecting MockTokenizer analysis breakdown", tokenList);
assertEquals(tokenList.size(), 1);
assertToken(tokenList.get(0), new TokenInfo("whatever", null, "word", 12, 20, 1, new int[]{1}, null, false));
}
@@ -353,8 +354,8 @@
NamedList<List<NamedList>> indexPart = textType.get("index");
assertNotNull("expecting an index token analysis for field type 'skutype1'", indexPart);
- List<NamedList> tokenList = indexPart.get("org.apache.lucene.analysis.WhitespaceTokenizer");
- assertNotNull("Expcting WhitespaceTokenizer analysis breakdown", tokenList);
+ List<NamedList> tokenList = indexPart.get(MockTokenizer.class.getName());
+ assertNotNull("Expcting MockTokenizer analysis breakdown", tokenList);
assertEquals(4, tokenList.size());
assertToken(tokenList.get(0), new TokenInfo("hi,", null, "word", 0, 3, 1, new int[]{1}, null, false));
assertToken(tokenList.get(1), new TokenInfo("3456-12", null, "word", 4, 11, 2, new int[]{2}, null, false));
diff --git a/solr/solrj/src/test-files/solrj/solr/conf/schema.xml b/solr/solrj/src/test-files/solrj/solr/conf/schema.xml
index 37408d0..b51dbc5 100644
--- a/solr/solrj/src/test-files/solrj/solr/conf/schema.xml
+++ b/solr/solrj/src/test-files/solrj/solr/conf/schema.xml
@@ -86,7 +86,7 @@
<!-- Field type demonstrating an Analyzer failure -->
<fieldtype name="failtype1" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -95,7 +95,7 @@
<!-- Demonstrating ignoreCaseChange -->
<fieldtype name="wdf_nocase" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -103,7 +103,7 @@
<fieldtype name="wdf_preserve" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -165,7 +165,7 @@
<analyzer><tokenizer class="solr.LetterTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="whitetok" class="solr.TextField">
- <analyzer><tokenizer class="solr.WhitespaceTokenizerFactory"/></analyzer>
+ <analyzer><tokenizer class="solr.MockTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="HTMLstandardtok" class="solr.TextField">
<analyzer>
@@ -176,7 +176,7 @@
<fieldtype name="HTMLwhitetok" class="solr.TextField">
<analyzer>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="standardtokfilt" class="solr.TextField">
@@ -187,19 +187,19 @@
</fieldtype>
<fieldtype name="standardfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="lowerfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="lowerpunctfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt" expand="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="1" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
@@ -223,43 +223,43 @@
</fieldtype>
<fieldtype name="porterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<!-- fieldtype name="snowballfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SnowballPorterFilterFactory"/>
</analyzer>
</fieldtype -->
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
</analyzer>
</fieldtype>
<fieldtype name="custstopfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="lengthfilt" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LengthFilterFactory" min="2" max="5"/>
</analyzer>
</fieldtype>
@@ -267,20 +267,20 @@
<analyzer>
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
<fieldtype name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
@@ -290,14 +290,14 @@
<fieldtype name="numericsubword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.StopFilterFactory"/>
@@ -307,12 +307,12 @@
<fieldtype name="protectedsubword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
@@ -321,12 +321,12 @@
<!-- more flexible in matching skus, but more chance of a false match -->
<fieldtype name="skutype1" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -335,12 +335,12 @@
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="skutype2" class="solr.TextField">
<analyzer type="index">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
@@ -349,7 +349,7 @@
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="syn" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="old_synonyms.txt"/>
</analyzer>
</fieldtype>
@@ -359,7 +359,7 @@
-->
<fieldtype name="dedup" class="solr.TextField">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="old_synonyms.txt" expand="true" />
<filter class="solr.PorterStemFilterFactory"/>
@@ -372,7 +372,7 @@
<fieldtype name="textgap" class="solr.TextField" multiValued="true" positionIncrementGap="100">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
@@ -394,7 +394,7 @@
<!-- omitPositions example -->
<fieldType name="nopositions" class="solr.TextField" omitPositions="true">
<analyzer>
- <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <tokenizer class="solr.MockTokenizerFactory"/>
</analyzer>
</fieldType>
</types>
diff --git a/solr/test-framework/src/java/org/apache/solr/analysis/MockTokenizerFactory.java b/solr/test-framework/src/java/org/apache/solr/analysis/MockTokenizerFactory.java
new file mode 100644
index 0000000..e2512cb
--- /dev/null
+++ b/solr/test-framework/src/java/org/apache/solr/analysis/MockTokenizerFactory.java
@@ -0,0 +1,61 @@
+package org.apache.solr.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.util.Map;
+
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.Tokenizer;
+
+/**
+ * Factory for {@link MockTokenizer} for testing purposes.
+ */
+public class MockTokenizerFactory extends BaseTokenizerFactory {
+ int pattern;
+ boolean enableChecks;
+
+ @Override
+ public void init(Map<String,String> args) {
+ super.init(args);
+ String patternArg = args.get("pattern");
+ if (patternArg == null) {
+ patternArg = "whitespace";
+ }
+
+ if ("whitespace".equalsIgnoreCase(patternArg)) {
+ pattern = MockTokenizer.WHITESPACE;
+ } else if ("keyword".equalsIgnoreCase(patternArg)) {
+ pattern = MockTokenizer.KEYWORD;
+ } else if ("simple".equalsIgnoreCase(patternArg)) {
+ pattern = MockTokenizer.SIMPLE;
+ } else {
+ throw new RuntimeException("invalid pattern!");
+ }
+
+ enableChecks = getBoolean("enableChecks", true);
+ }
+
+
+ @Override
+ public Tokenizer create(Reader input) {
+ MockTokenizer t = new MockTokenizer(input, pattern, false);
+ t.setEnableChecks(enableChecks);
+ return t;
+ }
+}
\ No newline at end of file