blob: 600fe29ed94a93a4ca5eafae5e0b9d9b81f52e4a [file] [log] [blame]
{
"Lucene.Net.Analysis.Cn.Smart.AnalyzerProfile.html": {
"href": "Lucene.Net.Analysis.Cn.Smart.AnalyzerProfile.html",
"title": "Class AnalyzerProfile | Apache Lucene.NET 4.8.0-beta00013 Documentation",
"keywords": "Class AnalyzerProfile Manages analysis data configuration for SmartChineseAnalyzer SmartChineseAnalyzer has a built-in dictionary and stopword list out-of-box. NOTE: To use an alternate dicationary than the built-in one, put the \"bigramdict.dct\" and \"coredict.dct\" files in a subdirectory of your application named \"smartcn-data\". This subdirectory can be placed in any directory up to and including the root directory (if the OS permission allows). To place the files in an alternate location, set an environment variable named \"smartcn.data.dir\" with the name of the directory the \"bigramdict.dct\" and \"coredict.dct\" files can be located within. The default \"bigramdict.dct\" and \"coredict.dct\" files can be found at: https://issues.apache.org/jira/browse/LUCENE-1629 . This is a Lucene.NET EXPERIMENTAL API, use at your own risk Inheritance System.Object AnalyzerProfile Inherited Members System.Object.Equals(System.Object) System.Object.Equals(System.Object, System.Object) System.Object.GetHashCode() System.Object.GetType() System.Object.MemberwiseClone() System.Object.ReferenceEquals(System.Object, System.Object) System.Object.ToString() Namespace : Lucene.Net.Analysis.Cn.Smart Assembly : Lucene.Net.Analysis.SmartCn.dll Syntax public static class AnalyzerProfile Fields | Improve this Doc View Source ANALYSIS_DATA_DIR Global indicating the configured analysis data directory Declaration public static string ANALYSIS_DATA_DIR Field Value Type Description System.String"
},
"Lucene.Net.Analysis.Cn.Smart.CharType.html": {
"href": "Lucene.Net.Analysis.Cn.Smart.CharType.html",
"title": "Enum CharType | Apache Lucene.NET 4.8.0-beta00013 Documentation",
"keywords": "Enum CharType Internal SmartChineseAnalyzer character type constants. This is a Lucene.NET EXPERIMENTAL API, use at your own risk Namespace : Lucene.Net.Analysis.Cn.Smart Assembly : Lucene.Net.Analysis.SmartCn.dll Syntax public enum CharType Fields Name Description DELIMITER Punctuation Characters DIGIT Numeric Digits FULLWIDTH_DIGIT Full-Width alphanumeric characters FULLWIDTH_LETTER Full-Width letters HANZI Han Ideographs LETTER Letters OTHER Other (not fitting any of the other categories) SPACE_LIKE Characters that act as a space"
},
"Lucene.Net.Analysis.Cn.Smart.Hhmm.HHMMSegmenter.html": {
"href": "Lucene.Net.Analysis.Cn.Smart.Hhmm.HHMMSegmenter.html",
"title": "Class HHMMSegmenter | Apache Lucene.NET 4.8.0-beta00013 Documentation",
"keywords": "Class HHMMSegmenter Finds the optimal segmentation of a sentence into Chinese words This is a Lucene.NET EXPERIMENTAL API, use at your own risk Inheritance System.Object HHMMSegmenter Inherited Members System.Object.Equals(System.Object) System.Object.Equals(System.Object, System.Object) System.Object.GetHashCode() System.Object.GetType() System.Object.MemberwiseClone() System.Object.ReferenceEquals(System.Object, System.Object) System.Object.ToString() Namespace : Lucene.Net.Analysis.Cn.Smart.Hhmm Assembly : Lucene.Net.Analysis.SmartCn.dll Syntax public class HHMMSegmenter Methods | Improve this Doc View Source Process(String) Return a list of SegToken representing the best segmentation of a sentence Declaration public virtual IList<SegToken> Process(string sentence) Parameters Type Name Description System.String sentence input sentence Returns Type Description System.Collections.Generic.IList < SegToken > best segmentation as a IList{SegToken}"
},
"Lucene.Net.Analysis.Cn.Smart.Hhmm.html": {
"href": "Lucene.Net.Analysis.Cn.Smart.Hhmm.html",
"title": "Namespace Lucene.Net.Analysis.Cn.Smart.Hhmm | Apache Lucene.NET 4.8.0-beta00013 Documentation",
"keywords": "Namespace Lucene.Net.Analysis.Cn.Smart.Hhmm <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> SmartChineseAnalyzer Hidden Markov Model package. This is a Lucene.NET EXPERIMENTAL API, use at your own risk Classes HHMMSegmenter Finds the optimal segmentation of a sentence into Chinese words This is a Lucene.NET EXPERIMENTAL API, use at your own risk SegToken SmartChineseAnalyzer internal token This is a Lucene.NET EXPERIMENTAL API, use at your own risk SegTokenFilter Filters a SegToken by converting full-width latin to half-width, then lowercasing latin. Additionally, all punctuation is converted into COMMON_DELIMITER This is a Lucene.NET EXPERIMENTAL API, use at your own risk"
},
"Lucene.Net.Analysis.Cn.Smart.Hhmm.SegToken.html": {
"href": "Lucene.Net.Analysis.Cn.Smart.Hhmm.SegToken.html",
"title": "Class SegToken | Apache Lucene.NET 4.8.0-beta00013 Documentation",
"keywords": "Class SegToken SmartChineseAnalyzer internal token This is a Lucene.NET EXPERIMENTAL API, use at your own risk Inheritance System.Object SegToken Inherited Members System.Object.Equals(System.Object, System.Object) System.Object.GetType() System.Object.MemberwiseClone() System.Object.ReferenceEquals(System.Object, System.Object) System.Object.ToString() Namespace : Lucene.Net.Analysis.Cn.Smart.Hhmm Assembly : Lucene.Net.Analysis.SmartCn.dll Syntax public class SegToken Constructors | Improve this Doc View Source SegToken(Char[], Int32, Int32, WordType, Int32) Create a new SegToken from a character array. Declaration public SegToken(char[] idArray, int start, int end, WordType wordType, int weight) Parameters Type Name Description System.Char [] idArray character array containing text System.Int32 start start offset of SegToken in original sentence System.Int32 end end offset of SegToken in original sentence WordType wordType WordType of the text System.Int32 weight word frequency Properties | Improve this Doc View Source CharArray Character array containing token text Declaration public char[] CharArray { get; set; } Property Value Type Description System.Char [] | Improve this Doc View Source EndOffset end offset into original sentence Declaration public int EndOffset { get; set; } Property Value Type Description System.Int32 | Improve this Doc View Source Index during segmentation, this is used to store the index of the token in the token list table Declaration public int Index { get; set; } Property Value Type Description System.Int32 | Improve this Doc View Source StartOffset start offset into original sentence Declaration public int StartOffset { get; set; } Property Value Type Description System.Int32 | Improve this Doc View Source Weight word frequency Declaration public int Weight { get; set; } Property Value Type Description System.Int32 | Improve this Doc View Source WordType WordType of the text Declaration public WordType WordType { get; set; } Property Value Type Description WordType Methods | Improve this Doc View Source Equals(Object) System.Object.Equals(System.Object) Declaration public override bool Equals(object obj) Parameters Type Name Description System.Object obj Returns Type Description System.Boolean Overrides System.Object.Equals(System.Object) | Improve this Doc View Source GetHashCode() System.Object.GetHashCode() Declaration public override int GetHashCode() Returns Type Description System.Int32 Overrides System.Object.GetHashCode()"
},
"Lucene.Net.Analysis.Cn.Smart.Hhmm.SegTokenFilter.html": {
"href": "Lucene.Net.Analysis.Cn.Smart.Hhmm.SegTokenFilter.html",
"title": "Class SegTokenFilter | Apache Lucene.NET 4.8.0-beta00013 Documentation",
"keywords": "Class SegTokenFilter Filters a SegToken by converting full-width latin to half-width, then lowercasing latin. Additionally, all punctuation is converted into COMMON_DELIMITER This is a Lucene.NET EXPERIMENTAL API, use at your own risk Inheritance System.Object SegTokenFilter Inherited Members System.Object.Equals(System.Object) System.Object.Equals(System.Object, System.Object) System.Object.GetHashCode() System.Object.GetType() System.Object.MemberwiseClone() System.Object.ReferenceEquals(System.Object, System.Object) System.Object.ToString() Namespace : Lucene.Net.Analysis.Cn.Smart.Hhmm Assembly : Lucene.Net.Analysis.SmartCn.dll Syntax public class SegTokenFilter Methods | Improve this Doc View Source Filter(SegToken) Filter an input SegToken Full-width latin will be converted to half-width, then all latin will be lowercased. All punctuation is converted into COMMON_DELIMITER Declaration public virtual SegToken Filter(SegToken token) Parameters Type Name Description SegToken token Input SegToken . Returns Type Description SegToken Normalized SegToken ."
},
"Lucene.Net.Analysis.Cn.Smart.HMMChineseTokenizer.html": {
"href": "Lucene.Net.Analysis.Cn.Smart.HMMChineseTokenizer.html",
"title": "Class HMMChineseTokenizer | Apache Lucene.NET 4.8.0-beta00013 Documentation",
"keywords": "Class HMMChineseTokenizer Tokenizer for Chinese or mixed Chinese-English text. The analyzer uses probabilistic knowledge to find the optimal word segmentation for Simplified Chinese text. The text is first broken into sentences, then each sentence is segmented into words. Inheritance System.Object Lucene.Net.Util.AttributeSource Lucene.Net.Analysis.TokenStream Lucene.Net.Analysis.Tokenizer Lucene.Net.Analysis.Util.SegmentingTokenizerBase HMMChineseTokenizer Implements System.IDisposable Inherited Members Lucene.Net.Analysis.Util.SegmentingTokenizerBase.BUFFERMAX Lucene.Net.Analysis.Util.SegmentingTokenizerBase.m_buffer Lucene.Net.Analysis.Util.SegmentingTokenizerBase.m_offset Lucene.Net.Analysis.Util.SegmentingTokenizerBase.IncrementToken() Lucene.Net.Analysis.Util.SegmentingTokenizerBase.End() Lucene.Net.Analysis.Util.SegmentingTokenizerBase.IsSafeEnd(System.Char) Lucene.Net.Analysis.Tokenizer.m_input Tokenizer.CorrectOffset(Int32) Tokenizer.SetReader(TextReader) Lucene.Net.Analysis.TokenStream.Dispose() Lucene.Net.Util.AttributeSource.GetAttributeFactory() Lucene.Net.Util.AttributeSource.GetAttributeClassesEnumerator() Lucene.Net.Util.AttributeSource.GetAttributeImplsEnumerator() Lucene.Net.Util.AttributeSource.AddAttributeImpl(Lucene.Net.Util.Attribute) Lucene.Net.Util.AttributeSource.AddAttribute<T>() Lucene.Net.Util.AttributeSource.HasAttributes Lucene.Net.Util.AttributeSource.HasAttribute<T>() Lucene.Net.Util.AttributeSource.GetAttribute<T>() Lucene.Net.Util.AttributeSource.ClearAttributes() Lucene.Net.Util.AttributeSource.CaptureState() Lucene.Net.Util.AttributeSource.RestoreState(Lucene.Net.Util.AttributeSource.State) Lucene.Net.Util.AttributeSource.GetHashCode() AttributeSource.Equals(Object) AttributeSource.ReflectAsString(Boolean) Lucene.Net.Util.AttributeSource.ReflectWith(Lucene.Net.Util.IAttributeReflector) Lucene.Net.Util.AttributeSource.CloneAttributes() Lucene.Net.Util.AttributeSource.CopyTo(Lucene.Net.Util.AttributeSource) Lucene.Net.Util.AttributeSource.ToString() System.Object.Equals(System.Object, System.Object) System.Object.GetType() System.Object.MemberwiseClone() System.Object.ReferenceEquals(System.Object, System.Object) Namespace : Lucene.Net.Analysis.Cn.Smart Assembly : Lucene.Net.Analysis.SmartCn.dll Syntax public class HMMChineseTokenizer : SegmentingTokenizerBase, IDisposable Constructors | Improve this Doc View Source HMMChineseTokenizer(AttributeSource.AttributeFactory, TextReader) Creates a new HMMChineseTokenizer , supplying the Lucene.Net.Util.AttributeSource.AttributeFactory Declaration public HMMChineseTokenizer(AttributeSource.AttributeFactory factory, TextReader reader) Parameters Type Name Description Lucene.Net.Util.AttributeSource.AttributeFactory factory System.IO.TextReader reader | Improve this Doc View Source HMMChineseTokenizer(TextReader) Creates a new HMMChineseTokenizer Declaration public HMMChineseTokenizer(TextReader reader) Parameters Type Name Description System.IO.TextReader reader Methods | Improve this Doc View Source Dispose(Boolean) Releases resources used by the HMMChineseTokenizer and if overridden in a derived class, optionally releases unmanaged resources. Declaration protected override void Dispose(bool disposing) Parameters Type Name Description System.Boolean disposing true to release both managed and unmanaged resources; false to release only unmanaged resources. Overrides Tokenizer.Dispose(Boolean) | Improve this Doc View Source IncrementWord() Declaration protected override bool IncrementWord() Returns Type Description System.Boolean Overrides Lucene.Net.Analysis.Util.SegmentingTokenizerBase.IncrementWord() | Improve this Doc View Source Reset() Declaration public override void Reset() Overrides Lucene.Net.Analysis.Util.SegmentingTokenizerBase.Reset() | Improve this Doc View Source SetNextSentence(Int32, Int32) Declaration protected override void SetNextSentence(int sentenceStart, int sentenceEnd) Parameters Type Name Description System.Int32 sentenceStart System.Int32 sentenceEnd Overrides Lucene.Net.Analysis.Util.SegmentingTokenizerBase.SetNextSentence(System.Int32, System.Int32) Implements System.IDisposable"
},
"Lucene.Net.Analysis.Cn.Smart.HMMChineseTokenizerFactory.html": {
"href": "Lucene.Net.Analysis.Cn.Smart.HMMChineseTokenizerFactory.html",
"title": "Class HMMChineseTokenizerFactory | Apache Lucene.NET 4.8.0-beta00013 Documentation",
"keywords": "Class HMMChineseTokenizerFactory Factory for HMMChineseTokenizer Note: this class will currently emit tokens for punctuation. So you should either add a Lucene.Net.Analysis.Miscellaneous.WordDelimiterFilter after to remove these (with concatenate off), or use the SmartChinese stoplist with a StopFilterFactory via: words=\"org/apache/lucene/analysis/cn/smart/stopwords.txt\" This is a Lucene.NET EXPERIMENTAL API, use at your own risk Inheritance System.Object Lucene.Net.Analysis.Util.AbstractAnalysisFactory Lucene.Net.Analysis.Util.TokenizerFactory HMMChineseTokenizerFactory Inherited Members Lucene.Net.Analysis.Util.TokenizerFactory.ForName(System.String, System.Collections.Generic.IDictionary<System.String, System.String>) Lucene.Net.Analysis.Util.TokenizerFactory.LookupClass(System.String) Lucene.Net.Analysis.Util.TokenizerFactory.AvailableTokenizers Lucene.Net.Analysis.Util.TokenizerFactory.ReloadTokenizers() Lucene.Net.Analysis.Util.TokenizerFactory.Create(System.IO.TextReader) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM Lucene.Net.Analysis.Util.AbstractAnalysisFactory.m_luceneMatchVersion Lucene.Net.Analysis.Util.AbstractAnalysisFactory.OriginalArgs Lucene.Net.Analysis.Util.AbstractAnalysisFactory.AssureMatchVersion() Lucene.Net.Analysis.Util.AbstractAnalysisFactory.LuceneMatchVersion Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Require(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Require(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Collections.Generic.ICollection<System.String>) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Require(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Collections.Generic.ICollection<System.String>, System.Boolean) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Get(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Get(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Collections.Generic.ICollection<System.String>) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Get(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Collections.Generic.ICollection<System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Get(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Collections.Generic.ICollection<System.String>, System.String, System.Boolean) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.RequireInt32(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetInt32(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Int32) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.RequireBoolean(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetBoolean(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Boolean) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.RequireSingle(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetSingle(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Single) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.RequireChar(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetChar(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Char) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetSet(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetPattern(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetCulture(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Globalization.CultureInfo) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetWordSet(Lucene.Net.Analysis.Util.IResourceLoader, System.String, System.Boolean) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetLines(Lucene.Net.Analysis.Util.IResourceLoader, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetSnowballWordSet(Lucene.Net.Analysis.Util.IResourceLoader, System.String, System.Boolean) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.SplitFileNames(System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetClassArg() Lucene.Net.Analysis.Util.AbstractAnalysisFactory.IsExplicitLuceneMatchVersion System.Object.Equals(System.Object) System.Object.Equals(System.Object, System.Object) System.Object.GetHashCode() System.Object.GetType() System.Object.MemberwiseClone() System.Object.ReferenceEquals(System.Object, System.Object) System.Object.ToString() Namespace : Lucene.Net.Analysis.Cn.Smart Assembly : Lucene.Net.Analysis.SmartCn.dll Syntax public sealed class HMMChineseTokenizerFactory : TokenizerFactory Constructors | Improve this Doc View Source HMMChineseTokenizerFactory(IDictionary<String, String>) Creates a new HMMChineseTokenizerFactory Declaration public HMMChineseTokenizerFactory(IDictionary<string, string> args) Parameters Type Name Description System.Collections.Generic.IDictionary < System.String , System.String > args Methods | Improve this Doc View Source Create(AttributeSource.AttributeFactory, TextReader) Declaration public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader reader) Parameters Type Name Description Lucene.Net.Util.AttributeSource.AttributeFactory factory System.IO.TextReader reader Returns Type Description Lucene.Net.Analysis.Tokenizer Overrides Lucene.Net.Analysis.Util.TokenizerFactory.Create(Lucene.Net.Util.AttributeSource.AttributeFactory, System.IO.TextReader)"
},
"Lucene.Net.Analysis.Cn.Smart.html": {
"href": "Lucene.Net.Analysis.Cn.Smart.html",
"title": "Namespace Lucene.Net.Analysis.Cn.Smart | Apache Lucene.NET 4.8.0-beta00013 Documentation",
"keywords": "Namespace Lucene.Net.Analysis.Cn.Smart <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> Analyzer for Simplified Chinese, which indexes words. This is a Lucene.NET EXPERIMENTAL API, use at your own risk Three analyzers are provided for Chinese, each of which treats Chinese text in a different way. * StandardAnalyzer: Index unigrams (individual Chinese characters) as a token. * CJKAnalyzer (in the analyzers/cjk package): Index bigrams (overlapping groups of two adjacent Chinese characters) as tokens. * SmartChineseAnalyzer (in this package): Index words (attempt to segment Chinese text into words) as tokens. Example phrase: \"我是中国人\" 1. StandardAnalyzer: 我-是-中-国-人 2. CJKAnalyzer: 我是-是中-中国-国人 3. SmartChineseAnalyzer: 我-是-中国-人 Classes AnalyzerProfile Manages analysis data configuration for SmartChineseAnalyzer SmartChineseAnalyzer has a built-in dictionary and stopword list out-of-box. NOTE: To use an alternate dicationary than the built-in one, put the \"bigramdict.dct\" and \"coredict.dct\" files in a subdirectory of your application named \"smartcn-data\". This subdirectory can be placed in any directory up to and including the root directory (if the OS permission allows). To place the files in an alternate location, set an environment variable named \"smartcn.data.dir\" with the name of the directory the \"bigramdict.dct\" and \"coredict.dct\" files can be located within. The default \"bigramdict.dct\" and \"coredict.dct\" files can be found at: https://issues.apache.org/jira/browse/LUCENE-1629 . This is a Lucene.NET EXPERIMENTAL API, use at your own risk HMMChineseTokenizer Tokenizer for Chinese or mixed Chinese-English text. The analyzer uses probabilistic knowledge to find the optimal word segmentation for Simplified Chinese text. The text is first broken into sentences, then each sentence is segmented into words. HMMChineseTokenizerFactory Factory for HMMChineseTokenizer Note: this class will currently emit tokens for punctuation. So you should either add a Lucene.Net.Analysis.Miscellaneous.WordDelimiterFilter after to remove these (with concatenate off), or use the SmartChinese stoplist with a StopFilterFactory via: words=\"org/apache/lucene/analysis/cn/smart/stopwords.txt\" This is a Lucene.NET EXPERIMENTAL API, use at your own risk SentenceTokenizer Tokenizes input text into sentences. The output tokens can then be broken into words with WordTokenFilter This is a Lucene.NET EXPERIMENTAL API, use at your own risk SmartChineseAnalyzer SmartChineseAnalyzer is an analyzer for Chinese or mixed Chinese-English text. The analyzer uses probabilistic knowledge to find the optimal word segmentation for Simplified Chinese text. The text is first broken into sentences, then each sentence is segmented into words. Segmentation is based upon the Hidden Markov Model . A large training corpus was used to calculate Chinese word frequency probability. This analyzer requires a dictionary to provide statistical data. SmartChineseAnalyzer has an included dictionary out-of-box. The included dictionary data is from ICTCLAS1.0 . Thanks to ICTCLAS for their hard work, and for contributing the data under the Apache 2 License! This is a Lucene.NET EXPERIMENTAL API, use at your own risk SmartChineseSentenceTokenizerFactory Factory for the SmartChineseAnalyzer SentenceTokenizer This is a Lucene.NET EXPERIMENTAL API, use at your own risk SmartChineseWordTokenFilterFactory Factory for the SmartChineseAnalyzer WordTokenFilter Note: this class will currently emit tokens for punctuation. So you should either add a Lucene.Net.Analysis.Miscellaneous.WordDelimiterFilter after to remove these (with concatenate off), or use the SmartChinese stoplist with a Lucene.Net.Analysis.Core.StopFilterFactory via: words=\"org/apache/lucene/analysis/cn/smart/stopwords.txt\" This is a Lucene.NET EXPERIMENTAL API, use at your own risk Utility SmartChineseAnalyzer utility constants and methods This is a Lucene.NET EXPERIMENTAL API, use at your own risk WordTokenFilter A Lucene.Net.Analysis.TokenFilter that breaks sentences into words. This is a Lucene.NET EXPERIMENTAL API, use at your own risk Enums CharType Internal SmartChineseAnalyzer character type constants. This is a Lucene.NET EXPERIMENTAL API, use at your own risk WordType Internal SmartChineseAnalyzer token type constants This is a Lucene.NET EXPERIMENTAL API, use at your own risk"
},
"Lucene.Net.Analysis.Cn.Smart.SentenceTokenizer.html": {
"href": "Lucene.Net.Analysis.Cn.Smart.SentenceTokenizer.html",
"title": "Class SentenceTokenizer | Apache Lucene.NET 4.8.0-beta00013 Documentation",
"keywords": "Class SentenceTokenizer Tokenizes input text into sentences. The output tokens can then be broken into words with WordTokenFilter This is a Lucene.NET EXPERIMENTAL API, use at your own risk Inheritance System.Object Lucene.Net.Util.AttributeSource Lucene.Net.Analysis.TokenStream Lucene.Net.Analysis.Tokenizer SentenceTokenizer Implements System.IDisposable Inherited Members Lucene.Net.Analysis.Tokenizer.m_input Tokenizer.Dispose(Boolean) Tokenizer.CorrectOffset(Int32) Tokenizer.SetReader(TextReader) Lucene.Net.Analysis.TokenStream.Dispose() Lucene.Net.Util.AttributeSource.GetAttributeFactory() Lucene.Net.Util.AttributeSource.GetAttributeClassesEnumerator() Lucene.Net.Util.AttributeSource.GetAttributeImplsEnumerator() Lucene.Net.Util.AttributeSource.AddAttributeImpl(Lucene.Net.Util.Attribute) Lucene.Net.Util.AttributeSource.AddAttribute<T>() Lucene.Net.Util.AttributeSource.HasAttributes Lucene.Net.Util.AttributeSource.HasAttribute<T>() Lucene.Net.Util.AttributeSource.GetAttribute<T>() Lucene.Net.Util.AttributeSource.ClearAttributes() Lucene.Net.Util.AttributeSource.CaptureState() Lucene.Net.Util.AttributeSource.RestoreState(Lucene.Net.Util.AttributeSource.State) Lucene.Net.Util.AttributeSource.GetHashCode() AttributeSource.Equals(Object) AttributeSource.ReflectAsString(Boolean) Lucene.Net.Util.AttributeSource.ReflectWith(Lucene.Net.Util.IAttributeReflector) Lucene.Net.Util.AttributeSource.CloneAttributes() Lucene.Net.Util.AttributeSource.CopyTo(Lucene.Net.Util.AttributeSource) Lucene.Net.Util.AttributeSource.ToString() System.Object.Equals(System.Object, System.Object) System.Object.GetType() System.Object.MemberwiseClone() System.Object.ReferenceEquals(System.Object, System.Object) Namespace : Lucene.Net.Analysis.Cn.Smart Assembly : Lucene.Net.Analysis.SmartCn.dll Syntax [Obsolete(\"Use HMMChineseTokenizer instead\")] public sealed class SentenceTokenizer : Tokenizer, IDisposable Constructors | Improve this Doc View Source SentenceTokenizer(AttributeSource.AttributeFactory, TextReader) Declaration public SentenceTokenizer(AttributeSource.AttributeFactory factory, TextReader reader) Parameters Type Name Description Lucene.Net.Util.AttributeSource.AttributeFactory factory System.IO.TextReader reader | Improve this Doc View Source SentenceTokenizer(TextReader) Declaration public SentenceTokenizer(TextReader reader) Parameters Type Name Description System.IO.TextReader reader Methods | Improve this Doc View Source End() Declaration public override void End() Overrides Lucene.Net.Analysis.TokenStream.End() | Improve this Doc View Source IncrementToken() Declaration public override bool IncrementToken() Returns Type Description System.Boolean Overrides Lucene.Net.Analysis.TokenStream.IncrementToken() | Improve this Doc View Source Reset() Declaration public override void Reset() Overrides Lucene.Net.Analysis.Tokenizer.Reset() Implements System.IDisposable"
},
"Lucene.Net.Analysis.Cn.Smart.SmartChineseAnalyzer.html": {
"href": "Lucene.Net.Analysis.Cn.Smart.SmartChineseAnalyzer.html",
"title": "Class SmartChineseAnalyzer | Apache Lucene.NET 4.8.0-beta00013 Documentation",
"keywords": "Class SmartChineseAnalyzer SmartChineseAnalyzer is an analyzer for Chinese or mixed Chinese-English text. The analyzer uses probabilistic knowledge to find the optimal word segmentation for Simplified Chinese text. The text is first broken into sentences, then each sentence is segmented into words. Segmentation is based upon the Hidden Markov Model . A large training corpus was used to calculate Chinese word frequency probability. This analyzer requires a dictionary to provide statistical data. SmartChineseAnalyzer has an included dictionary out-of-box. The included dictionary data is from ICTCLAS1.0 . Thanks to ICTCLAS for their hard work, and for contributing the data under the Apache 2 License! This is a Lucene.NET EXPERIMENTAL API, use at your own risk Inheritance System.Object Lucene.Net.Analysis.Analyzer SmartChineseAnalyzer Implements System.IDisposable Inherited Members Analyzer.NewAnonymous(Func<String, TextReader, TokenStreamComponents>) Analyzer.NewAnonymous(Func<String, TextReader, TokenStreamComponents>, ReuseStrategy) Analyzer.NewAnonymous(Func<String, TextReader, TokenStreamComponents>, Func<String, TextReader, TextReader>) Analyzer.NewAnonymous(Func<String, TextReader, TokenStreamComponents>, Func<String, TextReader, TextReader>, ReuseStrategy) Analyzer.GetTokenStream(String, TextReader) Analyzer.GetTokenStream(String, String) Analyzer.InitReader(String, TextReader) Analyzer.GetPositionIncrementGap(String) Analyzer.GetOffsetGap(String) Lucene.Net.Analysis.Analyzer.Strategy Lucene.Net.Analysis.Analyzer.Dispose() Analyzer.Dispose(Boolean) Lucene.Net.Analysis.Analyzer.GLOBAL_REUSE_STRATEGY Lucene.Net.Analysis.Analyzer.PER_FIELD_REUSE_STRATEGY System.Object.Equals(System.Object) System.Object.Equals(System.Object, System.Object) System.Object.GetHashCode() System.Object.GetType() System.Object.MemberwiseClone() System.Object.ReferenceEquals(System.Object, System.Object) System.Object.ToString() Namespace : Lucene.Net.Analysis.Cn.Smart Assembly : Lucene.Net.Analysis.SmartCn.dll Syntax public sealed class SmartChineseAnalyzer : Analyzer, IDisposable Constructors | Improve this Doc View Source SmartChineseAnalyzer(LuceneVersion) Create a new SmartChineseAnalyzer , using the default stopword list. Declaration public SmartChineseAnalyzer(LuceneVersion matchVersion) Parameters Type Name Description Lucene.Net.Util.LuceneVersion matchVersion | Improve this Doc View Source SmartChineseAnalyzer(LuceneVersion, CharArraySet) Create a new SmartChineseAnalyzer , using the provided Lucene.Net.Analysis.Util.CharArraySet of stopwords. Note: the set should include punctuation, unless you want to index punctuation! Declaration public SmartChineseAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords) Parameters Type Name Description Lucene.Net.Util.LuceneVersion matchVersion Lucene.Net.Analysis.Util.CharArraySet stopWords Lucene.Net.Analysis.Util.CharArraySet of stopwords to use. | Improve this Doc View Source SmartChineseAnalyzer(LuceneVersion, Boolean) Create a new SmartChineseAnalyzer , optionally using the default stopword list. The included default stopword list is simply a list of punctuation. If you do not use this list, punctuation will not be removed from the text! Declaration public SmartChineseAnalyzer(LuceneVersion matchVersion, bool useDefaultStopWords) Parameters Type Name Description Lucene.Net.Util.LuceneVersion matchVersion System.Boolean useDefaultStopWords true to use the default stopword list. Methods | Improve this Doc View Source CreateComponents(String, TextReader) Declaration protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader) Parameters Type Name Description System.String fieldName System.IO.TextReader reader Returns Type Description Lucene.Net.Analysis.TokenStreamComponents Overrides Analyzer.CreateComponents(String, TextReader) | Improve this Doc View Source GetDefaultStopSet() Returns an unmodifiable instance of the default stop-words set. Declaration public static CharArraySet GetDefaultStopSet() Returns Type Description Lucene.Net.Analysis.Util.CharArraySet An unmodifiable instance of the default stop-words set. Implements System.IDisposable"
},
"Lucene.Net.Analysis.Cn.Smart.SmartChineseSentenceTokenizerFactory.html": {
"href": "Lucene.Net.Analysis.Cn.Smart.SmartChineseSentenceTokenizerFactory.html",
"title": "Class SmartChineseSentenceTokenizerFactory | Apache Lucene.NET 4.8.0-beta00013 Documentation",
"keywords": "Class SmartChineseSentenceTokenizerFactory Factory for the SmartChineseAnalyzer SentenceTokenizer This is a Lucene.NET EXPERIMENTAL API, use at your own risk Inheritance System.Object Lucene.Net.Analysis.Util.AbstractAnalysisFactory Lucene.Net.Analysis.Util.TokenizerFactory SmartChineseSentenceTokenizerFactory Inherited Members Lucene.Net.Analysis.Util.TokenizerFactory.ForName(System.String, System.Collections.Generic.IDictionary<System.String, System.String>) Lucene.Net.Analysis.Util.TokenizerFactory.LookupClass(System.String) Lucene.Net.Analysis.Util.TokenizerFactory.AvailableTokenizers Lucene.Net.Analysis.Util.TokenizerFactory.ReloadTokenizers() Lucene.Net.Analysis.Util.TokenizerFactory.Create(System.IO.TextReader) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM Lucene.Net.Analysis.Util.AbstractAnalysisFactory.m_luceneMatchVersion Lucene.Net.Analysis.Util.AbstractAnalysisFactory.OriginalArgs Lucene.Net.Analysis.Util.AbstractAnalysisFactory.AssureMatchVersion() Lucene.Net.Analysis.Util.AbstractAnalysisFactory.LuceneMatchVersion Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Require(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Require(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Collections.Generic.ICollection<System.String>) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Require(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Collections.Generic.ICollection<System.String>, System.Boolean) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Get(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Get(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Collections.Generic.ICollection<System.String>) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Get(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Collections.Generic.ICollection<System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Get(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Collections.Generic.ICollection<System.String>, System.String, System.Boolean) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.RequireInt32(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetInt32(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Int32) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.RequireBoolean(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetBoolean(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Boolean) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.RequireSingle(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetSingle(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Single) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.RequireChar(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetChar(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Char) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetSet(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetPattern(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetCulture(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Globalization.CultureInfo) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetWordSet(Lucene.Net.Analysis.Util.IResourceLoader, System.String, System.Boolean) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetLines(Lucene.Net.Analysis.Util.IResourceLoader, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetSnowballWordSet(Lucene.Net.Analysis.Util.IResourceLoader, System.String, System.Boolean) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.SplitFileNames(System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetClassArg() Lucene.Net.Analysis.Util.AbstractAnalysisFactory.IsExplicitLuceneMatchVersion System.Object.Equals(System.Object) System.Object.Equals(System.Object, System.Object) System.Object.GetHashCode() System.Object.GetType() System.Object.MemberwiseClone() System.Object.ReferenceEquals(System.Object, System.Object) System.Object.ToString() Namespace : Lucene.Net.Analysis.Cn.Smart Assembly : Lucene.Net.Analysis.SmartCn.dll Syntax [Obsolete(\"Use HMMChineseTokenizerFactory instead\")] public class SmartChineseSentenceTokenizerFactory : TokenizerFactory Constructors | Improve this Doc View Source SmartChineseSentenceTokenizerFactory(IDictionary<String, String>) Creates a new SmartChineseSentenceTokenizerFactory Declaration public SmartChineseSentenceTokenizerFactory(IDictionary<string, string> args) Parameters Type Name Description System.Collections.Generic.IDictionary < System.String , System.String > args Methods | Improve this Doc View Source Create(AttributeSource.AttributeFactory, TextReader) Declaration public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input) Parameters Type Name Description Lucene.Net.Util.AttributeSource.AttributeFactory factory System.IO.TextReader input Returns Type Description Lucene.Net.Analysis.Tokenizer Overrides Lucene.Net.Analysis.Util.TokenizerFactory.Create(Lucene.Net.Util.AttributeSource.AttributeFactory, System.IO.TextReader)"
},
"Lucene.Net.Analysis.Cn.Smart.SmartChineseWordTokenFilterFactory.html": {
"href": "Lucene.Net.Analysis.Cn.Smart.SmartChineseWordTokenFilterFactory.html",
"title": "Class SmartChineseWordTokenFilterFactory | Apache Lucene.NET 4.8.0-beta00013 Documentation",
"keywords": "Class SmartChineseWordTokenFilterFactory Factory for the SmartChineseAnalyzer WordTokenFilter Note: this class will currently emit tokens for punctuation. So you should either add a Lucene.Net.Analysis.Miscellaneous.WordDelimiterFilter after to remove these (with concatenate off), or use the SmartChinese stoplist with a Lucene.Net.Analysis.Core.StopFilterFactory via: words=\"org/apache/lucene/analysis/cn/smart/stopwords.txt\" This is a Lucene.NET EXPERIMENTAL API, use at your own risk Inheritance System.Object Lucene.Net.Analysis.Util.AbstractAnalysisFactory Lucene.Net.Analysis.Util.TokenFilterFactory SmartChineseWordTokenFilterFactory Inherited Members Lucene.Net.Analysis.Util.TokenFilterFactory.ForName(System.String, System.Collections.Generic.IDictionary<System.String, System.String>) Lucene.Net.Analysis.Util.TokenFilterFactory.LookupClass(System.String) Lucene.Net.Analysis.Util.TokenFilterFactory.AvailableTokenFilters Lucene.Net.Analysis.Util.TokenFilterFactory.ReloadTokenFilters() Lucene.Net.Analysis.Util.AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM Lucene.Net.Analysis.Util.AbstractAnalysisFactory.m_luceneMatchVersion Lucene.Net.Analysis.Util.AbstractAnalysisFactory.OriginalArgs Lucene.Net.Analysis.Util.AbstractAnalysisFactory.AssureMatchVersion() Lucene.Net.Analysis.Util.AbstractAnalysisFactory.LuceneMatchVersion Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Require(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Require(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Collections.Generic.ICollection<System.String>) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Require(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Collections.Generic.ICollection<System.String>, System.Boolean) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Get(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Get(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Collections.Generic.ICollection<System.String>) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Get(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Collections.Generic.ICollection<System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.Get(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Collections.Generic.ICollection<System.String>, System.String, System.Boolean) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.RequireInt32(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetInt32(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Int32) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.RequireBoolean(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetBoolean(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Boolean) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.RequireSingle(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetSingle(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Single) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.RequireChar(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetChar(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Char) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetSet(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetPattern(System.Collections.Generic.IDictionary<System.String, System.String>, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetCulture(System.Collections.Generic.IDictionary<System.String, System.String>, System.String, System.Globalization.CultureInfo) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetWordSet(Lucene.Net.Analysis.Util.IResourceLoader, System.String, System.Boolean) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetLines(Lucene.Net.Analysis.Util.IResourceLoader, System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetSnowballWordSet(Lucene.Net.Analysis.Util.IResourceLoader, System.String, System.Boolean) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.SplitFileNames(System.String) Lucene.Net.Analysis.Util.AbstractAnalysisFactory.GetClassArg() Lucene.Net.Analysis.Util.AbstractAnalysisFactory.IsExplicitLuceneMatchVersion System.Object.Equals(System.Object) System.Object.Equals(System.Object, System.Object) System.Object.GetHashCode() System.Object.GetType() System.Object.MemberwiseClone() System.Object.ReferenceEquals(System.Object, System.Object) System.Object.ToString() Namespace : Lucene.Net.Analysis.Cn.Smart Assembly : Lucene.Net.Analysis.SmartCn.dll Syntax [Obsolete(\"Use HMMChineseTokenizerFactory instead\")] public class SmartChineseWordTokenFilterFactory : TokenFilterFactory Constructors | Improve this Doc View Source SmartChineseWordTokenFilterFactory(IDictionary<String, String>) Creates a new SmartChineseWordTokenFilterFactory Declaration public SmartChineseWordTokenFilterFactory(IDictionary<string, string> args) Parameters Type Name Description System.Collections.Generic.IDictionary < System.String , System.String > args Methods | Improve this Doc View Source Create(TokenStream) Declaration public override TokenStream Create(TokenStream input) Parameters Type Name Description Lucene.Net.Analysis.TokenStream input Returns Type Description Lucene.Net.Analysis.TokenStream Overrides Lucene.Net.Analysis.Util.TokenFilterFactory.Create(Lucene.Net.Analysis.TokenStream)"
},
"Lucene.Net.Analysis.Cn.Smart.Utility.html": {
"href": "Lucene.Net.Analysis.Cn.Smart.Utility.html",
"title": "Class Utility | Apache Lucene.NET 4.8.0-beta00013 Documentation",
"keywords": "Class Utility SmartChineseAnalyzer utility constants and methods This is a Lucene.NET EXPERIMENTAL API, use at your own risk Inheritance System.Object Utility Inherited Members System.Object.Equals(System.Object) System.Object.Equals(System.Object, System.Object) System.Object.GetHashCode() System.Object.GetType() System.Object.MemberwiseClone() System.Object.ReferenceEquals(System.Object, System.Object) System.Object.ToString() Namespace : Lucene.Net.Analysis.Cn.Smart Assembly : Lucene.Net.Analysis.SmartCn.dll Syntax public static class Utility Fields | Improve this Doc View Source COMMON_DELIMITER Delimiters will be filtered to this character by SegTokenFilter Declaration public static readonly char[] COMMON_DELIMITER Field Value Type Description System.Char [] | Improve this Doc View Source END_CHAR_ARRAY Declaration public static readonly char[] END_CHAR_ARRAY Field Value Type Description System.Char [] | Improve this Doc View Source MAX_FREQUENCE Maximum bigram frequency (used in the smoothing function). Declaration public static readonly int MAX_FREQUENCE Field Value Type Description System.Int32 | Improve this Doc View Source NUMBER_CHAR_ARRAY Declaration public static readonly char[] NUMBER_CHAR_ARRAY Field Value Type Description System.Char [] | Improve this Doc View Source SPACES Space-like characters that need to be skipped: such as space, tab, newline, carriage return. Declaration public static readonly string SPACES Field Value Type Description System.String | Improve this Doc View Source START_CHAR_ARRAY Declaration public static readonly char[] START_CHAR_ARRAY Field Value Type Description System.Char [] | Improve this Doc View Source STRING_CHAR_ARRAY Declaration public static readonly char[] STRING_CHAR_ARRAY Field Value Type Description System.Char [] Methods | Improve this Doc View Source CompareArray(Char[], Int32, Char[], Int32) Compare two arrays starting at the specified offsets. Declaration public static int CompareArray(char[] larray, int lstartIndex, char[] rarray, int rstartIndex) Parameters Type Name Description System.Char [] larray left array System.Int32 lstartIndex start offset into larray System.Char [] rarray right array System.Int32 rstartIndex start offset into rarray Returns Type Description System.Int32 0 if the arrays are equal,1 if larray > rarray , -1 if larray < rarray | Improve this Doc View Source CompareArrayByPrefix(Char[], Int32, Char[], Int32) Compare two arrays, starting at the specified offsets, but treating shortArray as a prefix to longArray . As long as shortArray is a prefix of longArray , return 0. Otherwise, behave as CompareArray(Char[], Int32, Char[], Int32) . Declaration public static int CompareArrayByPrefix(char[] shortArray, int shortIndex, char[] longArray, int longIndex) Parameters Type Name Description System.Char [] shortArray prefix array System.Int32 shortIndex offset into shortArray System.Char [] longArray long array (word) System.Int32 longIndex offset into longArray Returns Type Description System.Int32 0 if shortArray is a prefix of longArray , otherwise act as CompareArray(Char[], Int32, Char[], Int32) . | Improve this Doc View Source GetCharType(Char) Return the internal CharType constant of a given character. Declaration public static CharType GetCharType(char ch) Parameters Type Name Description System.Char ch input character Returns Type Description CharType Constant from CharType describing the character type. See Also CharType"
},
"Lucene.Net.Analysis.Cn.Smart.WordTokenFilter.html": {
"href": "Lucene.Net.Analysis.Cn.Smart.WordTokenFilter.html",
"title": "Class WordTokenFilter | Apache Lucene.NET 4.8.0-beta00013 Documentation",
"keywords": "Class WordTokenFilter A Lucene.Net.Analysis.TokenFilter that breaks sentences into words. This is a Lucene.NET EXPERIMENTAL API, use at your own risk Inheritance System.Object Lucene.Net.Util.AttributeSource Lucene.Net.Analysis.TokenStream Lucene.Net.Analysis.TokenFilter WordTokenFilter Implements System.IDisposable Inherited Members Lucene.Net.Analysis.TokenFilter.m_input Lucene.Net.Analysis.TokenFilter.End() Lucene.Net.Analysis.TokenStream.Dispose() Lucene.Net.Util.AttributeSource.GetAttributeFactory() Lucene.Net.Util.AttributeSource.GetAttributeClassesEnumerator() Lucene.Net.Util.AttributeSource.GetAttributeImplsEnumerator() Lucene.Net.Util.AttributeSource.AddAttributeImpl(Lucene.Net.Util.Attribute) Lucene.Net.Util.AttributeSource.AddAttribute<T>() Lucene.Net.Util.AttributeSource.HasAttributes Lucene.Net.Util.AttributeSource.HasAttribute<T>() Lucene.Net.Util.AttributeSource.GetAttribute<T>() Lucene.Net.Util.AttributeSource.ClearAttributes() Lucene.Net.Util.AttributeSource.CaptureState() Lucene.Net.Util.AttributeSource.RestoreState(Lucene.Net.Util.AttributeSource.State) Lucene.Net.Util.AttributeSource.GetHashCode() AttributeSource.Equals(Object) AttributeSource.ReflectAsString(Boolean) Lucene.Net.Util.AttributeSource.ReflectWith(Lucene.Net.Util.IAttributeReflector) Lucene.Net.Util.AttributeSource.CloneAttributes() Lucene.Net.Util.AttributeSource.CopyTo(Lucene.Net.Util.AttributeSource) Lucene.Net.Util.AttributeSource.ToString() System.Object.Equals(System.Object, System.Object) System.Object.GetType() System.Object.MemberwiseClone() System.Object.ReferenceEquals(System.Object, System.Object) Namespace : Lucene.Net.Analysis.Cn.Smart Assembly : Lucene.Net.Analysis.SmartCn.dll Syntax [Obsolete(\"Use HMMChineseTokenizer instead.\")] public sealed class WordTokenFilter : TokenFilter, IDisposable Constructors | Improve this Doc View Source WordTokenFilter(TokenStream) Construct a new WordTokenFilter . Declaration public WordTokenFilter(TokenStream input) Parameters Type Name Description Lucene.Net.Analysis.TokenStream input Lucene.Net.Analysis.TokenStream of sentences. Methods | Improve this Doc View Source Dispose(Boolean) Releases resources used by the WordTokenFilter and if overridden in a derived class, optionally releases unmanaged resources. Declaration protected override void Dispose(bool disposing) Parameters Type Name Description System.Boolean disposing true to release both managed and unmanaged resources; false to release only unmanaged resources. Overrides TokenFilter.Dispose(Boolean) | Improve this Doc View Source IncrementToken() Declaration public override bool IncrementToken() Returns Type Description System.Boolean Overrides Lucene.Net.Analysis.TokenStream.IncrementToken() | Improve this Doc View Source Reset() Declaration public override void Reset() Overrides Lucene.Net.Analysis.TokenFilter.Reset() Implements System.IDisposable"
},
"Lucene.Net.Analysis.Cn.Smart.WordType.html": {
"href": "Lucene.Net.Analysis.Cn.Smart.WordType.html",
"title": "Enum WordType | Apache Lucene.NET 4.8.0-beta00013 Documentation",
"keywords": "Enum WordType Internal SmartChineseAnalyzer token type constants This is a Lucene.NET EXPERIMENTAL API, use at your own risk Namespace : Lucene.Net.Analysis.Cn.Smart Assembly : Lucene.Net.Analysis.SmartCn.dll Syntax public enum WordType Fields Name Description CHINESE_WORD Chinese Word DELIMITER Punctuation Symbol FULLWIDTH_NUMBER Full-Width Alphanumeric FULLWIDTH_STRING Full-Width String NUMBER ASCII Alphanumeric SENTENCE_BEGIN Start of a Sentence SENTENCE_END End of a Sentence STRING ASCII String"
}
}