src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs - lucenenet - Git at Google

 using J2N.Text;
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
 using System;
 using System.Collections.Generic;
 using System.Globalization;
 using System.IO;
 using System.Linq;
 using System.Text;
 using System.Text.RegularExpressions;

 namespace Lucene.Net.Analysis.Pt
 {
     /*
 	 * Licensed to the Apache Software Foundation (ASF) under one or more
 	 * contributor license agreements.  See the NOTICE file distributed with
 	 * this work for additional information regarding copyright ownership.
 	 * The ASF licenses this file to You under the Apache License, Version 2.0
 	 * (the "License"); you may not use this file except in compliance with
 	 * the License.  You may obtain a copy of the License at
 	 *
 	 *     http://www.apache.org/licenses/LICENSE-2.0
 	 *
 	 * Unless required by applicable law or agreed to in writing, software
 	 * distributed under the License is distributed on an "AS IS" BASIS,
 	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 	 * See the License for the specific language governing permissions and
 	 * limitations under the License.
 	 */

     /// <summary>
     /// Base class for stemmers that use a set of RSLP-like stemming steps.
     /// <para>
     /// RSLP (Removedor de Sufixos da Lingua Portuguesa) is an algorithm designed
     /// originally for stemming the Portuguese language, described in the paper
     /// <c>A Stemming Algorithm for the Portuguese Language</c>, Orengo et. al.
     /// </para>
     /// <para>
     /// Since this time a plural-only modification (RSLP-S) as well as a modification
     /// for the Galician language have been implemented. This class parses a configuration
     /// file that describes <see cref="Step"/>s, where each <see cref="Step"/> contains a set of <see cref="Rule"/>s.
     /// </para>
     /// <para>
     /// The general rule format is:
     /// <code>{ "suffix", N, "replacement", { "exception1", "exception2", ...}}</code>
     /// where:
     /// <list type="bullet">
     ///   <item><description><c>suffix</c> is the suffix to be removed (such as "inho").</description></item>
     ///   <item><description><c>N</c> is the min stem size, where stem is defined as the candidate stem
     ///       after removing the suffix (but before appending the replacement!)</description></item>
     ///   <item><description><c>replacement</c> is an optimal string to append after removing the suffix.
     ///       This can be the empty string.</description></item>
     ///   <item><description><c>exceptions</c> is an optional list of exceptions, patterns that should
     ///       not be stemmed. These patterns can be specified as whole word or suffix (ends-with)
     ///       patterns, depending upon the exceptions format flag in the step header.</description></item>
     /// </list>
     /// </para>
     /// <para>
     /// A step is an ordered list of rules, with a structure in this format:
     /// <blockquote>{ "name", N, B, { "cond1", "cond2", ... }
     ///               ... rules ... };
     /// </blockquote>
     /// where:
     /// <list type="bullet">
     ///   <item><description><c>name</c> is a name for the step (such as "Plural").</description></item>
     ///   <item><description><c>N</c> is the min word size. Words that are less than this length bypass
     ///       the step completely, as an optimization. Note: N can be zero, in this case this
     ///       implementation will automatically calculate the appropriate value from the underlying
     ///       rules.</description></item>
     ///   <item><description><c>B</c> is a "boolean" flag specifying how exceptions in the rules are matched.
     ///       A value of 1 indicates whole-word pattern matching, a value of 0 indicates that
     ///       exceptions are actually suffixes and should be matched with ends-with.</description></item>
     ///   <item><description><c>conds</c> are an optional list of conditions to enter the step at all. If
     ///       the list is non-empty, then a word must end with one of these conditions or it will
     ///       bypass the step completely as an optimization.</description></item>
     /// </list>
     /// </para>
     /// <a href="http://www.inf.ufrgs.br/~viviane/rslp/index.htm">RSLP description</a>
     /// @lucene.internal
     /// </summary>
     public abstract class RSLPStemmerBase
     {
         /// <summary>
         /// A basic rule, with no exceptions.
         /// </summary>
         protected class Rule
         {
             protected internal readonly char[] m_suffix;
             protected readonly char[] m_replacement;
             protected internal readonly int m_min;

             /// <summary>
             /// Create a rule. </summary>
             /// <param name="suffix"> suffix to remove </param>
             /// <param name="min"> minimum stem length </param>
             /// <param name="replacement"> replacement string </param>
             public Rule(string suffix, int min, string replacement)
             {
                 this.m_suffix = suffix.ToCharArray();
                 this.m_replacement = replacement.ToCharArray();
                 this.m_min = min;
             }

             /// <returns> true if the word matches this rule. </returns>
             public virtual bool Matches(char[] s, int len)
             {
                 return (len - m_suffix.Length >= m_min && StemmerUtil.EndsWith(s, len, m_suffix));
             }

             /// <returns> new valid length of the string after firing this rule. </returns>
             public virtual int Replace(char[] s, int len)
             {
                 if (m_replacement.Length > 0)
                 {
                     Array.Copy(m_replacement, 0, s, len - m_suffix.Length, m_replacement.Length);
                 }
                 return len - m_suffix.Length + m_replacement.Length;
             }
         }

         /// <summary>
         /// A rule with a set of whole-word exceptions.
         /// </summary>
         protected class RuleWithSetExceptions : Rule
         {
             protected readonly CharArraySet m_exceptions;

             public RuleWithSetExceptions(string suffix, int min, string replacement, string[] exceptions) : base(suffix, min, replacement)
             {
                 for (int i = 0; i < exceptions.Length; i++)
                 {
                     if (!exceptions[i].EndsWith(suffix, StringComparison.Ordinal))
                     {
                         throw new Exception("useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
                     }
                 }
                 this.m_exceptions = new CharArraySet(
 #pragma warning disable 612, 618
                     LuceneVersion.LUCENE_CURRENT,
 #pragma warning restore 612, 618
                     exceptions, false);
             }

             public override bool Matches(char[] s, int len)
             {
                 return base.Matches(s, len) && !m_exceptions.Contains(s, 0, len);
             }
         }

         /// <summary>
         /// A rule with a set of exceptional suffixes.
         /// </summary>
         protected class RuleWithSuffixExceptions : Rule
         {
             // TODO: use a more efficient datastructure: automaton?
             protected readonly char[][] m_exceptions;

             public RuleWithSuffixExceptions(string suffix, int min, string replacement, string[] exceptions) : base(suffix, min, replacement)
             {
                 for (int i = 0; i < exceptions.Length; i++)
                 {
                     if (!exceptions[i].EndsWith(suffix, StringComparison.Ordinal))
                     {
                         throw new Exception("warning: useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
                     }
                 }
                 this.m_exceptions = new char[exceptions.Length][];
                 for (int i = 0; i < exceptions.Length; i++)
                 {
                     this.m_exceptions[i] = exceptions[i].ToCharArray();
                 }
             }

             public override bool Matches(char[] s, int len)
             {
                 if (!base.Matches(s, len))
                 {
                     return false;
                 }

                 for (int i = 0; i < m_exceptions.Length; i++)
                 {
                     if (StemmerUtil.EndsWith(s, len, m_exceptions[i]))
                     {
                         return false;
                     }
                 }

                 return true;
             }
         }

         /// <summary>
         /// A step containing a list of rules.
         /// </summary>
         protected class Step
         {
             protected internal readonly string m_name;
             protected readonly Rule[] m_rules;
             protected readonly int m_min;
             protected readonly char[][] m_suffixes;

             /// <summary>
             /// Create a new step </summary>
             /// <param name="name"> Step's name. </param>
             /// <param name="rules"> an ordered list of rules. </param>
             /// <param name="min"> minimum word size. if this is 0 it is automatically calculated. </param>
             /// <param name="suffixes"> optional list of conditional suffixes. may be null. </param>
             public Step(string name, Rule[] rules, int min, string[] suffixes)
             {
                 this.m_name = name;
                 this.m_rules = rules;
                 if (min == 0)
                 {
                     min = int.MaxValue;
                     foreach (Rule r in rules)
                     {
                         min = Math.Min(min, r.m_min + r.m_suffix.Length);
                     }
                 }
                 this.m_min = min;

                 if (suffixes == null || suffixes.Length == 0)
                 {
                     this.m_suffixes = null;
                 }
                 else
                 {
                     this.m_suffixes = new char[suffixes.Length][];
                     for (int i = 0; i < suffixes.Length; i++)
                     {
                         this.m_suffixes[i] = suffixes[i].ToCharArray();
                     }
                 }
             }

             /// <returns> new valid length of the string after applying the entire step. </returns>
             public virtual int Apply(char[] s, int len)
             {
                 if (len < m_min)
                 {
                     return len;
                 }

                 if (m_suffixes != null)
                 {
                     bool found = false;

                     for (int i = 0; i < m_suffixes.Length; i++)
                     {
                         if (StemmerUtil.EndsWith(s, len, m_suffixes[i]))
                         {
                             found = true;
                             break;
                         }
                     }

                     if (!found)
                     {
                         return len;
                     }
                 }

                 for (int i = 0; i < m_rules.Length; i++)
                 {
                     if (m_rules[i].Matches(s, len))
                     {
                         return m_rules[i].Replace(s, len);
                     }
                 }

                 return len;
             }
         }

         /// <summary>
         /// Parse a resource file into an RSLP stemmer description. </summary>
         /// <returns> a Map containing the named <see cref="Step"/>s in this description. </returns>
         protected static IDictionary<string, Step> Parse(Type clazz, string resource)
         {
             IDictionary<string, Step> steps = new Dictionary<string, Step>();

             using (TextReader r = IOUtils.GetDecodingReader(clazz, resource, Encoding.UTF8))
             {
                 string step;
                 while ((step = ReadLine(r)) != null)
                 {
                     Step s = ParseStep(r, step);
                     steps[s.m_name] = s;
                 }
             }
             return steps;
         }

         private static readonly Regex headerPattern = new Regex("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*(0|1),\\s*\\{(.*)\\},\\s*$", RegexOptions.Compiled);
         private static readonly Regex stripPattern = new Regex("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+)\\s*\\}\\s*(,|(\\}\\s*;))$", RegexOptions.Compiled);
         private static readonly Regex repPattern = new Regex("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*\"([^\"]*)\"\\}\\s*(,|(\\}\\s*;))$", RegexOptions.Compiled);
         private static readonly Regex excPattern = new Regex("^\\{\\s*\"([^\"]*)\",\\s*([0-9]+),\\s*\"([^\"]*)\",\\s*\\{(.*)\\}\\s*\\}\\s*(,|(\\}\\s*;))$", RegexOptions.Compiled);

         private static Step ParseStep(TextReader r, string header)
         {
             Match matcher = headerPattern.Match(header);
             if (!matcher.Success)
             {
                 throw new Exception("Illegal Step header specified at line " /*+ r.LineNumber*/); // TODO Line number
             }
             //Debug.Assert(headerPattern.GetGroupNumbers().Length == 4);
             string name = matcher.Groups[1].Value;
             int min = int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture);
             int type = int.Parse(matcher.Groups[3].Value, CultureInfo.InvariantCulture);
             string[] suffixes = ParseList(matcher.Groups[4].Value);
             Rule[] rules = ParseRules(r, type);
             return new Step(name, rules, min, suffixes);
         }

         private static Rule[] ParseRules(TextReader r, int type)
         {
             IList<Rule> rules = new List<Rule>();
             string line;
             while ((line = ReadLine(r)) != null)
             {
                 Match matcher = stripPattern.Match(line);
                 if (matcher.Success)
                 {
                     rules.Add(new Rule(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), ""));
                 }
                 else
                 {
                     matcher = repPattern.Match(line);
                     if (matcher.Success)
                     {
                         rules.Add(new Rule(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), matcher.Groups[3].Value));
                     }
                     else
                     {
                         matcher = excPattern.Match(line);
                         if (matcher.Success)
                         {
                             if (type == 0)
                             {
                                 rules.Add(new RuleWithSuffixExceptions(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), matcher.Groups[3].Value, ParseList(matcher.Groups[4].Value)));
                             }
                             else
                             {
                                 rules.Add(new RuleWithSetExceptions(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), matcher.Groups[3].Value, ParseList(matcher.Groups[4].Value)));
                             }
                         }
                         else
                         {
                             throw new Exception("Illegal Step rule specified at line " /*+ r.LineNumber*/);
                         }
                     }
                 }
                 if (line.EndsWith(";", StringComparison.Ordinal))
                 {
                     return rules.ToArray();
                 }
             }
             return null;
         }

         private static string[] ParseList(string s)
         {
             if (s.Length == 0)
             {
                 return null;
             }
             string[] list = s.Split(',').TrimEnd();
             for (int i = 0; i < list.Length; i++)
             {
                 list[i] = ParseString(list[i].Trim());
             }
             return list;
         }

         private static string ParseString(string s)
         {
             return s.Substring(1, (s.Length - 1) - 1);
         }

         private static string ReadLine(TextReader r)
         {
             string line = null;
             while ((line = r.ReadLine()) != null)
             {
                 line = line.Trim();
                 if (line.Length > 0 && line[0] != '#')
                 {
                     return line;
                 }
             }
             return line;
         }
     }
 }
	using J2N.Text;
	using Lucene.Net.Analysis.Util;
	using Lucene.Net.Util;
	using System;
	using System.Collections.Generic;
	using System.Globalization;
	using System.IO;
	using System.Linq;
	using System.Text;
	using System.Text.RegularExpressions;

	namespace Lucene.Net.Analysis.Pt
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/// <summary>
	/// Base class for stemmers that use a set of RSLP-like stemming steps.
	/// <para>
	/// RSLP (Removedor de Sufixos da Lingua Portuguesa) is an algorithm designed
	/// originally for stemming the Portuguese language, described in the paper
	/// <c>A Stemming Algorithm for the Portuguese Language</c>, Orengo et. al.
	/// </para>
	/// <para>
	/// Since this time a plural-only modification (RSLP-S) as well as a modification
	/// for the Galician language have been implemented. This class parses a configuration
	/// file that describes <see cref="Step"/>s, where each <see cref="Step"/> contains a set of <see cref="Rule"/>s.
	/// </para>
	/// <para>
	/// The general rule format is:
	/// <code>{ "suffix", N, "replacement", { "exception1", "exception2", ...}}</code>
	/// where:
	/// <list type="bullet">
	/// <item><description><c>suffix</c> is the suffix to be removed (such as "inho").</description></item>
	/// <item><description><c>N</c> is the min stem size, where stem is defined as the candidate stem
	/// after removing the suffix (but before appending the replacement!)</description></item>
	/// <item><description><c>replacement</c> is an optimal string to append after removing the suffix.
	/// This can be the empty string.</description></item>
	/// <item><description><c>exceptions</c> is an optional list of exceptions, patterns that should
	/// not be stemmed. These patterns can be specified as whole word or suffix (ends-with)
	/// patterns, depending upon the exceptions format flag in the step header.</description></item>
	/// </list>
	/// </para>
	/// <para>
	/// A step is an ordered list of rules, with a structure in this format:
	/// <blockquote>{ "name", N, B, { "cond1", "cond2", ... }
	/// ... rules ... };
	/// </blockquote>
	/// where:
	/// <list type="bullet">
	/// <item><description><c>name</c> is a name for the step (such as "Plural").</description></item>
	/// <item><description><c>N</c> is the min word size. Words that are less than this length bypass
	/// the step completely, as an optimization. Note: N can be zero, in this case this
	/// implementation will automatically calculate the appropriate value from the underlying
	/// rules.</description></item>
	/// <item><description><c>B</c> is a "boolean" flag specifying how exceptions in the rules are matched.
	/// A value of 1 indicates whole-word pattern matching, a value of 0 indicates that
	/// exceptions are actually suffixes and should be matched with ends-with.</description></item>
	/// <item><description><c>conds</c> are an optional list of conditions to enter the step at all. If
	/// the list is non-empty, then a word must end with one of these conditions or it will
	/// bypass the step completely as an optimization.</description></item>
	/// </list>
	/// </para>
	/// <a href="http://www.inf.ufrgs.br/~viviane/rslp/index.htm">RSLP description</a>
	/// @lucene.internal
	/// </summary>
	public abstract class RSLPStemmerBase
	{
	/// <summary>
	/// A basic rule, with no exceptions.
	/// </summary>
	protected class Rule
	{
	protected internal readonly char[] m_suffix;
	protected readonly char[] m_replacement;
	protected internal readonly int m_min;

	/// <summary>
	/// Create a rule. </summary>
	/// <param name="suffix"> suffix to remove </param>
	/// <param name="min"> minimum stem length </param>
	/// <param name="replacement"> replacement string </param>
	public Rule(string suffix, int min, string replacement)
	{
	this.m_suffix = suffix.ToCharArray();
	this.m_replacement = replacement.ToCharArray();
	this.m_min = min;
	}

	/// <returns> true if the word matches this rule. </returns>
	public virtual bool Matches(char[] s, int len)
	{
	return (len - m_suffix.Length >= m_min && StemmerUtil.EndsWith(s, len, m_suffix));
	}

	/// <returns> new valid length of the string after firing this rule. </returns>
	public virtual int Replace(char[] s, int len)
	{
	if (m_replacement.Length > 0)
	{
	Array.Copy(m_replacement, 0, s, len - m_suffix.Length, m_replacement.Length);
	}
	return len - m_suffix.Length + m_replacement.Length;
	}
	}

	/// <summary>
	/// A rule with a set of whole-word exceptions.
	/// </summary>
	protected class RuleWithSetExceptions : Rule
	{
	protected readonly CharArraySet m_exceptions;

	public RuleWithSetExceptions(string suffix, int min, string replacement, string[] exceptions) : base(suffix, min, replacement)
	{
	for (int i = 0; i < exceptions.Length; i++)
	{
	if (!exceptions[i].EndsWith(suffix, StringComparison.Ordinal))
	{
	throw new Exception("useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
	}
	}
	this.m_exceptions = new CharArraySet(
	#pragma warning disable 612, 618
	LuceneVersion.LUCENE_CURRENT,
	#pragma warning restore 612, 618
	exceptions, false);
	}

	public override bool Matches(char[] s, int len)
	{
	return base.Matches(s, len) && !m_exceptions.Contains(s, 0, len);
	}
	}

	/// <summary>
	/// A rule with a set of exceptional suffixes.
	/// </summary>
	protected class RuleWithSuffixExceptions : Rule
	{
	// TODO: use a more efficient datastructure: automaton?
	protected readonly char[][] m_exceptions;

	public RuleWithSuffixExceptions(string suffix, int min, string replacement, string[] exceptions) : base(suffix, min, replacement)
	{
	for (int i = 0; i < exceptions.Length; i++)
	{
	if (!exceptions[i].EndsWith(suffix, StringComparison.Ordinal))
	{
	throw new Exception("warning: useless exception '" + exceptions[i] + "' does not end with '" + suffix + "'");
	}
	}
	this.m_exceptions = new char[exceptions.Length][];
	for (int i = 0; i < exceptions.Length; i++)
	{
	this.m_exceptions[i] = exceptions[i].ToCharArray();
	}
	}

	public override bool Matches(char[] s, int len)
	{
	if (!base.Matches(s, len))
	{
	return false;
	}

	for (int i = 0; i < m_exceptions.Length; i++)
	{
	if (StemmerUtil.EndsWith(s, len, m_exceptions[i]))
	{
	return false;
	}
	}

	return true;
	}
	}

	/// <summary>
	/// A step containing a list of rules.
	/// </summary>
	protected class Step
	{
	protected internal readonly string m_name;
	protected readonly Rule[] m_rules;
	protected readonly int m_min;
	protected readonly char[][] m_suffixes;

	/// <summary>
	/// Create a new step </summary>
	/// <param name="name"> Step's name. </param>
	/// <param name="rules"> an ordered list of rules. </param>
	/// <param name="min"> minimum word size. if this is 0 it is automatically calculated. </param>
	/// <param name="suffixes"> optional list of conditional suffixes. may be null. </param>
	public Step(string name, Rule[] rules, int min, string[] suffixes)
	{
	this.m_name = name;
	this.m_rules = rules;
	if (min == 0)
	{
	min = int.MaxValue;
	foreach (Rule r in rules)
	{
	min = Math.Min(min, r.m_min + r.m_suffix.Length);
	}
	}
	this.m_min = min;

	if (suffixes == null \|\| suffixes.Length == 0)
	{
	this.m_suffixes = null;
	}
	else
	{
	this.m_suffixes = new char[suffixes.Length][];
	for (int i = 0; i < suffixes.Length; i++)
	{
	this.m_suffixes[i] = suffixes[i].ToCharArray();
	}
	}
	}

	/// <returns> new valid length of the string after applying the entire step. </returns>
	public virtual int Apply(char[] s, int len)
	{
	if (len < m_min)
	{
	return len;
	}

	if (m_suffixes != null)
	{
	bool found = false;

	for (int i = 0; i < m_suffixes.Length; i++)
	{
	if (StemmerUtil.EndsWith(s, len, m_suffixes[i]))
	{
	found = true;
	break;
	}
	}

	if (!found)
	{
	return len;
	}
	}

	for (int i = 0; i < m_rules.Length; i++)
	{
	if (m_rules[i].Matches(s, len))
	{
	return m_rules[i].Replace(s, len);
	}
	}

	return len;
	}
	}

	/// <summary>
	/// Parse a resource file into an RSLP stemmer description. </summary>
	/// <returns> a Map containing the named <see cref="Step"/>s in this description. </returns>
	protected static IDictionary<string, Step> Parse(Type clazz, string resource)
	{
	IDictionary<string, Step> steps = new Dictionary<string, Step>();

	using (TextReader r = IOUtils.GetDecodingReader(clazz, resource, Encoding.UTF8))
	{
	string step;
	while ((step = ReadLine(r)) != null)
	{
	Step s = ParseStep(r, step);
	steps[s.m_name] = s;
	}
	}
	return steps;
	}

	private static readonly Regex headerPattern = new Regex("^\\{\\s\"([^\"])\",\\s([0-9]+),\\s(0\|1),\\s\\{(.)\\},\\s*$", RegexOptions.Compiled);
	private static readonly Regex stripPattern = new Regex("^\\{\\s\"([^\"])\",\\s([0-9]+)\\s\\}\\s(,\|(\\}\\s;))$", RegexOptions.Compiled);
	private static readonly Regex repPattern = new Regex("^\\{\\s\"([^\"])\",\\s([0-9]+),\\s\"([^\"])\"\\}\\s(,\|(\\}\\s*;))$", RegexOptions.Compiled);
	private static readonly Regex excPattern = new Regex("^\\{\\s\"([^\"])\",\\s([0-9]+),\\s\"([^\"])\",\\s\\{(.)\\}\\s\\}\\s(,\|(\\}\\s;))$", RegexOptions.Compiled);

	private static Step ParseStep(TextReader r, string header)
	{
	Match matcher = headerPattern.Match(header);
	if (!matcher.Success)
	{
	throw new Exception("Illegal Step header specified at line " /+ r.LineNumber/); // TODO Line number
	}
	//Debug.Assert(headerPattern.GetGroupNumbers().Length == 4);
	string name = matcher.Groups[1].Value;
	int min = int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture);
	int type = int.Parse(matcher.Groups[3].Value, CultureInfo.InvariantCulture);
	string[] suffixes = ParseList(matcher.Groups[4].Value);
	Rule[] rules = ParseRules(r, type);
	return new Step(name, rules, min, suffixes);
	}

	private static Rule[] ParseRules(TextReader r, int type)
	{
	IList<Rule> rules = new List<Rule>();
	string line;
	while ((line = ReadLine(r)) != null)
	{
	Match matcher = stripPattern.Match(line);
	if (matcher.Success)
	{
	rules.Add(new Rule(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), ""));
	}
	else
	{
	matcher = repPattern.Match(line);
	if (matcher.Success)
	{
	rules.Add(new Rule(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), matcher.Groups[3].Value));
	}
	else
	{
	matcher = excPattern.Match(line);
	if (matcher.Success)
	{
	if (type == 0)
	{
	rules.Add(new RuleWithSuffixExceptions(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), matcher.Groups[3].Value, ParseList(matcher.Groups[4].Value)));
	}
	else
	{
	rules.Add(new RuleWithSetExceptions(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), matcher.Groups[3].Value, ParseList(matcher.Groups[4].Value)));
	}
	}
	else
	{
	throw new Exception("Illegal Step rule specified at line " /+ r.LineNumber/);
	}
	}
	}
	if (line.EndsWith(";", StringComparison.Ordinal))
	{
	return rules.ToArray();
	}
	}
	return null;
	}

	private static string[] ParseList(string s)
	{
	if (s.Length == 0)
	{
	return null;
	}
	string[] list = s.Split(',').TrimEnd();
	for (int i = 0; i < list.Length; i++)
	{
	list[i] = ParseString(list[i].Trim());
	}
	return list;
	}

	private static string ParseString(string s)
	{
	return s.Substring(1, (s.Length - 1) - 1);
	}

	private static string ReadLine(TextReader r)
	{
	string line = null;
	while ((line = r.ReadLine()) != null)
	{
	line = line.Trim();
	if (line.Length > 0 && line[0] != '#')
	{
	return line;
	}
	}
	return line;
	}
	}
	}