src/Lucene.Net.Suggest/Suggest/FileDictionary.cs - lucenenet - Git at Google

 using Lucene.Net.Search.Spell;
 using Lucene.Net.Util;
 using System;
 using System.Collections.Generic;
 using System.Globalization;
 using System.IO;
 using System.Text;

 namespace Lucene.Net.Search.Suggest
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     /// <summary>
     /// Dictionary represented by a text file.
     ///
     /// <para>Format allowed: 1 entry per line:</para>
     /// <para>An entry can be: </para>
     /// <list type="number">
     /// <item><description>suggestion</description></item>
     /// <item><description>suggestion <see cref="fieldDelimiter"/> weight</description></item>
     /// <item><description>suggestion <see cref="fieldDelimiter"/> weight <see cref="fieldDelimiter"/> payload</description></item>
     /// </list>
     /// where the default <see cref="fieldDelimiter"/> is <see cref="DEFAULT_FIELD_DELIMITER"/> (a tab)
     /// <para>
     /// <b>NOTE:</b>
     /// <list type="number">
     /// <item><description>In order to have payload enabled, the first entry has to have a payload</description></item>
     /// <item><description>If the weight for an entry is not specified then a value of 1 is used</description></item>
     /// <item><description>A payload cannot be specified without having the weight specified for an entry</description></item>
     /// <item><description>If the payload for an entry is not specified (assuming payload is enabled)
     ///  then an empty payload is returned</description></item>
     /// <item><description>An entry cannot have more than two <see cref="fieldDelimiter"/>s</description></item>
     /// </list>
     /// </para>
     /// <c>Example:</c><para/>
     /// word1 word2 TAB 100 TAB payload1<para/>
     /// word3 TAB 101<para/>
     /// word4 word3 TAB 102<para/>
     /// </summary>
     public class FileDictionary : IDictionary
     {

         /// <summary>
         /// Tab-delimited fields are most common thus the default, but one can override this via the constructor
         /// </summary>
         public const string DEFAULT_FIELD_DELIMITER = "\t";
         private readonly TextReader @in; // LUCENENET: marked readonly
         private string line;
         private bool done = false;
         private readonly string fieldDelimiter;

         /// <summary>
         /// Creates a dictionary based on an inputstream.
         /// Using <see cref="DEFAULT_FIELD_DELIMITER"/> as the
         /// field seperator in a line.
         /// <para>
         /// NOTE: content is treated as UTF-8
         /// </para>
         /// </summary>
         public FileDictionary(Stream dictFile)
             : this(dictFile, DEFAULT_FIELD_DELIMITER)
         {
         }

         /// <summary>
         /// Creates a dictionary based on a reader.
         /// Using <see cref="DEFAULT_FIELD_DELIMITER"/> as the
         /// field seperator in a line.
         /// </summary>
         public FileDictionary(TextReader reader)
             : this(reader, DEFAULT_FIELD_DELIMITER)
         {
         }

         /// <summary>
         /// Creates a dictionary based on a reader.
         /// Using <paramref name="fieldDelimiter"/> to seperate out the
         /// fields in a line.
         /// </summary>
         public FileDictionary(TextReader reader, string fieldDelimiter)
         {
             @in = reader;
             this.fieldDelimiter = fieldDelimiter;
         }

         /// <summary>
         /// Creates a dictionary based on an inputstream.
         /// Using <paramref name="fieldDelimiter"/> to seperate out the
         /// fields in a line.
         /// <para>
         /// NOTE: content is treated as UTF-8
         /// </para>
         /// </summary>
         public FileDictionary(Stream dictFile, string fieldDelimiter)
         {
             @in = IOUtils.GetDecodingReader(dictFile, Encoding.UTF8);
             this.fieldDelimiter = fieldDelimiter;
         }

         public virtual IInputEnumerator GetEntryEnumerator()
         {
             try
             {
                 return new FileEnumerator(this);
             }
             catch (IOException e)
             {
                 throw new Exception(e.ToString(), e);
             }
         }

         internal sealed class FileEnumerator : IInputEnumerator
         {
             private readonly FileDictionary outerInstance;

             internal long curWeight;
             internal readonly BytesRef spare = new BytesRef();
             internal BytesRef curPayload = new BytesRef();
             internal bool isFirstLine = true;
             internal bool hasPayloads = false;
             private BytesRef current;

             internal FileEnumerator(FileDictionary outerInstance)
             {
                 this.outerInstance = outerInstance;
                 outerInstance.line = outerInstance.@in.ReadLine();
                 if (outerInstance.line == null)
                 {
                     outerInstance.done = true;
                     IOUtils.Dispose(outerInstance.@in);
                 }
                 else
                 {
                     string[] fields = outerInstance.line.Split(new string[] { outerInstance.fieldDelimiter }, StringSplitOptions.RemoveEmptyEntries);
                     if (fields.Length > 3)
                     {
                         throw new ArgumentException("More than 3 fields in one line");
                     } // term, weight, payload
                     else if (fields.Length == 3)
                     {
                         hasPayloads = true;
                         spare.CopyChars(fields[0]);
                         ReadWeight(fields[1]);
                         curPayload.CopyChars(fields[2]);
                     } // term, weight
                     else if (fields.Length == 2)
                     {
                         spare.CopyChars(fields[0]);
                         ReadWeight(fields[1]);
                     } // only term
                     else
                     {
                         spare.CopyChars(fields[0]);
                         curWeight = 1;
                     }
                 }
             }

             public long Weight => curWeight;

             public BytesRef Current => current;

             public bool MoveNext()
             {
                 if (outerInstance.done)
                 {
                     current = null;
                     return false;
                 }
                 if (isFirstLine)
                 {
                     isFirstLine = false;
                     current = spare;
                     return true;
                 }
                 outerInstance.line = outerInstance.@in.ReadLine();
                 if (outerInstance.line != null)
                 {
                     string[] fields = outerInstance.line.Split(new string[] { outerInstance.fieldDelimiter }, StringSplitOptions.RemoveEmptyEntries);
                     if (fields.Length > 3)
                     {
                         throw new ArgumentException("More than 3 fields in one line");
                     } // term, weight and payload
                     else if (fields.Length == 3)
                     {
                         spare.CopyChars(fields[0]);
                         ReadWeight(fields[1]);
                         if (hasPayloads)
                         {
                             curPayload.CopyChars(fields[2]);
                         }
                     } // term, weight
                     else if (fields.Length == 2)
                     {
                         spare.CopyChars(fields[0]);
                         ReadWeight(fields[1]);
                         if (hasPayloads) // have an empty payload
                         {
                             curPayload = new BytesRef();
                         }
                     } // only term
                     else
                     {
                         spare.CopyChars(fields[0]);
                         curWeight = 1;
                         if (hasPayloads)
                         {
                             curPayload = new BytesRef();
                         }
                     }
                     current = spare;
                     return true;
                 }
                 else
                 {
                     outerInstance.done = true;
                     IOUtils.Dispose(outerInstance.@in);
                     current = null;
                     return false;
                 }
             }

             public IComparer<BytesRef> Comparer => null;

             public BytesRef Payload
                 => (hasPayloads) ? curPayload : null;


             public bool HasPayloads => hasPayloads;

             internal void ReadWeight(string weight)
             {
                 // LUCENENET specific - don't use exception, use TryParse
                 if (!long.TryParse(weight, NumberStyles.Integer, CultureInfo.InvariantCulture, out curWeight))
                 {
                     try
                     {
                         // keep reading floats for bw compat
                         curWeight = (long)double.Parse(weight, NumberStyles.Float, CultureInfo.InvariantCulture);
                     }
                     catch (FormatException e)
                     {
                         // LUCENENET TODO: This is just so we can see what string and what culture was being tested when parsing failed,
                         // to try to reproduce the conditions of the failure.
                         throw new FormatException($"Weight '{weight}' could not be parsed to long or double in culture '{CultureInfo.CurrentCulture.Name}'.", e);
                     }
                 }
             }

             public ICollection<BytesRef> Contexts => null;

             public bool HasContexts => false;
         }
     }
 }
	using Lucene.Net.Search.Spell;
	using Lucene.Net.Util;
	using System;
	using System.Collections.Generic;
	using System.Globalization;
	using System.IO;
	using System.Text;

	namespace Lucene.Net.Search.Suggest
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/// <summary>
	/// Dictionary represented by a text file.
	///
	/// <para>Format allowed: 1 entry per line:</para>
	/// <para>An entry can be: </para>
	/// <list type="number">
	/// <item><description>suggestion</description></item>
	/// <item><description>suggestion <see cref="fieldDelimiter"/> weight</description></item>
	/// <item><description>suggestion <see cref="fieldDelimiter"/> weight <see cref="fieldDelimiter"/> payload</description></item>
	/// </list>
	/// where the default <see cref="fieldDelimiter"/> is <see cref="DEFAULT_FIELD_DELIMITER"/> (a tab)
	/// <para>
	/// <b>NOTE:</b>
	/// <list type="number">
	/// <item><description>In order to have payload enabled, the first entry has to have a payload</description></item>
	/// <item><description>If the weight for an entry is not specified then a value of 1 is used</description></item>
	/// <item><description>A payload cannot be specified without having the weight specified for an entry</description></item>
	/// <item><description>If the payload for an entry is not specified (assuming payload is enabled)
	/// then an empty payload is returned</description></item>
	/// <item><description>An entry cannot have more than two <see cref="fieldDelimiter"/>s</description></item>
	/// </list>
	/// </para>
	/// <c>Example:</c><para/>
	/// word1 word2 TAB 100 TAB payload1<para/>
	/// word3 TAB 101<para/>
	/// word4 word3 TAB 102<para/>
	/// </summary>
	public class FileDictionary : IDictionary
	{

	/// <summary>
	/// Tab-delimited fields are most common thus the default, but one can override this via the constructor
	/// </summary>
	public const string DEFAULT_FIELD_DELIMITER = "\t";
	private readonly TextReader @in; // LUCENENET: marked readonly
	private string line;
	private bool done = false;
	private readonly string fieldDelimiter;

	/// <summary>
	/// Creates a dictionary based on an inputstream.
	/// Using <see cref="DEFAULT_FIELD_DELIMITER"/> as the
	/// field seperator in a line.
	/// <para>
	/// NOTE: content is treated as UTF-8
	/// </para>
	/// </summary>
	public FileDictionary(Stream dictFile)
	: this(dictFile, DEFAULT_FIELD_DELIMITER)
	{
	}

	/// <summary>
	/// Creates a dictionary based on a reader.
	/// Using <see cref="DEFAULT_FIELD_DELIMITER"/> as the
	/// field seperator in a line.
	/// </summary>
	public FileDictionary(TextReader reader)
	: this(reader, DEFAULT_FIELD_DELIMITER)
	{
	}

	/// <summary>
	/// Creates a dictionary based on a reader.
	/// Using <paramref name="fieldDelimiter"/> to seperate out the
	/// fields in a line.
	/// </summary>
	public FileDictionary(TextReader reader, string fieldDelimiter)
	{
	@in = reader;
	this.fieldDelimiter = fieldDelimiter;
	}

	/// <summary>
	/// Creates a dictionary based on an inputstream.
	/// Using <paramref name="fieldDelimiter"/> to seperate out the
	/// fields in a line.
	/// <para>
	/// NOTE: content is treated as UTF-8
	/// </para>
	/// </summary>
	public FileDictionary(Stream dictFile, string fieldDelimiter)
	{
	@in = IOUtils.GetDecodingReader(dictFile, Encoding.UTF8);
	this.fieldDelimiter = fieldDelimiter;
	}

	public virtual IInputEnumerator GetEntryEnumerator()
	{
	try
	{
	return new FileEnumerator(this);
	}
	catch (IOException e)
	{
	throw new Exception(e.ToString(), e);
	}
	}

	internal sealed class FileEnumerator : IInputEnumerator
	{
	private readonly FileDictionary outerInstance;

	internal long curWeight;
	internal readonly BytesRef spare = new BytesRef();
	internal BytesRef curPayload = new BytesRef();
	internal bool isFirstLine = true;
	internal bool hasPayloads = false;
	private BytesRef current;

	internal FileEnumerator(FileDictionary outerInstance)
	{
	this.outerInstance = outerInstance;
	outerInstance.line = outerInstance.@in.ReadLine();
	if (outerInstance.line == null)
	{
	outerInstance.done = true;
	IOUtils.Dispose(outerInstance.@in);
	}
	else
	{
	string[] fields = outerInstance.line.Split(new string[] { outerInstance.fieldDelimiter }, StringSplitOptions.RemoveEmptyEntries);
	if (fields.Length > 3)
	{
	throw new ArgumentException("More than 3 fields in one line");
	} // term, weight, payload
	else if (fields.Length == 3)
	{
	hasPayloads = true;
	spare.CopyChars(fields[0]);
	ReadWeight(fields[1]);
	curPayload.CopyChars(fields[2]);
	} // term, weight
	else if (fields.Length == 2)
	{
	spare.CopyChars(fields[0]);
	ReadWeight(fields[1]);
	} // only term
	else
	{
	spare.CopyChars(fields[0]);
	curWeight = 1;
	}
	}
	}

	public long Weight => curWeight;

	public BytesRef Current => current;

	public bool MoveNext()
	{
	if (outerInstance.done)
	{
	current = null;
	return false;
	}
	if (isFirstLine)
	{
	isFirstLine = false;
	current = spare;
	return true;
	}
	outerInstance.line = outerInstance.@in.ReadLine();
	if (outerInstance.line != null)
	{
	string[] fields = outerInstance.line.Split(new string[] { outerInstance.fieldDelimiter }, StringSplitOptions.RemoveEmptyEntries);
	if (fields.Length > 3)
	{
	throw new ArgumentException("More than 3 fields in one line");
	} // term, weight and payload
	else if (fields.Length == 3)
	{
	spare.CopyChars(fields[0]);
	ReadWeight(fields[1]);
	if (hasPayloads)
	{
	curPayload.CopyChars(fields[2]);
	}
	} // term, weight
	else if (fields.Length == 2)
	{
	spare.CopyChars(fields[0]);
	ReadWeight(fields[1]);
	if (hasPayloads) // have an empty payload
	{
	curPayload = new BytesRef();
	}
	} // only term
	else
	{
	spare.CopyChars(fields[0]);
	curWeight = 1;
	if (hasPayloads)
	{
	curPayload = new BytesRef();
	}
	}
	current = spare;
	return true;
	}
	else
	{
	outerInstance.done = true;
	IOUtils.Dispose(outerInstance.@in);
	current = null;
	return false;
	}
	}

	public IComparer<BytesRef> Comparer => null;

	public BytesRef Payload
	=> (hasPayloads) ? curPayload : null;


	public bool HasPayloads => hasPayloads;

	internal void ReadWeight(string weight)
	{
	// LUCENENET specific - don't use exception, use TryParse
	if (!long.TryParse(weight, NumberStyles.Integer, CultureInfo.InvariantCulture, out curWeight))
	{
	try
	{
	// keep reading floats for bw compat
	curWeight = (long)double.Parse(weight, NumberStyles.Float, CultureInfo.InvariantCulture);
	}
	catch (FormatException e)
	{
	// LUCENENET TODO: This is just so we can see what string and what culture was being tested when parsing failed,
	// to try to reproduce the conditions of the failure.
	throw new FormatException($"Weight '{weight}' could not be parsed to long or double in culture '{CultureInfo.CurrentCulture.Name}'.", e);
	}
	}
	}

	public ICollection<BytesRef> Contexts => null;

	public bool HasContexts => false;
	}
	}
	}