|  | using System; | 
|  |  | 
|  | namespace Lucene.Net.Search.Spell | 
|  | { | 
|  | /* | 
|  | * Licensed to the Apache Software Foundation (ASF) under one or more | 
|  | * contributor license agreements.  See the NOTICE file distributed with | 
|  | * this work for additional information regarding copyright ownership. | 
|  | * The ASF licenses this file to You under the Apache License, Version 2.0 | 
|  | * (the "License"); you may not use this file except in compliance with | 
|  | * the License.  You may obtain a copy of the License at | 
|  | * | 
|  | *     http://www.apache.org/licenses/LICENSE-2.0 | 
|  | * | 
|  | * Unless required by applicable law or agreed to in writing, software | 
|  | * distributed under the License is distributed on an "AS IS" BASIS, | 
|  | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | * See the License for the specific language governing permissions and | 
|  | * limitations under the License. | 
|  | */ | 
|  |  | 
|  | /// <summary> | 
|  | /// Levenstein edit distance class. | 
|  | /// </summary> | 
|  | // LUCENENET NOTE: This class is misspelled: It should be Levenshtein | 
|  | public sealed class LevensteinDistance : IStringDistance | 
|  | { | 
|  |  | 
|  | /// <summary> | 
|  | /// Optimized to run a bit faster than the static GetDistance(). | 
|  | /// In one benchmark times were 5.3sec using ctr vs 8.5sec w/ static method, thus 37% faster. | 
|  | /// </summary> | 
|  | public LevensteinDistance() | 
|  | { | 
|  | } | 
|  |  | 
|  |  | 
|  | //***************************** | 
|  | // Compute Levenshtein distance: see org.apache.commons.lang.StringUtils#getLevenshteinDistance(String, String) | 
|  | //***************************** | 
|  | public float GetDistance(string target, string other) | 
|  | { | 
|  | char[] sa; | 
|  | int n; | 
|  | int[] p; //'previous' cost array, horizontally | 
|  | int[] d; // cost array, horizontally | 
|  | int[] _d; //placeholder to assist in swapping p and d | 
|  |  | 
|  | /* | 
|  | The difference between this impl. and the previous is that, rather | 
|  | than creating and retaining a matrix of size s.length()+1 by t.length()+1, | 
|  | we maintain two single-dimensional arrays of length s.length()+1.  The first, d, | 
|  | is the 'current working' distance array that maintains the newest distance cost | 
|  | counts as we iterate through the characters of String s.  Each time we increment | 
|  | the index of String t we are comparing, d is copied to p, the second int[].  Doing so | 
|  | allows us to retain the previous cost counts as required by the algorithm (taking | 
|  | the minimum of the cost count to the left, up one, and diagonally up and to the left | 
|  | of the current cost count being calculated).  (Note that the arrays aren't really | 
|  | copied anymore, just switched...this is clearly much better than cloning an array | 
|  | or doing a System.arraycopy() each time  through the outer loop.) | 
|  |  | 
|  | Effectively, the difference between the two implementations is this one does not | 
|  | cause an out of memory condition when calculating the LD over two very large strings. | 
|  | */ | 
|  |  | 
|  | sa = target.ToCharArray(); | 
|  | n = sa.Length; | 
|  | p = new int[n + 1]; | 
|  | d = new int[n + 1]; | 
|  |  | 
|  | int m = other.Length; | 
|  | if (n == 0 || m == 0) | 
|  | { | 
|  | if (n == m) | 
|  | { | 
|  | return 1; | 
|  | } | 
|  | else | 
|  | { | 
|  | return 0; | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | // indexes into strings s and t | 
|  | int i; // iterates through s | 
|  | int j; // iterates through t | 
|  |  | 
|  | char t_j; // jth character of t | 
|  |  | 
|  | int cost; // cost | 
|  |  | 
|  | for (i = 0; i <= n; i++) | 
|  | { | 
|  | p[i] = i; | 
|  | } | 
|  |  | 
|  | for (j = 1; j <= m; j++) | 
|  | { | 
|  | t_j = other[j - 1]; | 
|  | d[0] = j; | 
|  |  | 
|  | for (i = 1; i <= n; i++) | 
|  | { | 
|  | cost = sa[i - 1] == t_j ? 0 : 1; | 
|  | // minimum of cell to the left+1, to the top+1, diagonally left and up +cost | 
|  | d[i] = Math.Min(Math.Min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost); | 
|  | } | 
|  |  | 
|  | // copy current distance counts to 'previous row' distance counts | 
|  | _d = p; | 
|  | p = d; | 
|  | d = _d; | 
|  | } | 
|  |  | 
|  | // our last action in the above loop was to switch d and p, so p now | 
|  | // actually has the most recent cost counts | 
|  | return 1.0f - ((float)p[n] / Math.Max(other.Length, sa.Length)); | 
|  | } | 
|  |  | 
|  | public override int GetHashCode() | 
|  | { | 
|  | unchecked | 
|  | { | 
|  | return 163 * this.GetType().GetHashCode(); | 
|  | } | 
|  | } | 
|  |  | 
|  | public override bool Equals(object obj) | 
|  | { | 
|  | if (this == obj) | 
|  | { | 
|  | return true; | 
|  | } | 
|  | if (null == obj) | 
|  | { | 
|  | return false; | 
|  | } | 
|  | return (this.GetType() == obj.GetType()); | 
|  | } | 
|  |  | 
|  | public override string ToString() | 
|  | { | 
|  | return "levenstein"; | 
|  | } | 
|  | } | 
|  | } |