Levenshtein Distance (编辑距离) 算法详解
2010-09-30 22:35:59 来源:WEB开发网天看到万仓一黍发的计算字符串的相似度(VB2005),感兴趣研究了一下,看了半天也没搞懂(惭愧惭愧),上网google了一下,大多也都是实现,没什么解释,几经波折还是在wikipedia找到了详细的算法解释,首先还是C#实现代码:
c#实现代码using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
namespace WordCompare
{
class Program
{
static void Main(string[] args)
{
var fromString = "English is a West Germanic language that developed in England during the Anglo-Saxon era. As a result of the military, economic, scientific, political, and cultural influence of the British Empire during the 18th, 19th, and early 20th centuries, and of the United States since the mid 20th century,[7][8][9][10] it has become the lingua franca in many parts of the world.[11][12] It is used extensively as a second language and as an official language in Commonwealth countries and many international organisations.English is a West Germanic language that developed in England during the Anglo-Saxon era. As a result of the military, economic, scientific, political, and cultural influence of the British Empire during the 18th, 19th, and early 20th centuries, and of the United States since the mid 20th century,[7][8][9][10] it has become the lingua franca in many parts of the world.[11][12] It is used extensively as a second language and as an official language in Commonwealth countries and many international organisations.English is a West Germanic language that developed in England during the Anglo-Saxon era. As a result of the military, economic, scientific, political, and cultural influence of the British Empire during the 18th, 19th, and early 20th centuries, and of the United States since the mid 20th century,[7][8][9][10] it has become the lingua franca in many parts of the world.[11][12] It is used extensively as a second language and as an official language in Commonwealth countries and many international organisations.";
var toString = "English is West Germanic language that developed in England during the Anglo-Saxon era. As a result of the military, economic, scientific, political, and cultural influence of the British Empire during the 18th, 19th, and early 20th centuries, and of the United States since the mid 20th century,[7][8][9][10] it has become the lingua franca in many parts of the world.[11][12] It is used extensively as a second language and as an official language in Commonwealth countries and many international organisations.English is a West Germanic language that developed in England during the Anglo-Saxon era. As a result of the military, economic, scientific, political, and cultural influence of the British Empire during the 18th, 19th, and early 20th centuries, and of the United States since the mid 20th century,[7][8][9][10] it has become the lingua franca in many parts of the world.[11][12] It is used extensively as a second language and as an official language in Commonwealth countries and many international organisations.English is a West Germanic language that developed in England during the Anglo-Saxon era. As a result of the military, economic, scientific, political, and cultural influence of the British Empire during the 18th, 19th, and early 20th centuries, and of the United States since the mid 20th century,[7][8][9][10] it has become the lingua franca in many parts of the world.[11][12] It is used extensively as a second language and as an official language in Commonwealth countries and many international organisations.a";
Stopwatch watch = new Stopwatch();
watch.Start();
var result = CompareStrings(fromString, toString);
watch.Stop();
Console.WriteLine("The result is {0}, spent {1} milliseconds.", result, watch.ElapsedMilliseconds);
}
private static int CompareStrings(string fromString, string toString)
{
var fLength = fromString.Length;
var tLength = toString.Length;
// pre verify the simplest condition
if (fLength == 0)
{
return tLength;
}
if (tLength == 0)
{
return fLength;
}
// prepare the martix
var martix = new int[fLength + 1, tLength + 1];
for (int i = 0; i <= fLength; i++)
{
martix[i, 0] = i;
}
for (int j = 0; j <= tLength; j++)
{
martix[0, j] = j;
}
// compare the chars
for (int i = 1; i <= fLength; i++)
{
var tempF = fromString[i - 1];
var cost = 0;
for (int j = 1; j <= tLength; j++)
{
var tempT = toString[j - 1];
if (tempT == tempF)
{
cost = 0;
}
else
{
cost = 1;
}
var valueAbove = martix[i - 1, j] + 1;
var valueLeft = martix[i, j - 1] + 1;
// left corner
var valueDiag = martix[i - 1, j - 1] + cost;
// find the minimum from the three vars above
var cellValue = valueAbove < valueLeft ? (valueDiag < valueAbove ? valueDiag : valueAbove) : (valueDiag < valueLeft ? valueDiag : valueLeft);
martix[i, j] = cellValue;
}
}
var result = martix[fLength, tLength];
return result;
}
}
}
Tags:Levenshtein Distance 编辑
编辑录入:爽爽 [复制链接] [打 印]更多精彩
赞助商链接