看到的一篇文本匹配程度程序,記一下
public static double Sim(string txt1, string txt2) { List<char> sl1 = txt1.ToCharArray().ToList(); List<char> sl2 = txt2.ToCharArray().ToList(); //去重 List<char> sl = sl1.Union(sl2).ToList<char>();
//获取重复次数 List<int> arrA = new List<int>(); List<int> arrB = new List<int>(); foreach (var str in sl) { arrA.Add(sl1.Where(x => x == str).Count()); arrB.Add(sl2.Where(x => x == str).Count()); } //计算商 double num = 0; //被除数 double numA = 0; double numB = 0; for (int i = 0; i < sl.Count; i++) { num += arrA[i] * arrB[i]; numA += Math.Pow(arrA[i], 2); numB += Math.Pow(arrB[i], 2); } double cos = num / (Math.Sqrt(numA) * Math.Sqrt(numB)); return cos; }
|
请发表评论