package com.thunisoft.in4.cm.associate.util;
import java.util.List;
import java.util.Map;
/**
* 编辑距离算法
*
* @author fanxf
*
*/
public class HanziParse {
/**
* 计算矢量距离 Levenshtein Distance(LD)
*
* @param str1
* str1
* @param str2
* str2
* @return ld; 0:完全相等,非零代表相似距離
*/
private static int distance(String str1, String str2) {
int n = str1.length();
int m = str2.length();
if (n == 0) {
return m;
}
if (m == 0) {
return n;
}
/** 二维数组,存放中间距离 */
int[][] distance = new int[n + 1][m + 1];
/** 初始化数据 */
int i = 0;
for (i = 0; i <= n; i++) {
distance[i][0] = i;
}
int j = 0;
for (j = 0; j <= m; j++) {
distance[0][j] = j;
}
/** 计算中间数据 */
int cost = 0;
for (i = 1; i <= n; i++) {
char ch1 = str1.charAt(i - 1);
for (j = 1; j <= m; j++) {
char ch2 = str2.charAt(j - 1);
if (ch1 == ch2) {
cost = 0;
} else {
cost = 1;
}
/** 获取最小值 */
distance[i][j] = min(distance[i - 1][j] + 1,
distance[i][j - 1] + 1, distance[i - 1][j - 1] + cost);
}
}
return distance[n][m];
}
/**
* 计算最小值
*
* @param one
* @param two
* @param three
* @return 返回最小值
*/
private static int min(int one, int two, int three) {
int min = one;
if (two < min) {
min = two;
}
if (three < min) {
min = three;
}
return min;
}
/**
* 计算相似度
*
* @param str1
* str1 待匹配数据
* @param str2
* str2 待匹配数据
* @return sim,匹配相似度
*/
public static float similarity(String str1, String str2) {
int ld = distance(str1, str2);
return 1 - (float) ld / Math.max(str1.length(), str2.length());
}
/**
* 测试
*
* @param args
*/
public static void main(String[] args) {
HanziParse ld = new HanziParse();
float num = ld.similarity("vvv", "vvv");
System.out.println(num);
}
}