# import numpy as np
# from scipy.spatial.distance import pdist#直接调包可以计算JC值 :需要两个句子长度一样;所以暂时不用
import jieba
def Jaccrad(model, reference): # terms_reference为源句子,terms_model为候选句子
terms_reference = jieba.cut(reference) # 默认精准模式
terms_model = jieba.cut(model)
grams_reference = set(terms_reference) # 去重;如果不需要就改为list
grams_model = set(terms_model)
temp = 0
for i in grams_reference: # 遍历传进来的list
print("传进来对比的值",i)
if i in grams_model:
temp = temp + 1
fenmu = len(grams_model) + len(grams_reference) - temp # 并集 计算并集数量
jaccard_coefficient = float(temp / fenmu) # 交集
return jaccard_coefficient
if __name__ == '__main__':
a = "香农在信息论中提出的信息熵定义为自信息的期望"
b = "香农在信息论中提出的信息熵定义为自信息的期望"
jaccard_coefficient = Jaccrad(a,b)
print(jaccard_coefficient)