<span style="color: #800000">'''<span style="color: #800000">
1、获取key和coord_values,样例使用的是list,但是如果真正用在训练上的话list就不适合了,建议改为使用数据库进行读取
2、对坐标进行精度化计算,这个其实我没理解是为什么,可能为了防止错误匹配吧,书上是这样写的
3、指定两个参数,参数一是新加入case的坐标,参数二是需要匹配距离最近的周边点的个数n,这里赢指定单数
4、距离计算,使用欧式距离
新加入case的坐标与每一个已有坐标计算,这里还有优化空间,以后更新
计算好的距离与key做成新的key-value
依据距离排序
取前n个case
5、取得key
对前n个case的key进行统计
取统计量结果最多的key即是新加入case所对应的分组
6、将新加入的values与分组写成key-value加入已有的key-value列队
输入新的case坐标,返回第一步......递归
<span style="color: #800000">'''
<span style="color: #0000ff">import<span style="color: #000000"> operator
<span style="color: #0000ff">def<span style="color: #000000"> create_case_list():
<span style="color: #008000">#<span style="color: #008000"> 1代表黄瓜,2代表香蕉,3代表茄子,4代表西葫芦
case_list = [[25,3,73732],[27.5,8,127492],[13,6,[16,13,50331049],[17,4,18874516],[22,13762774],[14,1,30473482],[18,38338108<span style="color: #000000">]]
case_type = [1,2,4<span style="color: #000000">]
<span style="color: #0000ff">return<span style="color: #000000"> case_list,case_type
<span style="color: #0000ff">def<span style="color: #000000"> knn_fun(user_coord,case_coord_list,case_type,take_num):
case_len =<span style="color: #000000"> len(case_coord_list)
coord_len =<span style="color: #000000"> len(user_coord)
eu_distance =<span style="color: #000000"> []
<span style="color: #0000ff">for coord <span style="color: #0000ff">in<span style="color: #000000"> case_coord_list:
coord_range = [(user_coord[i] - coord[i]) 2 <span style="color: #0000ff">for i <span style="color: #0000ff">in<span style="color: #000000"> range(coord_len)]
coord_range = sum(coord_range) 0.5<span style="color: #000000">
eu_distance.append(coord_range)
merage_distance_and_type =<span style="color: #000000"> zip(eu_distance,case_type)
merage_distance_and_type.sort()
type_list = [merage_distance_and_type[i][1] <span style="color: #0000ff">for i <span style="color: #0000ff">in<span style="color: #000000"> range(take_num)]
class_count =<span style="color: #000000"> {}
<span style="color: #0000ff">for type_case <span style="color: #0000ff">in<span style="color: #000000"> type_list:
type_temp = {type_case:1<span style="color: #000000">}
<span style="color: #0000ff">if class_count.get(type_case) ==<span style="color: #000000"> None:
class_count.update(type_temp)
<span style="color: #0000ff">else: class_count[type_case] += 1<span style="color: #000000">
sorted_class_count = sorted(class_count.iteritems(),key = operator.itemgetter(1),reverse =<span style="color: #000000"> True)
<span style="color: #0000ff">return<span style="color: #000000"> sorted_class_count[0][0]
<span style="color: #0000ff">def<span style="color: #000000"> auto_norm(case_list):
case_len =<span style="color: #000000"> len(case_list[0])
min_vals = [0] <span style="color: #000000"> case_len
max_vals = [0] <span style="color: #000000"> case_len
ranges = [0] *<span style="color: #000000"> case_len
<span style="color: #0000ff">for i <span style="color: #0000ff">in<span style="color: #000000"> range(case_len):
min_list = [case[i] <span style="color: #0000ff">for case <span style="color: #0000ff">in<span style="color: #000000"> case_list]
min_vals[i] =<span style="color: #000000"> min(min_list)
max_vals[i] = max([case[i] <span style="color: #0000ff">for case <span style="color: #0000ff">in<span style="color: #000000"> case_list])
ranges[i] = max_vals[i] -<span style="color: #000000"> min_vals[i]
norm_data_list =<span style="color: #000000"> []
<span style="color: #0000ff">for case <span style="color: #0000ff">in<span style="color: #000000"> case_list:
norm_data_list.append([(case[i] - min_vals[i])/ranges[i] <span style="color: #0000ff">for i <span style="color: #0000ff">in<span style="color: #000000"> range(case_len)])
<span style="color: #0000ff">return<span style="color: #000000"> norm_data_list,ranges,min_vals
<span style="color: #0000ff">def<span style="color: #000000"> main():
result_list = [<span style="color: #800000">'<span style="color: #800000">黄瓜<span style="color: #800000">',<span style="color: #800000">'<span style="color: #800000">香蕉<span style="color: #800000">',<span style="color: #800000">'<span style="color: #800000">茄子<span style="color: #800000">',<span style="color: #800000">'<span style="color: #800000">西葫芦<span style="color: #800000">'<span style="color: #000000">]
dimension1 = float(input(<span style="color: #800000">'<span style="color: #800000">长度是: <span style="color: #800000">'<span style="color: #000000">))
dimension2 = float(input(<span style="color: #800000">'<span style="color: #800000">弯曲度是: <span style="color: #800000">'<span style="color: #000000">))
dimension3 = float(input(<span style="color: #800000">'<span style="color: #800000">颜色是: <span style="color: #800000">'<span style="color: #000000">))
case_list,type_list =<span style="color: #000000"> create_case_list()
<span style="color: #008000">#<span style="color: #008000">norm_data_list,min_vals = auto_norm(case_list)
in_coord =<span style="color: #000000"> [dimension1,dimension2,dimension3]
<span style="color: #008000">#<span style="color: #008000">in_coord_len = len(in_coord)
<span style="color: #008000">#<span style="color: #008000">in_coord = [in_coord[i]/ranges[i] for i in range(in_coord_len)]
<span style="color: #008000">#<span style="color: #008000">class_sel_result = knn_fun(in_coord,norm_data_list,type_list,3)
class_sel_result = knn_fun(in_coord,case_list,3<span style="color: #000000">)
class_sel_result = class_sel_result - 1
<span style="color: #0000ff">return<span style="color: #000000"> result_list[class_sel_result]
<span style="color: #0000ff">if <span style="color: #800080">name == <span style="color: #800000">'<span style="color: #800000">main<span style="color: #800000">'<span style="color: #000000">:
a =<span style="color: #000000"> main()
<span style="color: #0000ff">print <span style="color: #800000">'<span style="color: #800000">这货是: %s<span style="color: #800000">' %a