@@ -13,13 +13,25 @@ def anomalyDetection_example():
1313 plt = display_2d_data (X , 'bx' )
1414 plt .title ("origin data" )
1515 plt .show ()
16+ '''多元高斯分布函数,并可视化拟合的边界'''
17+ mu ,sigma2 = estimateGaussian (X ) # 参数估计(求均值和方差)
18+ #print mu,sigma2
19+ p = multivariateGaussian (X ,mu ,sigma2 ) # 多元高斯分布函数
20+ #print p
21+ visualizeFit (X ,mu ,sigma2 ) # 显示图像
1622
17- mu ,sigma2 = estimateGaussian (X )
18- print mu ,sigma2
19- p = multivariateGaussian (X ,mu ,sigma2 )
20- print p
23+ '''选择异常点(在交叉验证CV上训练得到最好的epsilon)'''
24+ Xval = data ['Xval' ]
25+ yval = data ['yval' ]
26+ pval = multivariateGaussian (Xval , mu , sigma2 ) # 计算CV上的概率密度值
27+ epsilon ,F1 = selectThreshold (yval ,pval ) # 选择最优的epsilon临界值
28+ print u'在CV上得到的最好的epsilon是:%e' % epsilon
29+ print u'对应的F1Score值为:%f' % F1
30+ outliers = np .where (p < epsilon ) # 找到小于临界值的异常点,并作图
31+ plt .plot (X [outliers ,0 ],X [outliers ,1 ],'o' ,markeredgecolor = 'r' ,markerfacecolor = 'w' ,markersize = 10. )
32+ plt = display_2d_data (X , 'bx' )
33+ plt .show ()
2134
22- visualizeFit (X ,mu ,sigma2 )
2335
2436
2537
@@ -44,16 +56,49 @@ def multivariateGaussian(X,mu,Sigma2):
4456 k = len (mu )
4557 if (Sigma2 .shape [0 ]> 1 ):
4658 Sigma2 = np .diag (Sigma2 )
47-
59+ '''多元高斯分布函数'''
4860 X = X - mu
4961 argu = (2 * np .pi )** (- k / 2 )* np .linalg .det (Sigma2 )** (- 0.5 )
5062 p = argu * np .exp (- 0.5 * np .sum (np .dot (X ,np .linalg .inv (Sigma2 ))* X ,axis = 1 )) # axis表示每行
5163 return p
5264
5365# 可视化边界
5466def visualizeFit (X ,mu ,sigma2 ):
55- X1 ,X2 = np .meshgrid (0 ,0.5 ,35 )
56- Z = multivariateGaussian (np .vstack ((X1 ,X2 )), mu , Sigma2 )
67+ x = np .arange (0 , 36 , 0.5 ) # 0-36,步长0.5
68+ y = np .arange (0 , 36 , 0.5 )
69+ X1 ,X2 = np .meshgrid (x ,y ) # 要画等高线,所以meshgird
70+ Z = multivariateGaussian (np .hstack ((X1 .reshape (- 1 ,1 ),X2 .reshape (- 1 ,1 ))), mu , sigma2 ) # 计算对应的高斯分布函数
71+ Z = Z .reshape (X1 .shape ) # 调整形状
72+ plt .plot (X [:,0 ],X [:,1 ],'bx' )
73+
74+ if np .sum (np .isinf (Z ).astype (float )) == 0 : # 如果计算的为无穷,就不用画了
75+ # plt.contourf(X1,X2,Z,10.**np.arange(-20, 0, 3),linewidth=.5)
76+ CS = plt .contour (X1 ,X2 ,Z ,10. ** np .arange (- 20 , 0 , 3 ),color = 'black' ,linewidth = .5 ) # 画等高线,Z的值在10.**np.arange(-20, 0, 3)
77+ #plt.clabel(CS)
78+
79+ plt .show ()
80+
81+ # 选择最优的epsilon,即:使F1Score最大
82+ def selectThreshold (yval ,pval ):
83+ '''初始化所需变量'''
84+ bestEpsilon = 0.
85+ bestF1 = 0.
86+ F1 = 0.
87+ step = (np .max (pval )- np .min (pval ))/ 1000
88+ '''计算'''
89+ for epsilon in np .arange (np .min (pval ),np .max (pval ),step ):
90+ cvPrecision = pval < epsilon
91+ tp = np .sum ((cvPrecision == 1 ) & (yval == 1 )).astype (float ) # sum求和是int型的,需要转为float
92+ fp = np .sum ((cvPrecision == 1 ) & (yval == 0 )).astype (float )
93+ fn = np .sum ((cvPrecision == 1 ) & (yval == 0 )).astype (float )
94+ precision = tp / (tp + fp ) # 精准度
95+ recision = tp / (tp + fn ) # 召回率
96+ F1 = (2 * precision * recision )/ (precision + recision ) # F1Score计算公式
97+ if F1 > bestF1 : # 修改最优的F1 Score
98+ bestF1 = F1
99+ bestEpsilon = epsilon
100+ return bestEpsilon ,bestF1
101+
57102
58103
59104if __name__ == '__main__' :
0 commit comments