Gaussian Density Estimation
$\mu$$= \frac{1}{n}\sum\limits_{x_i \in X^+}$$x_i$
$\Sigma$$= \frac{1}{n}\sum\limits_{x_i \in X^+}($$x_i$$-$$\mu$$)($$x_i$$-$$\mu$$)^T$
$p($$x$)$ = \frac{1}{(2\pi)^{\frac{d}{2}}}$$|\Sigma|^{-1/2}$$exp[\frac{1}{2}($$x$$-$$\mu$)$)^T$${\Sigma}^{-1}$$($$x$$-$$\mu$)$]$
Gaussian density function의 Maximum likelihood
$\mu$$ = \frac{1}{N}\sum\limits^{N}_\limits{i=1}x_i$
\[\frac{\partial log \ L}{\partial \gamma}=-\frac{1}{2}\sum\limits^{N}\limits_{i=1}(x_i-\mu)^2+\frac{N}{2\gamma}=0\]$\sigma^2$$ = \frac{1}{N}\sum\limits^{N}_\limits{i=1}(x_i-$$\mu$$)^2$
Covaiance Matrix
Case1)
Case2)
Case3)
Case4)
Example
Mixture of Gaussian Density Estimation
$p(x|\lambda)=\sum\limits_{m=1}^{M}w_m$$g(x|\mu_m,\Sigma_m)$
$g(x|\mu_m, \Sigma_m)$$ = \frac{1}{(2\pi)^{d/2}|\Sigma_m|^{1/2}}exp[\frac{1}{2}(x-\mu_m)^T\Sigma_{m}^{-1}(x-\mu_m)] $
\[\lambda = \{w_m, \mu_m, \Sigma_m \}, \quad m=1,...,M\]EM Algorithm for MoG
$\mu_{m}^{new}$ $ = \frac{\sum\limits^{N}_{i=1}p(m|x_i,\lambda)x_i}{\sum\limits^{N}_{i=1}p(m|x_i,\lambda)} $
$\sigma^{2(new)} = \frac{\sum\limits^{N}_{i=1}p(m|x_i,\lambda)x^2_i}{\sum\limits_{i=1}^N p(m|x_i,\lambda)}-\{$$\mu_{m}^{new}$ $\}^2$
MoG from scratch
%matplotlib inline
import random
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.patches import Ellipse
import scipy as sc
from scipy import random, linalg, stats, special
data=pd.read_csv('MoGDE.csv')
data
x1 | x2 | label | |
---|---|---|---|
0 | 5.4 | 3.9 | abnormal |
1 | 6.6 | 2.9 | normal |
2 | 4.6 | 3.1 | normal |
3 | 6.3 | 2.5 | normal |
4 | 6.7 | 3.3 | normal |
5 | 4.6 | 3.4 | normal |
6 | 4.9 | 3.1 | normal |
7 | 6.3 | 2.3 | normal |
8 | 6.5 | 3.0 | normal |
9 | 6.4 | 2.9 | normal |
data.shape
(10, 3)
colordict = {'abnormal': 'red', 'normal': 'blue'}
color = list(colordict[Class] for Class in data.label)
plt.scatter(data.iloc[:,0],data.iloc[:,1],c=color)
X = data.drop('label',axis=1)
# used only normal
x = X.iloc[list(y=='normal'),]
y = data['label']
N_var = x.shape[1] # Variable의 갯수: 2
N_gaussian = 3 # 몇개의 가우시간 분포를 사용할 것인가?
N = x.shape[0] # 데이터의 관측치 갯수
# EM step을 위한 초기 Mu, Cov 값 크기 설정
_mu = np.empty([N_gaussian, N_var])
_cov = np.empty([N_gaussian, N_var, N])
print("initial mu shape =" ,_mu.shape)
print("initial cov shape =", _cov.shape)
print("initial weight shape =", _w.shape)
initial mu shape = (3, 2)
initial cov shape = (3, 2, 9)
initial weight shape = (2,)
초기화 Trick
$w$ 초기화
# 각 gaussian 분포의 가중치, 어떤 분포를 더 중요깊게 봐야하나?
_w = np.repeat(1.0/N_gaussian,N_gaussian)
# initial w
_w
array([ 0.33333333, 0.33333333, 0.33333333])
$\mu$ 초기화
# starting values
_mu = np.empty([N_gaussian, N_var])
for g in range(N_gaussian):
_mu[g, :] = np.random.random(N_var) * np.max(x, axis=0)
# initial mu
_mu
array([[ 6.27488988, 1.22521044],
[ 5.6468563 , 2.90363138],
[ 1.7517176 , 1.5095235 ]])
$\Sigma$ 초기화
def PSM_init(n):
M = np.matrix(np.random.rand(n,n))
M = (M + M.T)/2
M = M + np.eye(n)
return M # variance를 임의로 조정
_cov = np.array([PSM_init(N_var) for i in range(0,N_gaussian)])
# 3개 가우시안과 2x2 covariance
_cov
array([[[ 1.4940488 , 0.80513997],
[ 0.80513997, 1.06013646]],
[[ 1.08120598, 0.43578471],
[ 0.43578471, 1.02687712]],
[[ 1.52704504, 0.62869138],
[ 0.62869138, 1.30528722]]])
EM Algorithm
#EM Algorithm: Expectation
def Estep(x, w, mu, cov):
# 관측치 x 가우시안의 수 = (10,3)
# wj*p(xi)를 저장
w_x = np.zeros((x.shape[0], mu.shape[0]))
# 관측치 갯수마다 적용
for i in range(x.shape[0]):
wj_xi = np.zeros(mu.shape[0]) # n gaussian
for g in range(mu.shape[0]):
# wj*p(xi)
wj_xi[g] = w[g] * sc.stats.multivariate_normal.pdf(x.iloc[i, :], mu[g, :], cov[g, :, :])
for g in range(wj_xi.shape[0]):
# nomalized
w_x[i, g] = wj_xi[g] / np.sum(wj_xi)
return w_x
w_x = Estep(x, _w, _mu, _cov)
pd.DataFrame(w_x,columns=['gaussian01','gaussian02','gaussian03'])
gaussian01 | gaussian02 | gaussian03 | |
---|---|---|---|
0 | 0.208985 | 0.790635 | 0.000380 |
1 | 0.001545 | 0.906164 | 0.092292 |
2 | 0.305558 | 0.693785 | 0.000657 |
3 | 0.093164 | 0.906468 | 0.000368 |
4 | 0.000493 | 0.897656 | 0.101851 |
5 | 0.003199 | 0.956208 | 0.040594 |
6 | 0.432928 | 0.566540 | 0.000532 |
7 | 0.137516 | 0.861951 | 0.000533 |
8 | 0.146879 | 0.852461 | 0.000660 |
#EM Algorithm: Maximazation
def Mstep(w_x, x, mu, cov):
N = x.shape[0]
# the weigths update
new_w = np.sum(w_x, axis=0) / N
# (N, N_gaussian, N_var)
df_mu = np.zeros((N, mu.shape[0], mu.shape[1]))
# (N, N_gaussian, nxn covariance)
df_cov = np.zeros((N, cov.shape[0], cov.shape[1], cov.shape[2]))
# 새롭게 업데이트 될 mu, Sigma
new_mu = np.zeros((mu.shape[0], mu.shape[1]))
new_cov = np.zeros((cov.shape[0], cov.shape[1], cov.shape[2]))
# mean
for i in range(N):
# (N_gaussian x 1) x (1 x N_var) = (N_gaussian, N_var)
df_mu[i, :, :] = np.outer(w_x[i, :], x.iloc[i, :])
for g in range(N_gaussian):
new_mu[g, :] = (1 / np.sum(w_x, axis=0)[g]) * np.sum(df_mu, axis=0)[g, :]
# sd
for i in range(N):
for g in range(N_gaussian):
df_cov[i, g, :, :] = w_x[i, g] * np.outer((x.iloc[i, :] - new_mu[g, :]),
(x.iloc[i, :] - new_mu[g, :]))
for g in range(N_gaussian):
new_cov[g, :, :] = (1 / np.sum(w_x, axis=0)[g]) * np.sum(df_cov, axis=0)[g, :, :]
return new_w, new_mu, new_cov
new_w, new_mu, new_cov = Mstep(w_x, x, _mu, _cov)
print('new_w=',new_w)
print('new_mu=',new_mu)
print('new_cov=',new_cov)
$\mu_{m}^{new}$ $ = \frac{\sum\limits^{N}_{i=1}p(m|x_i,\lambda)x_i}{\sum\limits^{N}_{i=1}p(m|x_i,\lambda)} $
$\sigma^{2(new)} = \frac{\sum\limits^{N}_{i=1}p(m|x_i,\lambda)x^2_i}{\sum\limits_{i=1}^N p(m|x_i,\lambda)}-\{$$\mu_{m}^{new}$ $\}^2$
new_w= [ 0.14780731 0.82576305 0.02642964]
new_mu= [[ 6.40088899 2.65210406]
[ 5.82262742 2.9878173 ]
[ 4.67539539 3.22421922]]
new_cov= [[[ 0.02954544 0.03689953]
[ 0.03689953 0.1049337 ]]
[[ 0.76061528 -0.11639223]
[-0.11639223 0.09556273]]
[[ 0.05442243 -0.01663954]
[-0.01663954 0.02579161]]]
재귀적 학습 반복
# initialization
iteration = 100
N_gaussian = 2
_w = np.repeat(1.0/N_gaussian,N_gaussian)
_cov = np.array([PSM_init(N_var)/3 for i in range(0,N_gaussian)])
_mu = np.empty([N_gaussian, N_var])
for g in range(N_gaussian):
_mu[g, :] = np.random.random(N_var) * np.max(x, axis=0)
for k in range(iteration):
# E step
w_x = Estep(x, _w, _mu, _cov) # 주어진 모수로 w*p(x)를 계산한
# M step
new_w, new_mu, new_cov = Mstep(w_x, x, _mu, _cov)
# log likelihood를 계산
logL_i = np.zeros((x.shape[0]))
for i in range(x.shape[0]):
L = np.zeros(N_gaussian)
for g in range(N_gaussian):
L[g] = new_w[g] * sc.stats.multivariate_normal.pdf(x.iloc[i, :], new_mu[g, :],
new_cov[g, :, :])
logL_i[i] = np.log(np.sum(L))
if(k%20==0):print(k,'th logL=',logL)
logL = np.sum(logL_i)
0 th logL= -13.1787450112
20 th logL= -1.14361054866
40 th logL= -1.14361054866
60 th logL= -1.14361054866
80 th logL= -1.14361054866
print ("mu=", new_mu)
print("sigma=",new_cov)
print("weight=",new_w)
mu= [[ 6.4532756 2.81312867]
[ 4.77615462 3.19581029]]
sigma= [[[ 0.04073198 0.03993574]
[ 0.03993574 0.1076881 ]]
[[ 0.15831454 -0.01789133]
[-0.01789133 0.02218995]]]
weight= [ 0.65685372 0.34314628]
# visualization
plotsize = 8
sizeMean = 30
text_size = 16
axis_font = {'fontname': 'Arial', 'size': '24'}
Title_font = {'fontname': 'Arial', 'size': '28'}
color = ['b', 'r', 'g', 'c', 'm', 'y', 'k']
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(X.iloc[np.array(data.label=='normal'), 0], X.iloc[np.array(data.label=='normal'), 1], 'k.', markersize=sizeMean, color=color[0])
ax.plot(X.iloc[np.array(data.label=='abnormal'), 0], X.iloc[np.array(data.label=='abnormal'), 1], 'k.', markersize=sizeMean,color=color[1])
for i in range(N_gaussian):
# the sd with ellipses
# central point of the error ellipse
pos = [new_mu[i, 0], new_mu[i, 1]]
# for the angle we need the eigenvectors of the covariance matrix
w, ve = np.linalg.eig(new_cov[i, 0:2, 0:2])
# We pick the largest eigen value
order = w.argsort()[::-1]
w = w[order]
ve = ve[:, order]
# we compute the angle towards the eigen vector with the largest eigen value
thetaO = np.degrees(np.arctan(ve[1, 0] / ve[0, 0]))
# Compute the width and height of the ellipse based on the eigen values (ie the length of the vectors)
width, height = 2 * np.sqrt(w)
# making the ellipse
ellip = Ellipse(xy=pos, width=width, height=height, angle=thetaO)
ellip.set_alpha(0.5)
ellip.set_facecolor(color[i])
ax.add_artist(ellip)
for label in (ax.get_xticklabels() + ax.get_yticklabels()):
label.set_fontname('Arial')
label.set_fontsize(text_size)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)
ax.xaxis.set_ticks_position('bottom')
ax.yaxis.set_ticks_position('left')
ax.set_ylabel('X2', **axis_font)
ax.set_xlabel('X1', **axis_font)
ax.set_title('Novelty example', y=1.08, **Title_font)
ax.figure.set_size_inches(plotsize, plotsize)
plt.show()
score = list()
for i in range(10):
novelty = [new_w[g] * sc.stats.multivariate_normal.pdf(
X.iloc[i, :], new_mu[g, :], new_cov[g, :, :]) for g in range(N_gaussian)]
score.append(1/np.sum(novelty))
pd.concat([X,data['label'],pd.DataFrame(np.log(score), columns=['Novelty score'])],axis=1)
$p(x|\lambda)$$ = \sum\limits_{m=1}^{M}w_m$$g(x|\mu_m,\Sigma_m)$
$g(x|\mu_m, \Sigma_m)$$ = \frac{1}{(2\pi)^{d/2}|\Sigma_m|^{1/2}}exp[\frac{1}{2}(x-\mu_m)^T\Sigma_{m}^{-1}(x-\mu_m)] $
\[\lambda = \{w_m, \mu_m, \Sigma_m \}, \quad m=1,...,M\]일어날 확률 $p(x|\lambda)$의 역수에 로그를 취하여 novelty score를 추정
x1 | x2 | label | Novelty score | |
---|---|---|---|---|
0 | 5.4 | 3.9 | abnormal | 16.141871 |
1 | 6.6 | 2.9 | normal | -0.394478 |
2 | 4.6 | 3.1 | normal | 0.464040 |
3 | 6.3 | 2.5 | normal | -0.200547 |
4 | 6.7 | 3.3 | normal | 0.502637 |
5 | 4.6 | 3.4 | normal | 0.973989 |
6 | 4.9 | 3.1 | normal | 0.248448 |
7 | 6.3 | 2.3 | normal | 0.566527 |
8 | 6.5 | 3.0 | normal | -0.510498 |
9 | 6.4 | 2.9 | normal | -0.506506 |