高斯混合模型(GMM)

原理

有空再更新吧

算法

在这里插入图片描述

实验

生成数据

import numpy as np
import matplotlib.pyplot as plt


def gen_clusters():
    mean1 = [0,0]
    cov1 = [[1,0],[0,10]]
    data = np.random.multivariate_normal(mean1,cov1,100)
    
    mean2 = [10,10]
    cov2 = [[10,0],[0,1]]
    data = np.append(data,
                     np.random.multivariate_normal(mean2,cov2,100),
                     0)
    
    mean3 = [10,0]
    cov3 = [[3,0],[0,4]]
    data = np.append(data,
                     np.random.multivariate_normal(mean3,cov3,100),
                     0)
    
    return np.round(data,4)


def show_scatter(data):
    x,y = data.T
    plt.scatter(x,y)
    plt.axis()
    plt.title("scatter")
    plt.xlabel("x")
    plt.ylabel("y")
    
data = gen_clusters()
show_scatter(data)

在这里插入图片描述

高斯混合模型

class GMM:

    def __init__(self, k, dim):
        self.k = k
        self.dim = dim
        self.mus = np.random.rand(k, dim)
        self.sigmas = [np.eye(dim) for _ in range(k)]
        self.alphas = np.random.rand(k)
    
    def gaussian_prob(self, x, mu, sigma):
        if x.ndim == 1:
            x = x[np.newaxis,:]
        dim = mu.shape[-1]
        denom = np.sqrt((2 * np.pi) ** dim * (np.abs(np.linalg.det(sigma))))
        dists = x - mu
        num = np.asarray([np.exp( -dist @ np.linalg.inv(sigma) @ dist.T / 2) for dist in dists])
        return num / denom

    def multi_gaussian_prob(self, x):
        assert self.dim == x.shape[-1]
        prob = 0
        for mu, sigma, alpha in zip(self.mus, self.sigmas, self.alphas):
            prob += alpha * self.gaussian_prob(x, mu, sigma)
        return prob
    
    def fit(self, X, steps=10):
        '''
        EM algorithm   
        '''
        N = len(X)
        K = self.k
        
        self.mus = X[np.random.choice(N,K)]
        
        z = np.zeros((N, K))
        
        for _ in range(steps):

            # E-step
            for j, x in enumerate(X):
                for i in range(K):
                    z[j][i] = self.alphas[i] * self.gaussian_prob(x, self.mus[i], self.sigmas[i])
                z[j] /= np.sum(z[j])

            # M-step
            for i in range(self.k):
                # updata mus
                self.mus[i] = np.dot(z[:, i].T, X) / sum(z[:, i])

                # update sigmas
                cov = np.zeros_like(self.sigmas[0])
                for j, x in enumerate(X):
                    d = (x - self.mus[i]).reshape(self.dim,1)
                    cov += z[j][i] * d @ d.T
                cov /= sum(z[:, i])
                self.sigmas[i] = cov
                
                # update alphas
                self.alphas[i] = sum(z[:, i]) / len(X)

        return self.mus, self.sigmas, self.alphas, z
model = GMM(k=3,dim=2)
mus, sigmas, alphas, z = model.fit(data)

利用高斯混合模型聚类

labels = np.argmax(z, axis=1)

plt.scatter(data[:,0], data[:,1],c=labels, lw=1)
plt.plot(*mus.T, 'ro')

在这里插入图片描述
这里只迭代了 10 次,所以效果还不是最好

画出概率密度函数

from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

X = np.arange(-5, 20, 0.2)
Y = np.arange(-10, 15, 0.2)
X, Y = np.meshgrid(X, Y)

points = np.hstack((X.reshape((-1,1)), Y.reshape((-1,1))))

Z = model.multi_gaussian_prob(points).reshape(X.shape)

fig = plt.figure(figsize=(10,10))
ax = Axes3D(fig) 

ax.view_init(elev=90., azim=-90) # 调整3D视角

ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap='rainbow')
plt.xlabel('x')
plt.ylabel('y')
plt.show()

在这里插入图片描述

  • 4
    点赞
  • 1
    评论
  • 3
    收藏
  • 一键三连
    一键三连
  • 扫一扫,分享海报

打赏
文章很值,打赏犒劳作者一下
相关推荐
©️2020 CSDN 皮肤主题: 程序猿惹谁了 设计师:白松林 返回首页

打赏

颹蕭蕭

白嫖?

¥2 ¥4 ¥6 ¥10 ¥20
输入1-500的整数
余额支付 (余额:-- )
扫码支付
扫码支付:¥2
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者