(1)以下matlab代码实现了高斯混合模型:
function [Alpha, Mu, Sigma] = GMM_EM(Data, Alpha0, Mu0, Sigma0)
%% EM
迭代停止条件
loglik_threshold = 1e-10;
%%
初始化参数
[dim, N] = size(Data);
M = size(Mu0,2);
loglik_old = -realmax;
nbStep = 0;
Mu = Mu0;
Sigma = Sigma0;
Alpha = Alpha0;
Epsilon = 0.0001;
while (nbStep < 1200)
nbStep = nbStep+1;
%% E-
步骤
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
for i=1:M
% PDF of each point
Pxi(:,i) = GaussPDF(Data, Mu(:,i), Sigma(:,:,i));
end
%
计算后验概率
beta(i|x)
Pix_tmp = repmat(Alpha,[N 1]).*Pxi;
Pix = Pix_tmp ./ (repmat(sum(Pix_tmp,2),[1 M])+realmin);
Beta = sum(Pix);
%% M-
步骤
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
for i=1:M
%
更新权值
Alpha(i) = Beta(i) / N;
%
更新均值
Mu(:,i) = Data*Pix(:,i) / Beta(i);
%
更新方差
Data_tmp1 = Data – repmat(Mu(:,i),1,N);
Sigma(:,:,i) = (repmat(Pix(:,i)’,dim, 1) .* Data_tmp1*Data_tmp1′) / Beta(i);
%% Add a tiny variance to avoid numerical instability
Sigma(:,:,i) = Sigma(:,:,i) + 1E-5.*diag(ones(dim,1));
end
%
%% Stopping criterion 1 %%%%%%%%%%%%%%%%%%%%
%
for i=1:M
%Compute the new probability p(x|i)
%
Pxi(:,i) = GaussPDF(Data, Mu(:,i), Sigma(i));
%
end
%Compute the log likelihood
%
F = Pxi*Alpha’;
%
F(find(F<realmin)) = realmin;
%
loglik = mean(log(F));
%Stop the process depending on the increase of the log likelihood
%
if abs((loglik/loglik_old)-1) < loglik_threshold
%
break;
%
end
%
loglik_old = loglik;
%% Stopping criterion 2 %%%%%%%%%%%%%%%%%%%%
v = [sum(abs(Mu – Mu0)), abs(Alpha – Alpha0)];
s = abs(Sigma-Sigma0);
v2 = 0;
for i=1:M
v2 = v2 + det(s(:,:,i));
end
if ((sum(v) + v2) < Epsilon)
break;
end
Mu0 = Mu;
Sigma0 = Sigma;
Alpha0 = Alpha;
end
nbStep
(2)以下代码根据高斯分布函数计算每组数据的概率密度,被GMM_EM函数所调用
function prob = GaussPDF(Data, Mu, Sigma)
%
%
根据高斯分布函数计算每组数据的概率密度
Probability Density Function (PDF)
%
输入
—————————————————————–
%
o Data:
D x N
,
N
个
D
维数据
%
o Mu:
D x 1
,
M
个
Gauss
模型的中心初始值
%
o Sigma: M x M
,每个
Gauss
模型的方差(假设每个方差矩阵都是对角阵,
%
即一个数和单位矩阵的乘积)
% Outputs —————————————————————-
%
o prob:
1 x N array representing the probabilities for the
%
N datapoints.
[dim,N] = size(Data);
Data = Data’ – repmat(Mu’,N,1);
prob = sum((Data*inv(Sigma)).*Data, 2);
prob = exp(-0.5*prob) / sqrt((2*pi)^dim * (abs(det(Sigma))+realmin));
(3)以下是演示代码demo1.m
%
高斯混合模型参数估计示例
(基于
EM
算法)
%
2010
年
11
月
9
日
[data, mu, var, weight] = CreateSample(M, dim, N);
//
生成测试数据
[Alpha, Mu, Sigma] = GMM_EM(Data, Priors, Mu, Sigma)
(4)以下是测试数据生成函数,为demo1.m所调用:
function [data, mu, var, weight] = CreateSample(M, dim, N)
%
生成实验样本集,由
M
组正态分布的数据构成
% % GMM
模型的原理就是仅根据数据估计参数:每组正态分布的均值、方差,
%
以及每个正态分布函数在
GMM
的权重
alpha
。
%
在本函数中,这些参数均为随机生成,
%
%
输入
%
M
:
高斯函数个数
%
dim
:
数据维数
%
N
:
数据总个数
%
返回值
%
data : dim-by-N,
每列为一个数据
%
miu
: dim-by-M,
每组样本的均值,由本函数随机生成
%
var
: 1-by-M,
均方差,由本函数随机生成
%
weight: 1-by-M,
每组的权值,由本函数随机生成
% —————————————————-
%
%
随机生成不同组的方差、均值及权值
weight = rand(1,M);
weight = weight / norm(weight, 1);
%
归一化,保证总合为
1
var = double(mod(int16(rand(1,M)*100),10) + 1);
%
均方差,取
1~10
之间,采用对角矩阵
mu = double(round(randn(dim,M)*100));
%
均值,可以有负数
for(i = 1: M)
if (i ~= M)
n(i) = floor(N*weight(i));
else
n(i) = N – sum(n);
end
end
%
以标准高斯分布生成样本值,并平移到各组相应均值和方差
start = 0;
for (i=1:M)
X = randn(dim, n(i));
X = X.* var(i) + repmat(mu(:,i),1,n(i));
data(:,(start+1):start+n(i)) = X;
start = start + n(i);
end
save(‘d:\data.mat’, ‘data’);
出处:
http://wolfsky2002.blog.163.com/blog/static/10343152010112610221540/