Hi, i would to know, if someone have ever write the code to estimate the
parameter (mixing proportion, mean, a var/cov matrix) of a mixture of two
multivariate normal distribution. I wrote it and it works (it could find
mean and mixing proportion, if I fix the var/cov matrix), while if I fix
anything, it doesn't work. My suspect is that when the algorithm iterates
the var/cov matrix, something wrong happens, but i don't know how solve this
problem.
I enclose my script, if someone would give it a look, I would appreciate so
much.
thank you
daniele
#########################
#Start Script
#########################
library(mvtnorm)
libray(scatterplot3d)
library(MASS)
n=100
m1=c(5,-5)
m2=c(-3,3)
s1=matrix(c(2,1,1,3), 2,2)
s2=matrix(c(4,1,1,6), 2,2)
alpha=0.3
c1 <- mvrnorm(round(n*alpha),m1,s1)
c2 <- mvrnorm(round(n*(1-alpha)),m2,s2)
allval <- rbind(c1,c2)
x <- allval[sample(n,n),]
mixm<-
function(x,m1,m2,s1,s2, alpha)
{
f1<-dmvnorm(x, m1, s1, log=FALSE)
f2<-dmvnorm(x, m2, s2, log=FALSE)
f=alpha*f1+(1-alpha)*f2
f
}
plot(x)
den<-mixm(x,m1,m2,s1,s2,alpha)
scatterplot3d(x[,1],x[,2],den, highlight.3d=TRUE, col.axis="blue",
col.grid="lightblue",angle=120, pch=20)
em2mn<- function(y)
{
n<-length(y[,1])
p<-matrix(0,n,1)
f1<-matrix(0,n,1)
f2<-matrix(0,n,1)
tau<-matrix(0,n,2)
eps<-0.0001
mu01<-c(0,0)
mu02<-c(0,0)
sd01<-matrix(0,2,2)
sd02<-matrix(0,2,2)
cov1<-matrix(0,2,2)
cov2<-matrix(0,2,2)
# 1 inizializzare i valori
alpha0= runif(1,0,1)
for (j in 1:2) {
mu01[j] <- runif(1,min=quantile(y[,j], probs =0.25),
max=quantile(y[,j], probs =0.75))
mu02[j] <- runif(1,min=quantile(y[,j], probs =0.25),
max=quantile(y[,j], probs =0.75))
}
sd01<- var(y[1:round(n/2),])
sd02<- var(y[round(n/2):n,])
#sd01<-s1
#sd02<-s2
#prima iterazione
iter<-0
f1<-dmvnorm(y, mu01, sd01, log=FALSE)
f2<-dmvnorm(y, mu02, sd02, log=FALSE)
p=alpha0*f1+(1-alpha0)*f2
scatterplot3d(y[,1], y[,2], p , highlight.3d=TRUE, col.axis="blue",
col.grid="lightblue",main="val iniz mistura normali
multivariata",
angle=120, pch=20)
#verosimiglianza iniziale
l0<-sum(log(p))
l1<-l0
alpha<-alpha0
mu1<-mu01
mu2<-mu02
sd1<-sd01
sd2<-sd02
for (iter in 1:itermax)
{
#passo E
for (i in 1:n) {
tau[i,1]<-(alpha*f1[i])/p[i]
tau[i,2]<-((1-alpha)*f2[i])/p[i]
}
#passo M
alpha= mean(tau[,1])
mu1=colSums(tau[,1]*y)/sum(tau[,1])
mu2=colSums(tau[,2]*y)/sum(tau[,2])
ycen1<-(y-mu1)
ycen2<-(y-mu2)
cov1<-matrix(0,2,2)
cov2<-matrix(0,2,2)
for (i in 1:n){
cov1<-cov1+ (tau[i,1]*(ycen1[i,])%*%t(ycen1[i,]))
cov2<-cov2+ (tau[i,2]*(ycen2[i,])%*%t(ycen2[i,]))
}
# w1<-sqrt(tau[,1])
# w2<-sqrt(tau[,2])
# ywei1<-w1*ycen1
# ywei2<-w2*ycen2
sd1<-cov1/sum(tau[,1])
sd2<-cov2/sum(tau[,2])
f1<-dmvnorm(y,mu1,sd1,log=FALSE)
f2<-dmvnorm(y,mu2,sd2,log=FALSE)
p<-alpha*f1+(1-alpha)*f2
L<-sum(log(p))
cat(iter,L,"\t",alpha,"\t",mu1,"\t",mu2,"\n")
if (abs(L1-L)<eps) break
L1<-L
}
denfin<-mixm(x,mu1,mu2,sd1,sd2,alpha)
scatterplot3d(x[,1],x[,2],denfin, highlight.3d=TRUE, col.axis="blue",
col.grid="lightblue",main="densità valori
finali",angle=120, pch=20)
return(list(alpha0=alpha0,alpha=alpha,mu1=mu1,mu2=mu2,sd1=sd1,sd2=sd2))
}
em2mn(x)
##############################
end script
##############################
--
Dr. Daniele Riggi, PhD student
University of Milano-Bicocca
Department of Statistics
Building U7, Via Bicocca degli Arcimboldi, 8
20126 Milano, Italy
cell. +39 328 3380690
mailto: daniele.riggi@gmail.com
[[alternative HTML version deleted]]