Jie
2012-Jul-03 17:56 UTC
[R] need help EM algorithm to find MLE of coeff in mixed effects model
Dear All, have a general question about coefficients estimation of the mixed model. I simulated a very basic model: Y|b=X*\beta+Z*b +\sigma^2* diag(ni); b follows N(0,\psi) #i.e. bivariate normal where b is the latent variable, Z and X are ni*2 design matrices, sigma is the error variance, Y are longitudinal data, i.e. there are ni measurements for object i. Parameters are \beta, \sigma, \psi; call them \theta. I wrote a regular EM, the M step is to maximize the log(f(Y,b;\theta)) by taking first order derivatives, setting to 0 and solving the equation. The E step involves the evaluation of E step, using Gauss Hermite approximation. (10 points) All are simulated data. X and Z are naive like cbind(rep(1,m),1:m) After 200 iterations, the estimated \beta converges to the true value while \sigma and \psi do not. Even after one iteration, the \sigma goes up from about 10^0 to about 10^1. I am confused since the \hat{\beta} requires \sigma and \psi from previous iteration. If something wrong then all estimations should be incorrect... Another question is that I calculated the logf(Y;\theta) to see if it increases after updating \theta. Seems decreasing..... I thought the X and Z are linearly dependent would cause some issue but I also changed the X and Z to some random numbers from normal distribution. I also tried ECM, which converges fast but sigma and psi are not close to the desired values. Is this due to the limitation of some methods that I used? Can any one give some help? I am stuck for a week. I can send the code to you. First time to raise a question here. Not sure if it is proper to post all code. Below is the same as the zip file. ########################################################################## # the main R script n=100 beta=c(-0.5,1) vvar=2 #sigma^2=2 psi=matrix(c(1,0.2,0.2,1),2,2) b.m.true=mvrnorm(n=n,mu=c(0,0),Sigma=psi) #100*2 matrix, each row is the b_i Xi=cbind(rnorm(7,mean=2,sd=0.5),log(2:8)) #Xi=cbind(rep(1,7),1:7) y.m=matrix(NA,nrow=n,ncol=nrow(Xi)) #100*7, each row is a y_i Zi=Xi b.list=as.list(data.frame(t(b.m.true))) psi.old=matrix(c(0.5,0.4,0.4,0.5),2,2) #starting psi, beta and var, not exactly the same as the true value beta.old=c(-0.3,0.7) var.old=1.7 gausspar=gauss.quad(10,kind="hermite",alpha=0,beta=0) data.list.wob=list() for (i in 1:n) { data.list.wob[[i]]=list(Xi=Xi,yi=y.m[i,],Zi=Zi) } #compute true loglikelihood and initial loglikelihood truelog=0 for (i in 1:length(data.list.wob)) { truelog=truelog+loglike(data.list.wob[[i]],vvar,beta,psi) } truelog loglikeseq=c() loglikeseq[1]=sum(sapply(data.list.wob,loglike)) ECM=F for (m in 1:300) { Sig.hat=Zi%*%psi.old%*%t(Zi)+var.old*diag(nrow(Zi)) W.hat=psi.old-psi.old%*%t(Zi)%*%solve(Sig.hat)%*%Zi%*%psi.old Sigb=psi.old-psi.old%*%t(Zi)%*%solve(Sig.hat)%*%Zi%*%psi.old det(Sigb)^(-0.5) Y.minus.X.beta=t(t(y.m)-c(Xi%*%beta.old)) miu.m=t(apply(Y.minus.X.beta,MARGIN=1,function(s,B=psi.old%*%t(Zi)%*%solve(Sig.hat)) { B%*%s } )) ### each row is the miu_i tmp1=permutations(length(gausspar$nodes),2,repeats.allowed=T) tmp2=c(tmp1) a.mat=matrix(gausspar$nodes[tmp2],nrow=nrow(tmp1)) #a1,a1 #a1,a2 #... #a10,a9 #a10,a10 a.mat.list=as.list(data.frame(t(a.mat))) tmp1=permutations(length(gausspar$weights),2,repeats.allowed=T) tmp2=c(tmp1) weights.mat=matrix(gausspar$weights[tmp2],nrow=nrow(tmp1)) #w1,w1 #w1,w2 #... #w10,w9 #w10,w10 L=chol(solve(W.hat)) LL=sqrt(2)*solve(L) halfb=t(LL%*%t(a.mat)) # each page of b.array is all values of bi_k and bi_j for b_i b.list=list() for (i in 1:n) { b.list[[i]]=t(t(halfb)+miu.m[i,]) } #generate a list, each page contains Xi,yi,Zi, data.list=list() for (i in 1:n) { data.list[[i]]=list(Xi=Xi,yi=y.m[i,],Zi=Zi,b=b.list[[i]]) } #update sigma^2 t1=proc.time() tempaa=c() tempbb=c() for (j in 1:n) { #tempaa[j]=Eh4new(datai=data.list[[j]],weights.m=weights.mat,beta=beta.old) tempbb[j]=Eh4newv2(datai=data.list[[j]],weights.m=weights.mat,beta=beta.old) } var.new=mean(tempbb) if (ECM==T){var.old=var.new} sumXiXi=matrix(rowSums(sapply(data.list,function(s){t(s$Xi)%*%(s$Xi)})),ncol=ncol(Xi)) tempbb=c() for (j in 1:n) { tempbb=cbind(tempbb,Eh2new(data.list[[j]],weights.m=weights.mat)) } beta.new=solve(sumXiXi)%*%rowSums(tempbb) if (ECM==T){beta.old=beta.new} #update psi tempcc=array(NA,c(2,2,n)) sumtempcc=matrix(0,2,2) for (j in 1:n) { tempcc[,,j]=Eh1new(data.list[[j]],weights.m=weights.mat) sumtempcc=sumtempcc+tempcc[,,j] } psi.new=sumtempcc/n #stop if(sum(abs(beta.old-beta.new))+sum(abs(psi.old-psi.new))+sum(abs(var.old-var.new))<0.01) {print("converge, stop");break;} #update var.old=var.new psi.old=psi.new beta.old=beta.new loglikeseq[m+1]=sum(sapply(data.list,loglike)) } # end of M loop ######################################################################## #function to calculate loglikelihood loglike=function(datai=data.list[[i]],vvar=var.old,beta=beta.old,psi=psi.old) { temp1=-0.5*log(det(vvar*diag(nrow(datai$Zi))+datai$Zi%*%psi%*%t(datai$Zi))) temp2=-0.5*t(datai$yi-datai$Xi%*%beta)%*%solve(vvar*diag(nrow(datai$Zi))+datai$Zi%*%psi%*%t(datai$Zi))%*%(datai$yi-datai$Xi%*%beta) return(temp1+temp2) } ####################################################################### #functions to evaluate the conditional expection, saved as Efun v2.R #Eh1new=E(bibi') #Eh2new=E(X'(y-Zbi)) #Eh3new=E(bi'Z'Zbi) #Eh4new=E(Y-Xibeta-Zibi)'(Y-Xibeta-Zibi) #Eh5new=E(Xibeta-yi)'Zibi #Eh6new=E(bi) Eh1new=function(datai=data.list[[i]], weights.m=weights.mat) { #one way #temp=matrix(0,2,2) #for (i in 1:nrow(bi)) #{ #temp=temp+bi[i,]%*%t(bi[i,])*weights.m[i,1]*weights.m[i,2] #} #print(temp) #the other way bi=datai$b tempb=bi*rep(sqrt(weights.m[,1]*weights.m[,2]),2) #quadratic b, so need sqrt #deno=sum(weights.m[,1]*weights.m[,2]) return(t(tempb)%*%tempb/pi) } # end of Eh1 #Eh2new=E(X'(y-Zbi)) Eh2new=function(datai=data.list[[i]], weights.m=weights.mat) { #one way #temp=rep(0,2) #for (j in 1:nrow(bi)) #{ #temp=temp+c(t(datai$Xi)%*%(datai$yi-datai$Zi%*%bi[j,])*weights.m[j,1]*weights.m[j,2]) #} #deno=sum(weights.m[,1]*weights.m[,2]) #print(temp/deno) #another way bi=datai$b tempb=bi*rep(weights.m[,1]*weights.m[,2],2) tt=t(datai$Xi)%*%datai$yi-t(datai$Xi)%*%datai$Zi%*%colSums(tempb)/pi return(c(tt)) } # end of Eh2 #Eh3new=E(b'Z'Zbi) Eh3new=function(datai=data.list[[i]], weights.m=weights.mat) { #one way #deno=sum(weights.m[,1]*weights.m[,2]) #tempb=bi*rep(sqrt(weights.m[,1]*weights.m[,2]),2) #quadratic b, so need sqrt #sum(apply(datai$Zi%*%t(tempb),MARGIN=2,function(s){sum(s^2)}))/deno #another way bi=datai$b tempb=bi*rep(sqrt(weights.m[,1]*weights.m[,2]),2) #quadratic b, so need sqrt return(sum(sapply(as.list(data.frame(datai$Zi%*%t(tempb))),function(s){sum(s^2)}))/pi) } # end of Eh3 #Eh4new=E(Y-Xibeta-Zibi)'(Y-Xibeta-Zibi) Eh4new=function(datai=data.list[[i]], weights.m=weights.mat,beta=beta.old) { #one way #temp=0 #bi=datai$b #tt=c() #for (j in 1:nrow(bi)) #{ #tt[j]=sum((datai$yi-datai$Xi%*%beta-datai$Zi%*%bi[j,])^2)*weights.m[j,1]*weights.m[j,2] #temp=temp+sum((datai$yi-datai$Xi%*%beta-datai$Zi%*%bi[j,])^2)*weights.m[j,1]*weights.m[j,2] #} #temp/deno # another way bi=datai$b tt=sapply(as.list(ata.frame(c(datai$yi-datai$Xi%*%beta)-datai$Zi%*%t(bi))), function(s){sum(s^2)})*(weights.m[,1]*weights.m[,2]) return(sum(tt)/pi) } # end of Eh4 Eh4newv2=function(datai=data.list[[i]], weights.m=weights.mat,beta=beta.old) { bi=datai$b v=weights.m[,1]*weights.m[,2] temp=c() for (i in 1:length(v)) { temp[i]=sum(((datai$yi-datai$Xi%*%beta-datai$Zi%*%bi[i,])*sqrt(v[i]))^2) } return(sum(temp)/pi) } # end of Eh4 #Eh5new=E(Xibeta-yi)'Zib Eh5new=function(datai=data.list[[i]], weights.m=weights.mat,beta=beta.old) { bi=datai$b tempb=bi*rep(weights.m[,1]*weights.m[,2],2) t(datai$Xi%*%beta-datai$yi)%*%(datai$Zi%*%t(tempb)) return(sum(t(datai$Xi%*%beta-datai$yi)%*%(datai$Zi%*%t(tempb)))/pi) }