Hi all, I'm running a monte carlo test of a neural network tool I've developed, and it looks like it's going to take a very long time if I run it in R so I'm interested in translating my code (included below) into something faster like Fortran (which I'll have to learn from scratch). However, as you'll see my code loads the nnet library and uses it quite a bit, and I don't have a good sense of how this impacts the translation process; will I have to translate all the code for the nnet library itself as well? Any pointers would be greatly appreciated! Here's my code: #This code replicates the simulation performed by Rouder et al (2005), #which attempts to test the estimation of weibull distribution parameters #from sample data. In this implementation, their HB estimation method is #replaced by an iterative neural network approach. library(nnet) data.gen=function(iterations,min.sample.size,max.sample.size,min.shift,max.shift,min.scale,max.scale,min.shape,max.shape){ #set up some collection vectors sample.size=vector(mode="numeric",length=iterations) exp.shift=vector(mode="numeric",length=iterations) exp.scale=vector(mode="numeric",length=iterations) exp.shape=vector(mode="numeric",length=iterations) for(i in 1:iterations){ #sample from the parameter space sample.size[i]=round(runif(1,min.sample.size,max.sample.size),digits=0) exp.shift[i]=runif(1,min.shift,max.shift) exp.scale[i]=runif(1,min.scale,max.scale) exp.shape[i]=runif(1,min.shape,max.shape) #generate rt data and record summary stats obs.rt=rweibull(sample.size[i],exp.shape[i],exp.scale[i])+exp.shift[i] if(i==1){ obs.stats=summary(obs.rt) }else{ obs.stats=rbind(obs.stats,summary(obs.rt)) } } row.names(obs.stats)=c(1:iterations) obs.stats=as.data.frame(obs.stats) obs=as.data.frame(cbind(obs.stats,sample.size,exp.shift,exp.scale,exp.shape)) names(obs)=c("min","q1","med","mean","q3","max","samples","exp.shift","exp.scale","exp.shape") return(obs) } #set working directory setwd("E:/Various Data/NNEst/NetWeibull/Rouder data") stadler=read.table("bayest.par") names(stadler)=c("exp.shift","exp.scale","exp.shape") cell.size=20 sim.size=600 #first train initial neural nets training.data=data.gen(1e4,cell.size,cell.size,.1,1,.1,1,1,4) #train nn.shift with error checking ok=F while(ok==F){ nn1.shift=nnet(exp.shift~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) cor.shift=predict(nn.shift,training.data[,c(1:7)],type="raw") temp=hist(cor.shift,plot=F) if(length(temp$counts[temp$counts>0])>10){ ok=T } } #train nn.scale with error checking ok=F while(ok==F){ nn1.scale=nnet(exp.scale~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) cor.scale=predict(nn.scale,training.data[,c(1:7)],type="raw") temp=hist(cor.scale,plot=F) if(length(temp$counts[temp$counts>0])>10){ ok=T } } #train nn.shape with error checking ok=F while(ok==F){ nn1.shape=nnet(exp.shape~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) cor.shape=predict(nn.shape,training.data[,c(1:7)],type="raw") temp=hist(cor.shape,plot=F) if(length(temp$counts[temp$counts>0])>10){ ok=T } } #run simulation obs.stats=matrix(0,80,7) ind.shift.err=matrix(0,80,sim.size) ind.scale.err=matrix(0,80,sim.size) ind.shape.err=matrix(0,80,sim.size) group.shift.err=vector(mode="numeric",length=sim.size) group.scale.err=vector(mode="numeric",length=sim.size) group.shape.err=vector(mode="numeric",length=sim.size) for(i in 1:sim.size){ for(j in 1:80){ obs.stats[j,]=c(summary(rweibull(cell.size,stadler$exp.shape[j],stadler$exp.scale[j])+stadler$exp.shift[j]),cell.size) } obs.stats=as.data.frame(obs.stats) names(obs.stats)=c("min","q1","med","mean","q3","max","samples") #estimation iteration 1 cor.shift=predict(nn1.shift,obs.stats,type="raw") cor.scale=predict(nn1.scale,obs.stats,type="raw") cor.shape=predict(nn1.shape,obs.stats,type="raw") min.obs.samples=min(obs.stats$samples) max.obs.samples=max(obs.stats$samples) min.shift=quantile(cor.shift,seq(0,1,.05))[2] max.shift=quantile(cor.shift,seq(0,1,.05))[20] min.scale=quantile(cor.scale,seq(0,1,.05))[2] max.scale=quantile(cor.scale,seq(0,1,.05))[20] min.shape=quantile(cor.shape,seq(0,1,.05))[2] max.shape=quantile(cor.shape,seq(0,1,.05))[20] #re-train nets to reduced parameter space training.data=data.gen(1e4,min.obs.samples,max.obs.samples,min.shift,max.shift,min.scale,max.scale,min.shape,max.shape) #train nn.shift with error checking ok=F while(ok==F){ nn2.shift=nnet(exp.shift~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) cor.shift=predict(nn2.shift,training.data[,c(1:7)],type="raw") temp=hist(cor.shift,plot=F) if(length(temp$counts[temp$counts>0])>10){ ok=T } } #train nn.scale with error checking ok=F while(ok==F){ nn2.scale=nnet(exp.scale~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) cor.scale=predict(nn2.scale,training.data[,c(1:7)],type="raw") temp=hist(cor.scale,plot=F) if(length(temp$counts[temp$counts>0])>10){ ok=T } } #train nn.shape with error checking ok=F while(ok==F){ nn2.shape=nnet(exp.shape~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) cor.shape=predict(nn2.shape,training.data[,c(1:7)],type="raw") temp=hist(cor.shape,plot=F) if(length(temp$counts[temp$counts>0])>10){ ok=T } } #estimation iteration 2 cor.shift=predict(nn2.shift,obs.stats,type="raw") cor.scale=predict(nn2.scale,obs.stats,type="raw") cor.shape=predict(nn2.shape,obs.stats,type="raw") #record error ind.shift.err[,i]=cor.shift-stadler$exp.shift ind.scale.err[,i]=cor.scale-stadler$exp.scale ind.shape.err[,i]=cor.shape-stadler$exp.shape group.shift.err[i]=mean(cor.shift)-mean(stadler$exp.shift) group.scale.err[i]=mean(cor.scale)-mean(stadler$exp.scale) group.shape.err[i]=mean(cor.shape)-mean(stadler$exp.shape) } results=as.data.frame(rbind(cbind(sd(c(ind.shift.err[,1:162])),sd(c(ind.scale.err[,1:162])),sd(c(ind.shape.err[,1:162]))),cbind(sd(group.shift.err[1:162]),sd(group.scale.err[1:162]),sd(group.shape.err[1:162])))) results -- Mike Lawrence http://arts.uwaterloo.ca/~m4lawren "The road to wisdom? Well, it's plain and simple to express: Err and err and err again, but less and less and less." - Piet Hein
As nnet is done almost entirely in compiled C, you may well find that already most of the computation is in a compiled language. Please look at `Writing R Extensions' and profile your code to find the bottlenecks. While you are looking at that manual, please also consider the section on tidying up your code to make it readable for others. On Mon, 11 Sep 2006, Mike Lawrence wrote:> Hi all, > > I'm running a monte carlo test of a neural network tool I've developed, > and it looks like it's going to take a very long time if I run it in R > so I'm interested in translating my code (included below) into something > faster like Fortran (which I'll have to learn from scratch). However, as > you'll see my code loads the nnet library and uses it quite a bit, and I > don't have a good sense of how this impacts the translation process; > will I have to translate all the code for the nnet library itself as well? > > Any pointers would be greatly appreciated! Here's my code: > > #This code replicates the simulation performed by Rouder et al (2005), > #which attempts to test the estimation of weibull distribution parameters > #from sample data. In this implementation, their HB estimation method is > #replaced by an iterative neural network approach. > > library(nnet) > > data.gen=function(iterations,min.sample.size,max.sample.size,min.shift,max.shift,min.scale,max.scale,min.shape,max.shape){ > #set up some collection vectors > sample.size=vector(mode="numeric",length=iterations) > exp.shift=vector(mode="numeric",length=iterations) > exp.scale=vector(mode="numeric",length=iterations) > exp.shape=vector(mode="numeric",length=iterations) > for(i in 1:iterations){ > #sample from the parameter space > > sample.size[i]=round(runif(1,min.sample.size,max.sample.size),digits=0) > exp.shift[i]=runif(1,min.shift,max.shift) > exp.scale[i]=runif(1,min.scale,max.scale) > exp.shape[i]=runif(1,min.shape,max.shape) > #generate rt data and record summary stats > > obs.rt=rweibull(sample.size[i],exp.shape[i],exp.scale[i])+exp.shift[i] > if(i==1){ > obs.stats=summary(obs.rt) > }else{ > obs.stats=rbind(obs.stats,summary(obs.rt)) > } > } > row.names(obs.stats)=c(1:iterations) > obs.stats=as.data.frame(obs.stats) > > obs=as.data.frame(cbind(obs.stats,sample.size,exp.shift,exp.scale,exp.shape)) > > names(obs)=c("min","q1","med","mean","q3","max","samples","exp.shift","exp.scale","exp.shape") > return(obs) > } > > #set working directory > setwd("E:/Various Data/NNEst/NetWeibull/Rouder data") > > stadler=read.table("bayest.par") > names(stadler)=c("exp.shift","exp.scale","exp.shape") > > cell.size=20 > sim.size=600 > #first train initial neural nets > training.data=data.gen(1e4,cell.size,cell.size,.1,1,.1,1,1,4) > #train nn.shift with error checking > ok=F > while(ok==F){ > > nn1.shift=nnet(exp.shift~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) > cor.shift=predict(nn.shift,training.data[,c(1:7)],type="raw") > temp=hist(cor.shift,plot=F) > if(length(temp$counts[temp$counts>0])>10){ > ok=T > } > } > #train nn.scale with error checking > ok=F > while(ok==F){ > > nn1.scale=nnet(exp.scale~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) > cor.scale=predict(nn.scale,training.data[,c(1:7)],type="raw") > temp=hist(cor.scale,plot=F) > if(length(temp$counts[temp$counts>0])>10){ > ok=T > } > } > #train nn.shape with error checking > ok=F > while(ok==F){ > > nn1.shape=nnet(exp.shape~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) > cor.shape=predict(nn.shape,training.data[,c(1:7)],type="raw") > temp=hist(cor.shape,plot=F) > if(length(temp$counts[temp$counts>0])>10){ > ok=T > } > } > > > #run simulation > obs.stats=matrix(0,80,7) > ind.shift.err=matrix(0,80,sim.size) > ind.scale.err=matrix(0,80,sim.size) > ind.shape.err=matrix(0,80,sim.size) > group.shift.err=vector(mode="numeric",length=sim.size) > group.scale.err=vector(mode="numeric",length=sim.size) > group.shape.err=vector(mode="numeric",length=sim.size) > for(i in 1:sim.size){ > for(j in 1:80){ > > obs.stats[j,]=c(summary(rweibull(cell.size,stadler$exp.shape[j],stadler$exp.scale[j])+stadler$exp.shift[j]),cell.size) > } > obs.stats=as.data.frame(obs.stats) > names(obs.stats)=c("min","q1","med","mean","q3","max","samples") > #estimation iteration 1 > cor.shift=predict(nn1.shift,obs.stats,type="raw") > cor.scale=predict(nn1.scale,obs.stats,type="raw") > cor.shape=predict(nn1.shape,obs.stats,type="raw") > min.obs.samples=min(obs.stats$samples) > max.obs.samples=max(obs.stats$samples) > min.shift=quantile(cor.shift,seq(0,1,.05))[2] > max.shift=quantile(cor.shift,seq(0,1,.05))[20] > min.scale=quantile(cor.scale,seq(0,1,.05))[2] > max.scale=quantile(cor.scale,seq(0,1,.05))[20] > min.shape=quantile(cor.shape,seq(0,1,.05))[2] > max.shape=quantile(cor.shape,seq(0,1,.05))[20] > #re-train nets to reduced parameter space > > training.data=data.gen(1e4,min.obs.samples,max.obs.samples,min.shift,max.shift,min.scale,max.scale,min.shape,max.shape) > #train nn.shift with error checking > ok=F > while(ok==F){ > > nn2.shift=nnet(exp.shift~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) > cor.shift=predict(nn2.shift,training.data[,c(1:7)],type="raw") > temp=hist(cor.shift,plot=F) > if(length(temp$counts[temp$counts>0])>10){ > ok=T > } > } > #train nn.scale with error checking > ok=F > while(ok==F){ > > nn2.scale=nnet(exp.scale~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) > cor.scale=predict(nn2.scale,training.data[,c(1:7)],type="raw") > temp=hist(cor.scale,plot=F) > if(length(temp$counts[temp$counts>0])>10){ > ok=T > } > } > #train nn.shape with error checking > ok=F > while(ok==F){ > > nn2.shape=nnet(exp.shape~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) > cor.shape=predict(nn2.shape,training.data[,c(1:7)],type="raw") > temp=hist(cor.shape,plot=F) > if(length(temp$counts[temp$counts>0])>10){ > ok=T > } > } > #estimation iteration 2 > cor.shift=predict(nn2.shift,obs.stats,type="raw") > cor.scale=predict(nn2.scale,obs.stats,type="raw") > cor.shape=predict(nn2.shape,obs.stats,type="raw") > #record error > ind.shift.err[,i]=cor.shift-stadler$exp.shift > ind.scale.err[,i]=cor.scale-stadler$exp.scale > ind.shape.err[,i]=cor.shape-stadler$exp.shape > group.shift.err[i]=mean(cor.shift)-mean(stadler$exp.shift) > group.scale.err[i]=mean(cor.scale)-mean(stadler$exp.scale) > group.shape.err[i]=mean(cor.shape)-mean(stadler$exp.shape) > } > > results=as.data.frame(rbind(cbind(sd(c(ind.shift.err[,1:162])),sd(c(ind.scale.err[,1:162])),sd(c(ind.shape.err[,1:162]))),cbind(sd(group.shift.err[1:162]),sd(group.scale.err[1:162]),sd(group.shape.err[1:162])))) > results > >-- Brian D. Ripley, ripley at stats.ox.ac.uk Professor of Applied Statistics, http://www.stats.ox.ac.uk/~ripley/ University of Oxford, Tel: +44 1865 272861 (self) 1 South Parks Road, +44 1865 272866 (PA) Oxford OX1 3TG, UK Fax: +44 1865 272595
I have the impression (after only a quick glance at your code, so I may have missed something) that you are generating multiple datasets and then using them. As a general strategy this does not work very well. You will get slightly improved performance with compiled code, but the real problem is that you grab too much memory and start swapping, and then things will be very slow. It is better to generate a dataset, use it and save partial results, then generate the next dataset, using the same variable name so the memory use does not increase. There are examples of this in the dse2 package in the dse bundle on CRAN. (In fact, you may be able to use some of the structure in that package.) Paul Gilbert Mike Lawrence wrote:> Hi all, > > I'm running a monte carlo test of a neural network tool I've developed, > and it looks like it's going to take a very long time if I run it in R > so I'm interested in translating my code (included below) into something > faster like Fortran (which I'll have to learn from scratch). However, as > you'll see my code loads the nnet library and uses it quite a bit, and I > don't have a good sense of how this impacts the translation process; > will I have to translate all the code for the nnet library itself as well? > > Any pointers would be greatly appreciated! Here's my code: > > #This code replicates the simulation performed by Rouder et al (2005), > #which attempts to test the estimation of weibull distribution parameters > #from sample data. In this implementation, their HB estimation method is > #replaced by an iterative neural network approach. > > library(nnet) > > data.gen=function(iterations,min.sample.size,max.sample.size,min.shift,max.shift,min.scale,max.scale,min.shape,max.shape){ > #set up some collection vectors > sample.size=vector(mode="numeric",length=iterations) > exp.shift=vector(mode="numeric",length=iterations) > exp.scale=vector(mode="numeric",length=iterations) > exp.shape=vector(mode="numeric",length=iterations) > for(i in 1:iterations){ > #sample from the parameter space > > sample.size[i]=round(runif(1,min.sample.size,max.sample.size),digits=0) > exp.shift[i]=runif(1,min.shift,max.shift) > exp.scale[i]=runif(1,min.scale,max.scale) > exp.shape[i]=runif(1,min.shape,max.shape) > #generate rt data and record summary stats > > obs.rt=rweibull(sample.size[i],exp.shape[i],exp.scale[i])+exp.shift[i] > if(i==1){ > obs.stats=summary(obs.rt) > }else{ > obs.stats=rbind(obs.stats,summary(obs.rt)) > } > } > row.names(obs.stats)=c(1:iterations) > obs.stats=as.data.frame(obs.stats) > > obs=as.data.frame(cbind(obs.stats,sample.size,exp.shift,exp.scale,exp.shape)) > > names(obs)=c("min","q1","med","mean","q3","max","samples","exp.shift","exp.scale","exp.shape") > return(obs) > } > > #set working directory > setwd("E:/Various Data/NNEst/NetWeibull/Rouder data") > > stadler=read.table("bayest.par") > names(stadler)=c("exp.shift","exp.scale","exp.shape") > > cell.size=20 > sim.size=600 > #first train initial neural nets > training.data=data.gen(1e4,cell.size,cell.size,.1,1,.1,1,1,4) > #train nn.shift with error checking > ok=F > while(ok==F){ > > nn1.shift=nnet(exp.shift~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) > cor.shift=predict(nn.shift,training.data[,c(1:7)],type="raw") > temp=hist(cor.shift,plot=F) > if(length(temp$counts[temp$counts>0])>10){ > ok=T > } > } > #train nn.scale with error checking > ok=F > while(ok==F){ > > nn1.scale=nnet(exp.scale~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) > cor.scale=predict(nn.scale,training.data[,c(1:7)],type="raw") > temp=hist(cor.scale,plot=F) > if(length(temp$counts[temp$counts>0])>10){ > ok=T > } > } > #train nn.shape with error checking > ok=F > while(ok==F){ > > nn1.shape=nnet(exp.shape~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) > cor.shape=predict(nn.shape,training.data[,c(1:7)],type="raw") > temp=hist(cor.shape,plot=F) > if(length(temp$counts[temp$counts>0])>10){ > ok=T > } > } > > > #run simulation > obs.stats=matrix(0,80,7) > ind.shift.err=matrix(0,80,sim.size) > ind.scale.err=matrix(0,80,sim.size) > ind.shape.err=matrix(0,80,sim.size) > group.shift.err=vector(mode="numeric",length=sim.size) > group.scale.err=vector(mode="numeric",length=sim.size) > group.shape.err=vector(mode="numeric",length=sim.size) > for(i in 1:sim.size){ > for(j in 1:80){ > > obs.stats[j,]=c(summary(rweibull(cell.size,stadler$exp.shape[j],stadler$exp.scale[j])+stadler$exp.shift[j]),cell.size) > } > obs.stats=as.data.frame(obs.stats) > names(obs.stats)=c("min","q1","med","mean","q3","max","samples") > #estimation iteration 1 > cor.shift=predict(nn1.shift,obs.stats,type="raw") > cor.scale=predict(nn1.scale,obs.stats,type="raw") > cor.shape=predict(nn1.shape,obs.stats,type="raw") > min.obs.samples=min(obs.stats$samples) > max.obs.samples=max(obs.stats$samples) > min.shift=quantile(cor.shift,seq(0,1,.05))[2] > max.shift=quantile(cor.shift,seq(0,1,.05))[20] > min.scale=quantile(cor.scale,seq(0,1,.05))[2] > max.scale=quantile(cor.scale,seq(0,1,.05))[20] > min.shape=quantile(cor.shape,seq(0,1,.05))[2] > max.shape=quantile(cor.shape,seq(0,1,.05))[20] > #re-train nets to reduced parameter space > > training.data=data.gen(1e4,min.obs.samples,max.obs.samples,min.shift,max.shift,min.scale,max.scale,min.shape,max.shape) > #train nn.shift with error checking > ok=F > while(ok==F){ > > nn2.shift=nnet(exp.shift~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) > cor.shift=predict(nn2.shift,training.data[,c(1:7)],type="raw") > temp=hist(cor.shift,plot=F) > if(length(temp$counts[temp$counts>0])>10){ > ok=T > } > } > #train nn.scale with error checking > ok=F > while(ok==F){ > > nn2.scale=nnet(exp.scale~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) > cor.scale=predict(nn2.scale,training.data[,c(1:7)],type="raw") > temp=hist(cor.scale,plot=F) > if(length(temp$counts[temp$counts>0])>10){ > ok=T > } > } > #train nn.shape with error checking > ok=F > while(ok==F){ > > nn2.shape=nnet(exp.shape~min+q1+med+mean+q3+max+samples,data=training.data,size=8,linout=T,rang=1e-08,maxit=500,trace=F) > cor.shape=predict(nn2.shape,training.data[,c(1:7)],type="raw") > temp=hist(cor.shape,plot=F) > if(length(temp$counts[temp$counts>0])>10){ > ok=T > } > } > #estimation iteration 2 > cor.shift=predict(nn2.shift,obs.stats,type="raw") > cor.scale=predict(nn2.scale,obs.stats,type="raw") > cor.shape=predict(nn2.shape,obs.stats,type="raw") > #record error > ind.shift.err[,i]=cor.shift-stadler$exp.shift > ind.scale.err[,i]=cor.scale-stadler$exp.scale > ind.shape.err[,i]=cor.shape-stadler$exp.shape > group.shift.err[i]=mean(cor.shift)-mean(stadler$exp.shift) > group.scale.err[i]=mean(cor.scale)-mean(stadler$exp.scale) > group.shape.err[i]=mean(cor.shape)-mean(stadler$exp.shape) > } > > results=as.data.frame(rbind(cbind(sd(c(ind.shift.err[,1:162])),sd(c(ind.scale.err[,1:162])),sd(c(ind.shape.err[,1:162]))),cbind(sd(group.shift.err[1:162]),sd(group.scale.err[1:162]),sd(group.shape.err[1:162])))) > results >=================================================================================== La version fran?aise suit le texte anglais. ------------------------------------------------------------------------------------ This email may contain privileged and/or confidential inform...{{dropped}}
Apparently Analagous Threads
- Training nnet in two ways, trying to understand the performance difference - with (i hope!) commented, minimal, self-contained, reproducible code
- Help on NNET
- About Mcneil Hanley test for a portion of AUC!
- how to get actual value from predict in nnet?
- How to compare areas under ROC curves calculated with ROCR package