It appears that you are trying to partition the dataframe and then do some
operations. It is probably better to use 'split' to generate the set of
indices of the partitions and then do the operations on the subset. Here is
an example that calculate the 'mean' of each partition:
> n <- 20
> x <- data.frame(id=sample(1:3,n,TRUE), type=sample(1:3,n,TRUE),
value=runif(n))> x.split <- split(1:nrow(x), list(x$id, x$type), drop=TRUE)
> x.split
$`3.1`
[1] 1 15 19
$`1.1`
[1] 2
$`2.3`
[1] 3 9
$`2.1`
[1] 4 7 14
$`3.2`
[1] 5 10 17
$`1.3`
[1] 6 8 18
$`2.2`
[1] 11 20
$`1.2`
[1] 12 16
$`3.3`
[1] 13
> # calculate the number of values in the partition and their mean
> lapply(x.split, function(z) c(length(z),mean(x$value[z])))
$`3.1`
[1] 3.0000000 0.3120459
$`1.1`
[1] 1.0000000 0.5642638
$`2.3`
[1] 2.000000 0.384835
$`2.1`
[1] 3.0000000 0.7608086
$`3.2`
[1] 3.0000000 0.6184314
$`1.3`
[1] 3.0000000 0.4614186
$`2.2`
[1] 2.0000000 0.1475843
$`1.2`
[1] 2.0000000 0.2459645
$`3.3`
[1] 1.00000 0.92523
You should be able to extend this approach to your data.
On 8/10/06, Max Manfrin <mmanfrin@ulb.ac.be>
wrote:>
> Dear all,
> here is the result of R.Version():
>
> > R.Version()
> $platform
> [1] "powerpc-apple-darwin8.6.0"
>
> $arch
> [1] "powerpc"
>
> $os
> [1] "darwin8.6.0"
>
> $system
> [1] "powerpc, darwin8.6.0"
>
> $status
> [1] ""
>
> $major
> [1] "2"
>
> $minor
> [1] "3.1"
>
> $year
> [1] "2006"
>
> $month
> [1] "06"
>
> $day
> [1] "01"
>
> $`svn rev`
> [1] "38247"
>
> $language
> [1] "R"
>
> $version.string
> [1] "Version 2.3.1 (2006-06-01)"
>
> >
>
> I have the following code to produce boxplots of some experimental
> data, but it's really slow (I'm a newbie of R and the quality of my
> code is not really high-level!). Could you give me some guidelines
> (or examples) on how to remove those nested for loops (maybe using
> the "apply" function) so to speed-up the computation?
>
> ---BEGIN CODE---
>
>
optimal_values<-read.table("optimal_values_80.txt",header=TRUE)
> resPIR2OPT<-read.table("parallel_independent_2-
> opt_80_100.txt",header=TRUE)
>
resSEQ2OPT<-read.table("sequential_2-opt_80_800.txt",header=TRUE)
>
resSEQ22OPT<-read.table("sequential2_2-opt_80_100.txt",header=TRUE)
>
resFC1x102OPT<-read.table("fc.1.x.10_2-opt_80_100.txt",header=TRUE)
>
resFC26102OPT<-read.table("fc.2.6.10_2-opt_80_100.txt",header=TRUE)
>
resFC27102OPT<-read.table("fc.2.7.10_2-opt_80_100.txt",header=TRUE)
>
resFC28102OPT<-read.table("fc.2.8.10_2-opt_80_100.txt",header=TRUE)
>
resFC29102OPT<-read.table("fc.2.9.10_2-opt_80_100.txt",header=TRUE)
>
resFC36102OPT<-read.table("fc.3.6.10_2-opt_80_100.txt",header=TRUE)
>
resFC37102OPT<-read.table("fc.3.7.10_2-opt_80_100.txt",header=TRUE)
>
resFC38102OPT<-read.table("fc.3.8.10_2-opt_80_100.txt",header=TRUE)
>
resFC39102OPT<-read.table("fc.3.9.10_2-opt_80_100.txt",header=TRUE)
>
resHC1x102OPT<-read.table("hc.1.x.10_2-opt_80_100.txt",header=TRUE)
>
resHC26102OPT<-read.table("hc.2.6.10_2-opt_80_100.txt",header=TRUE)
>
resHC27102OPT<-read.table("hc.2.7.10_2-opt_80_100.txt",header=TRUE)
>
resHC28102OPT<-read.table("hc.2.8.10_2-opt_80_100.txt",header=TRUE)
>
resHC29102OPT<-read.table("hc.2.9.10_2-opt_80_100.txt",header=TRUE)
>
resHC36102OPT<-read.table("hc.3.6.10_2-opt_80_100.txt",header=TRUE)
>
resHC37102OPT<-read.table("hc.3.7.10_2-opt_80_100.txt",header=TRUE)
>
resHC38102OPT<-read.table("hc.3.8.10_2-opt_80_100.txt",header=TRUE)
>
resHC39102OPT<-read.table("hc.3.9.10_2-opt_80_100.txt",header=TRUE)
>
resRW1x102OPT<-read.table("rw.1.x.10_2-opt_80_100.txt",header=TRUE)
>
resRW26102OPT<-read.table("rw.2.6.10_2-opt_80_100.txt",header=TRUE)
>
resRW27102OPT<-read.table("rw.2.7.10_2-opt_80_100.txt",header=TRUE)
>
resRW28102OPT<-read.table("rw.2.8.10_2-opt_80_100.txt",header=TRUE)
>
resRW29102OPT<-read.table("rw.2.9.10_2-opt_80_100.txt",header=TRUE)
>
resRW36102OPT<-read.table("rw.3.6.10_2-opt_80_100.txt",header=TRUE)
>
resRW37102OPT<-read.table("rw.3.7.10_2-opt_80_100.txt",header=TRUE)
>
resRW38102OPT<-read.table("rw.3.8.10_2-opt_80_100.txt",header=TRUE)
>
resRW39102OPT<-read.table("rw.3.9.10_2-opt_80_100.txt",header=TRUE)
>
resUR1x102OPT<-read.table("ur.1.x.10_2-opt_80_100.txt",header=TRUE)
>
resUR26102OPT<-read.table("ur.2.6.10_2-opt_80_100.txt",header=TRUE)
>
resUR27102OPT<-read.table("ur.2.7.10_2-opt_80_100.txt",header=TRUE)
>
resUR28102OPT<-read.table("ur.2.8.10_2-opt_80_100.txt",header=TRUE)
>
resUR29102OPT<-read.table("ur.2.9.10_2-opt_80_100.txt",header=TRUE)
>
resUR36102OPT<-read.table("ur.3.6.10_2-opt_80_100.txt",header=TRUE)
>
resUR37102OPT<-read.table("ur.3.7.10_2-opt_80_100.txt",header=TRUE)
>
resUR38102OPT<-read.table("ur.3.8.10_2-opt_80_100.txt",header=TRUE)
>
resUR39102OPT<-read.table("ur.3.9.10_2-opt_80_100.txt",header=TRUE)
>
> res<-rbind
> (resFC1x102OPT,resFC26102OPT,resFC27102OPT,resFC28102OPT,resFC29102OPT,r
> esFC36102OPT,resFC37102OPT,resFC38102OPT,resFC39102OPT,resRW1x102OPT,res
> RW26102OPT,resRW27102OPT,resRW28102OPT,resRW29102OPT,resRW36102OPT,resRW
> 37102OPT,resRW38102OPT,resRW39102OPT,resHC1x102OPT,resUR1x102OPT,resUR26
> 102OPT,resUR27102OPT,resUR28102OPT,resUR29102OPT,resUR36102OPT,resUR3710
> 2OPT,resUR38102OPT,resUR39102OPT,resPIR2OPT,resSEQ2OPT,resSEQ22OPT)
>
>
> attach(res)
> lalgo<-levels(idalgo)
> linstance<-levels(instance)
> ltry<-unique(try)
> lcpu<-unique(cpu_id)
>
> for (i in (1:length(linstance)))
> {
> current_instance<-linstance[i]
>
> bestalgo<-list()
> for (j in (1:length(ltry)))
> {
> current_try<-ltry[j]
>
> for (k in (1:length(lalgo)))
> {
> current_algo<-lalgo[k]
>
> res2<-res[res$instance==current_instance &
> res$try==current_try &
> res$idalgo==current_algo,]
> # res2 contains for a given instance, a given try, and a given algo,
> all results
> res3<-res2[res2$best==min(res2$best),]
> res4<-res3[res3$time==min(res3$time),]
> if (nrow(res4)>1)
> {
> res4<-res4[1,]
> }
> if (nrow(res4)==1)
> {
>
> res4$best<-(res4$best*100/optimal_values[optimal_values
> $instance==linstance[i],]$optimum)-100
> print(res4)
> bestalgo<-rbind(bestalgo,res4)
> }
> }
> }
>
epsfile=paste(linstance[i],"_100_lim.eps",sep="")
> postscript(file=epsfile,onefile=TRUE,horizontal=TRUE)
> l<-split(bestalgo$best,list(bestalgo$idalgo))
> par(mar=c(5,5,5,3),cex.axis=0.7,las=2,mgp=c(4, 1, 0))
> title_plot=paste("100 iterations - instance
",linstance[i],sep="")
> boxplot(l,xlab="",ylab="% distance from best known
> solution",names=c
> (levels(bestalgo$idalgo)),main=title_plot,ylim=c(0,0.5))
>
> dev.off()
>
epsfile=paste(linstance[i],"_100_nolim.eps",sep="")
> postscript(file=epsfile,onefile=TRUE,horizontal=TRUE)
> l<-split(bestalgo$best,list(bestalgo$idalgo))
> par(mar=c(5,5,5,3),cex.axis=0.7,las=2,mgp=c(4, 1, 0))
> title_plot=paste("100 iterations - instance
",linstance[i],sep="")
> boxplot(l,xlab="",ylab="% distance from best known
> solution",names=c
> (levels(bestalgo$idalgo)),main=title_plot)
>
> dev.off()
>
> }
> detach(res)
>
> ---END CODE ---
>
>
> This is the output of the command str(res):
>
> > str(res)
> `data.frame': 230200 obs. of 11 variables:
> $ idalgo : Factor w/ 31 levels "FC.1.x.10-2opt",..: 1 1 1 1 1 1
1 1
> 1 1 ...
> $ topo : Factor w/ 7 levels
"FC","RW","HC",..: 1 1 1 1 1 1 1 1 1
> 1 ...
> $ schema : Factor w/ 12 levels "1.x.10","2.6.10",..:
1 1 1 1 1 1 1
> 1 1 1 ...
> $ ls : int 2 2 2 2 2 2 2 2 2 2 ...
> $ type : Factor w/ 2 levels "Par","Seq": 1 1 1 1 1
1 1 1 1 1 ...
> $ cpu_id : int 0 0 0 0 0 0 0 0 0 0 ...
> $ instance : Factor w/ 2 levels "lipa80a","tai80a": 1 1
1 1 1 1 1 1 1
> 1 ...
> $ try : int 1 1 1 1 1 1 1 1 1 1 ...
> $ best : int 255434 255321 255296 255224 255181 255030 254985
> 254961 254927 254897 ...
> $ time : num 0.09 0.09 0.09 0.18 0.27 0.46 1 1.37 1.42 1.66 ...
> $ iteration: int 1 1 1 2 3 5 11 17 18 20 ...
> >
>
> Hoping that somebody could help me, accept my best regards.
> ----
> Max MANFRIN
> http://iridia.ulb.ac.be/~mmanfrin/
>
> ______________________________________________
> R-help@stat.math.ethz.ch mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide
> http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>
--
Jim Holtman
Cincinnati, OH
+1 513 646 9390
What is the problem you are trying to solve?
[[alternative HTML version deleted]]