o.heil at dkfz.de
2010-Feb-03 12:55 UTC
[Rd] mclapply on a set not divisible by number of cores (PR#14205)
Full_Name: Oliver Heil Version: 2.10.0 OS: debian squeeze Submission from: (NULL) (193.174.58.251) When running mclapply on a list of strings with a length of 618 on 10 cores the resulting data is wrong every 10 entries starting with the 6th. Our machine has 16 cores. You may reproduce the error using data provided here: <http://www.dkfz.de/gpcf/tmp_535434fsfd/> Together with the following code (R --vanilla): # foreach probeid(618 Probeids) get the data points from the # dataframes control and group # calculate mean, standard deviation and detection p value for group and control # calculate the p value, that mean of control and mean of group are different # # The result is a list (length 618) of 7 tuples # # Have a look at x_sd_p.test[[6]], x_sd_p.test[[16]], ... # It works fine using lapply or doing the function "by # hand" for example with factor=probeids[6] # load("df.control.R") load("df.group.R") load("negative_bead.R") load("probeids.R") library("multicore") x_sd_p.test=mclapply(probeids,function(factor){ idxg=which(df.group$Factor %in% factor); mg=NA;sdg=NA;pg=1.0; if(length(idxg)>0){ lg=df.group$x[idxg]; mg=mean(lg,,TRUE); sdg=sd(lg,TRUE); t=wilcox.test(lg,negative_bead,alternative="g",exact=TRUE); pg=t$p.value; } idxc=which(df.control$Factor %in% factor); mc=NA;sdc=NA;pc=1.0 if(length(idxc)>0){ lc=df.control$x[idxc]; mc=mean(lc,,TRUE); sdc=sd(lc,TRUE); t=wilcox.test(lc,negative_bead,alternative="g",exact=TRUE); pc=t$p.value; } p=1.0; if(length(idxg)>0&&length(idxc)>0){ t=wilcox.test(lg,lc,alternative="t",exact=TRUE); p=t$p.value; } c(mg,sdg,pg,mc,sdc,pc,p); },mc.cores=10) l=lapply(x_sd_p.test,function(x){length(x)})> sessionInfo()R version 2.10.0 (2009-10-26) x86_64-pc-linux-gnu locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=C LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=en_US.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] multicore_0.1-3 loaded via a namespace (and not attached): [1] tools_2.10.0> version_ platform x86_64-pc-linux-gnu arch x86_64 os linux-gnu system x86_64, linux-gnu status major 2 minor 10.0 year 2009 month 10 day 26 svn rev 50208 language R version.string R version 2.10.0 (2009-10-26)