WATSON Mick
2012-Dec-17 22:01 UTC
[R] Code works standalone, yet same code fails when part of package
Hi I'm missing something here but I cannot figure out what. What I can see is that the same code works when I load it via source(...) yet fails when I execute it after loading the package I have built (which includes the code. Below is a transcript of my R session. First I load the code from a file, using source(). Then I execute it fine. Then I remove the function object, I load the package, and execute the same code from the package - and I get an error. Please help! Mick> library(Rsamtools) > source("viRome/R/read.bam.R") > read.bamfunction(bamfile=NULL, chr=NULL, start=1, end=1e07, what=c("qname", "flag", "rname", "strand", "pos", "qwidth", "mapq", "cigar", "mrnm", "mpos", "isize", "seq"), tag=c("NM"), removeN=TRUE) { which <- RangesList(chr = IRanges(start,end)) names(which) <- chr param <- ScanBamParam(which = which, what = what, tag=tag) bam <- scanBam(bamfile, param=param) lst <- lapply(names(bam[[1]]), function(elt) {do.call(c, unname(lapply (bam, "[[", elt)))}) names(lst) <- names(bam[[1]]) # convert to data.frame df <- do.call("DataFrame", lst) df <- df[!is.na(df$qwidth),] df$seq <- as.character(df$seq) df$cigar <- as.character(df$cigar) df$rname <- rep(chr, nrow(df)) if (removeN==TRUE) { ns <- grep("N", df$seq) if (length(ns) > 0) { df <- df[-ns,] } } return(as.data.frame(df)) }> bam <- read.bam("../../example/SRR389185_vs_SINV_sorted.bam", chr="SINV") > # works! remove the function > rm(read.bam) > # load the package > library(viRome)Loading required package: seqinr Attaching package: ?seqinr? The following object(s) are masked from ?package:Biostrings?: translate Loading required package: plyr Attaching package: ?plyr? The following object(s) are masked from ?package:seqinr?: count The following object(s) are masked from ?package:IRanges?: compact, desc, rename Loading required package: gsubfn Loading required package: proto Loading required namespace: tcltk Loading Tcl/Tk interface ... done Loading required package: seqLogo Loading required package: grid> read.bamfunction (bamfile = NULL, chr = NULL, start = 1, end = 1e+07, what = c("qname", "flag", "rname", "strand", "pos", "qwidth", "mapq", "cigar", "mrnm", "mpos", "isize", "seq"), tag = c("NM"), removeN = TRUE) { which <- RangesList(chr = IRanges(start, end)) names(which) <- chr param <- ScanBamParam(which = which, what = what, tag = tag) bam <- scanBam(bamfile, param = param) lst <- lapply(names(bam[[1]]), function(elt) { do.call(c, unname(lapply(bam, "[[", elt))) }) names(lst) <- names(bam[[1]]) df <- do.call("DataFrame", lst) df <- df[!is.na(df$qwidth), ] df$seq <- as.character(df$seq) df$cigar <- as.character(df$cigar) df$rname <- rep(chr, nrow(df)) if (removeN == TRUE) { ns <- grep("N", df$seq) if (length(ns) > 0) { df <- df[-ns, ] } } return(as.data.frame(df)) } <environment: namespace:viRome>> # same code now produces an error > bam <- read.bam("../../example/SRR389185_vs_SINV_sorted.bam", chr="SINV")Error in as.data.frame.default(df) : cannot coerce class 'structure("DataFrame", package = "IRanges")' into a data.frame R version 2.15.2 (2012-10-26) Platform: x86_64-redhat-linux-gnu (64-bit) locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=C LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] grid stats graphics grDevices utils datasets methods [8] base other attached packages: [1] viRome_0.2 seqLogo_1.24.0 gsubfn_0.6-5 [4] proto_0.3-9.2 plyr_1.8 seqinr_3.0-7 [7] Rsamtools_1.10.2 Biostrings_2.26.2 GenomicRanges_1.10.5 [10] IRanges_1.16.4 BiocGenerics_0.4.0 loaded via a namespace (and not attached): [1] bitops_1.0-5 parallel_2.15.2 stats4_2.15.2 tcltk_2.15.2 [5] zlibbioc_1.4.0 -- The University of Edinburgh is a charitable body, registered in Scotland, with registration number SC005336.
Martin Morgan
2012-Dec-17 23:45 UTC
[R] Code works standalone, yet same code fails when part of package
Hi Mick -- On 12/17/2012 02:01 PM, WATSON Mick wrote:> Hi > > I'm missing something here but I cannot figure out what. What I can see is that the same code works when I load it via source(...) yet fails when I execute it after loading the package I have built (which includes the code. > > Below is a transcript of my R session. First I load the code from a file, using source(). Then I execute it fine. Then I remove the function object, I load the package, and execute the same code from the package - and I get an error. > > Please help! > > Mick > >> library(Rsamtools) >> source("viRome/R/read.bam.R") >> read.bam > function(bamfile=NULL, chr=NULL, start=1, end=1e07, what=c("qname", "flag", > "rname", "strand", "pos", "qwidth", "mapq", "cigar", "mrnm", "mpos", > "isize", "seq"), tag=c("NM"), removeN=TRUE) { > > which <- RangesList(chr = IRanges(start,end)) > names(which) <- chr > param <- ScanBamParam(which = which, what = what, tag=tag) > > bam <- scanBam(bamfile, param=param) > > lst <- lapply(names(bam[[1]]), function(elt) {do.call(c, unname(lapply > > (bam, "[[", elt)))}) > names(lst) <- names(bam[[1]]) > > # convert to data.frame > df <- do.call("DataFrame", lst) > df <- df[!is.na(df$qwidth),] > > df$seq <- as.character(df$seq) > df$cigar <- as.character(df$cigar) > > df$rname <- rep(chr, nrow(df)) > > if (removeN==TRUE) { > ns <- grep("N", df$seq) > if (length(ns) > 0) { > df <- df[-ns,] > } > } > > return(as.data.frame(df)) > } >> bam <- read.bam("../../example/SRR389185_vs_SINV_sorted.bam", chr="SINV") >> # works! remove the function >> rm(read.bam) >> # load the package >> library(viRome) > Loading required package: seqinr > > Attaching package: ?seqinr? > > The following object(s) are masked from ?package:Biostrings?: > > translate > > Loading required package: plyr > > Attaching package: ?plyr? > > The following object(s) are masked from ?package:seqinr?: > > count > > The following object(s) are masked from ?package:IRanges?: > > compact, desc, rename > > Loading required package: gsubfn > Loading required package: proto > Loading required namespace: tcltk > Loading Tcl/Tk interface ... done > Loading required package: seqLogo > Loading required package: grid >> read.bam > function (bamfile = NULL, chr = NULL, start = 1, end = 1e+07, > what = c("qname", "flag", "rname", "strand", "pos", "qwidth", > "mapq", "cigar", "mrnm", "mpos", "isize", "seq"), tag = c("NM"), > removeN = TRUE) > { > which <- RangesList(chr = IRanges(start, end)) > names(which) <- chr > param <- ScanBamParam(which = which, what = what, tag = tag) > bam <- scanBam(bamfile, param = param) > lst <- lapply(names(bam[[1]]), function(elt) { > do.call(c, unname(lapply(bam, "[[", elt))) > }) > names(lst) <- names(bam[[1]]) > df <- do.call("DataFrame", lst) > df <- df[!is.na(df$qwidth), ] > df$seq <- as.character(df$seq) > df$cigar <- as.character(df$cigar) > df$rname <- rep(chr, nrow(df)) > if (removeN == TRUE) { > ns <- grep("N", df$seq) > if (length(ns) > 0) { > df <- df[-ns, ] > } > } > return(as.data.frame(df)) > } > <environment: namespace:viRome> >> # same code now produces an error >> bam <- read.bam("../../example/SRR389185_vs_SINV_sorted.bam", chr="SINV") > Error in as.data.frame.default(df) : > cannot coerce class 'structure("DataFrame", package = "IRanges")' into aGlad to see you using the Bioconductor GenomicRanges infrastructure; you might get excellent help on one of the Bioconductor mailing lists http://bioconductor.org/help/mailing-list/ But the issue here is that your package needs to have Imports: IRanges in it's DESCRIPTION file, and something like importMethodsFrom(IRanges, coerce) in its NAMESPACE file. Otherwise, the method to coerce a DataFrame to a data.frame are not available to your package code. Martin> > data.frame > > R version 2.15.2 (2012-10-26) > Platform: x86_64-redhat-linux-gnu (64-bit) > > locale: > [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C > [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 > [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 > [7] LC_PAPER=C LC_NAME=C > [9] LC_ADDRESS=C LC_TELEPHONE=C > [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C > > attached base packages: > [1] grid stats graphics grDevices utils datasets methods > [8] base > > other attached packages: > [1] viRome_0.2 seqLogo_1.24.0 gsubfn_0.6-5 > [4] proto_0.3-9.2 plyr_1.8 seqinr_3.0-7 > [7] Rsamtools_1.10.2 Biostrings_2.26.2 GenomicRanges_1.10.5 > [10] IRanges_1.16.4 BiocGenerics_0.4.0 > > loaded via a namespace (and not attached): > [1] bitops_1.0-5 parallel_2.15.2 stats4_2.15.2 tcltk_2.15.2 > [5] zlibbioc_1.4.0 >-- Computational Biology / Fred Hutchinson Cancer Research Center 1100 Fairview Ave. N. PO Box 19024 Seattle, WA 98109 Location: Arnold Building M1 B861 Phone: (206) 667-2793