stephen sefick
2008-Sep-25 00:47 UTC
[R] Splitting row names up and then adding up the columns associated with criteria from the parts of the site coding (help)
d <- c("upwd1201", "upwd0502", "upwd0702", "upwd1002", "upwd1102", "upwd0203", "upwd0503", "upwd0803", "upwd0104", "upwd0704", "upwd0804", "upwd1204", "upwd0805", "upwd1005", "upwd0106", "dnwd1201", "dnwd0502", "dnwd0702", "dnwd1002", "dnwd1102", "dnwd1202", "dnwd0103", "dnwd0203", "dnwd0303", "dnwd0403", "dnwd0503", "dnwd0803", "dnwd0104", "dnwd0704", "dnwd0804", "dnwd1204", "dnwd0805", "dnwd1005", "dnwd0106", "uppl0502", "uppl0702", "uppl1002", "uppl1102", "uppl0203", "uppl0503", "uppl0803", "uppl0104", "uppl0804", "uppl1204", "uppl0805", "uppl1005", "uppl0106", "dnpl0502", "dnpl0702", "dnpl1002", "dnpl1102", "dnpl1202", "dnpl0103", "dnpl0203", "dnpl0403", "dnpl0503", "dnpl0803", "dnpl0104", "dnpl0704", "dnpl0804", "dnpl1204", "dnpl0805", "dnpl1005", "dnpl0106", "uplp1201", "uplp0502", "uplp0702", "uplp1002", "uplp1102", "uplp0203", "uplp0503", "uplp0803", "uplp0104", "uplp0704", "uplp0804", "uplp1204", "uplp0805", "uplp1005", "uplp0106", "dnlp1201", "dnlp0502", "dnlp0702", "dnlp1002", "dnlp1102", "dnlp1202", "dnlp0103", "dnlp0203", "dnlp0303", "dnlp0403", "dnlp0503", "dnlp0803", "dnlp0104", "dnlp0704", "dnpt0804", "dnlp1204", "dnlp0805", "dnlp1005", "dnlp0106", "uprk1201", "uprk0502", "uprk0702", "uprk1002", "uprk1102", "uprk0203", "uprk0503", "uprk0803", "uprk0104", "uprk0704", "uprk0804", "uprk1204", "uprk0805", "uprk1005", "uprk0106", "dnrk0502", "dnrk0702", "dnrk1002", "dnrk1102", "dnrk1202", "dnrk0103", "dnrk0203", "dnrk0303", "dnrk0403", "dnrk0503", "dnrk0803", "dnrk0104", "dnrk0704", "dnrk0804", "dnrk1204", "dnrk0805", "dnrk1005", "dnrk0106", "uprt1201", "uprt0502", "uprt0702", "uprt1002", "uprt1102", "uprt0203", "uprt0503", "uprt0803", "uprt0104", "uprt0704", "uprt0804", "uprt1204", "uprt0805", "uprt1005", "uprt0106", "dnrt1201", "dnrt0502", "dnrt0702", "dnrt1002", "dnrt0403", "dnrt0803", "dnrt0104", "dnpt0704", "dnrt0804", "dnrt0805", "dnrt1005", "dnrt0106", "upsd1201", "upsd0502", "upsd0702", "upsd1002", "upsd1102", "upsd0203", "upsd0503", "upsd0803", "upsd0104", "upsd0704", "upsd0804", "upsd1204", "upsd0805", "upsd1005", "upsd0106", "dnsd1201", "dnsd0502", "dnsd0702", "dnsd1002", "dnsd1102", "dnsd1202", "dnsd0103", "dnsd0203", "dnsd0303", "dnsd0403", "dnsd0503", "dnsd0803", "dnsd0104", "dnsd0704", "dnsd0804", "dnsd1204", "dnsd0805", "dnsd1005", "dnsd0106") these are the rownames of a dataframe, and I would like to split them up into chunks of two letters, two letter, and then the four numbers. The problem that I am trying to solve is to add the numbers in the columns (unique counts of species) that correspond to a "whole sample" : a whole sample consists of one of "1" all six of "2" for a particular date "3" 1 up= unrestored down=restored 2 habitat types sd rk rt pl lp wd 3 date - self explanitory strsplit looks promising, but I don't know what a regular expression is or how to use it for that matter -- Stephen Sefick Research Scientist Southeastern Natural Sciences Academy Let's not spend our time and resources thinking about things that are so little or so large that all they really do for us is puff us up and make us feel like gods. We are mammals, and have not exhausted the annoying little problems of being mammals. -K. Mullis
Henrique Dallazuanna
2008-Sep-25 00:57 UTC
[R] Splitting row names up and then adding up the columns associated with criteria from the parts of the site coding (help)
Try this: library(gsubfn) do.call(rbind, strapply(d, "[a-z]{2}|[0-9]{4}")) On Wed, Sep 24, 2008 at 9:47 PM, stephen sefick <ssefick at gmail.com> wrote:> d <- c("upwd1201", "upwd0502", "upwd0702", "upwd1002", "upwd1102", > "upwd0203", "upwd0503", "upwd0803", "upwd0104", "upwd0704", "upwd0804", > "upwd1204", "upwd0805", "upwd1005", "upwd0106", "dnwd1201", "dnwd0502", > "dnwd0702", "dnwd1002", "dnwd1102", "dnwd1202", "dnwd0103", "dnwd0203", > "dnwd0303", "dnwd0403", "dnwd0503", "dnwd0803", "dnwd0104", "dnwd0704", > "dnwd0804", "dnwd1204", "dnwd0805", "dnwd1005", "dnwd0106", "uppl0502", > "uppl0702", "uppl1002", "uppl1102", "uppl0203", "uppl0503", "uppl0803", > "uppl0104", "uppl0804", "uppl1204", "uppl0805", "uppl1005", "uppl0106", > "dnpl0502", "dnpl0702", "dnpl1002", "dnpl1102", "dnpl1202", "dnpl0103", > "dnpl0203", "dnpl0403", "dnpl0503", "dnpl0803", "dnpl0104", "dnpl0704", > "dnpl0804", "dnpl1204", "dnpl0805", "dnpl1005", "dnpl0106", "uplp1201", > "uplp0502", "uplp0702", "uplp1002", "uplp1102", "uplp0203", "uplp0503", > "uplp0803", "uplp0104", "uplp0704", "uplp0804", "uplp1204", "uplp0805", > "uplp1005", "uplp0106", "dnlp1201", "dnlp0502", "dnlp0702", "dnlp1002", > "dnlp1102", "dnlp1202", "dnlp0103", "dnlp0203", "dnlp0303", "dnlp0403", > "dnlp0503", "dnlp0803", "dnlp0104", "dnlp0704", "dnpt0804", "dnlp1204", > "dnlp0805", "dnlp1005", "dnlp0106", "uprk1201", "uprk0502", "uprk0702", > "uprk1002", "uprk1102", "uprk0203", "uprk0503", "uprk0803", "uprk0104", > "uprk0704", "uprk0804", "uprk1204", "uprk0805", "uprk1005", "uprk0106", > "dnrk0502", "dnrk0702", "dnrk1002", "dnrk1102", "dnrk1202", "dnrk0103", > "dnrk0203", "dnrk0303", "dnrk0403", "dnrk0503", "dnrk0803", "dnrk0104", > "dnrk0704", "dnrk0804", "dnrk1204", "dnrk0805", "dnrk1005", "dnrk0106", > "uprt1201", "uprt0502", "uprt0702", "uprt1002", "uprt1102", "uprt0203", > "uprt0503", "uprt0803", "uprt0104", "uprt0704", "uprt0804", "uprt1204", > "uprt0805", "uprt1005", "uprt0106", "dnrt1201", "dnrt0502", "dnrt0702", > "dnrt1002", "dnrt0403", "dnrt0803", "dnrt0104", "dnpt0704", "dnrt0804", > "dnrt0805", "dnrt1005", "dnrt0106", "upsd1201", "upsd0502", "upsd0702", > "upsd1002", "upsd1102", "upsd0203", "upsd0503", "upsd0803", "upsd0104", > "upsd0704", "upsd0804", "upsd1204", "upsd0805", "upsd1005", "upsd0106", > "dnsd1201", "dnsd0502", "dnsd0702", "dnsd1002", "dnsd1102", "dnsd1202", > "dnsd0103", "dnsd0203", "dnsd0303", "dnsd0403", "dnsd0503", "dnsd0803", > "dnsd0104", "dnsd0704", "dnsd0804", "dnsd1204", "dnsd0805", "dnsd1005", > "dnsd0106") > > these are the rownames of a dataframe, and I would like to split them > up into chunks of two letters, two letter, and then the four numbers. > The problem that I am trying to solve is to add the numbers in the > columns (unique counts of species) that correspond to a "whole sample" > : > > a whole sample consists of one of "1" all six of "2" for a particular date "3" > > 1 > up= unrestored > down=restored > > 2 > habitat types > sd > rk > rt > pl > lp > wd > > 3 > date - self explanitory > > strsplit looks promising, but I don't know what a regular expression > is or how to use it for that matter > -- > Stephen Sefick > Research Scientist > Southeastern Natural Sciences Academy > > Let's not spend our time and resources thinking about things that are > so little or so large that all they really do for us is puff us up and > make us feel like gods. We are mammals, and have not exhausted the > annoying little problems of being mammals. > > -K. Mullis > > ______________________________________________ > R-help at r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. >-- Henrique Dallazuanna Curitiba-Paran?-Brasil 25? 25' 40" S 49? 16' 22" O