#Hi R-users, #Suppose that I have a data.frame like this: y1 <- rnorm(10) + 6.8 y2 <- rnorm(10) + (1:10*1.7 + 1) y3 <- rnorm(10) + (1:10*6.7 + 3.7) y <- c(y1,y2,y3) x <- rep(1:3,10) f <- gl(2,15, labels=paste("lev", 1:2, sep="")) g <- seq(as.Date("2000/1/1"), by="day", length=30) DF <- data.frame(x=x,y=y, f=f, g=g) DF$g[DF$x == 1] <- NA DF$x[3:6] <- NA DF$wdays <- weekdays(DF$g) DF #For EDA purposes, I would like to calculate frequences in each variable g <- lapply(DF, function(x) as.data.frame(table(x))) #After this, I would like to cbind these data.frames (in g) into a single data.frame (which to export to MS Excel) #do.call(cbind, g) does not seem to work because of the different number of rows in each data.frame. #The resulting data.frame shoul look like this (only two variables printed here): Rowid;x;Freq.x;y;Freq.y; # etc... 1;1;9;1.69151845313816;1; 2;2;9;5.03748767699799;1; 3;3;8;5.37387749444247;1; 4;Empty;Empty;6.83926626214299;1; 5;Empty;Empty;6.97484558968873;1; 6;Empty;Empty;7.11023821708323;1; 7;Empty;Empty;7.1348316549091;1; 8;Empty;Empty;7.16727166992407;1; 9;Empty;Empty;7.35983428577469;1; 10;Empty;Empty;7.7596470136235;1; 11;Empty;Empty;7.86369414967578;1; 12;Empty;Empty;7.97164674771006;1; 13;Empty;Empty;8.0787295301318;1; 14;Empty;Empty;8.14161030348166;1; 15;Empty;Empty;8.20134832959661;1; 16;Empty;Empty;10.1469115339016;1 17;Empty;Empty;12.7442067301746;1 18;Empty;Empty;14.0865167751202;1 19;Empty;Empty;15.8280312307450;1 20;Empty;Empty;16.0484499360756;1 21;Empty;Empty;17.0795222149999;1 22;Empty;Empty;18.1254057823357;1 23;Empty;Empty;22.7169729331525;1 24;Empty;Empty;30.7237748005358;1 25;Empty;Empty;37.2141271786934;1 26;Empty;Empty;44.4954633229803;1 27;Empty;Empty;50.2302409305761;1 28;Empty;Empty;57.8913405112114;1 29;Empty;Empty;64.849897477945;1 30;Empty;Empty;71.4205263353053;1 #Anyone have an idea how to do this? #Thanks, #Lauri
This should do it for you by padding out the rows so they are the same length:> # use your 'g' and pad out the rows so they are the same length > str(g)List of 5 $ x :'data.frame': 3 obs. of 2 variables: ..$ x : Factor w/ 3 levels "1","2","3": 1 2 3 ..$ Freq: int [1:3] 9 9 8 $ y :'data.frame': 30 obs. of 2 variables: ..$ x : Factor w/ 30 levels "4.21178116845085",..: 1 2 3 4 5 6 7 8 9 10 ... ..$ Freq: int [1:30] 1 1 1 1 1 1 1 1 1 1 ... $ f :'data.frame': 2 obs. of 2 variables: ..$ x : Factor w/ 2 levels "lev1","lev2": 1 2 ..$ Freq: int [1:2] 15 15 $ g :'data.frame': 20 obs. of 2 variables: ..$ x : Factor w/ 20 levels "2000-01-02","2000-01-03",..: 1 2 3 4 5 6 7 8 9 10 ... ..$ Freq: int [1:20] 1 1 1 1 1 1 1 1 1 1 ... $ wdays:'data.frame': 7 obs. of 2 variables: ..$ x : Factor w/ 7 levels "Friday","Monday",..: 1 2 3 4 5 6 7 ..$ Freq: int [1:7] 2 3 3 4 3 2 3> # determine max nrows > max.rows <- max(sapply(g, nrow)) > g.new <- lapply(g, function(.x){+ if (nrow(.x) < max.rows) .x <- rbind(.x, matrix(NA, ncol=2, nrow=max.rows - nrow(.x), + dimnames=list(NULL, c('x', 'Freq')))) + .x + })> do.call('cbind', g.new)x.x x.Freq y.x y.Freq f.x f.Freq g.x g.Freq wdays.x wdays.Freq 1 1 9 4.21178116845085 1 lev1 15 2000-01-02 1 Friday 2 2 2 9 4.78984323641143 1 lev2 15 2000-01-03 1 Monday 3 3 3 8 5.4787594194582 1 <NA> NA 2000-01-05 1 Saturday 3 4 <NA> NA 5.5853001128225 1 <NA> NA 2000-01-06 1 Sunday 4 5 <NA> NA 5.96437138758995 1 <NA> NA 2000-01-08 1 Thursday 3 6 <NA> NA 5.97953161588198 1 <NA> NA 2000-01-09 1 Tuesday 2 7 <NA> NA 6.17354618925767 1 <NA> NA 2000-01-11 1 Wednesday 3 8 <NA> NA 6.49461161284364 1 <NA> NA 2000-01-12 1 <NA> NA 9 <NA> NA 6.98364332422208 1 <NA> NA 2000-01-14 1 <NA> NA 10 <NA> NA 7.12950777181536 1 <NA> NA 2000-01-15 1 <NA> NA 11 <NA> NA 7.28742905242849 1 <NA> NA 2000-01-17 1 <NA> NA 12 <NA> NA 7.3757813516535 1 <NA> NA 2000-01-18 1 <NA> NA 13 <NA> NA 7.53832470512922 1 <NA> NA 2000-01-20 1 <NA> NA 14 <NA> NA 8.39528080213779 1 <NA> NA 2000-01-21 1 <NA> NA 15 <NA> NA 10.6249309181431 1 <NA> NA 2000-01-23 1 <NA> NA 16 <NA> NA 11.1550663909848 1 <NA> NA 2000-01-24 1 <NA> NA 17 <NA> NA 11.3189773716082 1 <NA> NA 2000-01-26 1 <NA> NA 18 <NA> NA 12.8838097369011 1 <NA> NA 2000-01-27 1 <NA> NA 19 <NA> NA 15.5438362106853 1 <NA> NA 2000-01-29 1 <NA> NA 20 <NA> NA 17.1212211950981 1 <NA> NA 2000-01-30 1 <NA> NA 21 <NA> NA 17.8821363007311 1 <NA> NA <NA> NA <NA> NA 22 <NA> NA 18.5939013212175 1 <NA> NA <NA> NA <NA> NA On Dec 1, 2007 8:05 AM, Lauri Nikkinen <lauri.nikkinen at iki.fi> wrote:> #Hi R-users, > #Suppose that I have a data.frame like this: > > y1 <- rnorm(10) + 6.8 > y2 <- rnorm(10) + (1:10*1.7 + 1) > y3 <- rnorm(10) + (1:10*6.7 + 3.7) > y <- c(y1,y2,y3) > x <- rep(1:3,10) > f <- gl(2,15, labels=paste("lev", 1:2, sep="")) > g <- seq(as.Date("2000/1/1"), by="day", length=30) > DF <- data.frame(x=x,y=y, f=f, g=g) > DF$g[DF$x == 1] <- NA > DF$x[3:6] <- NA > DF$wdays <- weekdays(DF$g) > > DF > > #For EDA purposes, I would like to calculate frequences in each variable > g <- lapply(DF, function(x) as.data.frame(table(x))) > > #After this, I would like to cbind these data.frames (in g) into a > single data.frame (which to export to MS Excel) > #do.call(cbind, g) does not seem to work because of the different > number of rows in each data.frame. > #The resulting data.frame shoul look like this (only two variables > printed here): > > Rowid;x;Freq.x;y;Freq.y; # etc... > 1;1;9;1.69151845313816;1; > 2;2;9;5.03748767699799;1; > 3;3;8;5.37387749444247;1; > 4;Empty;Empty;6.83926626214299;1; > 5;Empty;Empty;6.97484558968873;1; > 6;Empty;Empty;7.11023821708323;1; > 7;Empty;Empty;7.1348316549091;1; > 8;Empty;Empty;7.16727166992407;1; > 9;Empty;Empty;7.35983428577469;1; > 10;Empty;Empty;7.7596470136235;1; > 11;Empty;Empty;7.86369414967578;1; > 12;Empty;Empty;7.97164674771006;1; > 13;Empty;Empty;8.0787295301318;1; > 14;Empty;Empty;8.14161030348166;1; > 15;Empty;Empty;8.20134832959661;1; > 16;Empty;Empty;10.1469115339016;1 > 17;Empty;Empty;12.7442067301746;1 > 18;Empty;Empty;14.0865167751202;1 > 19;Empty;Empty;15.8280312307450;1 > 20;Empty;Empty;16.0484499360756;1 > 21;Empty;Empty;17.0795222149999;1 > 22;Empty;Empty;18.1254057823357;1 > 23;Empty;Empty;22.7169729331525;1 > 24;Empty;Empty;30.7237748005358;1 > 25;Empty;Empty;37.2141271786934;1 > 26;Empty;Empty;44.4954633229803;1 > 27;Empty;Empty;50.2302409305761;1 > 28;Empty;Empty;57.8913405112114;1 > 29;Empty;Empty;64.849897477945;1 > 30;Empty;Empty;71.4205263353053;1 > > > #Anyone have an idea how to do this? > > #Thanks, > #Lauri > > ______________________________________________ > R-help at r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. >-- Jim Holtman Cincinnati, OH +1 513 646 9390 What is the problem you are trying to solve?
Moshe Olshansky
2007-Dec-03 04:39 UTC
[R] How to cbind DF:s with differing number of rows?
Hi Lauri, I see two possibilities. Let say that you have a <-c(1:5) b <- c(1:7) c <- c(1:4) l <- list(a,b,c) and you want to create an Excel file with columnA (1) containing a (5 rows), column B (2) containing b and column C containing c. One possibility would be to write each ROW of the output file separately (7 rows in your case) writing nothing (just \t ) when you reach the end of the particular item in the list. Another possibility would be to use xlsReadWritePro package which allows to write each item (a,b,c) (or list element - l[[1]],l[[2]],... etc) to a given column of the output file. Regards, Moshe. --- Lauri Nikkinen <lauri.nikkinen at iki.fi> wrote:> #Hi R-users, > #Suppose that I have a data.frame like this: > > y1 <- rnorm(10) + 6.8 > y2 <- rnorm(10) + (1:10*1.7 + 1) > y3 <- rnorm(10) + (1:10*6.7 + 3.7) > y <- c(y1,y2,y3) > x <- rep(1:3,10) > f <- gl(2,15, labels=paste("lev", 1:2, sep="")) > g <- seq(as.Date("2000/1/1"), by="day", length=30) > DF <- data.frame(x=x,y=y, f=f, g=g) > DF$g[DF$x == 1] <- NA > DF$x[3:6] <- NA > DF$wdays <- weekdays(DF$g) > > DF > > #For EDA purposes, I would like to calculate > frequences in each variable > g <- lapply(DF, function(x) as.data.frame(table(x))) > > #After this, I would like to cbind these data.frames > (in g) into a > single data.frame (which to export to MS Excel) > #do.call(cbind, g) does not seem to work because of > the different > number of rows in each data.frame. > #The resulting data.frame shoul look like this (only > two variables > printed here): > > Rowid;x;Freq.x;y;Freq.y; # etc... > 1;1;9;1.69151845313816;1; > 2;2;9;5.03748767699799;1; > 3;3;8;5.37387749444247;1; > 4;Empty;Empty;6.83926626214299;1; > 5;Empty;Empty;6.97484558968873;1; > 6;Empty;Empty;7.11023821708323;1; > 7;Empty;Empty;7.1348316549091;1; > 8;Empty;Empty;7.16727166992407;1; > 9;Empty;Empty;7.35983428577469;1; > 10;Empty;Empty;7.7596470136235;1; > 11;Empty;Empty;7.86369414967578;1; > 12;Empty;Empty;7.97164674771006;1; > 13;Empty;Empty;8.0787295301318;1; > 14;Empty;Empty;8.14161030348166;1; > 15;Empty;Empty;8.20134832959661;1; > 16;Empty;Empty;10.1469115339016;1 > 17;Empty;Empty;12.7442067301746;1 > 18;Empty;Empty;14.0865167751202;1 > 19;Empty;Empty;15.8280312307450;1 > 20;Empty;Empty;16.0484499360756;1 > 21;Empty;Empty;17.0795222149999;1 > 22;Empty;Empty;18.1254057823357;1 > 23;Empty;Empty;22.7169729331525;1 > 24;Empty;Empty;30.7237748005358;1 > 25;Empty;Empty;37.2141271786934;1 > 26;Empty;Empty;44.4954633229803;1 > 27;Empty;Empty;50.2302409305761;1 > 28;Empty;Empty;57.8913405112114;1 > 29;Empty;Empty;64.849897477945;1 > 30;Empty;Empty;71.4205263353053;1 > > > #Anyone have an idea how to do this? > > #Thanks, > #Lauri > > ______________________________________________ > R-help at r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide > http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, > reproducible code. >