Hi, I need a fast way to split a data.frame (and matrix) into a list of columns. For matrices, split(x, col(x)) works (which can then be done in C for speed-up, if necessary), but for a data.frame? split(iris, col(iris)) does not work as expected (?). The outcome should be lapply(seq_len(ncol(iris)), function(j) iris[,j]) and not require additional packages (if possible). Thanks & cheers, Marius PS: Below is the C code for matrices. Not sure how easy it would be to extend that to data.frames (?) SEXP col_split(SEXP x) { /* Setup */ int *dims = INTEGER(getAttrib(x, R_DimSymbol)); int n = dims[0], d = dims[1]; SEXP res = PROTECT(allocVector(VECSXP, d)); SEXP ref; int i = 0, j, k; /* Distinguish int/real matrices */ switch (TYPEOF(x)) { case INTSXP: for(j = 0; j < d; j++) { SET_VECTOR_ELT(res, j, allocVector(INTSXP, n)); int *e = INTEGER(VECTOR_ELT(res, j)); for(k = 0 ; k < n ; i++, k++) { e[k] = INTEGER(x)[i]; } } break; case REALSXP: for(j = 0; j < d; j++) { SET_VECTOR_ELT(res, j, allocVector(REALSXP, n)); double *e = REAL(VECTOR_ELT(res, j)); for(k = 0 ; k < n ; i++, k++) { e[k] = REAL(x)[i]; } } break; case LGLSXP: for(j = 0; j < d; j++) { SET_VECTOR_ELT(res, j, allocVector(LGLSXP, n)); int *e = LOGICAL(VECTOR_ELT(res, j)); for(k = 0 ; k < n ; i++, k++) { e[k] = LOGICAL(x)[i]; } } break; case STRSXP: for(j = 0; j < d; j++) { ref = allocVector(STRSXP, n); SET_VECTOR_ELT(res, j, ref); ref = VECTOR_ELT(res, j); for(k = 0 ; k < n ; i++, k++) { SET_STRING_ELT(ref, k, STRING_ELT(x, i)); } } break; default: error("Wrong type of 'x': %s", CHAR(type2str_nowarn(TYPEOF(x)))); } /* Return */ UNPROTECT(1); return(res); }
Need to re-read the "Introduction to R". Data frames ARE lists of columns. So to convert a matrix to a list of vectors use as.data.frame( m ) -- Sent from my phone. Please excuse my brevity. On August 28, 2016 11:14:20 PM PDT, Marius Hofert <marius.hofert at uwaterloo.ca> wrote:>Hi, > >I need a fast way to split a data.frame (and matrix) into a list of >columns. For matrices, split(x, col(x)) works (which can then be done >in C for speed-up, if necessary), but for a data.frame? split(iris, >col(iris)) does not work as expected (?). >The outcome should be lapply(seq_len(ncol(iris)), function(j) >iris[,j]) and not require additional packages (if possible). > >Thanks & cheers, >Marius > >PS: Below is the C code for matrices. Not sure how easy it would be to >extend that to data.frames (?) > >SEXP col_split(SEXP x) >{ > /* Setup */ > int *dims = INTEGER(getAttrib(x, R_DimSymbol)); > int n = dims[0], d = dims[1]; > SEXP res = PROTECT(allocVector(VECSXP, d)); > SEXP ref; > int i = 0, j, k; > > /* Distinguish int/real matrices */ > switch (TYPEOF(x)) { > case INTSXP: > for(j = 0; j < d; j++) { > SET_VECTOR_ELT(res, j, allocVector(INTSXP, n)); > int *e = INTEGER(VECTOR_ELT(res, j)); > for(k = 0 ; k < n ; i++, k++) { > e[k] = INTEGER(x)[i]; > } > } > break; > case REALSXP: > for(j = 0; j < d; j++) { > SET_VECTOR_ELT(res, j, allocVector(REALSXP, n)); > double *e = REAL(VECTOR_ELT(res, j)); > for(k = 0 ; k < n ; i++, k++) { > e[k] = REAL(x)[i]; > } > } > break; > case LGLSXP: > for(j = 0; j < d; j++) { > SET_VECTOR_ELT(res, j, allocVector(LGLSXP, n)); > int *e = LOGICAL(VECTOR_ELT(res, j)); > for(k = 0 ; k < n ; i++, k++) { > e[k] = LOGICAL(x)[i]; > } > } > break; > case STRSXP: > for(j = 0; j < d; j++) { >ref = allocVector(STRSXP, n); > SET_VECTOR_ELT(res, j, ref); > ref = VECTOR_ELT(res, j); > for(k = 0 ; k < n ; i++, k++) { > SET_STRING_ELT(ref, k, STRING_ELT(x, i)); > } > } > break; >default: error("Wrong type of 'x': %s", >CHAR(type2str_nowarn(TYPEOF(x)))); > } > > /* Return */ > UNPROTECT(1); > return(res); >} > >______________________________________________ >R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see >https://stat.ethz.ch/mailman/listinfo/r-help >PLEASE do read the posting guide >http://www.R-project.org/posting-guide.html >and provide commented, minimal, self-contained, reproducible code.
> On Aug 28, 2016, at 11:14 PM, Marius Hofert <marius.hofert at uwaterloo.ca> wrote: > > Hi, > > I need a fast way to split a data.frame (and matrix) into a list of > columns.This is a bit of a puzzle since data.frame objects are by definition "lists of columns". If you want a data.frame object (say it's name is dat) to _only be a list of columns then dat <- unclass(dat) The split.data.frame function splits by rows since that is the most desired and expected behavior and because the authors of S/R probably thought there was no point in making the split "by columns" when it already was. -- David.> For matrices, split(x, col(x)) works (which can then be done > in C for speed-up, if necessary), but for a data.frame? split(iris, > col(iris)) does not work as expected (?). > The outcome should be lapply(seq_len(ncol(iris)), function(j) > iris[,j]) and not require additional packages (if possible). > > Thanks & cheers, > Marius > > PS: Below is the C code for matrices. Not sure how easy it would be to > extend that to data.frames (?) > > SEXP col_split(SEXP x) > { > /* Setup */ > int *dims = INTEGER(getAttrib(x, R_DimSymbol)); > int n = dims[0], d = dims[1]; > SEXP res = PROTECT(allocVector(VECSXP, d)); > SEXP ref; > int i = 0, j, k; > > /* Distinguish int/real matrices */ > switch (TYPEOF(x)) { > case INTSXP: > for(j = 0; j < d; j++) { > SET_VECTOR_ELT(res, j, allocVector(INTSXP, n)); > int *e = INTEGER(VECTOR_ELT(res, j)); > for(k = 0 ; k < n ; i++, k++) { > e[k] = INTEGER(x)[i]; > } > } > break; > case REALSXP: > for(j = 0; j < d; j++) { > SET_VECTOR_ELT(res, j, allocVector(REALSXP, n)); > double *e = REAL(VECTOR_ELT(res, j)); > for(k = 0 ; k < n ; i++, k++) { > e[k] = REAL(x)[i]; > } > } > break; > case LGLSXP: > for(j = 0; j < d; j++) { > SET_VECTOR_ELT(res, j, allocVector(LGLSXP, n)); > int *e = LOGICAL(VECTOR_ELT(res, j)); > for(k = 0 ; k < n ; i++, k++) { > e[k] = LOGICAL(x)[i]; > } > } > break; > case STRSXP: > for(j = 0; j < d; j++) { > ref = allocVector(STRSXP, n); > SET_VECTOR_ELT(res, j, ref); > ref = VECTOR_ELT(res, j); > for(k = 0 ; k < n ; i++, k++) { > SET_STRING_ELT(ref, k, STRING_ELT(x, i)); > } > } > break; > default: error("Wrong type of 'x': %s", CHAR(type2str_nowarn(TYPEOF(x)))); > } > > /* Return */ > UNPROTECT(1); > return(res); > } > > ______________________________________________ > R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code.David Winsemius Alameda, CA, USA
^^??????),,, -----Original Message----- From: "David Winsemius" <dwinsemius at comcast.net> Sent: ?29-?08-?2016 11:59 To: "Marius Hofert" <marius.hofert at uwaterloo.ca> Cc: "R-help" <r-help at r-project.org> Subject: Re: [R] How to split a data.frame into its columns?> On Aug 28, 2016, at 11:14 PM, Marius Hofert <marius.hofert at uwaterloo.ca> wrote: > > Hi, > > I need a fast way to split a data.frame (and matrix) into a list of > columns.This is a bit of a puzzle since data.frame objects are by definition "lists of columns". If you want a data.frame object (say it's name is dat) to _only be a list of columns then dat <- unclass(dat) The split.data.frame function splits by rows since that is the most desired and expected behavior and because the authors of S/R probably thought there was no point in making the split "by columns" when it already was. -- David.> For matrices, split(x, col(x)) works (which can then be done > in C for speed-up, if necessary), but for a data.frame? split(iris, > col(iris)) does not work as expected (?). > The outcome should be lapply(seq_len(ncol(iris)), function(j) > iris[,j]) and not require additional packages (if possible). > > Thanks & cheers, > Marius > > PS: Below is the C code for matrices. Not sure how easy it would be to > extend that to data.frames (?) > > SEXP col_split(SEXP x) > { > /* Setup */ > int *dims = INTEGER(getAttrib(x, R_DimSymbol)); > int n = dims[0], d = dims[1]; > SEXP res = PROTECT(allocVector(VECSXP, d)); > SEXP ref; > int i = 0, j, k; > > /* Distinguish int/real matrices */ > switch (TYPEOF(x)) { > case INTSXP: > for(j = 0; j < d; j++) { > SET_VECTOR_ELT(res, j, allocVector(INTSXP, n)); > int *e = INTEGER(VECTOR_ELT(res, j)); > for(k = 0 ; k < n ; i++, k++) { > e[k] = INTEGER(x)[i]; > } > } > break; > case REALSXP: > for(j = 0; j < d; j++) { > SET_VECTOR_ELT(res, j, allocVector(REALSXP, n)); > double *e = REAL(VECTOR_ELT(res, j)); > for(k = 0 ; k < n ; i++, k++) { > e[k] = REAL(x)[i]; > } > } > break; > case LGLSXP: > for(j = 0; j < d; j++) { > SET_VECTOR_ELT(res, j, allocVector(LGLSXP, n)); > int *e = LOGICAL(VECTOR_ELT(res, j)); > for(k = 0 ; k < n ; i++, k++) { > e[k] = LOGICAL(x)[i]; > } > } > break; > case STRSXP: > for(j = 0; j < d; j++) { > ref = allocVector(STRSXP, n); > SET_VECTOR_ELT(res, j, ref); > ref = VECTOR_ELT(res, j); > for(k = 0 ; k < n ; i++, k++) { > SET_STRING_ELT(ref, k, STRING_ELT(x, i)); > } > } > break; > default: error("Wrong type of 'x': %s", CHAR(type2str_nowarn(TYPEOF(x)))); > } > > /* Return */ > UNPROTECT(1); > return(res); > } > > ______________________________________________ > R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code.David Winsemius Alameda, CA, USA ______________________________________________ R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code. [[alternative HTML version deleted]]
Hi David and Jeff, Thanks for your quick help, unclass() was precisely what I was looking for. Cheers, M On Mon, Aug 29, 2016 at 10:39 AM, aditya pant <adityapant1 at gmail.com> wrote:> > ^^??????),,, > ________________________________ > From: David Winsemius > Sent: ?29-?08-?2016 11:59 > To: Marius Hofert > Cc: R-help > Subject: Re: [R] How to split a data.frame into its columns? > > >> On Aug 28, 2016, at 11:14 PM, Marius Hofert <marius.hofert at uwaterloo.ca> >> wrote: >> >> Hi, >> >> I need a fast way to split a data.frame (and matrix) into a list of >> columns. > > This is a bit of a puzzle since data.frame objects are by definition "lists > of columns". > > If you want a data.frame object (say it's name is dat) to _only be a list of > columns then > > dat <- unclass(dat) > > The split.data.frame function splits by rows since that is the most desired > and expected behavior and because the authors of S/R probably thought there > was no point in making the split "by columns" when it already was. > > -- > David. > >> For matrices, split(x, col(x)) works (which can then be done >> in C for speed-up, if necessary), but for a data.frame? split(iris, >> col(iris)) does not work as expected (?). >> The outcome should be lapply(seq_len(ncol(iris)), function(j) >> iris[,j]) and not require additional packages (if possible). >> >> Thanks & cheers, >> Marius >> >> PS: Below is the C code for matrices. Not sure how easy it would be to >> extend that to data.frames (?) >> >> SEXP col_split(SEXP x) >> { >> /* Setup */ >> int *dims = INTEGER(getAttrib(x, R_DimSymbol)); >> int n = dims[0], d = dims[1]; >> SEXP res = PROTECT(allocVector(VECSXP, d)); >> SEXP ref; >> int i = 0, j, k; >> >> /* Distinguish int/real matrices */ >> switch (TYPEOF(x)) { >> case INTSXP: >> for(j = 0; j < d; j++) { >> SET_VECTOR_ELT(res, j, allocVector(INTSXP, n)); >> int *e = INTEGER(VECTOR_ELT(res, j)); >> for(k = 0 ; k < n ; i++, k++) { >> e[k] = INTEGER(x)[i]; >> } >> } >> break; >> case REALSXP: >> for(j = 0; j < d; j++) { >> SET_VECTOR_ELT(res, j, allocVector(REALSXP, n)); >> double *e = REAL(VECTOR_ELT(res, j)); >> for(k = 0 ; k < n ; i++, k++) { >> e[k] = REAL(x)[i]; >> } >> } >> break; >> case LGLSXP: >> for(j = 0; j < d; j++) { >> SET_VECTOR_ELT(res, j, allocVector(LGLSXP, n)); >> int *e = LOGICAL(VECTOR_ELT(res, j)); >> for(k = 0 ; k < n ; i++, k++) { >> e[k] = LOGICAL(x)[i]; >> } >> } >> break; >> case STRSXP: >> for(j = 0; j < d; j++) { >> ref = allocVector(STRSXP, n); >> SET_VECTOR_ELT(res, j, ref); >> ref = VECTOR_ELT(res, j); >> for(k = 0 ; k < n ; i++, k++) { >> SET_STRING_ELT(ref, k, STRING_ELT(x, i)); >> } >> } >> break; >> default: error("Wrong type of 'x': %s", >> CHAR(type2str_nowarn(TYPEOF(x)))); >> } >> >> /* Return */ >> UNPROTECT(1); >> return(res); >> } >> >> ______________________________________________ >> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see >> https://stat.ethz.ch/mailman/listinfo/r-help >> PLEASE do read the posting guide >> http://www.R-project.org/posting-guide.html >> and provide commented, minimal, self-contained, reproducible code. > > David Winsemius > Alameda, CA, USA > > ______________________________________________ > R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code.