Hello List, My goal is to apply a user-defined function on several columns of a data frame. When testing the code on a reproducible example below, I get the following error message.> #now Write a new function using the above cut ()/quantile function to apply on different columns of the data frame > > CutQuintiles <- function(x) {+ cut (test1$x,quantile (test1$x, (0:5/5)),include.lowest=TRUE) + }> > #apply the CutQuintile () on every odd-numbered columns of the "test1" data frame > newcols <- sapply(test1 [, seq (1,6,2)], CutQuintiles)Error in cut.default(test1$x, quantile(test1$x, (0:5/5)), include.lowest = TRUE) : 'x' must be numeric I would appreciate receiving your advice. Thanks, Pradip ###### The reproducible example begins here test1 <- read.table (text"State,ObtMj_P,ObtMj_SE,ExpPrevMed_P,ExpPrevMed_SE,ParMon_P,ParMon_SE Alabama,49.60,1.37,80.00,0.91,12.10,0.68 Alaska,55.00,1.41,81.80,1.08,12.40,0.90 Arizona,52.50,1.56,79.60,1.20,15.80,1.08 Arkansas,50.50,1.22,78.00,0.78,12.80,0.72 California,51.10,0.65,80.50,0.53,13.00,0.41 Colorado,55.10,1.26,81.70,1.03,12.10,0.72 Connecticut,56.30,1.28,85.00,0.93,14.60,0.77 Delaware,53.60,1.30,79.50,1.04,14.70,0.97 District of Columbia,53.50,1.22,76.20,1.03,14.30,1.13 Florida,52.70,0.67,78.90,0.52,14.10,0.45 Georgia,52.50,1.15,79.30,1.02,15.90,0.98 Hawaii,49.40,1.33,83.80,1.12,16.00,1.06 Idaho,48.30,1.23,82.40,0.99,11.90,0.74 Illinois,52.70,0.63,81.00,0.46,13.60,0.40 Indiana,49.60,1.16,80.90,0.91,12.60,0.82 Iowa,46.30,1.37,82.10,1.01,13.60,0.87 Kansas,44.30,1.43,79.20,0.98,12.90,0.79 Kentucky,52.90,1.37,78.70,1.05,14.60,0.98 Louisiana,49.70,1.23,76.80,1.06,14.50,0.76 Maine,55.60,1.44,82.90,0.93,16.70,0.83 Maryland,53.90,1.46,83.60,0.95,14.00,0.80 Massachusetts,55.40,1.41,81.00,1.15,14.70,0.80 Michigan,52.40,0.62,80.50,0.47,15.00,0.43 Minnesota,51.50,1.20,84.40,0.87,14.40,0.86 Mississippi,43.20,1.14,76.60,0.91,12.30,0.78 Missouri,48.70,1.20,80.30,0.90,13.70,0.12 Montana,56.40,1.16,83.70,0.95,12.10,0.68 Nebraska,45.70,1.51,83.40,0.95,12.40,0.90 Nevada,54.20,1.17,80.60,1.07,15.80,1.08 New Hampshire,56.10,1.30,83.30,0.93,12.80,0.72 New Jersey,53.20,1.45,83.70,0.95,13.00,0.41 New Mexico,57.60,1.34,78.90,1.03,12.10,0.72 New York,53.70,0.67,82.60,0.48,14.60,0.77 North Carolina,52.20,1.26,81.90,0.84,14.70,0.97 North Dakota,48.60,1.34,84.20,0.88,14.30,1.13 Ohio,50.90,0.61,82.70,0.49,14.10,0.45 Oklahoma,47.20,1.42,78.80,1.33,15.90,0.98 Oregon,54.00,1.35,80.60,1.14,16.00,1.06 Pennsylvania,53.00,0.63,79.90,0.47,11.90,0.74 Rhode Island,57.20,1.20,79.50,1.02,13.60,0.40 South Carolina,50.50,1.21,79.50,0.95,12.60,0.82 South Dakota,43.40,1.30,81.70,1.05,13.60,0.87 Tennessee,48.90,1.35,78.40,1.35,12.90,0.79 Texas,48.70,0.62,79.00,0.48,14.60,0.98 Utah,42.00,1.49,85.00,0.93,14.50,0.76 Vermont,58.70,1.24,83.70,0.84,16.70,0.83 Virginia,51.80,1.18,82.00,1.04,14.00,0.80 Washington,53.50,1.39,84.10,0.96,14.70,0.80 West Virginia,52.80,1.07,79.80,0.93,15.00,0.43 Wisconsin,49.90,1.50,83.50,1.02,14.40,0.86 Wyoming,49.20,1.29,82.00,0.85,12.30,0.78 ", sep=",", row.names='State', header=TRUE, as.is=TRUE) # Verify if The following function ctagorizes the "obtmj_p" values into one of the 5 equal sized groups- works fine. cut (test1$obtmj_p,quantile (test1$obtmj_p, (0:5/5)),include.lowest=TRUE) #now Write a new function using the above cut ()/quantile function to apply on different columns of the data frame CutQuintiles <- function(x) { cut (test1$x,quantile (test1$x, (0:5/5)),include.lowest=TRUE) } #apply the CutQuintile () on every odd-numbered columns of the "test1" data frame newcols <- sapply(test1 [, seq (1,6,2)], CutQuintiles) # name 3 new columns based on the odd-numbered columns names(newcols) <- paste (names(test1 [, seq (1,6,2)]), "_cat") ###### Pradip K. Muhuri, PhD Statistician Substance Abuse & Mental Health Services Administration The Center for Behavioral Health Statistics and Quality Division of Population Surveys 1 Choke Cherry Road, Room 2-1071 Rockville, MD 20857 Tel: 240-276-1070 Fax: 240-276-1260 e-mail: Pradip.Muhuri@samhsa.hhs.gov<mailto:Pradip.Muhuri@samhsa.hhs.gov> The Center for Behavioral Health Statistics and Quality your feedback. Please click on the following link to complete a brief customer survey: http://cbhsqsurvey.samhsa.gov<http://cbhsqsurvey.samhsa.gov/> [[alternative HTML version deleted]]
On Jan 8, 2013, at 9:11 AM, Muhuri, Pradip (SAMHSA/CBHSQ) wrote:> Hello List, > > My goal is to apply a user-defined function on several columns of a > data frame. When testing the code on a reproducible example below, I > get the following error message. > >> #now Write a new function using the above cut ()/quantile function >> to apply on different columns of the data frame >> >> CutQuintiles <- function(x) { > + cut (test1$x,quantile (test1$x, (0:5/5)),include.lowest=TRUE) > + } >> >> #apply the CutQuintile () on every odd-numbered columns of the >> "test1" data frame >> newcols <- sapply(test1 [, seq (1,6,2)], CutQuintiles) > Error in cut.default(test1$x, quantile(test1$x, (0:5/5)), > include.lowest = TRUE) : > 'x' must be numeric > > I would appreciate receiving your advice. >Take the "test$" out of that function's code. You are reaching outside the function when you should only be working on the "x" object that gets passed into it.> Thanks, > > Pradip > > ###### The reproducible example begins here > > test1 <- read.table (text> "State,ObtMj_P,ObtMj_SE,ExpPrevMed_P,ExpPrevMed_SE,ParMon_P,ParMon_SE > Alabama,49.60,1.37,80.00,0.91,12.10,0.68 > Alaska,55.00,1.41,81.80,1.08,12.40,0.90 > Arizona,52.50,1.56,79.60,1.20,15.80,1.08 > Arkansas,50.50,1.22,78.00,0.78,12.80,0.72 > California,51.10,0.65,80.50,0.53,13.00,0.41 > Colorado,55.10,1.26,81.70,1.03,12.10,0.72 > Connecticut,56.30,1.28,85.00,0.93,14.60,0.77 > Delaware,53.60,1.30,79.50,1.04,14.70,0.97 > District of Columbia,53.50,1.22,76.20,1.03,14.30,1.13 > Florida,52.70,0.67,78.90,0.52,14.10,0.45 > Georgia,52.50,1.15,79.30,1.02,15.90,0.98 > Hawaii,49.40,1.33,83.80,1.12,16.00,1.06 > Idaho,48.30,1.23,82.40,0.99,11.90,0.74 > Illinois,52.70,0.63,81.00,0.46,13.60,0.40 > Indiana,49.60,1.16,80.90,0.91,12.60,0.82 > Iowa,46.30,1.37,82.10,1.01,13.60,0.87 > Kansas,44.30,1.43,79.20,0.98,12.90,0.79 > Kentucky,52.90,1.37,78.70,1.05,14.60,0.98 > Louisiana,49.70,1.23,76.80,1.06,14.50,0.76 > Maine,55.60,1.44,82.90,0.93,16.70,0.83 > Maryland,53.90,1.46,83.60,0.95,14.00,0.80 > Massachusetts,55.40,1.41,81.00,1.15,14.70,0.80 > Michigan,52.40,0.62,80.50,0.47,15.00,0.43 > Minnesota,51.50,1.20,84.40,0.87,14.40,0.86 > Mississippi,43.20,1.14,76.60,0.91,12.30,0.78 > Missouri,48.70,1.20,80.30,0.90,13.70,0.12 > Montana,56.40,1.16,83.70,0.95,12.10,0.68 > Nebraska,45.70,1.51,83.40,0.95,12.40,0.90 > Nevada,54.20,1.17,80.60,1.07,15.80,1.08 > New Hampshire,56.10,1.30,83.30,0.93,12.80,0.72 > New Jersey,53.20,1.45,83.70,0.95,13.00,0.41 > New Mexico,57.60,1.34,78.90,1.03,12.10,0.72 > New York,53.70,0.67,82.60,0.48,14.60,0.77 > North Carolina,52.20,1.26,81.90,0.84,14.70,0.97 > North Dakota,48.60,1.34,84.20,0.88,14.30,1.13 > Ohio,50.90,0.61,82.70,0.49,14.10,0.45 > Oklahoma,47.20,1.42,78.80,1.33,15.90,0.98 > Oregon,54.00,1.35,80.60,1.14,16.00,1.06 > Pennsylvania,53.00,0.63,79.90,0.47,11.90,0.74 > Rhode Island,57.20,1.20,79.50,1.02,13.60,0.40 > South Carolina,50.50,1.21,79.50,0.95,12.60,0.82 > South Dakota,43.40,1.30,81.70,1.05,13.60,0.87 > Tennessee,48.90,1.35,78.40,1.35,12.90,0.79 > Texas,48.70,0.62,79.00,0.48,14.60,0.98 > Utah,42.00,1.49,85.00,0.93,14.50,0.76 > Vermont,58.70,1.24,83.70,0.84,16.70,0.83 > Virginia,51.80,1.18,82.00,1.04,14.00,0.80 > Washington,53.50,1.39,84.10,0.96,14.70,0.80 > West Virginia,52.80,1.07,79.80,0.93,15.00,0.43 > Wisconsin,49.90,1.50,83.50,1.02,14.40,0.86 > Wyoming,49.20,1.29,82.00,0.85,12.30,0.78 > ", sep=",", row.names='State', header=TRUE, as.is=TRUE) > > > # Verify if The following function ctagorizes the "obtmj_p" values > into one of the 5 equal sized groups- works fine. > > cut (test1$obtmj_p,quantile (test1$obtmj_p, > (0:5/5)),include.lowest=TRUE) > > > #now Write a new function using the above cut ()/quantile function > to apply on different columns of the data frame > > CutQuintiles <- function(x) { > cut (test1$x,quantile (test1$x, (0:5/5)),include.lowest=TRUE) > } > > #apply the CutQuintile () on every odd-numbered columns of the > "test1" data frame > newcols <- sapply(test1 [, seq (1,6,2)], CutQuintiles) > > # name 3 new columns based on the odd-numbered columns > names(newcols) <- paste (names(test1 [, seq (1,6,2)]), "_cat") > > ###### > Pradip K. Muhuri, PhD > Statistician > Substance Abuse & Mental Health Services Administration > The Center for Behavioral Health Statistics and Quality > Division of Population Surveys > 1 Choke Cherry Road, Room 2-1071 > Rockville, MD 20857 > > Tel: 240-276-1070 > Fax: 240-276-1260 > e-mail: Pradip.Muhuri at samhsa.hhs.gov<mailto:Pradip.Muhuri at samhsa.hhs.gov > > > > The Center for Behavioral Health Statistics and Quality your > feedback. Please click on the following link to complete a brief > customer survey: http://cbhsqsurvey.samhsa.gov<http://cbhsqsurvey.samhsa.gov/ > > > > > [[alternative HTML version deleted]] > > ______________________________________________ > R-help at r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code.David Winsemius, MD Alameda, CA, USA
Hello List, Last time, Arun's following solution worked to create 3 new columns (1,3,5). Now how would I tweak this function to create corresponding (additional) columns (7,8,9) of mode factor (levels = 1,2,3,4,5)? Thanks for your continued support. Pradip ####### cut and paste from the reproducible example CutQuintiles <- function( x) { cut (x,quantile (x, (0:5/5)),include.lowest=TRUE) } #apply the CutQuintile () on every odd-numbered columns of the "test1" data frame test1$newcols <- sapply(test1 [, seq (1,6,2)], CutQuintiles) # name 3 new columns based on the odd-numbered columns names(test1$newcols) <- paste (names(test1 [, seq (1,6,2)]), "_cat") ###### Reproducible Example test1 <- read.table (text"State,ObtMj_P,ObtMj_SE,ExpPrevMed_P,ExpPrevMed_SE,ParMon_P,ParMon_SE Alabama,49.60,1.37,80.00,0.91,12.10,0.68 Alaska,55.00,1.41,81.80,1.08,12.40,0.90 Arizona,52.50,1.56,79.60,1.20,15.80,1.08 Arkansas,50.50,1.22,78.00,0.78,12.80,0.72 California,51.10,0.65,80.50,0.53,13.00,0.41 Colorado,55.10,1.26,81.70,1.03,12.10,0.72 Connecticut,56.30,1.28,85.00,0.93,14.60,0.77 Delaware,53.60,1.30,79.50,1.04,14.70,0.97 District of Columbia,53.50,1.22,76.20,1.03,14.30,1.13 Florida,52.70,0.67,78.90,0.52,14.10,0.45 Georgia,52.50,1.15,79.30,1.02,15.90,0.98 Hawaii,49.40,1.33,83.80,1.12,16.00,1.06 Idaho,48.30,1.23,82.40,0.99,11.90,0.74 Illinois,52.70,0.63,81.00,0.46,13.60,0.40 Indiana,49.60,1.16,80.90,0.91,12.60,0.82 Iowa,46.30,1.37,82.10,1.01,13.60,0.87 Kansas,44.30,1.43,79.20,0.98,12.90,0.79 Kentucky,52.90,1.37,78.70,1.05,14.60,0.98 Louisiana,49.70,1.23,76.80,1.06,14.50,0.76 Maine,55.60,1.44,82.90,0.93,16.70,0.83 Maryland,53.90,1.46,83.60,0.95,14.00,0.80 Massachusetts,55.40,1.41,81.00,1.15,14.70,0.80 Michigan,52.40,0.62,80.50,0.47,15.00,0.43 Minnesota,51.50,1.20,84.40,0.87,14.40,0.86 Mississippi,43.20,1.14,76.60,0.91,12.30,0.78 Missouri,48.70,1.20,80.30,0.90,13.70,0.12 Montana,56.40,1.16,83.70,0.95,12.10,0.68 Nebraska,45.70,1.51,83.40,0.95,12.40,0.90 Nevada,54.20,1.17,80.60,1.07,15.80,1.08 New Hampshire,56.10,1.30,83.30,0.93,12.80,0.72 New Jersey,53.20,1.45,83.70,0.95,13.00,0.41 New Mexico,57.60,1.34,78.90,1.03,12.10,0.72 New York,53.70,0.67,82.60,0.48,14.60,0.77 North Carolina,52.20,1.26,81.90,0.84,14.70,0.97 North Dakota,48.60,1.34,84.20,0.88,14.30,1.13 Ohio,50.90,0.61,82.70,0.49,14.10,0.45 Oklahoma,47.20,1.42,78.80,1.33,15.90,0.98 Oregon,54.00,1.35,80.60,1.14,16.00,1.06 Pennsylvania,53.00,0.63,79.90,0.47,11.90,0.74 Rhode Island,57.20,1.20,79.50,1.02,13.60,0.40 South Carolina,50.50,1.21,79.50,0.95,12.60,0.82 South Dakota,43.40,1.30,81.70,1.05,13.60,0.87 Tennessee,48.90,1.35,78.40,1.35,12.90,0.79 Texas,48.70,0.62,79.00,0.48,14.60,0.98 Utah,42.00,1.49,85.00,0.93,14.50,0.76 Vermont,58.70,1.24,83.70,0.84,16.70,0.83 Virginia,51.80,1.18,82.00,1.04,14.00,0.80 Washington,53.50,1.39,84.10,0.96,14.70,0.80 West Virginia,52.80,1.07,79.80,0.93,15.00,0.43 Wisconsin,49.90,1.50,83.50,1.02,14.40,0.86 Wyoming,49.20,1.29,82.00,0.85,12.30,0.78 ", sep=",", row.names='State', header=TRUE, as.is=TRUE) # change names () to lower case names (test1) <- tolower (names (test1)) #Write a cut/quantile function to apply on different columns of the data frame CutQuintiles <- function( x) { cut (x,quantile (x, (0:5/5)),include.lowest=TRUE) } #apply the CutQuintile () on every odd-numbered columns of the "test1" data frame test1$newcols <- sapply(test1 [, seq (1,6,2)], CutQuintiles) # name 3 new columns based on the odd-numbered columns names(test1$newcols) <- paste (names(test1 [, seq (1,6,2)]), "_cat") dim (test1) options (width=100) test1 [[alternative HTML version deleted]]