Dear R experts, I got help to build a loop but there is a bug inside it that causes one part of the mechanism to fail. It should grow once, but if keep growing on rows where $ti_all is not NA. Here is a wall of code that very crudely demonstrates the problem, there is a couple of dim() outputs at the end where you can see how it the second time around keeps adds (2) rows, but this does not happen to row 2, I'm aware this is part of another function, but I can't figure it out for that. The first thing that happen is correct, that 7 rows are added. Any help or guidance would be appreciated. Thanks, Eric lookup <- structure(list(c_name = c(1L, 2L, 4L, 5L, 6L, 7L), t_name structure(1:6, .Label = c("Bob", "Julian", "Mitt", "Ricky", "Tom", "Victor"), class = "factor")), .Names = c("c_name", "t_name"), class "data.frame", row.names = c("1", "2", "3", "4", "5", "6")) mydata <- structure(list(id = c(1L, 1L, 2L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 6L, 7L, 7L, 7L, 7L, 8L, 9L), time c("intake_arm_1", "v_001_arm_1", "intake_arm_1", "intake_arm_1", "intake_arm_1", "v_001_arm_1", "v_002_arm_1", "v_003_arm_1", "v_004_arm_1", "v_005_arm_1", "v_006_arm_1", "v_007_arm_1", "intake_arm_1", "v_001_arm_1", "intake_arm_1", "intake_arm_1", "v_011_arm_1", "v_012_arm_1", "v_013_arm_1", "intake_arm_1", "intake_arm_1"), dat_all = c(NA, NA, NA, NA, NA, NA, NA, "2012-09-23", "2012-09-23", "2012-09-02", "2012-09-10", "2012-09-23", NA, NA, NA, NA, "2012-09-23", "2012-09-23", "2012-09-23", NA, NA), ti_all = c(NA, NA, NA, NA, NA, NA, NA, 6L, 44L, 33L, NA, 22L, NA, NA, NA, NA, 65L, NA, 10L, NA, NA), ty_all = c(NA, NA, NA, NA, NA, NA, NA, "out_", "out_", "cma_", NA, "cma_", NA, NA, NA, NA, "out_", "out_", "out_", NA, NA), out_c = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), cma_c = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), c_n = c(NA, 1L, NA, NA, NA, NA, NA, 7L, 4L, 7L, NA, 1L, NA, 2L, NA, NA, 7L, 7L, 7L, NA, NA), t_name = c("Tom", NA, "Ricky", "Ricky", "Victor", NA, NA, NA, NA, NA, NA, NA, "Julian", NA, "Julian", "Bob", NA, NA, NA, "Mitt", "Mitt")), .Names = c("id", "time", "dat_all", "ti_all", "ty_all", "out_c", "cma_c", "c_n", "t_name"), class = "data.frame", row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21")) if(require(plyr)){ print("plyr is loaded correctly") } else { print("trying to install plyr") install.packages('plyr') if(require(plyr)){ print("plyr installed and loaded") } else { stop("could not install plyr") } } newrows <- ddply(mydata, .(id), function(subdata) { subdata_ty = subdata[!is.na(subdata$ty_all), ] if (NROW(subdata) == 1) { r = subdata[1, ] c("v_001_arm_1", NA, NA, NA, NA, NA, lookup$c_name[lookup$t_name == r$t_name], NA) } else if (NROW(subdata_ty) > 0) { numbers = sapply(strsplit(subdata$time, "_"), function(l) ifelse(l[1] != "intake", as.numeric(l[2]), 0)) newname = paste(c("v", sprintf("%03d", max(numbers) + 1), "arm", "1"), collapse="_") r1 = subdata[1, ] new_c_n = lookup$c_name[lookup$t_name == r1$t_name] new_out_c = sum(subdata$ty_all == "out_" & !is.na(subdata$ti_all)) new_cma_c = sum(subdata$ty_all == "cma_" & !is.na(subdata$ti_all)) new_out_c = ifelse(new_out_c == 0, NA, new_out_c) new_cma_c = ifelse(new_cma_c == 0, NA, new_cma_c) return(c(newname, NA, NA, NA, new_out_c, new_cma_c, new_c_n, NA)) } }) # recombine and sort colnames(newrows) = colnames(mydata) newdata = rbind(mydata, newrows) newdata = newdata[order(newdata$id), ] mydata2 <- newdata newrows2 <- ddply(mydata2, .(id), function(subdata) { subdata_ty = subdata[!is.na(subdata$ty_all), ] if (NROW(subdata) == 1) { r = subdata[1, ] c("v_001_arm_1", NA, NA, NA, NA, NA, lookup$c_name[lookup$t_name == r$t_name], NA) } else if (NROW(subdata_ty) > 0) { numbers = sapply(strsplit(subdata$time, "_"), function(l) ifelse(l[1] != "intake", as.numeric(l[2]), 0)) newname = paste(c("v", sprintf("%03d", max(numbers) + 1), "arm", "1"), collapse="_") r1 = subdata[1, ] new_c_n = lookup$c_name[lookup$t_name == r1$t_name] new_out_c = sum(subdata$ty_all == "out_" & !is.na(subdata$ti_all)) new_cma_c = sum(subdata$ty_all == "cma_" & !is.na(subdata$ti_all)) new_out_c = ifelse(new_out_c == 0, NA, new_out_c) new_cma_c = ifelse(new_cma_c == 0, NA, new_cma_c) return(c(newname, NA, NA, NA, new_out_c, new_cma_c, new_c_n, NA)) } }) # recombine and sort colnames(newrows2) = colnames(mydata2) newdata2 = rbind(mydata2, newrows2) newdata2 = newdata2[order(newdata2$id), ] mydata3 <- newdata2 newrows2 <- ddply(mydata3, .(id), function(subdata) { subdata_ty = subdata[!is.na(subdata$ty_all), ] if (NROW(subdata) == 1) { r = subdata[1, ] c("v_001_arm_1", NA, NA, NA, NA, NA, lookup$c_name[lookup$t_name == r$t_name], NA) } else if (NROW(subdata_ty) > 0) { numbers = sapply(strsplit(subdata$time, "_"), function(l) ifelse(l[1] != "intake", as.numeric(l[2]), 0)) newname = paste(c("v", sprintf("%03d", max(numbers) + 1), "arm", "1"), collapse="_") r1 = subdata[1, ] new_c_n = lookup$c_name[lookup$t_name == r1$t_name] new_out_c = sum(subdata$ty_all == "out_" & !is.na(subdata$ti_all)) new_cma_c = sum(subdata$ty_all == "cma_" & !is.na(subdata$ti_all)) new_out_c = ifelse(new_out_c == 0, NA, new_out_c) new_cma_c = ifelse(new_cma_c == 0, NA, new_cma_c) return(c(newname, NA, NA, NA, new_out_c, new_cma_c, new_c_n, NA)) } }) # recombine and sort colnames(newrows2) = colnames(mydata3) newdata3 = rbind(mydata3, newrows2) newdata3 = newdata3[order(newdata3$id), ] identical(newdata3, newdata2) identical(newdata2, newdata) dim(mydata) dim(newdata) dim(newdata2) dim(newdata3)