Kevin Egan
2020-Sep-23 14:32 UTC
[R] Issues with lapply and for loop Compared to Running Function
Hello, I?d like to apologise as I understand that this is a significant amount of code, but I am struggling to understand why my code develops an error when running. I have been able to obtain results for the list of matrices named xdot and ydot but am struggling with zdot as I keep getting the error "Error in seq.default(sbeta) : 'from' must be a finite number ? when trying to run either a for loop or lapply through the list of matrices. However, when I run the the bootstrap function on one of the many matrices it provides results. Is there a way to fix this issue? I?m confused as to why it works normally but not when running lapply or a for loop set.seed(123) # seed for reproducibility library(boot) library(np) # used for b.star library(dplyr) # Used for Centering and Standardizing library(deSolve) # ODE library(lars) # linear 3D system Linear3D_derivative <- function(n, eta, polyorder){ n <- round(n, 0) # n = number of time points rounded to nearest integer # noise = noise to be added # polyorder = degree of polynomial times.3D <- seq(0, ((n)-1)*0.01, by = 0.01) A.linear3D <- matrix(c(-0.1, -2, 0, 2, -0.1, 0, 0, 0, -0.3), 3, 3) state.linear3D <- c(X = 2, Y = 0, Z = 1) linear3D <- function(t, A, b) { with(as.list(c(A, b)), { dX <- A %*% b list(c(dX)) }) } out.linear3D <- ode(y = state.linear3D, times = times.3D, func = linear3D, parms = A.linear3D) out.linear3D <- out.linear3D[,-1] out.linear3D.sorted <- data.frame(out.linear3D)[-1] out.linear3D.sorted <- out.linear3D[,c(3,2,1)] # Rearrange for Theta matrix: X0.0.1 = x, X0.1.0 = y, X1.0.0 = z # Polynomial Expansion expanded.theta <- polym(as.matrix(out.linear3D.sorted), degree = polyorder, raw = T) # Order by degree using as.numeric_version # numeric_version allows to convert names of variables and expand without limit ordered.results <- order(attr(expanded.theta, "degree"), as.numeric_version(colnames(expanded.theta))) # Sort Theta Matrix sorted.theta <- expanded.theta[,ordered.results] sorted.theta <- data.frame(sorted.theta) # Change Variable Names s <- strsplit(substring(colnames(sorted.theta), 2), "\\.") colnames(sorted.theta) <- sapply(s, function(x){ vec <- c("x", "y", "z")[seq_along(x)] x <- as.integer(x) y <- rep(vec, rev(x)) paste(y, collapse = "") }) # Add ones column to theta matrix sorted.theta <- data.frame(1, sorted.theta) # That lost the attributes, so put them back attr(sorted.theta, "degree") <- c(0, attr(expanded.theta, "degree")[ordered.results]) sorted.theta <- sorted.theta[,order(attr(sorted.theta, "degree"), colnames(sorted.theta))] # That lost the attributes again, so put them back attr(sorted.theta, "degree") <- c(0, attr(expanded.theta, "degree")[ordered.results]) # Create Derivative matrix dx <- matrix(NA, nrow = nrow(out.linear3D.sorted), ncol = ncol(out.linear3D.sorted)) for (i in 1:nrow(out.linear3D.sorted)){ # lorenz returns a list with one element. To assign to dx you have extract the list element using [[1]] dx[i,] <- linear3D(0, A.linear3D, out.linear3D[i,])[[1]] } # Add Noise length <- nrow(dx) * ncol(dx) dx <- dx + eta*matrix(rnorm(length, mean = 0, sd = 1), nrow(dx)) # Derivative Variables xdot <- dx[,1] ydot <- dx[,2] zdot <- dx[,3] # Combine Matrices xdot.df <- data.frame(cbind(xdot, sorted.theta)) ydot.df <- data.frame(cbind(ydot, sorted.theta)) zdot.df <- data.frame(cbind(zdot, sorted.theta)) # Center y, X will be standardized in the modelling function y.xdot <- xdot.df %>% dplyr::select(xdot) %>% scale(center = TRUE, scale = FALSE) %>% as.matrix() X.xdot <- xdot.df %>% dplyr::select(-xdot) %>% as.matrix() xdot.matrix <- as.matrix(cbind(y.xdot,X.xdot)) colnames(xdot.matrix)[which(colnames(xdot.matrix) == "X1")] <- "1" y.ydot <- ydot.df %>% dplyr::select(ydot) %>% scale(center = TRUE, scale = FALSE) %>% as.matrix() X.ydot <- ydot.df %>% dplyr::select(-ydot) %>% as.matrix() ydot.matrix <- as.matrix(cbind(y.ydot,X.ydot)) colnames(ydot.matrix)[which(colnames(ydot.matrix) == "X1")] <- "1" y.zdot <- zdot.df %>% dplyr::select(zdot) %>% scale(center = TRUE, scale = FALSE) %>% as.matrix() X.zdot <- zdot.df %>% dplyr::select(-zdot) %>% as.matrix() zdot.matrix <- as.matrix(cbind(y.zdot,X.zdot)) colnames(zdot.matrix)[which(colnames(zdot.matrix) == "X1")] <- "1" return(list(xdot.matrix=xdot.matrix, ydot.matrix = ydot.matrix, zdot.matrix = zdot.matrix, col.names = colnames(X.xdot), degree.theta = attr(sorted.theta, "degree"))) } # lars alasso step BIC with OLS weights lars_alasso_OLSweights_fn <- function(data,index){ #index is the bootstrap sample index x <- data[index,-1] y <- data[index,1] m <-ncol(x) n <-nrow(x) x <- as.matrix(x) # standardize variables like lars does one <- rep(1, n) meanx <- drop(one %*% x)/n xc <- scale(x, meanx, FALSE) # first subtracts mean xc[,1] <- 1 normx <- sqrt(drop(one %*% (xc^2))) names(normx) <- NULL xs <- scale(xc, FALSE, normx) xs[,1] <- 1 # Perform OLS for weights vector out.ls<-lm(y~xs) # ols fit on standardized beta.ols<-out.ls$coeff[2:(m+1)] # ols except for intercept # Some Coefficients may be NA so make them 0 beta.ols[is.na(beta.ols)] <- 0 w<-abs(beta.ols) # Scale x using weights vector xs2 <- scale(xs,center=FALSE,scale=1/w) xs2[,1] <- 1 object <- lars(xs2,y,type="lasso",normalize=FALSE) bic <- ((log(n)/(n))*object$df)+(as.vector(object$RSS)/(n)) # Tibshirani On the degrees of freedom step.bic <- which.min(bic) #bic <- log(n)*object$df+n*log(as.vector(object$RSS)/n) coeff <- as.vector(coef(object, s = step.bic, mode= "step")) # coeff <- predict.lars(object,xs2,s=step.bic,type="coef",mode="step")$coefficients # get back in right scale by multiplying by weights vector coeff <- coeff*w/normx coeff[is.na(coeff)] <- 0 coef <- as.vector(coeff) return(coef) } # lars alasso step BIC with OLS weights plus OLS post selection lars_alassoOLS_OLSweights_fn <- function(data,index){ #index is the bootstrap sample index x <- data[index,-1] y <- data[index,1] m <-ncol(x) n <-nrow(x) x <- as.matrix(x) # standardize variables like lars does one <- rep(1, n) meanx <- drop(one %*% x)/n xc <- scale(x, meanx, FALSE) # first subtracts mean xc[,1] <- 1 normx <- sqrt(drop(one %*% (xc^2))) names(normx) <- NULL xs <- scale(xc, FALSE, normx) xs[,1] <- 1 # Perform OLS for weights vector out.ls<-lm(y~xs) # ols fit on standardized beta.ols<-out.ls$coeff[2:(m+1)] # ols except for intercept # Some Coefficients may be NA so make them 0 beta.ols[is.na(beta.ols)] <- 0 w <- abs(beta.ols) # Scale xs using weights vector xs2 <- scale(xs,center=FALSE,scale=1/w) xs2[,1] <- 1 object <- lars(xs2,y,type="lasso",normalize=FALSE) bic <- ((log(n)/(n))*object$df)+(as.vector(object$RSS)/(n)) # Tibshirani On the degrees of freedom # bic <- log(n)*object$df+n*log(as.vector(object$RSS)/n) step.bic <- which.min(bic) coeff <- as.vector(coef(object,s=step.bic,mode="step")) # coeff <- predict.lars(object,xs2,s=step.bic,type="coef",mode="step")$coefficients coeff <- coeff*w/normx # get back in right scale by multiplying by weights vector coeff[is.na(coeff)] <- 0 coef_nonzero <- as.vector(coeff) != 0 if(all(coef_nonzero == F)){ ls_coef <- coeff } else{ ls.obj <- glm(y~x[, coef_nonzero, drop = FALSE]) ls_coef <- (ls.obj$coefficients)[-1] } vect_coef <- rep(0,length(coef_nonzero)) vect_coef[coef_nonzero] <- ls_coef return(vect_coef) } ### Non-parametric bootstrap # Determine block length using b.star function bootstrap_function <- function(data, alpha, polynomial.orders){ # polynomial.orders <- attr(expanded.theta, "degree")[ordered.results] degree of variables bstar <- b.star(data[,1], round = TRUE) # Block length for derivative variable blocklength <- bstar[,2] # Select Block Length of cirular block result # tsboot Function on Original Dataframe # Run tsboot on function using blocklength of derivative variable alasso.boot.ts <- tsboot(data,lars_alasso_OLSweights_fn, R=1000, sim = "fixed", l = blocklength) alasso.boot.t0<- alasso.boot.ts$t0 # the estimator from original data set alasso.boot.t <- alasso.boot.ts$t # Matrix of coefficients from bootstrap samples # Determine number of variables with polynomial degree less than or equal to ## largest non-zero column polynomial degree alasso.boot.t.nonzero <- apply(alasso.boot.t, 2, function(c)sum(c!=0)) alasso.boot.t.nz.max <- max(which(alasso.boot.t.nonzero!=0)) #add 1 for ones column new.theta.order <- sum(polynomial.orders<=polynomial.orders[alasso.boot.t.nz.max]) # Rerun Bootstrap with Truncated Matrix post.boot.matrix <- data[,0:new.theta.order+1] # adding 1 to include derivative variable alassoOLS.boot.ts <- tsboot(post.boot.matrix,lars_alassoOLS_OLSweights_fn, R=1000, sim = "fixed", l = blocklength) alassoOLS.boot.t0<- alassoOLS.boot.ts$t0 # the estimator from iterative data set alassoOLS.boot.t <- alassoOLS.boot.ts$t variables <- length(alassoOLS.boot.t0) # Number of Variables considered coef.nonzero <- length(which(alassoOLS.boot.t0 != 0)) # variables selected q <- (alpha*coef.nonzero)/variables # q = alpha B <- alassoOLS.boot.ts$R # Number of bootstraps q1 <- (B*q)/2 # Lower bound q2 <- B-q1+1 # Upper bound # Sort and determine value of lower and upper bound bound.percentile <- apply(alassoOLS.boot.t,2, function(u){CI = sort(u)[c(round(q1,0),round(q2,0))]}) # See if variables are within confidence intervals within.ci.check <- (alassoOLS.boot.t0 >= bound.percentile[1,] & alassoOLS.boot.t0 <= bound.percentile[2,]) iterative.df.colnames <- colnames(post.boot.matrix) iterative.theta.colnames <- iterative.df.colnames[-c(1)] return(list(ci=bound.percentile, point.estimates = alassoOLS.boot.t0, within.ci.check = within.ci.check, point.estimate.colnames = iterative.theta.colnames)) } s <- seq(4.77, 5, by = 0.01) k <- 10^s systems <- lapply(k, eta = 0, polyorder = 3, Linear3D_derivative) xdot <- lapply(systems, `[[`, 1) ydot <- lapply(systems, `[[`, 2) zdot <- lapply(systems, `[[`, 3) zdot.results <- lapply(zdot, alpha = 0.05, polynomial.orders = systems[[1]]$degree.theta, bootstrap_function) I?ve also tried to use mclapply to speed up the process. Thanks