Hello, I am trying to run a coxph model but get an error Error in model.frame.default(formula = Surv(time, status) ~ selectedVarnames, : variable lengths differ (found for 'selectedVarnames') Of note the dataset is generated as part of using the glmnet for Lasso regularization. Glmnet takes in as input a matrix where all categorical variables have been converted to binary factors with dummies. As a result I have had to do some data manipulations that are probably the origin of the error. Original dataframe -> transform categorical to binary -> transform to matrix -> run glment -> get best features -> selected features + status + time -> transform to dataframe -> run coxph The coxph code (entire code below): glmnet.cox <- coxph(Surv(time,status) ~ selectedVarnames, data=reformat_dataSet, init=selectedBeta,iter=200) Here is a description of the data (I can't attach the real thing as it is protected health data): selectedVar: 394X81 Double Matrix reformat_dataSet: 394 obs. 83 varaibles (dataframe including 59 predicting variables, time and status) selectedBeta: numeric [81] selectedVarnames: "FABM0+FABM1+FABM2+FABM4+FABM4EOS+SEXF+SEXM+Age_at_Dx+Performance_Status0+Pe rformance_Status2+Performance_Status3+Performance_Status4+AHD_typeabnormal counts_MDS+AHD_typeALL_MDS+AHD_typecytopenia+AHD_typelow PLT+AHD_typelow white count+AHD_typeMPD+AHD_typepancytopenia+PRIOR_MALNO+PRIOR_MALYES+PRIOR_CHEMON O+PRIOR_XRTNO+PRIOR_XRTYES+cyto_grpUNFAVORABLE+Cyto5+Cyto7+Cyto8+Cyto10+Cyto 11+Cyto12+ITDPOS+D835POS+ABS_BLST+BM_PROM+PB_MONO+PB_PROM+HGB+PLT+ALBUMIN+CR EATININE+CD13+CD34+CD19+Fresh_or__CryoCryo+AKT1_2_3_pT308+ARC+ATF3+BECN1+BIR C2+CAV1+CDKN2A+CTNNB1+FOXO3_S318_321+GAB2+GRP78+IGFBP2+INPP5D+JMJD6+JUN_pS73 +KIT+LYN+MAP2K1_2_pS217_221+NRP1+PIK3CA+PLAC1+PRKCA+PRKCD_pT507+PTEN_pS380T3 82T383+RAC1_2_3+RPS6KB1+SMAD1+SMAD2_pS245+SMAD5+SMAD5_pS463+STAT3+STMN1+TP53 +TRIM62+VASP+XPO1" dataset: 394 obs. 270 variables predict_matrix: 394X392 double matrix THANK YOU!!! The entire code (without the data though): library("survival") library("pec") library ("glmnet") library ("peperr") library ("Hmisc") cIndexCoxglmnet <- list() for (i in 1:50){ train <- sample(1:nrow(dataset), nrow(dataset), replace = TRUE) ## random sampling with replacement trainSet <-dataset [train,] testSet<-dataset [-train,] cat ("\n","ITERATION:",i,"\n") #creat Y (survival matrix) for glmnet surv_obj <- Surv(trainSet$time,trainSet$status) ## tranform categorical variables into binary variables with dummy for trainSet predict_matrix <- model.matrix(~ ., data=trainSet, contrasts.arg = lapply (trainSet[,sapply(trainSet, is.factor)], contrasts, contrasts=FALSE)) ## remove the statu/time variables from the predictor matrix (x) for glmnet predict_matrix <- subset (predict_matrix, select=c(-time,-status)) ## create a glmnet cox object using lasso regularization and cross validation glmnet.cv <- cv.glmnet (predict_matrix, surv_obj, family="cox") ## get the glmnet model on the full dataset glmnet.obj <- glmnet.cv$glmnet.fit # find lambda index for the models with least partial likelihood deviance (by cv.glmnet) optimal.lambda <- glmnet.cv$lambda.min # For a more parsimoneous model use lambda.1se lambda.index <- which(glmnet.obj$lambda==optimal.lambda) # take beta for optimal lambda optimal.beta <- glmnet.obj$beta[,lambda.index] # find non zero beta coef nonzero.coef <- abs(optimal.beta)>0 selectedBeta <- optimal.beta[nonzero.coef] # take only covariates for which beta is not zero selectedVar <- predict_matrix[,nonzero.coef] # create a dataframe for trainSet with time, status and selected variables in binary representation for evaluation in pec reformat_dataSet <- as.data.frame(cbind(surv_obj,selectedVar)) # names of selectedVars selectedVarnames<-paste(colnames(selectedVar),collapse="+") # create coxph object with pre-defined coefficients glmnet.cox <- coxph(Surv(time,status) ~ selectedVarnames, data=reformat_dataSet, init=selectedBeta,iter=200) ## create datasets based on the testSet fit for glmnet models for testing in pec function ## !!!encountered tech problems in variables selection for unknown reason so the code is somewhat redundant !!! #creat Y (survival matrix) for glmnet surv_obj_test <- Surv(testSet$time,testSet$status) ## tranform categorical variables into binary variables with dummy for testSet predict_matrix_test <- model.matrix(~ ., data=testSet, contrasts.arg = lapply (testSet[,sapply(testSet, is.factor)], contrasts, contrasts=FALSE)) ## remove the statu/time variables from the predictor matrix (x) for glmnet predict_matrix_test <- subset (predict_matrix_test, select=c(-time,-status)) ## remove the statu/time variables from the predictor matrix (x) for glmnet selectedVar_test <- predict_matrix_test [,nonzero.coef] # create a dataframe for trainSet with time, status and selected variables in binary representation for evaluation in pec reformat_testSet <- as.data.frame(cbind(surv_obj_test,selectedVar_test)) refrmat_matrix <- cbind(surv_obj_test,selectedVar_test) ## compute c-index (Harrell's) for cox-glmnet models if (is.null(glmnet.cox)){ cIndexCoxglmnet <- c(cIndexCoxglmnet,NULL) }else{ cIndexCoxglmnet <- c(cIndexCoxglmnet, rcorr.cens(predict(glmnet.cox, reformat_testSet),Surv(reformat_testSet$time,reformat_testSet$status))[1]) } } [[alternative HTML version deleted]]