I am trying to implement a simple r-svm example using the iris data (only two of the classes are taken and data is within the code). I am running into some errors. I am not an expert on svm's. If any one has used it, I would appreciate their help. I am appending the code below. Thanks../Murli ####################################################### ### R-code for R-SVM ### use leave-one-out / Nfold or bootstrape to permute data for external CV ### build SVM model and use mean-balanced weight to sort genes on training set ### and recursive elimination of least important genes ### author: Dr. Xin Lu, Research Scientist ### Biostatistics Department, Harvard School of Public Health library(e1071) ## read in SVM formated data in filename ## the format following the defination of SVMTorch ## the first line contains 2 integer: nSample nFeature+1 ## followed by a matrix, each row for one sample, with the last column being +/1 1 for class label ReadSVMdata <- function(filename) { dd <- read.table( filename, header=F, skip=1) x <- as.matrix( dd[, 1:(ncol(dd)-1)] ) y <- factor( dd[, ncol(dd)] ) ret <- list(x=x, y=y) } ## create a decreasing ladder for recursive feature elimination CreatLadder <- function( Ntotal, pRatio=0.75, Nmin=5 ) { x <- vector() x[1] <- Ntotal for( i in 1:100 ) { pp <- round(x[i] * pRatio) if( pp == x[i] ) { pp <- pp-1 } if( pp >= Nmin ) { x[i+1] <- pp } else { break } } x } ## R-SVM core code ## input: ## x: row matrix of data ## y: class label: 1 / -1 for 2 classes ## CVtype: ## integer: N fold CV ## "LOO": leave-one-out CV ## "bootstrape": bootstrape CV ## CVnum: number of CVs ## LOO: defined as sample size ## Nfold and bootstrape: user defined, default as sample size ## output: a named list ## Error: a vector of CV error on each level ## SelFreq: a matrix for the frequency of each gene being selected in each level ## with each column corresponds to a level of selection ## and each row for a gene ## The top important gene in each level are those high-freqent ones RSVM <- function(x, y, ladder, CVtype, CVnum=0 ) { ## check if y is binary response Ytype <- names(table(y)) if( length(Ytype) != 2) { print("ERROR!! RSVM can only deal with 2-class problem") return(0) } ## class mean m1 <- apply(x[ which(y==Ytype[1]), ], 2, mean) m2 <- apply(x[ which(y==Ytype[2]), ], 2, mean) md <- m1-m2 yy <- vector( length=length(y)) yy[which(y==Ytype[1])] <- 1 yy[which(y==Ytype[2])] <- -1 y <- yy ## check ladder if( min(diff(ladder)) >= 0 ) { print("ERROR!! ladder must be monotonously decreasing") return(0); } if( ladder[1] != ncol(x) ) { ladder <- c(ncol(x), ladder) } nSample <- nrow(x) nGene <- ncol(x) SampInd <- seq(1, nSample) if( CVtype == "LOO" ) { CVnum <- nSample } else { if( CVnum == 0 ) { CVnum <- nSample } } ## vector for test error and number of tests ErrVec <- vector( length=length(ladder)) names(ErrVec) <- paste("Lev_", ladder, sep="") nTests <- 0 SelFreq <- matrix( 0, nrow=nGene, ncol=length(ladder)) colnames(SelFreq) <- paste("Lev_", ladder, sep="") ## for each CV for( i in 1:CVnum ) { ## split data if( CVtype == "LOO" ) { TestInd <- i TrainInd <- SampInd[ -TestInd] } else { if( CVtype == "bootstrape" ) { TrainInd <- sample(SampInd, nSample, replace=T ) TestInd <- SampInd[ which(!(SampInd %in% TrainInd ))] } else { ## Nfold TrainInd <- sample(SampInd, nSample*(CVtype-1)/CVtype ) TestInd <- SampInd[ which(!(SampInd %in% TrainInd ))] } } nTests <- nTests + length(TestInd) ## in each level, train a SVM model and record test error xTrain <- x[TrainInd, ] yTrain <- y[TrainInd] xTest <- x[TestInd,] yTest <- y[TestInd] ## index of the genes used in the SelInd <- seq(1, nGene) for( gLevel in 1:length(ladder) ) { ## record the genes selected in this ladder SelFreq[SelInd, gLevel] <- SelFreq[SelInd, gLevel] +1 ## train SVM model and test error svmres <- svm(xTrain[, SelInd], yTrain, scale=F, type="C-classification", kernel="linear" ) if( CVtype == "LOO" ) { svmpred <- predict(svmres, matrix(xTest[SelInd], nrow=1) ) } else { svmpred <- predict(svmres, xTest[, SelInd] ) } ErrVec[gLevel] <- ErrVec[gLevel] + sum(svmpred != yTest ) ## weight vector W <- t(svmres$coefs*yTrain[svmres$index]) %*% svmres$SV * md[SelInd] rkW <- rank(W) if( gLevel < length(ladder) ) { SelInd <- SelInd[which(rkW > (ladder[gLevel] - ladder[gLevel+1]))] } } } ret <- list(ladder=ladder, Error=ErrVec/nTests, SelFreq=SelFreq) } SummaryRSVM <- function( RSVMres ) { ERInd <- max( which(RSVMres$Error == min(RSVMres$Error)) ) MinLevel <- RSVMres$ladder[ERInd] FreqVec <- RSVMres$SelFreq[, ERInd] SelInd <- which( rank(FreqVec) >= (ladder[1]-MinLevel) ) # print("MinCV error of", min(RSVMres$Error), "at", MinLevel, "genes" ) ret <- list( MinER=min(RSVMres$Error), MinLevel=MinLevel, SelInd=SelInd) } ########################################### #my code starts below #data<-ReadSVMdata("iris_r-svm.txt") #The data read from the file is given below. data<-structure(list(x = structure(c(5.1, 4.9, 4.7, 4.6, 5, 5.4, 4.6, 5, 4.4, 4.9, 5.4, 4.8, 4.8, 4.3, 5.8, 5.7, 5.4, 5.1, 5.7, 5.1, 5.4, 5.1, 4.6, 5.1, 4.8, 5, 5, 5.2, 5.2, 4.7, 4.8, 5.4, 5.2, 5.5, 4.9, 5, 5.5, 4.9, 4.4, 5.1, 5, 4.5, 4.4, 5, 5.1, 4.8, 5.1, 4.6, 5.3, 5, 7, 6.4, 6.9, 5.5, 6.5, 5.7, 6.3, 4.9, 6.6, 5.2, 5, 5.9, 6, 6.1, 5.6, 6.7, 5.6, 5.8, 6.2, 5.6, 5.9, 6.1, 6.3, 6.1, 6.4, 6.6, 6.8, 6.7, 6, 5.7, 5.5, 5.5, 5.8, 6, 5.4, 6, 6.7, 6.3, 5.6, 5.5, 5.5, 6.1, 5.8, 5, 5.6, 5.7, 5.7, 6.2, 5.1, 5.7, 3.5, 3, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.4, 3, 3, 4, 4.4, 3.9, 3.5, 3.8, 3.8, 3.4, 3.7, 3.6, 3.3, 3.4, 3, 3.4, 3.5, 3.4, 3.2, 3.1, 3.4, 4.1, 4.2, 3.1, 3.2, 3.5, 3.6, 3, 3.4, 3.5, 2.3, 3.2, 3.5, 3.8, 3, 3.8, 3.2, 3.7, 3.3, 3.2, 3.2, 3.1, 2.3, 2.8, 2.8, 3.3, 2.4, 2.9, 2.7, 2, 3, 2.2, 2.9, 2.9, 3.1, 3, 2.7, 2.2, 2.5, 3.2, 2.8, 2.5, 2.8, 2.9, 3, 2.8, 3, 2.9, 2.6, 2.4, 2.4, 2.7, 2.7, 3, 3.4, 3.1, 2.3, 3, 2.5, 2.6, 3, 2.6, 2.3, 2.7, 3, 2.9, 2.9, 2.5, 2.8, 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.6, 1.4, 1.1, 1.2, 1.5, 1.3, 1.4, 1.7, 1.5, 1.7, 1.5, 1, 1.7, 1.9, 1.6, 1.6, 1.5, 1.4, 1.6, 1.6, 1.5, 1.5, 1.4, 1.5, 1.2, 1.3, 1.4, 1.3, 1.5, 1.3, 1.3, 1.3, 1.6, 1.9, 1.4, 1.6, 1.4, 1.5, 1.4, 4.7, 4.5, 4.9, 4, 4.6, 4.5, 4.7, 3.3, 4.6, 3.9, 3.5, 4.2, 4, 4.7, 3.6, 4.4, 4.5, 4.1, 4.5, 3.9, 4.8, 4, 4.9, 4.7, 4.3, 4.4, 4.8, 5, 4.5, 3.5, 3.8, 3.7, 3.9, 5.1, 4.5, 4.5, 4.7, 4.4, 4.1, 4, 4.4, 4.6, 4, 3.3, 4.2, 4.2, 4.2, 4.3, 3, 4.1, 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.4, 0.4, 0.3, 0.3, 0.3, 0.2, 0.4, 0.2, 0.5, 0.2, 0.2, 0.4, 0.2, 0.2, 0.2, 0.2, 0.4, 0.1, 0.2, 0.2, 0.2, 0.2, 0.1, 0.2, 0.2, 0.3, 0.3, 0.2, 0.6, 0.4, 0.3, 0.2, 0.2, 0.2, 0.2, 1.4, 1.5, 1.5, 1.3, 1.5, 1.3, 1.6, 1, 1.3, 1.4, 1, 1.5, 1, 1.4, 1.3, 1.4, 1.5, 1, 1.5, 1.1, 1.8, 1.3, 1.5, 1.2, 1.3, 1.4, 1.4, 1.7, 1.5, 1, 1.1, 1, 1.2, 1.6, 1.5, 1.6, 1.5, 1.3, 1.3, 1.3, 1.2, 1.4, 1.2, 1, 1.3, 1.2, 1.3, 1.3, 1.1, 1.3), .Dim = c(100, 4), .Dimnames = list(c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", "80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "100"), c("V1", "V2", "V3", "V4"))), y = structure(c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), .Label = c("-1", "1"), class = "factor")), .Names = c("x", "y")) len<-length(data$y) x<-data$x y<-data$y ladder<-CreatLadder(len) RSVM(x,y,ladder,"LOO")