I am working on a script that takes numeric performance indicators and runs them against a series of regressors (dummy regressors, yes\no stuff via 0 and 1, e.g. Was is Christmas this week 0=no, 1=yes). The script is as follows (Written as a function): -- Begin Script -- doEnv <- function(HOUR,ENVNAME,REPORTNAME) { library(RODBC) library(forecast) library("geneplotter") library(forecast) library(fUtilities) library(TSA) require(gplots) library(robfilter) SOURCEDATA <- paste("Q:/TEST/RSTATS/EPOC ",HOUR," Metrics.xls",sep="") REGRESSORS <- "Q:/TEST/RSTATS/eventswithholidays.xls" mypalette=c() mypalette$background="#FFFFFF" mypalette$chart="#FFFFFF" mypalette$forecastRegion="#66CCFF" mypalette$confidence="#FF9966" mypalette$limits="#FF0000" mypalette$major="#000000" mypalette$minor="#cccccc" mypalette$actual="#aaaaaa" mypalette$dp1="#9900FF" mypalette$dp2="#EEEE00" mypalette$dp3="#CCFF00" mypalette$dp4="#00CCFF" mypalette$dp5="#FF00CC" #Raw Data channel1 <- odbcConnectExcel(SOURCEDATA) sqlTables(channel1) sh1 <- sqlFetch(channel1, "Actuals$") close(channel1) channel2 <- odbcConnectExcel(REGRESSORS) sqlTables(channel2) sh2 <- sqlFetch(channel2, "data$") close(channel2) #Get Raw Data tsSource<-ts(sh1[[ENVNAME]],start=c(2004,1),freq=52) #Data is now a Time Series #Prep Out-of-sample test ranges modLength=length(sh1[[ENVNAME]]) modMax=round((modLength/3)*2) modEndDate=time(tsSource)[modMax] modStartDate=time(tsSource)[1] #RAW SUMAMRY WITH OVERLAY OF OUT OF SAMPLE RANGES summary(tsSource) modelSource=window(tsSource,modStartDate,end=modEndDate) verSource=window(tsSource,time(tsSource)[modMax+1]) pdf(paste("Q:/ReleaseMgmt/Environment Mgmt/Data/Current/Metrics/Mainframe/Test Environment Projections/RSTATS/images/",ENVNAME,"-",HOUR,"-","Raw Metrics with Test Range.pdf",sep=""),width=9, height=6.5) plot(tsSource,col="grey", main=paste("Raw Data for", REPORTNAME), xlab="Date", ylab="MiPS Used") points(modelSource,col="red", pch=20) points(verSource,col="blue", pch=20) smartlegend( x="left", y= "top", inset=0, #smartlegend parameters legend = c("Actual Data","Data for Model Selection","Data for In Sample Verification"), fill=c(mypalette$actual,"red","blue"),bg = mypalette$background) print("The Red region is where we are going to develop the model from and the blue area is where we will evaluate the model (In Sample Testing)") #Ok our ranges are comfirmed we'll get a better graph later # This Heavy Voodoo™ allows us to have a dynamic number of #dummy variables we can add\remove from the spreadsheet forecastDistance <- 52 #Grab Existing Regressors (clipping out the data) cReg <- sh2[1:modLength,-1] mcReg <- sh2[1:modMax,-1] #transform the on\offs into proper factors for(i in names(cReg)) cReg[[i]] <- factor(cReg[[i]]) for(i in names(mcReg)) mcReg[[i]] <- factor(mcReg[[i]]) #Grab X Future Regressors equal to the forecastDistance (gotta double check if I need a +1 on the start point) fReg <- sh2[length(tsSource):(length(tsSource)+forecastDistance),-1] mfReg <-sh2[(modMax+1):modLength,-1] #fix variable names names(cReg) <- make.names(names(cReg)) names(mcReg) <- make.names(names(mcReg)) names(fReg) <- make.names(names(fReg)) names(mfReg) <- make.names(names(mfReg)) #print("#####################") #print("This is the CReg Data") #print("#####################") #print(summary(cReg)) #print("######################") #print("This is the mcReg Data") #print("######################") #print(summary(mcReg)) #names(mcReg) for(i in names(fReg)) fReg[[i]] <- factor(fReg[[i]]) for(i in names(mfReg)) mfReg[[i]] <- factor(mfReg[[i]]) #end heavy voodoo ######## # # MODEL VERIFICATION FIRST!!!!! # # Basic Look at the raw data hist(modelSource) plot(density(modelSource,na.rm=TRUE)) plot(sort(modelSource),pch=".") for(i in names(mcReg)) { pairs(modelSource ~ .,mcReg[[i]], main=paste("Model - MIPS vs",i)) } #Build the list to store our results linearModel <- list() residuals <- list() arima_Fit <- list() arima_AO <- list() arima_IO <- list() newcReg <- list() newfReg <- list() newmcReg <- list() newmfReg <- list() newFit <- list() newForecast <- list() # Following won't work until mcReg contains full variety linearModel[[1]]=lm(modelSource ~ + UNITBUILD + UNITDB + ITBUILD + ITDB + UATBUILD + UATDB + HOGANCODE + RCF + ReleaseST1 + ReleaseST2 + ReleaseBLA + Small.Bank.Acquisitions + HLY.NewYear + HLY.MLK + HLY.PRES + HLY.MEMORIAL + HLY.J4 + HLY.LABOR + HLY.COLUMBUS + HLY.VETS + HLY.THANKS + HLY.XMAS + HLY.ELECT + HLY.PATRIOT + EOM,mcReg) linearModel[[2]]=step(linearModel[[1]], trace=1) linearModel[[3]]=lm(modelSource ~ + UNITBUILD + UNITDB + ITBUILD + ITDB + UATBUILD + UATDB + HOGANCODE + RCF + ReleaseST1 + ReleaseST2 + ReleaseBLA + Small.Bank.Acquisitions + HLY.NewYear + HLY.MLK + HLY.PRES + HLY.MEMORIAL + HLY.J4 + HLY.LABOR + HLY.COLUMBUS + HLY.VETS + HLY.THANKS + HLY.XMAS + HLY.ELECT + HLY.PATRIOT + EOM - 1,mcReg) linearModel[[4]]=step(linearModel[[3]],trace=1) if(ENVNAME=="E081") {linearModel[[5]]=lm(modelSource ~ + UNITBUILD + UNITDB + HOGANCODE + RCF + ReleaseST1 + Small.Bank.Acquisitions + HLY.NewYear + HLY.MLK + HLY.PRES + HLY.MEMORIAL + HLY.J4 + HLY.LABOR + HLY.COLUMBUS + HLY.VETS + HLY.THANKS + HLY.XMAS + HLY.ELECT + HLY.PATRIOT + EOM,mcReg)} if(ENVNAME=="I000") {linearModel[[5]]=lm(modelSource ~ + ITBUILD + ITDB + HOGANCODE + RCF + ReleaseST1 + Small.Bank.Acquisitions + HLY.NewYear + HLY.MLK + HLY.PRES + HLY.MEMORIAL + HLY.J4 + HLY.LABOR + HLY.COLUMBUS + HLY.VETS + HLY.THANKS + HLY.XMAS + HLY.ELECT + HLY.PATRIOT + EOM,mcReg)} if(ENVNAME=="U000") {linearModel[[5]]=lm(modelSource ~ + UATBUILD + UATDB + HOGANCODE + RCF + ReleaseST2 + ReleaseBLA + Small.Bank.Acquisitions + HLY.NewYear + HLY.MLK + HLY.PRES + HLY.MEMORIAL + HLY.J4 + HLY.LABOR + HLY.COLUMBUS + HLY.VETS + HLY.THANKS + HLY.XMAS + HLY.ELECT + HLY.PATRIOT + EOM,mcReg)} if(ENVNAME=="U400") {linearModel[[5]]=lm(modelSource ~ + UATBUILD + UATDB + HOGANCODE + RCF + ReleaseST2 + ReleaseBLA + Small.Bank.Acquisitions + HLY.NewYear + HLY.MLK + HLY.PRES + HLY.MEMORIAL + HLY.J4 + HLY.LABOR + HLY.COLUMBUS + HLY.VETS + HLY.THANKS + HLY.XMAS + HLY.ELECT + HLY.PATRIOT + EOM,mcReg)} if(ENVNAME=="T000") {linearModel[[5]]=lm(modelSource ~ + UNITBUILD + UNITDB + ITBUILD + ITDB + UATBUILD + UATDB + HOGANCODE + RCF + ReleaseST1 + ReleaseST2 + ReleaseBLA + Small.Bank.Acquisitions + HLY.NewYear + HLY.MLK + HLY.PRES + HLY.MEMORIAL + HLY.J4 + HLY.LABOR + HLY.COLUMBUS + HLY.VETS + HLY.THANKS + HLY.XMAS + HLY.ELECT + HLY.PATRIOT + EOM,mcReg)} if(ENVNAME=="P000") {linearModel[[5]]=lm(modelSource ~ + Small.Bank.Acquisitions + HLY.NewYear + HLY.MLK + HLY.PRES + HLY.MEMORIAL + HLY.J4 + HLY.LABOR + HLY.COLUMBUS + HLY.VETS + HLY.THANKS + HLY.XMAS + HLY.ELECT + HLY.PATRIOT + EOM,mcReg)} linearModel[[6]]=step(linearModel[[5]],trace=1) ################## ### FOR EACH MODEL ################## for(i in length(linearModel)) { print(paste("For linear Model",i)) print(summary(linearModel[[i]])) residuals[[i]] <- residuals(linearModel[[i]]) arima_Fit[[i]] <- auto.arima(residuals[[i]]) #Ok what is left over after Regression and ARIMA that cannot #be explained. Stupid outliers.... #AO's can be added to the cReg as a normal dummy variable # but these are AOs from the model not the original data. # is it better to handle AOs from the original data? arima_AO[[i]] <- detectAO(arima_Fit[[i]]) #What do I do with an innovative outlier? Transfer function or what? #auto.arima doesn't handle the IO=c(...) stuff.... Umm... #transfer functions, etc. are a deficency in the script at this point.... arima_IO[[i]] <- detectIO(arima_Fit[[i]]) #Get the pdq,PDQs into a variable so we can re-feed it if neccessary arima_Fit[[i]]$order =c(arima_Fit[[i]]$arma[1],arima_Fit[[i]]$arma[2],arima_Fit[[i]]$arma[6]) arima_Fit[[i]]$seasonal =c(arima_Fit[[i]]$arma[3],arima_Fit[[i]]$arma[4],arima_Fit[[i]]$arma[7]) newcReg[[i]]=cReg[match(names(linearModel[[i]]$coeff[-1]),paste(names(cReg),"1",sep=""))] newfReg[[i]]=fReg[match(names(linearModel[[i]]$coeff[-1]),paste(names(fReg),"1",sep=""))] newmcReg[[i]]=mcReg[match(names(linearModel[[i]]$coeff[-1]),paste(names(mcReg),"1",sep=""))] newmfReg[[i]]=mfReg[match(names(linearModel[[i]]$coeff[-1]),paste(names(mfReg),"1",sep=""))] newFit[[i]] <- Arima(modelSource,order=arima_Fit[[i]]$order,seasonal=list(order=arima_Fit[[i]]$seasonal),xreg=newmcReg[[i]],include.drift=F) newForecast[[i]] <-predict(newFit[[i]],n.ahead=modLength-modMax,newxreg=newmfReg[[i]]) } #################### # END FOR EACH MODEL #################### dev.off() return("AUTOBOT") } -- End Script -- The problem exists in the line: newForecast[[i]] <-predict(newFit[[i]],n.ahead=modLength-modMax,newxreg=newmfReg[[i]]) resulting in the error: Error in as.matrix(newxreg) %*% coefs[-(1:narma)] : requires numeric matrix/vector arguments In addition: Warning message: In if (class(fit) != "try-error") offset <- -2 * fit$loglik - length(x) * : the condition has length > 1 and only the first element will be used I know that if I do not transform my 0/1 dummy regressors into Factors (for(i in names(fReg)) fReg[[i]] <- factor(fReg[[i]]),for(i in names(mfReg)) mfReg[[i]] <- factor(mfReg[[i]])) this message goes away and it appears that it is attempting to convert the data (as.matrix). My question boils down to is there a solution or proper way to address factors in prediction\forecasting. As I am having to learn this all on my own I am at a loss on what do do to extract the forecast if I am using factors. (I still have no clue how to handle a transfer function to account for policy changes... ufda which I had time to go back to college...) [[alternative HTML version deleted]]