cursethiscure
2012-Jul-19 16:15 UTC
[R] Change log(J) to log(J+1) to stop log(0) from occurring in harModel
I think the code is part of the RTAQ package but is not included in it, as I obtained it from https://r-forge.r-project.org/scm/viewvc.php/pkg/RTAQ/R/HAR_model.R?view=markup&root=blotter&sortby=author&pathrev=1028. It is not my code and I make no claim to other's good work, and apologize if I should even be posting it I am not sure, but in the transform function it allows to change the model to `log` or `sqrt`, but when then model is changed to log and the model used is either "HARRVJ" or "HARRVCJ" it will return the following error: x = harModel(dat, periods = c(1,5,22), periodsJ=c(1), RVest c("RCov","RBPCov"), + type="HARRVJ", h=22, transform="log") ; # Estimate .... [TRUNCATED] Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) : NA/NaN/Inf in foreign function call (arg 1) which is due to it taking the log(0), the actual model should take log(J + 1) in case of a 0 value for the J in the time series, but unfortunately I do not know how to rectify this. I was wondering if any one could tell me how how I can achieve this as I am very naive with R still. # Get the matrix for estimation of linear model maxp = max(periods,periodsJ); #max number of aggregation levels if(!is.null(leverage)){ maxp = max(maxp,leverage) } n = length(RM1); #Number of Days # Aggregate RV: RVmatrix1 = aggRV(RM1,periods); if( nest==2 ){ RVmatrix2 = aggRV(RM2,periods); } # In case a jumprobust estimator is supplied # Aggregate and subselect y: y = aggY(RM1,h,maxp); # Only keep useful parts: x1 = RVmatrix1[(maxp:(n-h)),]; if( nest==2 ){ x2 = RVmatrix2[(maxp:(n-h)),]; } # In case a jumprobust estimator is supplied # Jumps: if(type!="HARRV"){ # If model type is as such that you need jump component J = pmax( RM1 - RM2,0 ); # Jump contributions should be positive J = aggJ(J,periodsJ); } if( !is.null(leverage) ){ if( sum(data<0) == 0 ){ warning("You cannot use leverage variables in the model in case your input consists of Realized Measures") } # Get close-to-close returns e = apply.daily(data,sum); #Sum logreturns daily # Get the rmins: rmintemp = pmin(e,0); # Aggregate everything: rmin = aggRV(rmintemp,periods=leverage,type="Rmin"); # Select: rmin = rmin[(maxp:(n-h)),]; }else{ rmin = matrix(ncol=0,nrow=dim(x1)[1]) } ############################### # Estimate the model parameters, according to type of model : # First model type: traditional HAR-RV: if( type == "HARRV" ){ if(!is.null(transform)){ y = Ftransform(y); x1 = Ftransform(x1) } x1 = cbind(x1,rmin); model = estimhar(y=y,x=x1); model$transform = transform; model$h = h; model$type = "HARRV"; model$dates = alldates[(maxp+h):n]; class(model) = c("harModel","lm"); return( model ) } #End HAR-RV if cond if( type == "HARRVJ" ){ J = J[(maxp:(n-h)),]; x = cbind(x1,J); # bind jumps to RV data if(!is.null(transform)){ y = Ftransform(y); x = Ftransform(x); } x = cbind(x,rmin); model = estimhar(y=y,x=x); model$transform = transform; model$h = h; model$type = "HARRVJ"; model$dates = alldates[(maxp+h):n]; class(model) = c("harModel","lm"); return( model ) }#End HAR-RV-J if cond if( type == "HARRVCJ" ){ # Are the jumps significant? if not set to zero: if( jumptest=="ABDJumptest" ){ TQ = apply.daily(data, TQfun); J = J[,1]; teststats = ABDJumptest(RV=RM1,BPV=RM2,TQ=TQ ); }else{ jtest = match.fun(jumptest); teststats = jtest(data,...) } Jindicators = teststats > qnorm(1-alpha); J[!Jindicators] = 0; # Get continuus components if necessary RV measures if necessary: Cmatrix = matrix( nrow = dim(RVmatrix1)[1], ncol = 1 ); Cmatrix[Jindicators,] = RVmatrix2[Jindicators,1]; #Fill with robust one in case of jump Cmatrix[(!Jindicators)] = RVmatrix1[(!Jindicators),1]; #Fill with non-robust one in case of no-jump # Aggregate again: Cmatrix <- aggRV(Cmatrix,periods,type="C"); Jmatrix <- aggJ(J,periodsJ); # subset again: Cmatrix <- Cmatrix[(maxp:(n-h)),]; Jmatrix <- Jmatrix[(maxp:(n-h)),]; x = cbind(Cmatrix,Jmatrix); # bind jumps to RV data if(!is.null(transform)){ y = Ftransform(y); x = Ftransform(x); } x = cbind(x,rmin); model = estimhar( y=y, x=x ); model$transform = transform; model$h = h; model$type = "HARRVCJ"; model$dates = alldates[(maxp+h):n]; class(model) = c("harModel","lm"); return(model) } } #End function harModel ################################################################# estimhar = function(y, x){ #Potentially add stuff here colnames(y)="y"; output = lm( formula(y~x), data=cbind(y,x)); } # Help function to get nicely formatted formula's for print/summary methods.. getHarmodelformula = function(x){ modelnames = colnames(x$model$x); if(!is.null(x$transform)){ modelnames = paste(x$transform,"(",modelnames,")",sep=""); } #Added visual tingie for plotting transformed RV betas = paste("beta",(1:length(modelnames)),"",sep="") betas2 = paste(" + ",betas,"*") rightside = paste(betas2, modelnames,collapse=""); h = x$h; left = paste("RV",h,sep=""); if(!is.null(x$transform)){ left = paste(x$transform,"(",left,")",sep="" ) } modeldescription = paste(left,"= beta0",rightside); return(list(modeldescription,betas)) } aggRV <- function(RM1,periods,type="RV"){ n = length(RM1); nperiods = length(periods); RVmatrix1 = matrix(nrow=n,ncol=nperiods); for(i in 1:nperiods){ if(periods[i]==1){ RVmatrix1[,i] = RM1; }else{ RVmatrix1[(periods[i]:n),i] rollmean(x=RM1,k=periods[i],align="left") } } #end loop over periods for standard RV estimator colnames(RVmatrix1) = paste(type,periods,sep=""); return(RVmatrix1); } aggJ <- function( J, periodsJ ){ n = length(J); nperiods = length(periodsJ); JM = matrix(nrow=n,ncol=nperiods); for(i in 1:nperiods){ if(periodsJ[i]==1){ JM[,i] = J; }else{ JM[(periodsJ[i]:n),i] = rollmean( x=J, k=periodsJ[i], align="left") } } # End loop over periods for standard RV estimator colnames(JM) = paste("J",periodsJ,sep=""); return(JM) } aggY = function(RM1,h,maxp){ n = length(RM1); if( h == 1 ){ y = RM1[(maxp+1):n]; } if( h != 1 ){ y = matrix( nrow=length(RM1), ncol=1 ); colnames(y) = "y"; y[(h:n),] = rollmean(x=RM1,k=h,align="left"); y = matrix(y[((maxp+h):n),],ncol=1); y=as.data.frame(y) } return(y); } ######################################################################### # Print method for harmodel: print.harModel = function(x, digits = max(3, getOption("digits") - 3), ...){ formula = getHarmodelformula(x); modeldescription = formula[[1]]; betas formula[[2]]; cat("\nModel:\n", paste(modeldescription, sep = "\n", collapse = "\n"), "\n\n", sep = "") coefs = coef(x); names(coefs) = c("beta0",betas) if (length(coef(x))){ cat("Coefficients:\n") print.default(format(coefs, digits = digits), print.gap = 2,quote FALSE); cat("\n\n"); Rs = summary(x)[c("r.squared", "adj.r.squared")] zz = c(Rs$r.squared,Rs$adj.r.squared); names(zz) = c("r.squared","adj.r.squared") print.default((format(zz,digits=digits) ),print.gap = 2,quote=FALSE) } else cat("No coefficients\n") cat("\n") invisible(x) } summary.harModel = function(object, correlation = FALSE, symbolic.cor FALSE,...){ x=object; dd = summary.lm(x); formula = getHarmodelformula(x); modeldescription = formula[[1]]; betas formula[[2]]; dd$call = modeldescription; rownames(dd$coefficients) = c("beta0",betas); return(dd) } plot.harModel = function(x, which = c(1L:3L, 5L), caption = list("Residuals vs Fitted", "Normal Q-Q", "Scale-Location", "Cook's distance", "Residuals vs Leverage", expression("Cook's dist vs Leverage " * h[ii]/(1 - h[ii]))), panel = if (add.smooth) panel.smooth else points, sub.caption = NULL, main = "", ask = prod(par("mfcol")) < length(which) && dev.interactive(), ..., id.n = 3, labels.id = names(residuals(x)), cex.id = 0.75, qqline = TRUE, cook.levels = c(0.5, 1), add.smooth = getOption("add.smooth"), label.pos = c(4, 2), cex.caption = 1){ observed = x$model$y; fitted = x$fitted.values; dates = x$dates; dates = as.POSIXct(dates); observed = xts(observed, order.by=dates); fitted = xts(fitted, order.by=dates); type = x$type; g_range = range(fitted,observed) g_range[1] = 0.95*g_range[1]; g_range[2]= 1.05 * g_range[2]; #ind = seq(1,length(fitted),length.out=5); title = paste("Observed and forecasted RV based on HAR Model:",type); plot.zoo(observed,col="red",lwd=2,main=title, ylim=g_range,xlab="Time",ylab="Realized Volatility"); # axis(1,time(b)[ind], format(time(b)[ind],), las=2, cex.axis=0.8); not used anymore # axis(2); lines(fitted,col="blue",lwd=2); legend("topleft", c("Observed RV","Forecasted RV"), cex=1.1, col=c("red","blue"),lty=1, lwd=2, bty="n"); } I have tried changing if( type == "HARRVJ" ){ J = J[(maxp:(n-h)),]; *x = cbind(x1,J); * # bind jumps to RV data if(!is.null(transform)){ y = Ftransform(y); x = Ftransform(x); } x = cbind(x,rmin); model = estimhar(y=y,x=x); model$transform = transform; model$h = h; model$type = "HARRVJ"; model$dates = alldates[(maxp+h):n]; class(model) = c("harModel","lm"); return( model ) }#End HAR-RV-J if cond to if( type == "HARRVJ" ){ J = J[(maxp:(n-h)),]; * x = cbind(x1,J+1);* # bind jumps to RV data if(!is.null(transform)){ y = Ftransform(y); x = Ftransform(x); } x = cbind(x,rmin); model = estimhar(y=y,x=x); model$transform = transform; model$h = h; model$type = "HARRVJ"; model$dates = alldates[(maxp+h):n]; class(model) = c("harModel","lm"); return( model ) }#End HAR-RV-J if cond # and this if( type == "HARRVCJ" ){ # Are the jumps significant? if not set to zero: if( jumptest=="ABDJumptest" ){ TQ = apply.daily(data, TQfun); J = J[,1]; teststats = ABDJumptest(RV=RM1,BPV=RM2,TQ=TQ ); }else{ jtest = match.fun(jumptest); teststats = jtest(data,...) } Jindicators = teststats > qnorm(1-alpha); * J[!Jindicators] = 0;* # to if( type == "HARRVCJ" ){ # Are the jumps significant? if not set to zero: if( jumptest=="ABDJumptest" ){ TQ = apply.daily(data, TQfun); J = J[,1]; teststats = ABDJumptest(RV=RM1,BPV=RM2,TQ=TQ ); }else{ jtest = match.fun(jumptest); teststats = jtest(data,...) } Jindicators = teststats > qnorm(1-alpha); *J[!Jindicators] = 1;* but it returns to large of scores when the regressions are run. Note I would change it back to how it is for `transform=NULL` and `transform="sqrt" as they compute fine for this. I don't think the log tests can work without the change though. -- View this message in context: http://r.789695.n4.nabble.com/Change-log-J-to-log-J-1-to-stop-log-0-from-occurring-in-harModel-tp4637072.html Sent from the R help mailing list archive at Nabble.com.
Joshua Ulrich
2012-Jul-19 20:24 UTC
[R] Change log(J) to log(J+1) to stop log(0) from occurring in harModel
Same post on Stack Overflow (again): http://stackoverflow.com/q/11567745/271616 -- Joshua Ulrich | FOSS Trading: www.fosstrading.com On Thu, Jul 19, 2012 at 11:15 AM, cursethiscure <caolan.harvey6 at mail.dcu.ie> wrote:> I think the code is part of the RTAQ package but is not included in it, as I > obtained it from > https://r-forge.r-project.org/scm/viewvc.php/pkg/RTAQ/R/HAR_model.R?view=markup&root=blotter&sortby=author&pathrev=1028. > > It is not my code and I make no claim to other's good work, and apologize if > I should even be posting it I am not sure, but in the transform function it > allows to change the model to `log` or `sqrt`, but when then model is > changed to log and the model used is either "HARRVJ" or "HARRVCJ" it will > return the following error: > > x = harModel(dat, periods = c(1,5,22), periodsJ=c(1), RVest > c("RCov","RBPCov"), > + type="HARRVJ", h=22, transform="log") ; # Estimate .... > [TRUNCATED] > Error in lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) : > NA/NaN/Inf in foreign function call (arg 1) > > which is due to it taking the log(0), the actual model should take log(J + > 1) in case of a 0 value for the J in the time series, but unfortunately I do > not know how to rectify this. I was wondering if any one could tell me how > how I can achieve this as I am very naive with R still. > ><snip>> > I have tried changing > > if( type == "HARRVJ" ){ > J = J[(maxp:(n-h)),]; > *x = cbind(x1,J); * # bind jumps to RV data > if(!is.null(transform)){ y = Ftransform(y); x = Ftransform(x); } > x = cbind(x,rmin); > model = estimhar(y=y,x=x); > model$transform = transform; model$h = h; model$type = "HARRVJ"; > model$dates = alldates[(maxp+h):n]; > class(model) = c("harModel","lm"); > return( model ) > }#End HAR-RV-J if cond > > to > > if( type == "HARRVJ" ){ > J = J[(maxp:(n-h)),]; > * x = cbind(x1,J+1);* # bind jumps to RV data > if(!is.null(transform)){ y = Ftransform(y); x = Ftransform(x); } > x = cbind(x,rmin); > model = estimhar(y=y,x=x); > model$transform = transform; model$h = h; model$type = "HARRVJ"; > model$dates = alldates[(maxp+h):n]; > class(model) = c("harModel","lm"); > return( model ) > }#End HAR-RV-J if cond > > # and this > > if( type == "HARRVCJ" ){ > # Are the jumps significant? if not set to zero: > if( jumptest=="ABDJumptest" ){ > TQ = apply.daily(data, TQfun); > J = J[,1]; > teststats = ABDJumptest(RV=RM1,BPV=RM2,TQ=TQ ); > }else{ jtest = match.fun(jumptest); teststats = jtest(data,...) } > Jindicators = teststats > qnorm(1-alpha); > * J[!Jindicators] = 0;* > > # to > > if( type == "HARRVCJ" ){ > # Are the jumps significant? if not set to zero: > if( jumptest=="ABDJumptest" ){ > TQ = apply.daily(data, TQfun); > J = J[,1]; > teststats = ABDJumptest(RV=RM1,BPV=RM2,TQ=TQ ); > }else{ jtest = match.fun(jumptest); teststats = jtest(data,...) } > Jindicators = teststats > qnorm(1-alpha); > *J[!Jindicators] = 1;* > > but it returns to large of scores when the regressions are run. Note I would > change it back to how it is for `transform=NULL` and `transform="sqrt" as > they compute fine for this. > > I don't think the log tests can work without the change though. > > > -- > View this message in context: http://r.789695.n4.nabble.com/Change-log-J-to-log-J-1-to-stop-log-0-from-occurring-in-harModel-tp4637072.html > Sent from the R help mailing list archive at Nabble.com. > > ______________________________________________ > R-help at r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code.