j@de@shod@@ m@iii@g oii googiem@ii@com
2022-Jul-22 12:29 UTC
[R] mgcv: relative risk from GAM with distributed lag
I made a small error in the code below (not checking for NAs which are
introduced by the lag function). However, this doesn't solve the issue
I raised).
So here's the problem (with corrected code) again:
I'm not sure what to do with the 'time' variable. (I don't want
to
make predictions for specific points in time). I coded as follows
(full reproducible example at bottom of email), but get a warning and
error:
N <- 1000 # number of points for smooth to be predicted
# new temperatures and lags for prediction
pred_temp <- seq(min(dat$temp, na.rm = T), max(dat$temp, na.rm = T), length =
N)
pred_lag <- rep(c(0, 1,2,3,4,5,6),each=N) ## IS IT CORRECT TO SET
UP LAG LIKE THIS?
# not sure if these covariates are required with type = "terms"
pred_humidity <- rep(median(dat$humidity, na.rm = T), N)
pred_rain <- rep(median(dat$rain, na.rm = T), N)
pd <- data.frame(temp = pred_temp, lag = pred_lag, humidity pred_humidity,
rain = pred_rain)
predictions <- predict(mod, pd, type = "terms")
The predict line creates the following warning and error:
Warning in predict.gam(mod, pd, type = "terms") :
not all required variables have been supplied in newdata!
Error in model.frame.default(ff, data = newdata, na.action = na.act) :
object is not a matrix
For ease of reference, I've (re)included the full reproducible example:
library(mgcv)
set.seed(3) # make reproducible example
simdat <- gamSim(1,400)
g <- exp(simdat$f/5)
simdat$y <- rnbinom(g,size=3,mu=g) # negative binomial response var
simdat$time <- 1:400 # create time series
names(simdat) <- c("deaths", "temp",
"humidity", "rain", "x3", "f",
"f0", "f1", "f2", "f3",
"time")
# lag function based on Wood (book 2017, p.349 and gamair package
documentation p.54
# https://cran.rstudio.com/web/packages/gamair/gamair.pdf)
lagard <- function(x,n.lag=7) {
n <- length(x); X <- matrix(NA,n,n.lag)
for (i in 1:n.lag) X[i:n,i] <- x[i:n-i+1]
X
}
# set up lag, temp, rain and humidity as 7-column matrices
# to create lagged variables
dat <- list(lag=matrix(0:6,nrow(simda
t),7,byrow=TRUE),
deaths=simdat$deaths, time = simdat$time)
dat$temp <- lagard(simdat$temp)
dat$rain <- lagard(simdat$rain)
dat$humidity <- lagard(simdat$humidity)
mod <- gam(deaths~s(time, k=70) + te(temp, lag, k=c(12, 4)) +
te(humidity, lag, k=c(12, 4)) + te(rain, lag, k=c(12, 4)), data = dat,
family = nb, method = 'REML', select = TRUE)
# create prediction data
N <- 1000 # number of points for which to predict the smooths
pred_temp <- seq(min(dat$temp, na.rm = T), max(dat$temp, na.rm = T), length =
N)
pred_lag <- rep(c(0, 1,2,3,4,5,6),each=N)
pred_humidity <- rep(median(dat$humidity), N)
pred_rain <- rep(median(dat$rain), N)
pd <- data.frame(temp = pred_temp, lag = pred_lag, humidity pred_humidity,
rain = pred_rain)
# make predictions
predictions <- predict(mod, pd, type = "terms")
On Fri, 22 Jul 2022 at 12:47, jade.shodan at googlemail.com
<jade.shodan at googlemail.com> wrote:>
> Hi Simon,
>
> Thanks for the pointers! But I'm not sure what to do with the
'time'
> variable. (I don't want to make predictions for specific points in
> time). I coded as follows (full reproducible example at bottom of
> email), but get a warning and error:
>
>
> N <- 1000 # number of points for smooth to be predicted
> # new temperatures and lags for prediction
> pred_temp <- seq(min(dat$temp, na.rm = T), max(dat$temp, na.rm = T),
length = N)
> pred_lag <- rep(c(0, 1,2,3,4,5,6),each=N) ## IS IT CORRECT TO SET
> UP LAG LIKE THIS?
>
> # not sure if these covariates are required with type = "terms"
> pred_humidity <- rep(median(dat$humidity), N)
> pred_rain <- rep(median(dat$rain), N)
> pd <- data.frame(temp = pred_temp, lag = pred_lag, humidity >
pred_humidity, rain = pred_rain)
>
> predictions <- predict(mod, pd, type = "terms")
>
>
> The predict line creates the following warning and error:
>
> Warning in predict.gam(mod, pd, type = "terms") :
> not all required variables have been supplied in newdata!
>
> Error in model.frame.default(ff, data = newdata, na.action = na.act) :
> object is not a matrix
>
>
> For ease of reference, I've (re)included the full reproducible example:
>
> library(mgcv)
> set.seed(3) # make reproducible example
> simdat <- gamSim(1,400)
> g <- exp(simdat$f/5)
>
> simdat$y <- rnbinom(g,size=3,mu=g) # negative binomial response var
> simdat$time <- 1:400 # create time series
>
> names(simdat) <- c("deaths", "temp",
"humidity", "rain", "x3", "f",
> "f0", "f1", "f2", "f3",
"time")
>
> # lag function based on Wood (book 2017, p.349 and gamair package
> documentation p.54
> # https://cran.rstudio.com/web/packages/gamair/gamair.pdf)
> lagard <- function(x,n.lag=7) {
> n <- length(x); X <- matrix(NA,n,n.lag)
> for (i in 1:n.lag) X[i:n,i] <- x[i:n-i+1]
> X
> }
>
> # set up lag, temp, rain and humidity as 7-column matrices
> # to create lagged variables
> dat <- list(lag=matrix(0:6,nrow(simdat),7,byrow=TRUE),
> deaths=simdat$deaths, time = simdat$time)
> dat$temp <- lagard(simdat$temp)
> dat$rain <- lagard(simdat$rain)
> dat$humidity <- lagard(simdat$humidity)
>
> mod <- gam(deaths~s(time, k=70) + te(temp, lag, k=c(12, 4)) +
> te(humidity, lag, k=c(12, 4)) + te(rain, lag, k=c(12, 4)), data = dat,
> family = nb, method = 'REML', select = TRUE)
>
> # create prediction data
> N <- 1000 # number of points for which to predict the smooths
> pred_temp <- seq(min(dat$temp, na.rm = T), max(dat$temp, na.rm = T),
length = N)
> pred_lag <- rep(c(0, 1,2,3,4,5,6),each=N)
> pred_humidity <- rep(median(dat$humidity), N)
> pred_rain <- rep(median(dat$rain), N)
> pd <- data.frame(temp = pred_temp, lag = pred_lag, humidity >
pred_humidity, rain = pred_rain)
>
> # make predictions
> predictions <- predict(mod, pd, type = "terms")
>
>
> On Fri, 22 Jul 2022 at 09:54, Simon Wood <simon.wood at bath.edu>
wrote:
> >
> >
> > On 21/07/2022 15:19, jade.shodan--- via R-help wrote:
> > > Hello everyone (incl. Simon Wood?),
> > >
> > > I'm not sure that my original question (see below, including
> > > reproducible example) was as clear as it could have been. To
clarify,
> > > what I would to like to get is:
> > >
> > > 1) a perspective plot of temperature x lag x relative risk. I
know
> > > how to use plot.gam and vis.gam but don't know how to get
plots on the
> > > relative risk scale as opposed to "response" or
"link".
> >
> > - You are on the log scale so I think that all you need to do is to
use
> > 'predict.gam', with 'type = "terms"' to get
the predictions for the
> > te(temp, lag) term over the required grid of lags and temperatures.
> > Suppose the dataframe of prediction data is 'pd'. Now produce
pd0, which
> > is identical to pd, except that the temperatures are all set to the
> > reference temperature. Use predict.gam to predict te(temp,lag) from
pd0.
> > Now the exponential of the difference between the first and second
> > predictions is the required RR, which you can plot using
'persp',
> > 'contour', 'image' or whatever. If you need credible
intervals see pages
> > 341-343 of my 'GAMs: An intro with R' book (2nd ed).
> >
> > > 2) a plot of relative risk (accumulated across all lags) vs
> > > temperature, given a reference temperature. An example of such a
plot
> > > can be found in figure 2 (bottom) of this paper by Gasparrini et
al:
> > > https://onlinelibrary.wiley.com/doi/abs/10.1002/sim.3940
> >
> > - I guess this only makes sense if you have the same temperature at
all
> > lags. So this time produce a data.frame with each desired temperature
> > repeated for each lag: 'pd1'. Again use
predict.gam(...,type="terms").
> > Then sum the predictions over lags for each temperature, subtract the
> > minimum, and take the exponential. Same as above for CIs.
> >
> > best,
> >
> > Simon
> >
> > > I've seen Simon Wood's response to a related issue here:
> > > https://stat.ethz.ch/pipermail/r-help/2012-May/314387.html
> > > However, I'm not sure how to apply this to time series data
with
> > > distributed lag, to get the above mentioned figures.
> > >
> > > Would be really grateful for suggestions!
> > >
> > > Jade
> > >
> > > On Tue, 19 Jul 2022 at 16:07, jade.shodan at googlemail.com
> > > <jade.shodan at googlemail.com> wrote:
> > >> Dear list members,
> > >>
> > >> Does anyone know how to obtain a relative risk/ risk ratio
from a GAM
> > >> with a distributed lag model implemented in mgcv? I have a
GAM
> > >> predicting daily deaths from time series data consisting of
daily
> > >> temperature, humidity and rainfall. The GAM includes a
distributed lag
> > >> model because deaths may occur over several days following a
high heat
> > >> day.
> > >>
> > >> What I'd like to do is compute (and plot) the relative
risk
> > >> (accumulated across all lags) for a given temperature vs the
> > >> temperature at which the risk is lowest, with corresponding
confidence
> > >> intervals. I am aware of the predict.gam function but am not
sure if
> > >> and how it should be used in this case. (Additionally,
I'd also like
> > >> to plot the relative risk for different lags separately).
> > >>
> > >> I apologise if this seems trivial to some. (Actually, I hope
it is,
> > >> because that might mean I get a solution!) I've been
looking for
> > >> examples on how to do this, but found nothing so far.
Suggestions
> > >> would be very much appreciated!
> > >>
> > >> Below is a reproducible example with the GAM:
> > >>
> > >> library(mgcv)
> > >> set.seed(3) # make reproducible example
> > >> simdat <- gamSim(1,400) # simulate data
> > >> g <- exp(simdat$f/5)
> > >> simdat$y <- rnbinom(g,size=3,mu=g) # negative binomial
response var
> > >> simdat$time <- 1:400 # create time series
> > >> names(simdat) <- c("deaths", "temp",
"humidity", "rain", "x3", "f",
> > >> "f0", "f1", "f2",
"f3", "time")
> > >>
> > >> # lag function based on Simon Wood (book 2017, p.349 and
gamair
> > >> package documentation p.54
> > >> # https://cran.rstudio.com/web/packages/gamair/gamair.pdf)
> > >> lagard <- function(x,n.lag=7) {
> > >> n <- length(x); X <- matrix(NA,n,n.lag)
> > >> for (i in 1:n.lag) X[i:n,i] <- x[i:n-i+1]
> > >> X
> > >> }
> > >>
> > >> # set up lag, temp, rain and humidity as 7-column matrices
> > >> # to create lagged variables - based on Simon Wood's
example
> > >> dat <- list(lag=matrix(0:6,nrow(simdat),7,byrow=TRUE),
> > >> deaths=simdat$deaths, time = simdat$time)
> > >> dat$temp <- lagard(simdat$temp)
> > >> dat$rain <- lagard(simdat$rain)
> > >> dat$humidity <- lagard(simdat$humidity)
> > >>
> > >> mod <- gam(deaths~s(time, k=70) + te(temp, lag, k=c(12,
4)) +
> > >> te(humidity, lag, k=c(12, 4)) + te(rain, lag, k=c(12, 4)),
data = dat,
> > >> family = nb, method = 'REML', select = TRUE)
> > >>
> > >> summary(mod)
> > >> plot(mod, scheme = 1)
> > >> plot(mod, scheme = 2)
> > >>
> > >> Thanks for any suggestions you may have,
> > >>
> > >> Jade
> > > ______________________________________________
> > > R-help at r-project.org mailing list -- To UNSUBSCRIBE and more,
see
> > > https://stat.ethz.ch/mailman/listinfo/r-help
> > > PLEASE do read the posting guide
http://www.R-project.org/posting-guide.html
> > > and provide commented, minimal, self-contained, reproducible
code.
> >
> > --
> > Simon Wood, School of Mathematics, University of Edinburgh,
> > https://www.maths.ed.ac.uk/~swood34/
> >
j@de@shod@@ m@iii@g oii googiem@ii@com
2022-Jul-23 13:54 UTC
[R] mgcv: relative risk from GAM with distributed lag
Hi Simon and all,
I've corrected some mistakes in setting up the prediction data frame
(sorry, very stressed and sleep deprived due to closing in deadlines),
but am having problems getting the perspective plot using persp().
I've calculated relative risk (RR) but am having trouble getting the
perspective (or contour) plot. persp() takes x and y vectors in
ascending order, and z as a matrix. I've seen the outer() function
being used to facilitate perspective plots, but every example I've
seen treats z as a function, to be evaluated at combinations for x and
y. In my case, I already have observations for z. Not sure if I need
to use outer(), and if so, how?
My code for making predictions, getting RR and trying to get the plot
is as follows (full reproducible example including model at the
bottom). Can someone tell me what I'm doing wrong?
########### CODE ########################
# create prediction data
N <- 1000 # number of points for which to predict the smooths
pred_temp <- seq(min(dat$temp, na.rm = T), max(dat$temp, na.rm = T),
length = N) # prediction data for temperature
pred_lag <- rep(c(0, 1,2,3,4,5,6),each=N)
pred_rain <- seq(min(dat$rain, na.rm = T), max(dat$rain, na.rm = T), length =
N)
pred_humidity <- seq(min(dat$humidity, na.rm = T), max(dat$humidity,
na.rm = T), length = N)
pred_time <- seq(min(dat$time, na.rm = T), max(dat$time, na.rm = T), length =
N)
pd <- data.frame(temp = pred_temp, lag = pred_lag, humidity pred_humidity,
rain = pred_rain, time = pred_time)
# create prediction data with reference temperature set to median temperature
# identical to pd but now with all temperatures set to median value of
temperature
pred_temp0 <- rep(median(dat$temp, na.rm=T), N)
pd0 <- data.frame(temp = pred_temp0, lag = pred_lag, humidity pred_humidity,
rain = pred_rain, time = pred_time)
# make predictions
predictions <- predict.gam(mod, pd, type = "terms")
predictions0 <- predict.gam(mod, pd0, type = "terms")
# calculate RR
diff <- predictions[,2] - predictions0[,2]
rr <- as.vector(exp(diff))
# convert rr to matrix required for z argument for persp()
rr_mat <- matrix(rr, nrow = N)
persp(pred_temp, pred_lag, rr_mat)
######### ERROR MESSAGE ##################################
The persp call results in the follow error:
Error in persp.default(pred_temp, pred_lag, rr_mat) :
increasing 'x' and 'y' values expected
I don't understand this because pred_temp and pred_lag ARE in ascending
order.
############################################################
FULL REPRODUCIBLE EXAMPLE
############################################################
library(mgcv)
set.seed(3) # make reproducible example
simdat <- gamSim(1,400)
g <- exp(simdat$f/5)
simdat$y <- rnbinom(g,size=3,mu=g) # negative binomial response var
simdat$time <- 1:400 # create time series
names(simdat) <- c("deaths", "temp",
"humidity", "rain", "x3", "f",
"f0", "f1", "f2", "f3",
"time")
# lag function based on Wood (book 2017, p.349 and gamair package
documentation p.54
# https://cran.rstudio.com/web/packages/gamair/gamair.pdf)
lagard <- function(x,n.lag=7) {
n <- length(x); X <- matrix(NA,n,n.lag)
for (i in 1:n.lag) X[i:n,i] <- x[i:n-i+1]
X
}
# set up lag, temp, rain and humidity as 7-column matrices
# to create lagged variables
dat <- list(lag=matrix(0:6,nrow(simdat),7,byrow=TRUE),
deaths=simdat$deaths, time = simdat$time)
dat$temp <- lagard(simdat$temp)
dat$rain <- lagard(simdat$rain)
dat$humidity <- lagard(simdat$humidity)
mod <- gam(deaths~s(time, k=70) + te(temp, lag, k=c(12, 4)) +
te(humidity, lag, k=c(12, 4)) + te(rain, lag, k=c(12, 4)), data = dat,
family = nb, method = 'REML', select = TRUE)
summary(mod)
plot(mod, scheme = 1) # perspective
plot(mod, scheme = 2) # contour
# create prediction data
N <- 1000 # number of points for which to predict the smooths
pred_temp <- seq(min(dat$temp, na.rm = T), max(dat$temp, na.rm = T), length =
N)
pred_lag <- rep(c(0, 1,2,3,4,5,6),each=N)
pred_rain <- seq(min(dat$rain, na.rm = T), max(dat$rain, na.rm = T), length =
N)
pred_humidity <- seq(min(dat$humidity, na.rm = T), max(dat$humidity,
na.rm = T), length = N)
pred_time <- seq(min(dat$time, na.rm = T), max(dat$time, na.rm = T), length =
N)
pd <- data.frame(temp = pred_temp, lag = pred_lag, humidity pred_humidity,
rain = pred_rain, time = pred_time)
# create prediction data with reference temperature set to median temperature
# identical to pd but now with all temperatures set to median value of
temperature
pred_temp0 <- rep(median(dat$temp, na.rm=T), N)
pd0 <- data.frame(temp = pred_temp0, lag = pred_lag, humidity pred_humidity,
rain = pred_rain, time = pred_time)
# make predictions
predictions <- predict.gam(mod, pd, type = "terms")
predictions0 <- predict.gam(mod, pd0, type = "terms")
# calculate RR
diff <- predictions[,2] - predictions0[,2]
rr <- as.vector(exp(diff))
# convert rr to matrix required for z argument for persp()
rr_mat <- matrix(rr, nrow = N)
persp(pred_temp, pred_lag, rr_mat)
On Fri, 22 Jul 2022 at 13:29, jade.shodan at googlemail.com
<jade.shodan at googlemail.com> wrote:>
> I made a small error in the code below (not checking for NAs which are
> introduced by the lag function). However, this doesn't solve the issue
> I raised).
>
> So here's the problem (with corrected code) again:
>
> I'm not sure what to do with the 'time' variable. (I don't
want to
> make predictions for specific points in time). I coded as follows
> (full reproducible example at bottom of email), but get a warning and
> error:
>
>
> N <- 1000 # number of points for smooth to be predicted
> # new temperatures and lags for prediction
> pred_temp <- seq(min(dat$temp, na.rm = T), max(dat$temp, na.rm = T),
length = N)
> pred_lag <- rep(c(0, 1,2,3,4,5,6),each=N) ## IS IT CORRECT TO SET
> UP LAG LIKE THIS?
>
> # not sure if these covariates are required with type = "terms"
> pred_humidity <- rep(median(dat$humidity, na.rm = T), N)
> pred_rain <- rep(median(dat$rain, na.rm = T), N)
> pd <- data.frame(temp = pred_temp, lag = pred_lag, humidity >
pred_humidity, rain = pred_rain)
>
> predictions <- predict(mod, pd, type = "terms")
>
>
> The predict line creates the following warning and error:
>
> Warning in predict.gam(mod, pd, type = "terms") :
> not all required variables have been supplied in newdata!
>
> Error in model.frame.default(ff, data = newdata, na.action = na.act) :
> object is not a matrix
>
>
> For ease of reference, I've (re)included the full reproducible example:
>
> library(mgcv)
> set.seed(3) # make reproducible example
> simdat <- gamSim(1,400)
> g <- exp(simdat$f/5)
>
> simdat$y <- rnbinom(g,size=3,mu=g) # negative binomial response var
> simdat$time <- 1:400 # create time series
>
> names(simdat) <- c("deaths", "temp",
"humidity", "rain", "x3", "f",
> "f0", "f1", "f2", "f3",
"time")
>
> # lag function based on Wood (book 2017, p.349 and gamair package
> documentation p.54
> # https://cran.rstudio.com/web/packages/gamair/gamair.pdf)
> lagard <- function(x,n.lag=7) {
> n <- length(x); X <- matrix(NA,n,n.lag)
> for (i in 1:n.lag) X[i:n,i] <- x[i:n-i+1]
> X
> }
>
> # set up lag, temp, rain and humidity as 7-column matrices
> # to create lagged variables
> dat <- list(lag=matrix(0:6,nrow(simda
> t),7,byrow=TRUE),
> deaths=simdat$deaths, time = simdat$time)
> dat$temp <- lagard(simdat$temp)
> dat$rain <- lagard(simdat$rain)
> dat$humidity <- lagard(simdat$humidity)
>
> mod <- gam(deaths~s(time, k=70) + te(temp, lag, k=c(12, 4)) +
> te(humidity, lag, k=c(12, 4)) + te(rain, lag, k=c(12, 4)), data = dat,
> family = nb, method = 'REML', select = TRUE)
>
> # create prediction data
> N <- 1000 # number of points for which to predict the smooths
> pred_temp <- seq(min(dat$temp, na.rm = T), max(dat$temp, na.rm = T),
length = N)
> pred_lag <- rep(c(0, 1,2,3,4,5,6),each=N)
> pred_humidity <- rep(median(dat$humidity), N)
> pred_rain <- rep(median(dat$rain), N)
> pd <- data.frame(temp = pred_temp, lag = pred_lag, humidity >
pred_humidity, rain = pred_rain)
>
> # make predictions
> predictions <- predict(mod, pd, type = "terms")
>
> On Fri, 22 Jul 2022 at 12:47, jade.shodan at googlemail.com
> <jade.shodan at googlemail.com> wrote:
> >
> > Hi Simon,
> >
> > Thanks for the pointers! But I'm not sure what to do with the
'time'
> > variable. (I don't want to make predictions for specific points in
> > time). I coded as follows (full reproducible example at bottom of
> > email), but get a warning and error:
> >
> >
> > N <- 1000 # number of points for smooth to be predicted
> > # new temperatures and lags for prediction
> > pred_temp <- seq(min(dat$temp, na.rm = T), max(dat$temp, na.rm =
T), length = N)
> > pred_lag <- rep(c(0, 1,2,3,4,5,6),each=N) ## IS IT CORRECT TO
SET
> > UP LAG LIKE THIS?
> >
> > # not sure if these covariates are required with type =
"terms"
> > pred_humidity <- rep(median(dat$humidity), N)
> > pred_rain <- rep(median(dat$rain), N)
> > pd <- data.frame(temp = pred_temp, lag = pred_lag, humidity >
> pred_humidity, rain = pred_rain)
> >
> > predictions <- predict(mod, pd, type = "terms")
> >
> >
> > The predict line creates the following warning and error:
> >
> > Warning in predict.gam(mod, pd, type = "terms") :
> > not all required variables have been supplied in newdata!
> >
> > Error in model.frame.default(ff, data = newdata, na.action = na.act) :
> > object is not a matrix
> >
> >
> > For ease of reference, I've (re)included the full reproducible
example:
> >
> > library(mgcv)
> > set.seed(3) # make reproducible example
> > simdat <- gamSim(1,400)
> > g <- exp(simdat$f/5)
> >
> > simdat$y <- rnbinom(g,size=3,mu=g) # negative binomial response
var
> > simdat$time <- 1:400 # create time series
> >
> > names(simdat) <- c("deaths", "temp",
"humidity", "rain", "x3", "f",
> > "f0", "f1", "f2", "f3",
"time")
> >
> > # lag function based on Wood (book 2017, p.349 and gamair package
> > documentation p.54
> > # https://cran.rstudio.com/web/packages/gamair/gamair.pdf)
> > lagard <- function(x,n.lag=7) {
> > n <- length(x); X <- matrix(NA,n,n.lag)
> > for (i in 1:n.lag) X[i:n,i] <- x[i:n-i+1]
> > X
> > }
> >
> > # set up lag, temp, rain and humidity as 7-column matrices
> > # to create lagged variables
> > dat <- list(lag=matrix(0:6,nrow(simdat),7,byrow=TRUE),
> > deaths=simdat$deaths, time = simdat$time)
> > dat$temp <- lagard(simdat$temp)
> > dat$rain <- lagard(simdat$rain)
> > dat$humidity <- lagard(simdat$humidity)
> >
> > mod <- gam(deaths~s(time, k=70) + te(temp, lag, k=c(12, 4)) +
> > te(humidity, lag, k=c(12, 4)) + te(rain, lag, k=c(12, 4)), data = dat,
> > family = nb, method = 'REML', select = TRUE)
> >
> > # create prediction data
> > N <- 1000 # number of points for which to predict the smooths
> > pred_temp <- seq(min(dat$temp, na.rm = T), max(dat$temp, na.rm =
T), length = N)
> > pred_lag <- rep(c(0, 1,2,3,4,5,6),each=N)
> > pred_humidity <- rep(median(dat$humidity), N)
> > pred_rain <- rep(median(dat$rain), N)
> > pd <- data.frame(temp = pred_temp, lag = pred_lag, humidity >
> pred_humidity, rain = pred_rain)
> >
> > # make predictions
> > predictions <- predict(mod, pd, type = "terms")
> >
> >
> > On Fri, 22 Jul 2022 at 09:54, Simon Wood <simon.wood at
bath.edu> wrote:
> > >
> > >
> > > On 21/07/2022 15:19, jade.shodan--- via R-help wrote:
> > > > Hello everyone (incl. Simon Wood?),
> > > >
> > > > I'm not sure that my original question (see below,
including
> > > > reproducible example) was as clear as it could have been. To
clarify,
> > > > what I would to like to get is:
> > > >
> > > > 1) a perspective plot of temperature x lag x relative risk.
I know
> > > > how to use plot.gam and vis.gam but don't know how to
get plots on the
> > > > relative risk scale as opposed to "response" or
"link".
> > >
> > > - You are on the log scale so I think that all you need to do is
to use
> > > 'predict.gam', with 'type = "terms"' to
get the predictions for the
> > > te(temp, lag) term over the required grid of lags and
temperatures.
> > > Suppose the dataframe of prediction data is 'pd'. Now
produce pd0, which
> > > is identical to pd, except that the temperatures are all set to
the
> > > reference temperature. Use predict.gam to predict te(temp,lag)
from pd0.
> > > Now the exponential of the difference between the first and
second
> > > predictions is the required RR, which you can plot using
'persp',
> > > 'contour', 'image' or whatever. If you need
credible intervals see pages
> > > 341-343 of my 'GAMs: An intro with R' book (2nd ed).
> > >
> > > > 2) a plot of relative risk (accumulated across all lags) vs
> > > > temperature, given a reference temperature. An example of
such a plot
> > > > can be found in figure 2 (bottom) of this paper by
Gasparrini et al:
> > > > https://onlinelibrary.wiley.com/doi/abs/10.1002/sim.3940
> > >
> > > - I guess this only makes sense if you have the same temperature
at all
> > > lags. So this time produce a data.frame with each desired
temperature
> > > repeated for each lag: 'pd1'. Again use
predict.gam(...,type="terms").
> > > Then sum the predictions over lags for each temperature, subtract
the
> > > minimum, and take the exponential. Same as above for CIs.
> > >
> > > best,
> > >
> > > Simon
> > >
> > > > I've seen Simon Wood's response to a related issue
here:
> > > > https://stat.ethz.ch/pipermail/r-help/2012-May/314387.html
> > > > However, I'm not sure how to apply this to time series
data with
> > > > distributed lag, to get the above mentioned figures.
> > > >
> > > > Would be really grateful for suggestions!
> > > >
> > > > Jade
> > > >
> > > > On Tue, 19 Jul 2022 at 16:07, jade.shodan at googlemail.com
> > > > <jade.shodan at googlemail.com> wrote:
> > > >> Dear list members,
> > > >>
> > > >> Does anyone know how to obtain a relative risk/ risk
ratio from a GAM
> > > >> with a distributed lag model implemented in mgcv? I have
a GAM
> > > >> predicting daily deaths from time series data consisting
of daily
> > > >> temperature, humidity and rainfall. The GAM includes a
distributed lag
> > > >> model because deaths may occur over several days
following a high heat
> > > >> day.
> > > >>
> > > >> What I'd like to do is compute (and plot) the
relative risk
> > > >> (accumulated across all lags) for a given temperature vs
the
> > > >> temperature at which the risk is lowest, with
corresponding confidence
> > > >> intervals. I am aware of the predict.gam function but am
not sure if
> > > >> and how it should be used in this case. (Additionally,
I'd also like
> > > >> to plot the relative risk for different lags
separately).
> > > >>
> > > >> I apologise if this seems trivial to some. (Actually, I
hope it is,
> > > >> because that might mean I get a solution!) I've been
looking for
> > > >> examples on how to do this, but found nothing so far.
Suggestions
> > > >> would be very much appreciated!
> > > >>
> > > >> Below is a reproducible example with the GAM:
> > > >>
> > > >> library(mgcv)
> > > >> set.seed(3) # make reproducible example
> > > >> simdat <- gamSim(1,400) # simulate data
> > > >> g <- exp(simdat$f/5)
> > > >> simdat$y <- rnbinom(g,size=3,mu=g) # negative
binomial response var
> > > >> simdat$time <- 1:400 # create time series
> > > >> names(simdat) <- c("deaths",
"temp", "humidity", "rain", "x3",
"f",
> > > >> "f0", "f1", "f2",
"f3", "time")
> > > >>
> > > >> # lag function based on Simon Wood (book 2017, p.349 and
gamair
> > > >> package documentation p.54
> > > >> #
https://cran.rstudio.com/web/packages/gamair/gamair.pdf)
> > > >> lagard <- function(x,n.lag=7) {
> > > >> n <- length(x); X <- matrix(NA,n,n.lag)
> > > >> for (i in 1:n.lag) X[i:n,i] <- x[i:n-i+1]
> > > >> X
> > > >> }
> > > >>
> > > >> # set up lag, temp, rain and humidity as 7-column
matrices
> > > >> # to create lagged variables - based on Simon Wood's
example
> > > >> dat <-
list(lag=matrix(0:6,nrow(simdat),7,byrow=TRUE),
> > > >> deaths=simdat$deaths, time = simdat$time)
> > > >> dat$temp <- lagard(simdat$temp)
> > > >> dat$rain <- lagard(simdat$rain)
> > > >> dat$humidity <- lagard(simdat$humidity)
> > > >>
> > > >> mod <- gam(deaths~s(time, k=70) + te(temp, lag,
k=c(12, 4)) +
> > > >> te(humidity, lag, k=c(12, 4)) + te(rain, lag, k=c(12,
4)), data = dat,
> > > >> family = nb, method = 'REML', select = TRUE)
> > > >>
> > > >> summary(mod)
> > > >> plot(mod, scheme = 1)
> > > >> plot(mod, scheme = 2)
> > > >>
> > > >> Thanks for any suggestions you may have,
> > > >>
> > > >> Jade
> > > > ______________________________________________
> > > > R-help at r-project.org mailing list -- To UNSUBSCRIBE and
more, see
> > > > https://stat.ethz.ch/mailman/listinfo/r-help
> > > > PLEASE do read the posting guide
http://www.R-project.org/posting-guide.html
> > > > and provide commented, minimal, self-contained, reproducible
code.
> > >
> > > --
> > > Simon Wood, School of Mathematics, University of Edinburgh,
> > > https://www.maths.ed.ac.uk/~swood34/
> > >