Fredrik Nilsson
2011-Jun-13 10:49 UTC
[R] How to formulate an (effect-modifying) interaction with matching variable in a conditional logistic regression?
Hi, I would like to see if a matching variable is an effect-modifier in a conditional logistic regression. Naturally, the matching variable can't enter directly in the model but as an interaction with terms that are in. However, I have problems in formulating the correct model the term that's already in the model is a factor. I am using treatment contrasts and the problem is that if I write: update(model, . ~ . + factorX:matchingvariableY) then I get one extra level for the level that is contained in the intercept which is conditioned away, and I get numerical problems. I show an example below using data from Hosmer and Lemeshow's Applied Logistic Regression (2nd ed. p.230 onwards) I can solve the problem by introducing new variables that contains the interaction I want, but I'd like to avoid this unwieldy solution. Can you suggest a better one? Thank you for your help! Fredrik Nilsson # Example # Code Sheet for the Variables in the 1-1 matched Low Birth Weight Study # Described in Section 7.3 Page 230 # # Name Description Codes/Values # pair Pair 1 - 56 # low Low Birth Weight 1 = BWT<=2500g, # 0 = BWT>2500g # age Age of Mother Years # lwt Weight of Mother at Pounds # Last Menstrual Period # race Race 1 = White # 2 = Black # 3 = Other # smoke Smoking Status 0 = No,1 = Yes # During Pregnancy # ptd History of Premature Labor 0 = None,1 = Yes # ht History of Hypertension 0 = No, 1 = Yes # ui Presence of Uterine 0 = No, 1 = Yes # Irritability pair<-rep(1:56, each=2) low<-rep(c(0,1), 56) age<-c(14,15,16,17,17,17,17,17,18,18,19,19,19,20,20,20,20,20, 20,20,20,21,21,21,21,21,22,22,23,23,23,23,23,24,24,24, 24,24,25,25,25,25,25,25,26,26,26,26,27,28,28,29,30,31,32,35) age<-rep(age,each=2) # actually the last one should be 34 not 35? age[112]<-34 lwt<-c(135,101,98,115,95,130,103,130,122,110,113,120,113,120,119,142, 100,148,90,110,150,91,115,102,235,112,120,150,103,125,169,120, 141,80,121,109,127,121,120,122,158,105,108,165,124,200,185,103, 160,100,115,130,95,130,158,130,130,97,128,187,119,120,115,110, 190,94,90,128,115,132,110,155,115,138,110,105,118,105,120,85, 155,115,125,92,140,89,241,105,113,117,168,96,133,154,160,190, 124,130,120,120,130,95,135,130,95,142,215,102,121,105,170,187) race<-c(0,2,1,2,2,2,2,2,0,0,1,0,1,1,2,1,0,2,0,1,2,0,2,0,0,0,2,0,2,2,2, 1,0,2,1,2,2,0,2,1,0,2,0,0,2,1,1,2,0,2,0,0,2,0,1,0,1,2,2,1,2,2, 2,0,0,2,0,1,0,2,2,0,2,0,2,1,0,2,2,2,0,2,1,0,0,2,1,2,0,0,1,2,2, 2,2,0,0,1,2,2,2,0,0,0,0,0,0,0,2,0,0,1) race<-as.factor(race);levels(race)<-c("white","black","other") smoke<-c(0,1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,0, 1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0, 1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1, 0,0,1,1,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1) smoke<-as.factor(smoke);levels(smoke)<-c("no","yes") ptd<-c(0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0, 1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1, 0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0, 0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0) ptd<-as.factor(ptd); levels(ptd)<-c("no","yes") ht<-c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,1) ht<-as.factor(ht);levels(ht)<-c("no","yes") ui<-c(0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1, 1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, 1,0,0,0,1,0,0,0,0,0,0,0,0) dataset<-data.frame(pair,low,age,lwt,race,smoke,ptd,ht,ui) library(survival) # model with all covariates mult.cl<-clogit(low~lwt+race+smoke+ptd+ht+ui+strata(pair),data=dataset) summary(mult.cl) # H&L drop race mult2.cl<-update(mult.cl,.~.-race) summary(mult2.cl) ###################### # check interactions # ###################### multi1.cl<-update(mult2.cl,.~.+age:lwt) summary(multi1.cl) anova(mult2.cl,multi1.cl) # then comes the interaction with smoke # no good! Here's the problem multi2.cl<-update(mult2.cl,.~.+age:smoke) summary(multi2.cl) anova(mult2.cl,multi2.cl) # has to define my own variable? dataset$ageNsmoke<-as.numeric(dataset$smoke=="yes")*dataset$age multi2a.cl<-update(mult2.cl,.~.+ageNsmoke) summary(multi2a.cl) anova(mult2.cl,multi2a.cl) # but suppose I had a model with race, then I'd have to # define two variables dataset$ageNblack<-as.numeric(dataset$race=="black")*dataset$age dataset$ageNother<-as.numeric(dataset$race=="other")*dataset$age multi3.cl<-update(mult.cl,.~.+race:age) summary(multi3.cl) anova(multi3.cl, mult.cl) multi3.cl<-update(mult.cl,.~.+ageNblack+ageNother) summary(multi3.cl) anova(multi3.cl, mult.cl) # even more unwieldy with factors as matching variables # I fake a second matching variable "type" with three levels dataset$type<-c(rep(1,36),rep(2,38),rep(3,38)) dataset$type<-as.factor(dataset$type) levels(dataset$type)<-c("type1","type2","type3") dataset$smokeNtype1<-as.numeric(dataset$smoke=="yes")*as.numeric(dataset$type=="type1") dataset$smokeNtype2<-as.numeric(dataset$smoke=="yes")*as.numeric(dataset$type=="type2") multi4.cl<-update(mult.cl,.~.+smokeNtype1+smokeNtype2)