dear sir/madam:
i have a problem which about the example(lrm).
how to get the function "L <- .4*(sex=='male') +
.045*(age-50) +(log(cholesterol - 10)-5.2)*(-2*(sex=='female') +
2*(sex=='male'))".
thank you
example(lrm)
n <- 1000 # define sample size
set.seed(17) # so can reproduce the results
age <- rnorm(n, 50, 10)
blood.pressure <- rnorm(n, 120, 15)
cholesterol <- rnorm(n, 200, 25)
sex <- factor(sample(c('female','male'), n,TRUE))
label(age) <- 'Age' # label is in Hmisc
label(cholesterol) <- 'Total Cholesterol'
label(blood.pressure) <- 'Systolic Blood Pressure'
label(sex) <- 'Sex'
units(cholesterol) <- 'mg/dl'
units(blood.pressure) <- 'mmHg'
ch <- cut2(cholesterol, g=40, levels.mean=TRUE) # use mean values in
intervals
table(ch)
f <- lrm(ch ~ age)
m <- Mean(f) # see help file for Mean.lrm
d <- data.frame(age=seq(0,90,by=10))
m(predict(f, d))
f <- ols(cholesterol ~ age)
predict(f, d)
---------------------need to interpretation-------------
L <- .4*(sex=='male') + .045*(age-50) +(log(cholesterol -
10)-5.2)*(-2*(sex=='female') + 2*(sex=='male'))
------------------------------end-------------------------
# Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)]
y <- ifelse(runif(n) < plogis(L), 1, 0)
cholesterol[1:3] <- NA # 3 missings, at random
ddist <- datadist(age, blood.pressure, cholesterol, sex)
options(datadist='ddist')
fit <- lrm(y ~ blood.pressure + sex * (age + rcs(cholesterol,4)),
x=TRUE, y=TRUE)
# x=TRUE, y=TRUE allows use of resid(), which.influence below
# could define d <- datadist(fit) after lrm(), but data distribution
# summary would not be stored with fit, so later uses of Predict
# or summary.rms would require access to the original dataset or
# d or specifying all variable values to summary, Predict, nomogram
anova(fit)
p <- Predict(fit, age, sex)
plot(p)
plot(Predict(fit, age=20:70, sex="male")) # need if datadist not used
print(cbind(resid(fit,"dfbetas"),
resid(fit,"dffits"))[1:20,])
which.influence(fit, .3)
# latex(fit) #print nice statement of fitted model
#
#Repeat this fit using penalized MLE, penalizing complex termslrm 67
#(for nonlinear or interaction effects)
#
fitp <- update(fit, penalty=list(simple=0,nonlinear=10), x=TRUE, y=TRUE)
effective.df(fitp)
# or lrm(y ~ ..., penalty=...)
#Get fits for a variety of penalties and assess predictive accuracy
#in a new data set. Program efficiently so that complex design
#matrices are only created once.
set.seed(201)
x1 <- rnorm(500)
x2 <- rnorm(500)
x3 <- sample(0:1,500,rep=TRUE)
L <- x1+abs(x2)+x3
y <- ifelse(runif(500)<=plogis(L), 1, 0)
new.data <- data.frame(x1,x2,x3,y)[301:500,]
#
for(penlty in seq(0,.15,by=.005)) {
if(penlty==0) {
f <- lrm(y ~ rcs(x1,4)+rcs(x2,6)*x3, subset=1:300, x=TRUE, y=TRUE)
# True model is linear in x1 and has no interaction
X <- f$x # saves time for future runs - don't have to use rcs etc.
Y <- f$y # this also deletes rows with NAs (if there were any)
penalty.matrix <- diag(diag(var(X)))
Xnew <- predict(f, new.data, type="x", incl.non.slopes=FALSE)
# expand design matrix for new data
Ynew <- new.data$y
} else f <- lrm.fit(X,Y, penalty.matrix=penlty*penalty.matrix)
#
cat("\nPenalty :",penlty,"\n")
pred.logit <- f$coef[1] + (Xnew %*% f$coef[-1])
pred <- plogis(pred.logit)
C.index <- somers2(pred, Ynew)["C"]
Brier <- mean((pred-Ynew)^2)
Deviance<- -2*sum( Ynew*log(pred) + (1-Ynew)*log(1-pred) )
cat("ROC area:",format(C.index)," Brier
score:",format(Brier),
" -2 Log L:",format(Deviance),"\n")
[[alternative HTML version deleted]]