Dear all,
I am trying to apply the logistic regression to determine the limit of
detection (LOD) of a molecular biology assay, the polymerase chain reaction
(PCR). The aim of the procedure is to identify the value (variable
"dilution") that determine a 95% probability of success, that is
"positive"/"total"=0.95. The procedure I have implemented
seemed to work
looking at the figure obtained from the sample set 1; however the figure
obtained from the sample set 2 shows that interpolation is not correct.
Admittedly, these are not the best sample sets to determine the LOD -there
are too many 100% successes - but the regression should still work.
Does anybody have some tip to solve this incongruence?
Many thanks,
Luigi Marongiu, MSc
----------------------------------------------------------------------------
----------------
### SAMPLE SET No. 1
### define data
labs<-c("dilution", "total", "positive")
p.1<-c(10, 20, 19)
p.2<-c(100, 20, 20)
p.3<-c(1000, 20, 20)
p.4<-c(10000, 20, 20)
p.5<-c(100000, 20, 20)
p.6<-c(1000000, 20, 20)
### create data frame from matrix
my.mat<-matrix(c(p.1, p.2, p.3, p.4, p.5, p.6), nrow=6, byrow=TRUE)
dimnames(my.mat)<-list(c(1:6),labs)
my.data<-as.data.frame(my.mat)
attach(my.data)
my.data
### define frequency data
Y<-cbind(positive, total-positive)
### fit model LOGIT
model<-glm(Y ~ dilution, family=binomial(link=logit), data=my.data)
### plot data and regression line
plot(dilution, positive/total, pch=16, ylim=c(0,1), log = "x",
xaxt="n", xlim=c(1, 1000000),
main="Positive amplification by PCR", ylab="Fraction
amplified",
xlab=expression(paste("Standard dilutions (c/",mu,"l)")))
### add x axis (William Dunlap, personal communication)
xlim <- par("usr")[1:2]
lab.x <- seq(ceiling(xlim[1]), floor(xlim[2]))
axis(side=1, at=10^lab.x, lab=as.expression(lapply(lab.x,
function(x)bquote(10^.(x)))))
lines(dilution, fitted(model), lty=1)
### set significance level and logit
# define significance
level<-0.95
# set logit value
logit.LOD<-log(level/(1-level))
### set function to determine LOD (Vito Ricci, personal communication)
LOD<-function(mod) as.vector((logit.LOD-coef(mod)[1])/coef(mod)[2])
### computation of the LOD value
x.LOD<-LOD(model)
### compute S.E.
# set variables
co<-model$coef
model.sum<-summary(model)
SE.co<-model.sum$coef[,2]
COV.co<-model.sum$cov.scaled[1,2]
# compute SE
SE.LOD<-abs(x.LOD) * sqrt( (SE.co[1]/co[1])^2 +
(SE.co[2]/co[2])^2 -
2*(COV.co/(co[1]*co[2])) )
log.SE.LOD<-log10(SE.LOD)
# define boundaries
bottom<-x.LOD-(1.96*log.SE.LOD)
ceiling<-x.LOD+(1.96*log.SE.LOD)
### REPORT
# mean value
x.LOD
# bottom
bottom
# ceiling
ceiling
### plot LOD
lines(c(0.1, x.LOD), c(0.95, 0.95), lty=2)
lines(c(x.LOD, x.LOD), c(0.95, 0), lty=2)
text(x.LOD, 0.02, labels = round(x.LOD, digits = 2), pos=4, offset 0.3, cex
= 0.7)
text(1, 0.93, label="95%", pos=4, offset = 0.3, cex = 0.7)
lines(c(bottom, ceiling), c(0,0), lty=1, lend="butt", lwd=0.7)
points(x.LOD, 0, pch=21, cex=0.8, bg="white")
detach(my.data)
############################################################################
###############################################################
############################################################################
###############################################################
### SAMPLE SET No. 2
### define data
labs<-c("dilution", "total", "positive")
p.1<-c(10, 10, 4)
p.2<-c(100, 10, 10)
p.3<-c(1000, 10, 10)
p.4<-c(10000, 10, 10)
p.5<-c(100000, 10, 10)
p.6<-c(1000000, 10, 10)
### create data frame from matrix
my.mat<-matrix(c(p.1, p.2, p.3, p.4, p.5, p.6), nrow=6, byrow=TRUE)
dimnames(my.mat)<-list(c(1:6),labs)
my.data<-as.data.frame(my.mat)
attach(my.data)
my.data
### define frequency data
Y<-cbind(positive, total-positive)
### fit model LOGIT
model<-glm(Y ~ dilution, family=binomial(link=logit), data=my.data)
### plot data and regression line
plot(dilution, positive/total, pch=16, ylim=c(0,1), log = "x",
xaxt="n", xlim=c(1, 1000000),
main="Positive amplification by PCR", ylab="Fraction
amplified",
xlab=expression(paste("Standard dilutions (c/",mu,"l)")))
### add x axis (William Dunlap, personal communication)
xlim <- par("usr")[1:2]
lab.x <- seq(ceiling(xlim[1]), floor(xlim[2]))
axis(side=1, at=10^lab.x, lab=as.expression(lapply(lab.x,
function(x)bquote(10^.(x)))))
lines(dilution, fitted(model), lty=1)
### set significance level and logit
# define significance
level<-0.95
# set logit value
logit.LOD<-log(level/(1-level))
### set function to determine LOD (Vito Ricci, personal communication)
LOD<-function(mod) as.vector((logit.LOD-coef(mod)[1])/coef(mod)[2])
### computation of the LOD value
x.LOD<-LOD(model)
### compute S.E.
# set variables
co<-model$coef
model.sum<-summary(model)
SE.co<-model.sum$coef[,2]
COV.co<-model.sum$cov.scaled[1,2]
# compute SE
SE.LOD<-abs(x.LOD) * sqrt( (SE.co[1]/co[1])^2 +
(SE.co[2]/co[2])^2 -
2*(COV.co/(co[1]*co[2])) )
log.SE.LOD<-log10(SE.LOD)
# define boundaries
bottom<-x.LOD-(1.96*log.SE.LOD)
ceiling<-x.LOD+(1.96*log.SE.LOD)
### REPORT
# mean value
x.LOD
# bottom
bottom
# ceiling
ceiling
### plot LOD
lines(c(0.1, x.LOD), c(0.95, 0.95), lty=2)
lines(c(x.LOD, x.LOD), c(0.95, 0), lty=2)
text(x.LOD, 0.02, labels = round(x.LOD, digits = 2), pos=4, offset 0.3, cex
= 0.7)
text(1, 0.93, label="95%", pos=4, offset = 0.3, cex = 0.7)
lines(c(bottom, ceiling), c(0,0), lty=1, lend="butt", lwd=0.7)
points(x.LOD, 0, pch=21, cex=0.8, bg="white")
detach(my.data)
############################################################################
###############################################################
[[alternative HTML version deleted]]
> set 1; however the figure obtained from the sample set 2 > shows that interpolation is not correct.I don't think the interpolation is incorrect; what is making it look incorrect is using a straight line to represent a logistic regression. Try adding the predicted values for the line to your plot: lines( dil <- 10^seq(-1, 6, 0.05), predict(model, newdata=data.frame(dilution=dil), type="response")) S Ellison ******************************************************************* This email and any attachments are confidential. Any use...{{dropped:8}}
Dear all,
I am trying to apply the logistic regression to determine the limit of
detection (LOD) of a molecular biology assay, the polymerase chain reaction
(PCR). The aim of the procedure is to identify the value (variable
"dilution") that determine a 95% probability of success, that is
"positive"/"total"=0.95. The procedure I have implemented
seemed to work
looking at the figure obtained from the sample set 1; however the figure
obtained from the sample set 2 shows that interpolation is not correct.
Admittedly, these are not the best sample sets to determine the LOD -there
are too many 100% successes - but the regression should still work.
Does anybody have some tip to solve this incongruence?
Many thanks,
Luigi Marongiu, MSc
----------------------------------------------------------------------------
----------------
### SAMPLE SET No. 1
### define data
labs<-c("dilution", "total", "positive")
p.1<-c(10, 20, 19)
p.2<-c(100, 20, 20)
p.3<-c(1000, 20, 20)
p.4<-c(10000, 20, 20)
p.5<-c(100000, 20, 20)
p.6<-c(1000000, 20, 20)
### create data frame from matrix
my.mat<-matrix(c(p.1, p.2, p.3, p.4, p.5, p.6), nrow=6, byrow=TRUE)
dimnames(my.mat)<-list(c(1:6),labs)
my.data<-as.data.frame(my.mat)
attach(my.data)
my.data
### define frequency data
Y<-cbind(positive, total-positive)
### fit model LOGIT
model<-glm(Y ~ dilution, family=binomial(link=logit), data=my.data)
### plot data and regression line
plot(dilution, positive/total, pch=16, ylim=c(0,1), log = "x",
xaxt="n", xlim=c(1, 1000000),
main="Positive amplification by PCR", ylab="Fraction
amplified",
xlab=expression(paste("Standard dilutions (c/",mu,"l)")))
### add x axis (William Dunlap, personal communication)
xlim <- par("usr")[1:2]
lab.x <- seq(ceiling(xlim[1]), floor(xlim[2]))
axis(side=1, at=10^lab.x, lab=as.expression(lapply(lab.x,
function(x)bquote(10^.(x)))))
lines(dilution, fitted(model), lty=1)
### set significance level and logit
# define significance
level<-0.95
# set logit value
logit.LOD<-log(level/(1-level))
### set function to determine LOD (Vito Ricci, personal communication)
LOD<-function(mod) as.vector((logit.LOD-coef(mod)[1])/coef(mod)[2])
### computation of the LOD value
x.LOD<-LOD(model)
### compute S.E.
# set variables
co<-model$coef
model.sum<-summary(model)
SE.co<-model.sum$coef[,2]
COV.co<-model.sum$cov.scaled[1,2]
# compute SE
SE.LOD<-abs(x.LOD) * sqrt( (SE.co[1]/co[1])^2 +
(SE.co[2]/co[2])^2 -
2*(COV.co/(co[1]*co[2])) )
log.SE.LOD<-log10(SE.LOD)
# define boundaries
bottom<-x.LOD-(1.96*log.SE.LOD)
ceiling<-x.LOD+(1.96*log.SE.LOD)
### REPORT
# mean value
x.LOD
# bottom
bottom
# ceiling
ceiling
### plot LOD
lines(c(0.1, x.LOD), c(0.95, 0.95), lty=2)
lines(c(x.LOD, x.LOD), c(0.95, 0), lty=2)
text(x.LOD, 0.02, labels = round(x.LOD, digits = 2), pos=4, offset 0.3, cex
= 0.7)
text(1, 0.93, label="95%", pos=4, offset = 0.3, cex = 0.7)
lines(c(bottom, ceiling), c(0,0), lty=1, lend="butt", lwd=0.7)
points(x.LOD, 0, pch=21, cex=0.8, bg="white")
detach(my.data)
############################################################################
###############################################################
############################################################################
###############################################################
### SAMPLE SET No. 2
### define data
labs<-c("dilution", "total", "positive")
p.1<-c(10, 10, 4)
p.2<-c(100, 10, 10)
p.3<-c(1000, 10, 10)
p.4<-c(10000, 10, 10)
p.5<-c(100000, 10, 10)
p.6<-c(1000000, 10, 10)
### create data frame from matrix
my.mat<-matrix(c(p.1, p.2, p.3, p.4, p.5, p.6), nrow=6, byrow=TRUE)
dimnames(my.mat)<-list(c(1:6),labs)
my.data<-as.data.frame(my.mat)
attach(my.data)
my.data
### define frequency data
Y<-cbind(positive, total-positive)
### fit model LOGIT
model<-glm(Y ~ dilution, family=binomial(link=logit), data=my.data)
### plot data and regression line
plot(dilution, positive/total, pch=16, ylim=c(0,1), log = "x",
xaxt="n", xlim=c(1, 1000000),
main="Positive amplification by PCR", ylab="Fraction
amplified",
xlab=expression(paste("Standard dilutions (c/",mu,"l)")))
### add x axis (William Dunlap, personal communication)
xlim <- par("usr")[1:2]
lab.x <- seq(ceiling(xlim[1]), floor(xlim[2]))
axis(side=1, at=10^lab.x, lab=as.expression(lapply(lab.x,
function(x)bquote(10^.(x)))))
lines(dilution, fitted(model), lty=1)
### set significance level and logit
# define significance
level<-0.95
# set logit value
logit.LOD<-log(level/(1-level))
### set function to determine LOD (Vito Ricci, personal communication)
LOD<-function(mod) as.vector((logit.LOD-coef(mod)[1])/coef(mod)[2])
### computation of the LOD value
x.LOD<-LOD(model)
### compute S.E.
# set variables
co<-model$coef
model.sum<-summary(model)
SE.co<-model.sum$coef[,2]
COV.co<-model.sum$cov.scaled[1,2]
# compute SE
SE.LOD<-abs(x.LOD) * sqrt( (SE.co[1]/co[1])^2 +
(SE.co[2]/co[2])^2 -
2*(COV.co/(co[1]*co[2])) )
log.SE.LOD<-log10(SE.LOD)
# define boundaries
bottom<-x.LOD-(1.96*log.SE.LOD)
ceiling<-x.LOD+(1.96*log.SE.LOD)
### REPORT
# mean value
x.LOD
# bottom
bottom
# ceiling
ceiling
### plot LOD
lines(c(0.1, x.LOD), c(0.95, 0.95), lty=2)
lines(c(x.LOD, x.LOD), c(0.95, 0), lty=2)
text(x.LOD, 0.02, labels = round(x.LOD, digits = 2), pos=4, offset 0.3, cex
= 0.7)
text(1, 0.93, label="95%", pos=4, offset = 0.3, cex = 0.7)
lines(c(bottom, ceiling), c(0,0), lty=1, lend="butt", lwd=0.7)
points(x.LOD, 0, pch=21, cex=0.8, bg="white")
detach(my.data)
############################################################################
###############################################################
[[alternative HTML version deleted]]