Dear all, I am trying to apply the logistic regression to determine the limit of detection (LOD) of a molecular biology assay, the polymerase chain reaction (PCR). The aim of the procedure is to identify the value (variable "dilution") that determine a 95% probability of success, that is "positive"/"total"=0.95. The procedure I have implemented seemed to work looking at the figure obtained from the sample set 1; however the figure obtained from the sample set 2 shows that interpolation is not correct. Admittedly, these are not the best sample sets to determine the LOD -there are too many 100% successes - but the regression should still work. Does anybody have some tip to solve this incongruence? Many thanks, Luigi Marongiu, MSc ---------------------------------------------------------------------------- ---------------- ### SAMPLE SET No. 1 ### define data labs<-c("dilution", "total", "positive") p.1<-c(10, 20, 19) p.2<-c(100, 20, 20) p.3<-c(1000, 20, 20) p.4<-c(10000, 20, 20) p.5<-c(100000, 20, 20) p.6<-c(1000000, 20, 20) ### create data frame from matrix my.mat<-matrix(c(p.1, p.2, p.3, p.4, p.5, p.6), nrow=6, byrow=TRUE) dimnames(my.mat)<-list(c(1:6),labs) my.data<-as.data.frame(my.mat) attach(my.data) my.data ### define frequency data Y<-cbind(positive, total-positive) ### fit model LOGIT model<-glm(Y ~ dilution, family=binomial(link=logit), data=my.data) ### plot data and regression line plot(dilution, positive/total, pch=16, ylim=c(0,1), log = "x", xaxt="n", xlim=c(1, 1000000), main="Positive amplification by PCR", ylab="Fraction amplified", xlab=expression(paste("Standard dilutions (c/",mu,"l)"))) ### add x axis (William Dunlap, personal communication) xlim <- par("usr")[1:2] lab.x <- seq(ceiling(xlim[1]), floor(xlim[2])) axis(side=1, at=10^lab.x, lab=as.expression(lapply(lab.x, function(x)bquote(10^.(x))))) lines(dilution, fitted(model), lty=1) ### set significance level and logit # define significance level<-0.95 # set logit value logit.LOD<-log(level/(1-level)) ### set function to determine LOD (Vito Ricci, personal communication) LOD<-function(mod) as.vector((logit.LOD-coef(mod)[1])/coef(mod)[2]) ### computation of the LOD value x.LOD<-LOD(model) ### compute S.E. # set variables co<-model$coef model.sum<-summary(model) SE.co<-model.sum$coef[,2] COV.co<-model.sum$cov.scaled[1,2] # compute SE SE.LOD<-abs(x.LOD) * sqrt( (SE.co[1]/co[1])^2 + (SE.co[2]/co[2])^2 - 2*(COV.co/(co[1]*co[2])) ) log.SE.LOD<-log10(SE.LOD) # define boundaries bottom<-x.LOD-(1.96*log.SE.LOD) ceiling<-x.LOD+(1.96*log.SE.LOD) ### REPORT # mean value x.LOD # bottom bottom # ceiling ceiling ### plot LOD lines(c(0.1, x.LOD), c(0.95, 0.95), lty=2) lines(c(x.LOD, x.LOD), c(0.95, 0), lty=2) text(x.LOD, 0.02, labels = round(x.LOD, digits = 2), pos=4, offset 0.3, cex = 0.7) text(1, 0.93, label="95%", pos=4, offset = 0.3, cex = 0.7) lines(c(bottom, ceiling), c(0,0), lty=1, lend="butt", lwd=0.7) points(x.LOD, 0, pch=21, cex=0.8, bg="white") detach(my.data) ############################################################################ ############################################################### ############################################################################ ############################################################### ### SAMPLE SET No. 2 ### define data labs<-c("dilution", "total", "positive") p.1<-c(10, 10, 4) p.2<-c(100, 10, 10) p.3<-c(1000, 10, 10) p.4<-c(10000, 10, 10) p.5<-c(100000, 10, 10) p.6<-c(1000000, 10, 10) ### create data frame from matrix my.mat<-matrix(c(p.1, p.2, p.3, p.4, p.5, p.6), nrow=6, byrow=TRUE) dimnames(my.mat)<-list(c(1:6),labs) my.data<-as.data.frame(my.mat) attach(my.data) my.data ### define frequency data Y<-cbind(positive, total-positive) ### fit model LOGIT model<-glm(Y ~ dilution, family=binomial(link=logit), data=my.data) ### plot data and regression line plot(dilution, positive/total, pch=16, ylim=c(0,1), log = "x", xaxt="n", xlim=c(1, 1000000), main="Positive amplification by PCR", ylab="Fraction amplified", xlab=expression(paste("Standard dilutions (c/",mu,"l)"))) ### add x axis (William Dunlap, personal communication) xlim <- par("usr")[1:2] lab.x <- seq(ceiling(xlim[1]), floor(xlim[2])) axis(side=1, at=10^lab.x, lab=as.expression(lapply(lab.x, function(x)bquote(10^.(x))))) lines(dilution, fitted(model), lty=1) ### set significance level and logit # define significance level<-0.95 # set logit value logit.LOD<-log(level/(1-level)) ### set function to determine LOD (Vito Ricci, personal communication) LOD<-function(mod) as.vector((logit.LOD-coef(mod)[1])/coef(mod)[2]) ### computation of the LOD value x.LOD<-LOD(model) ### compute S.E. # set variables co<-model$coef model.sum<-summary(model) SE.co<-model.sum$coef[,2] COV.co<-model.sum$cov.scaled[1,2] # compute SE SE.LOD<-abs(x.LOD) * sqrt( (SE.co[1]/co[1])^2 + (SE.co[2]/co[2])^2 - 2*(COV.co/(co[1]*co[2])) ) log.SE.LOD<-log10(SE.LOD) # define boundaries bottom<-x.LOD-(1.96*log.SE.LOD) ceiling<-x.LOD+(1.96*log.SE.LOD) ### REPORT # mean value x.LOD # bottom bottom # ceiling ceiling ### plot LOD lines(c(0.1, x.LOD), c(0.95, 0.95), lty=2) lines(c(x.LOD, x.LOD), c(0.95, 0), lty=2) text(x.LOD, 0.02, labels = round(x.LOD, digits = 2), pos=4, offset 0.3, cex = 0.7) text(1, 0.93, label="95%", pos=4, offset = 0.3, cex = 0.7) lines(c(bottom, ceiling), c(0,0), lty=1, lend="butt", lwd=0.7) points(x.LOD, 0, pch=21, cex=0.8, bg="white") detach(my.data) ############################################################################ ############################################################### [[alternative HTML version deleted]]
> set 1; however the figure obtained from the sample set 2 > shows that interpolation is not correct.I don't think the interpolation is incorrect; what is making it look incorrect is using a straight line to represent a logistic regression. Try adding the predicted values for the line to your plot: lines( dil <- 10^seq(-1, 6, 0.05), predict(model, newdata=data.frame(dilution=dil), type="response")) S Ellison ******************************************************************* This email and any attachments are confidential. Any use...{{dropped:8}}
Dear all, I am trying to apply the logistic regression to determine the limit of detection (LOD) of a molecular biology assay, the polymerase chain reaction (PCR). The aim of the procedure is to identify the value (variable "dilution") that determine a 95% probability of success, that is "positive"/"total"=0.95. The procedure I have implemented seemed to work looking at the figure obtained from the sample set 1; however the figure obtained from the sample set 2 shows that interpolation is not correct. Admittedly, these are not the best sample sets to determine the LOD -there are too many 100% successes - but the regression should still work. Does anybody have some tip to solve this incongruence? Many thanks, Luigi Marongiu, MSc ---------------------------------------------------------------------------- ---------------- ### SAMPLE SET No. 1 ### define data labs<-c("dilution", "total", "positive") p.1<-c(10, 20, 19) p.2<-c(100, 20, 20) p.3<-c(1000, 20, 20) p.4<-c(10000, 20, 20) p.5<-c(100000, 20, 20) p.6<-c(1000000, 20, 20) ### create data frame from matrix my.mat<-matrix(c(p.1, p.2, p.3, p.4, p.5, p.6), nrow=6, byrow=TRUE) dimnames(my.mat)<-list(c(1:6),labs) my.data<-as.data.frame(my.mat) attach(my.data) my.data ### define frequency data Y<-cbind(positive, total-positive) ### fit model LOGIT model<-glm(Y ~ dilution, family=binomial(link=logit), data=my.data) ### plot data and regression line plot(dilution, positive/total, pch=16, ylim=c(0,1), log = "x", xaxt="n", xlim=c(1, 1000000), main="Positive amplification by PCR", ylab="Fraction amplified", xlab=expression(paste("Standard dilutions (c/",mu,"l)"))) ### add x axis (William Dunlap, personal communication) xlim <- par("usr")[1:2] lab.x <- seq(ceiling(xlim[1]), floor(xlim[2])) axis(side=1, at=10^lab.x, lab=as.expression(lapply(lab.x, function(x)bquote(10^.(x))))) lines(dilution, fitted(model), lty=1) ### set significance level and logit # define significance level<-0.95 # set logit value logit.LOD<-log(level/(1-level)) ### set function to determine LOD (Vito Ricci, personal communication) LOD<-function(mod) as.vector((logit.LOD-coef(mod)[1])/coef(mod)[2]) ### computation of the LOD value x.LOD<-LOD(model) ### compute S.E. # set variables co<-model$coef model.sum<-summary(model) SE.co<-model.sum$coef[,2] COV.co<-model.sum$cov.scaled[1,2] # compute SE SE.LOD<-abs(x.LOD) * sqrt( (SE.co[1]/co[1])^2 + (SE.co[2]/co[2])^2 - 2*(COV.co/(co[1]*co[2])) ) log.SE.LOD<-log10(SE.LOD) # define boundaries bottom<-x.LOD-(1.96*log.SE.LOD) ceiling<-x.LOD+(1.96*log.SE.LOD) ### REPORT # mean value x.LOD # bottom bottom # ceiling ceiling ### plot LOD lines(c(0.1, x.LOD), c(0.95, 0.95), lty=2) lines(c(x.LOD, x.LOD), c(0.95, 0), lty=2) text(x.LOD, 0.02, labels = round(x.LOD, digits = 2), pos=4, offset 0.3, cex = 0.7) text(1, 0.93, label="95%", pos=4, offset = 0.3, cex = 0.7) lines(c(bottom, ceiling), c(0,0), lty=1, lend="butt", lwd=0.7) points(x.LOD, 0, pch=21, cex=0.8, bg="white") detach(my.data) ############################################################################ ############################################################### ############################################################################ ############################################################### ### SAMPLE SET No. 2 ### define data labs<-c("dilution", "total", "positive") p.1<-c(10, 10, 4) p.2<-c(100, 10, 10) p.3<-c(1000, 10, 10) p.4<-c(10000, 10, 10) p.5<-c(100000, 10, 10) p.6<-c(1000000, 10, 10) ### create data frame from matrix my.mat<-matrix(c(p.1, p.2, p.3, p.4, p.5, p.6), nrow=6, byrow=TRUE) dimnames(my.mat)<-list(c(1:6),labs) my.data<-as.data.frame(my.mat) attach(my.data) my.data ### define frequency data Y<-cbind(positive, total-positive) ### fit model LOGIT model<-glm(Y ~ dilution, family=binomial(link=logit), data=my.data) ### plot data and regression line plot(dilution, positive/total, pch=16, ylim=c(0,1), log = "x", xaxt="n", xlim=c(1, 1000000), main="Positive amplification by PCR", ylab="Fraction amplified", xlab=expression(paste("Standard dilutions (c/",mu,"l)"))) ### add x axis (William Dunlap, personal communication) xlim <- par("usr")[1:2] lab.x <- seq(ceiling(xlim[1]), floor(xlim[2])) axis(side=1, at=10^lab.x, lab=as.expression(lapply(lab.x, function(x)bquote(10^.(x))))) lines(dilution, fitted(model), lty=1) ### set significance level and logit # define significance level<-0.95 # set logit value logit.LOD<-log(level/(1-level)) ### set function to determine LOD (Vito Ricci, personal communication) LOD<-function(mod) as.vector((logit.LOD-coef(mod)[1])/coef(mod)[2]) ### computation of the LOD value x.LOD<-LOD(model) ### compute S.E. # set variables co<-model$coef model.sum<-summary(model) SE.co<-model.sum$coef[,2] COV.co<-model.sum$cov.scaled[1,2] # compute SE SE.LOD<-abs(x.LOD) * sqrt( (SE.co[1]/co[1])^2 + (SE.co[2]/co[2])^2 - 2*(COV.co/(co[1]*co[2])) ) log.SE.LOD<-log10(SE.LOD) # define boundaries bottom<-x.LOD-(1.96*log.SE.LOD) ceiling<-x.LOD+(1.96*log.SE.LOD) ### REPORT # mean value x.LOD # bottom bottom # ceiling ceiling ### plot LOD lines(c(0.1, x.LOD), c(0.95, 0.95), lty=2) lines(c(x.LOD, x.LOD), c(0.95, 0), lty=2) text(x.LOD, 0.02, labels = round(x.LOD, digits = 2), pos=4, offset 0.3, cex = 0.7) text(1, 0.93, label="95%", pos=4, offset = 0.3, cex = 0.7) lines(c(bottom, ceiling), c(0,0), lty=1, lend="butt", lwd=0.7) points(x.LOD, 0, pch=21, cex=0.8, bg="white") detach(my.data) ############################################################################ ############################################################### [[alternative HTML version deleted]]