Here is a drop-in replacement for the R incomplete beta function. src/math/pbeta.c It is a slightly modified version of the cephes library one from Netlib. In the few cases I tried it seems to give at least 14 digit agreement with the one in S-PLUS (its hard to get more). I'm not sure what performance is like. I'd like to know if it helps with some of the problems which have been reported for the t and F code in R. If it works It'll be in an official second patch for 0.49 later in the week (so just swap it temporarily with the old one). Ross - - s n i p - - h e r e - - /* * Incomplete Beta Integral * * Taken from Cephes Math Library, Release 2.3: March, 1995 * Copyright 1984, 1995 by Stephen L. Moshier * Changes for R : Copyright 1997 by Ross Ihaka */ #include <math.h> #include <float.h> #include <errno.h> static double incbcf(), incbd(), pseries(); static double big = 4.503599627370496e15; static double biginv = 2.22044604925031308085e-16; double pbeta(double xx, double aa, double bb) { double a, b, t, x, xc, w, y; int flag; if (aa <= 0 || bb <= 0) goto domerr; if (xx <= 0 || xx >= 1) { if (xx == 0) return 0; if (xx == 1) return 1; domerr: errno = EDOM; return 0; } flag = 0; if (bb * xx <= 1 && xx <= 0.95) { t = pseries(aa, bb, xx); goto done; } w = 1 - xx; /* Reverse a and b if x is greater than the mean. */ if (xx > (aa / (aa + bb))) { flag = 1; a = bb; b = aa; xc = xx; x = w; } else { a = aa; b = bb; xc = w; x = xx; } if (flag == 1 && (b * x) <= 1 && x <= 0.95) { t = pseries(a, b, x); goto done; } /* Choose expansion for better convergence. */ y = x * (a + b - 2) - (a - 1); if (y < 0) w = incbcf(a, b, x); else w = incbd(a, b, x) / xc; /* Multiply w by the factor a b _ _ _ x (1-x) | (a+b) / ( a | (a) | (b) ) . */ y = a * log(x); t = b * log(xc); /* Resort to logarithms. */ y += t + lgamma(a + b) - lgamma(a) - lgamma(b); y += log(w / a); t = exp(y); done: if (flag == 1) { if (t <= DBL_EPSILON) t = 1 - DBL_EPSILON; else t = 1 - t; } return t; } /* Continued fraction expansion #1 * for incomplete beta integral */ static double incbcf(double a, double b, double x) { double xk, pk, pkm1, pkm2, qk, qkm1, qkm2; double k1, k2, k3, k4, k5, k6, k7, k8; double r, t, ans, thresh; int n; k1 = a; k2 = a + b; k3 = a; k4 = a + 1; k5 = 1; k6 = b - 1; k7 = k4; k8 = a + 2; pkm2 = 0; qkm2 = 1; pkm1 = 1; qkm1 = 1; ans = 1; r = 1; n = 0; thresh = 3 * DBL_EPSILON; do { xk = -(x * k1 * k2) / (k3 * k4); pk = pkm1 + pkm2 * xk; qk = qkm1 + qkm2 * xk; pkm2 = pkm1; pkm1 = pk; qkm2 = qkm1; qkm1 = qk; xk = (x * k5 * k6) / (k7 * k8); pk = pkm1 + pkm2 * xk; qk = qkm1 + qkm2 * xk; pkm2 = pkm1; pkm1 = pk; qkm2 = qkm1; qkm1 = qk; if (qk != 0) r = pk / qk; if (r != 0) { t = fabs((ans - r) / r); ans = r; } else t = 1; if (t < thresh) goto cdone; k1 += 1; k2 += 1; k3 += 2; k4 += 2; k5 += 1; k6 -= 1; k7 += 2; k8 += 2; if ((fabs(qk) + fabs(pk)) > big) { pkm2 *= biginv; pkm1 *= biginv; qkm2 *= biginv; qkm1 *= biginv; } if ((fabs(qk) < biginv) || (fabs(pk) < biginv)) { pkm2 *= big; pkm1 *= big; qkm2 *= big; qkm1 *= big; } } while (++n < 300); cdone: return ans; } /* Continued fraction expansion #2 * for incomplete beta integral */ static double incbd(double a, double b, double x) { double xk, pk, pkm1, pkm2, qk, qkm1, qkm2; double k1, k2, k3, k4, k5, k6, k7, k8; double r, t, ans, z, thresh; int n; k1 = a; k2 = b - 1; k3 = a; k4 = a + 1; k5 = 1; k6 = a + b; k7 = a + 1; k8 = a + 2; pkm2 = 0; qkm2 = 1; pkm1 = 1; qkm1 = 1; z = x / (1 - x); ans = 1; r = 1; n = 0; thresh = 3 * DBL_EPSILON; do { xk = -(z * k1 * k2) / (k3 * k4); pk = pkm1 + pkm2 * xk; qk = qkm1 + qkm2 * xk; pkm2 = pkm1; pkm1 = pk; qkm2 = qkm1; qkm1 = qk; xk = (z * k5 * k6) / (k7 * k8); pk = pkm1 + pkm2 * xk; qk = qkm1 + qkm2 * xk; pkm2 = pkm1; pkm1 = pk; qkm2 = qkm1; qkm1 = qk; if (qk != 0) r = pk / qk; if (r != 0) { t = fabs((ans - r) / r); ans = r; } else t = 1; if (t < thresh) goto cdone; k1 += 1; k2 -= 1; k3 += 2; k4 += 2; k5 += 1; k6 += 1; k7 += 2; k8 += 2; if ((fabs(qk) + fabs(pk)) > big) { pkm2 *= biginv; pkm1 *= biginv; qkm2 *= biginv; qkm1 *= biginv; } if ((fabs(qk) < biginv) || (fabs(pk) < biginv)) { pkm2 *= big; pkm1 *= big; qkm2 *= big; qkm1 *= big; } } while (++n < 300); cdone: return ans; } /* Power series for incomplete beta integral. Use when b*x is small and x not too close to 1. */ static double pseries(double a, double b, double x) { double s, t, u, v, n, t1, z, ai; ai = 1 / a; u = (1 - b) * x; v = u / (a + 1); t1 = v; t = u; n = 2; s = 0; z = DBL_EPSILON * ai; while (fabs(v) > z) { u = (n - b) * x / n; t *= u; v = t / (a + n); s += v; n += 1; } s += t1; s += ai; u = a * log(x); t = lgamma(a + b) - lgamma(a) - lgamma(b) + u + log(s); s = exp(t); return s; } =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html Send "info", "help", or "[un]subscribe" (in the "body", not the subject !) To: r-devel-request@stat.math.ethz.ch =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
>>>>> Ross Ihaka writes:> Here is a drop-in replacement for the R incomplete beta function. > src/math/pbeta.c> It is a slightly modified version of the cephes library one from > Netlib. In the few cases I tried it seems to give at least 14 > digit agreement with the one in S-PLUS (its hard to get more).> I'm not sure what performance is like. I'd like to know if it > helps with some of the problems which have been reported for the > t and F code in R.Indeed, `qt(0.975, 3)' now also works on Debian Linux/GNU/ix86 when compiled with `-O2 -g'. Thanks! (Of course, this is the only example I tried thus far ...) -k =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html Send "info", "help", or "[un]subscribe" (in the "body", not the subject !) To: r-devel-request@stat.math.ethz.ch =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
Martin Maechler
1997-Apr-30 10:04 UTC
R-alpha: New pbeta.c : Still problems for BIG 'df' (and an S-bug)
I've only made a few experiments using qt(.) with the new pbeta.c: At least the problem of "BIG df" --> 'long/infinite' CPU remains even though it has been pushed slightly For 0.49 'plain', qt(.99, 10^k) already takes quite some time for k=5, and takes for ever for k=7 Try for(ee in 1:6) cat(10^ee,":", unix.time(qt(.99,df=10^ee))[1],"\n") For the new pbeta.c, the problem appears about two exponents later. qt(.99, df = 1e8) ## 'never' returns The following code shows you a bit of what is going on. BTW, S-plus seems to use a different approximation which remains fast even when df = BIG. However, S-plus results then become COMPLETELY RUBBISH! Here are my few tests: p <- c(.975, .995, .9995, 1 - 5e-6) df1 <- c(1:5,10^c(1:8,10,15,10*(2:10))) df0 <- df1[1:12] ## df0 works for R 0.49 -patch2- dig <- 8 for(df in df0) cat(formatC(df,w=6),":", formatC(unix.time(qq <- qt(p, df))[1],form='f'), "|", formatC(qq, form='f', dig=dig,wid=dig+6),"\n") cat(" _Inf_ : --- |", formatC(qnorm(p), form='f', dig=dig,wid=dig+6),"\n") ## R 0.49 with pbeta.c of 30 April 1997: df : CPU[s]| q .975 .995 .9995 .999995=1-5e-6 1 : 0.0100 | 12.70620474 63.65674116 636.61924876 63661.96977709 2 : 0.0200 | 4.30265273 9.92484320 31.59905458 316.22539430 3 : 0.0000 | 3.18244631 5.84090931 12.92397864 60.39682404 4 : 0.0000 | 2.77644511 4.60409487 8.61030158 27.77164766 5 : 0.0100 | 2.57058184 4.03214298 6.86882663 17.89686614 10 : 0.0100 | 2.22813885 3.16927267 4.58689386 8.15028656 100 : 0.0900 | 1.98397152 2.62589052 3.39049131 4.65424006 1000 : 0.0300 | 1.96233908 2.58075470 3.30028265 4.43992647 1e+04 : 0.0400 | 1.96020124 2.57632105 3.29149997 4.41943950 1e+05 : 0.1300 | 1.95998771 2.57587847 3.29062403 4.41739993 1e+06 : 0.2800 | 1.95996636 2.57583422 3.29053646 4.41719606 1e+07 : 0.3800 | 1.95996422 2.57582979 3.29052770 4.41717568 _Inf_ : --- | 1.95996398 2.57582930 3.29052672 4.41718074 ## S-plus (3.4): From df ~= 1e4 (p=1-5e-6) results are COMPLETELY wrong ! ## ----- You get quick results for even higher 'df' -- but they are wrong... 1 : 0.0100 | 12.70620474 63.65674116 636.61924876 63661.96770658 2 : 0.0000 | 4.30265273 9.92484320 31.59905458 316.22539430 3 : 0.0000 | 3.18244631 5.84090931 12.92397864 60.39682404 4 : 0.0100 | 2.77644511 4.60409487 8.61030158 27.77164766 5 : 0.0000 | 2.57058184 4.03214298 6.86882663 17.89686614 10 : 0.0000 | 2.22813885 3.16927267 4.58689386 8.15028656 100 : 0.0100 | 1.98397152 2.62589052 3.39049131 4.65424006 1000 : 0.0200 | 1.96233908 2.58075470 3.30028265 4.43992647 1e+04 : 0.0200 | 1.96020124 2.57632105 3.29966223 4.70204286 1e+05 : 0.0200 | 1.95998771 2.57587848 5.14482712 12.48628360 1e+06 : 0.0100 | 1.95996636 2.57583569 14.70818229 38.67299101 1e+07 : 0.0200 | 1.95996422 2.57600108 45.98857869 122.03496539 _Inf_ : --- | 1.95996398 2.57582930 3.29052673 4.41717341 =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html Send "info", "help", or "[un]subscribe" (in the "body", not the subject !) To: r-devel-request@stat.math.ethz.ch =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-