Here is a drop-in replacement for the R incomplete beta function.
src/math/pbeta.c
It is a slightly modified version of the cephes library one from
Netlib. In the few cases I tried it seems to give at least 14
digit agreement with the one in S-PLUS (its hard to get more).
I'm not sure what performance is like. I'd like to know if it
helps with some of the problems which have been reported for the
t and F code in R.
If it works It'll be in an official second patch for 0.49 later in
the week (so just swap it temporarily with the old one).
Ross
- - s n i p - - h e r e - -
/*
* Incomplete Beta Integral
*
* Taken from Cephes Math Library, Release 2.3: March, 1995
* Copyright 1984, 1995 by Stephen L. Moshier
* Changes for R : Copyright 1997 by Ross Ihaka
*/
#include <math.h>
#include <float.h>
#include <errno.h>
static double incbcf(), incbd(), pseries();
static double big = 4.503599627370496e15;
static double biginv = 2.22044604925031308085e-16;
double pbeta(double xx, double aa, double bb)
{
double a, b, t, x, xc, w, y;
int flag;
if (aa <= 0 || bb <= 0)
goto domerr;
if (xx <= 0 || xx >= 1) {
if (xx == 0)
return 0;
if (xx == 1)
return 1;
domerr:
errno = EDOM;
return 0;
}
flag = 0;
if (bb * xx <= 1 && xx <= 0.95) {
t = pseries(aa, bb, xx);
goto done;
}
w = 1 - xx;
/* Reverse a and b if x is greater than the mean. */
if (xx > (aa / (aa + bb))) {
flag = 1;
a = bb;
b = aa;
xc = xx;
x = w;
} else {
a = aa;
b = bb;
xc = w;
x = xx;
}
if (flag == 1 && (b * x) <= 1 && x <= 0.95) {
t = pseries(a, b, x);
goto done;
}
/* Choose expansion for better convergence. */
y = x * (a + b - 2) - (a - 1);
if (y < 0)
w = incbcf(a, b, x);
else
w = incbd(a, b, x) / xc;
/* Multiply w by the factor
a b _ _ _
x (1-x) | (a+b) / ( a | (a) | (b) ) . */
y = a * log(x);
t = b * log(xc);
/* Resort to logarithms. */
y += t + lgamma(a + b) - lgamma(a) - lgamma(b);
y += log(w / a);
t = exp(y);
done:
if (flag == 1) {
if (t <= DBL_EPSILON)
t = 1 - DBL_EPSILON;
else
t = 1 - t;
}
return t;
}
/* Continued fraction expansion #1
* for incomplete beta integral
*/
static double incbcf(double a, double b, double x)
{
double xk, pk, pkm1, pkm2, qk, qkm1, qkm2;
double k1, k2, k3, k4, k5, k6, k7, k8;
double r, t, ans, thresh;
int n;
k1 = a;
k2 = a + b;
k3 = a;
k4 = a + 1;
k5 = 1;
k6 = b - 1;
k7 = k4;
k8 = a + 2;
pkm2 = 0;
qkm2 = 1;
pkm1 = 1;
qkm1 = 1;
ans = 1;
r = 1;
n = 0;
thresh = 3 * DBL_EPSILON;
do {
xk = -(x * k1 * k2) / (k3 * k4);
pk = pkm1 + pkm2 * xk;
qk = qkm1 + qkm2 * xk;
pkm2 = pkm1;
pkm1 = pk;
qkm2 = qkm1;
qkm1 = qk;
xk = (x * k5 * k6) / (k7 * k8);
pk = pkm1 + pkm2 * xk;
qk = qkm1 + qkm2 * xk;
pkm2 = pkm1;
pkm1 = pk;
qkm2 = qkm1;
qkm1 = qk;
if (qk != 0)
r = pk / qk;
if (r != 0) {
t = fabs((ans - r) / r);
ans = r;
} else
t = 1;
if (t < thresh)
goto cdone;
k1 += 1;
k2 += 1;
k3 += 2;
k4 += 2;
k5 += 1;
k6 -= 1;
k7 += 2;
k8 += 2;
if ((fabs(qk) + fabs(pk)) > big) {
pkm2 *= biginv;
pkm1 *= biginv;
qkm2 *= biginv;
qkm1 *= biginv;
}
if ((fabs(qk) < biginv) || (fabs(pk) < biginv)) {
pkm2 *= big;
pkm1 *= big;
qkm2 *= big;
qkm1 *= big;
}
}
while (++n < 300);
cdone:
return ans;
}
/* Continued fraction expansion #2
* for incomplete beta integral
*/
static double incbd(double a, double b, double x)
{
double xk, pk, pkm1, pkm2, qk, qkm1, qkm2;
double k1, k2, k3, k4, k5, k6, k7, k8;
double r, t, ans, z, thresh;
int n;
k1 = a;
k2 = b - 1;
k3 = a;
k4 = a + 1;
k5 = 1;
k6 = a + b;
k7 = a + 1;
k8 = a + 2;
pkm2 = 0;
qkm2 = 1;
pkm1 = 1;
qkm1 = 1;
z = x / (1 - x);
ans = 1;
r = 1;
n = 0;
thresh = 3 * DBL_EPSILON;
do {
xk = -(z * k1 * k2) / (k3 * k4);
pk = pkm1 + pkm2 * xk;
qk = qkm1 + qkm2 * xk;
pkm2 = pkm1;
pkm1 = pk;
qkm2 = qkm1;
qkm1 = qk;
xk = (z * k5 * k6) / (k7 * k8);
pk = pkm1 + pkm2 * xk;
qk = qkm1 + qkm2 * xk;
pkm2 = pkm1;
pkm1 = pk;
qkm2 = qkm1;
qkm1 = qk;
if (qk != 0)
r = pk / qk;
if (r != 0) {
t = fabs((ans - r) / r);
ans = r;
} else
t = 1;
if (t < thresh)
goto cdone;
k1 += 1;
k2 -= 1;
k3 += 2;
k4 += 2;
k5 += 1;
k6 += 1;
k7 += 2;
k8 += 2;
if ((fabs(qk) + fabs(pk)) > big) {
pkm2 *= biginv;
pkm1 *= biginv;
qkm2 *= biginv;
qkm1 *= biginv;
}
if ((fabs(qk) < biginv) || (fabs(pk) < biginv)) {
pkm2 *= big;
pkm1 *= big;
qkm2 *= big;
qkm1 *= big;
}
}
while (++n < 300);
cdone:
return ans;
}
/* Power series for incomplete beta integral.
Use when b*x is small and x not too close to 1. */
static double pseries(double a, double b, double x)
{
double s, t, u, v, n, t1, z, ai;
ai = 1 / a;
u = (1 - b) * x;
v = u / (a + 1);
t1 = v;
t = u;
n = 2;
s = 0;
z = DBL_EPSILON * ai;
while (fabs(v) > z) {
u = (n - b) * x / n;
t *= u;
v = t / (a + n);
s += v;
n += 1;
}
s += t1;
s += ai;
u = a * log(x);
t = lgamma(a + b) - lgamma(a) - lgamma(b) + u + log(s);
s = exp(t);
return s;
}
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !) To:
r-devel-request@stat.math.ethz.ch
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
>>>>> Ross Ihaka writes:> Here is a drop-in replacement for the R incomplete beta function. > src/math/pbeta.c> It is a slightly modified version of the cephes library one from > Netlib. In the few cases I tried it seems to give at least 14 > digit agreement with the one in S-PLUS (its hard to get more).> I'm not sure what performance is like. I'd like to know if it > helps with some of the problems which have been reported for the > t and F code in R.Indeed, `qt(0.975, 3)' now also works on Debian Linux/GNU/ix86 when compiled with `-O2 -g'. Thanks! (Of course, this is the only example I tried thus far ...) -k =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html Send "info", "help", or "[un]subscribe" (in the "body", not the subject !) To: r-devel-request@stat.math.ethz.ch =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
Martin Maechler
1997-Apr-30 10:04 UTC
R-alpha: New pbeta.c : Still problems for BIG 'df' (and an S-bug)
I've only made a few experiments using qt(.) with the new pbeta.c:
At least the problem of "BIG df" --> 'long/infinite' CPU
remains even
though it has been pushed slightly
For 0.49 'plain',
qt(.99, 10^k) already takes quite some time for k=5, and
takes for ever for k=7
Try
for(ee in 1:6) cat(10^ee,":",
unix.time(qt(.99,df=10^ee))[1],"\n")
For the new pbeta.c, the problem appears about two exponents later.
qt(.99, df = 1e8) ## 'never' returns
The following code shows you a bit of what is going on.
BTW, S-plus seems to use a different approximation which remains fast even
when df = BIG.
However, S-plus results then become COMPLETELY RUBBISH!
Here are my few tests:
p <- c(.975, .995, .9995, 1 - 5e-6)
df1 <- c(1:5,10^c(1:8,10,15,10*(2:10)))
df0 <- df1[1:12] ## df0 works for R 0.49 -patch2-
dig <- 8
for(df in df0)
cat(formatC(df,w=6),":", formatC(unix.time(qq <- qt(p,
df))[1],form='f'), "|",
formatC(qq, form='f', dig=dig,wid=dig+6),"\n")
cat(" _Inf_ : --- |", formatC(qnorm(p), form='f',
dig=dig,wid=dig+6),"\n")
## R 0.49 with pbeta.c of 30 April 1997:
df : CPU[s]| q .975 .995 .9995 .999995=1-5e-6
1 : 0.0100 | 12.70620474 63.65674116 636.61924876 63661.96977709
2 : 0.0200 | 4.30265273 9.92484320 31.59905458 316.22539430
3 : 0.0000 | 3.18244631 5.84090931 12.92397864 60.39682404
4 : 0.0000 | 2.77644511 4.60409487 8.61030158 27.77164766
5 : 0.0100 | 2.57058184 4.03214298 6.86882663 17.89686614
10 : 0.0100 | 2.22813885 3.16927267 4.58689386 8.15028656
100 : 0.0900 | 1.98397152 2.62589052 3.39049131 4.65424006
1000 : 0.0300 | 1.96233908 2.58075470 3.30028265 4.43992647
1e+04 : 0.0400 | 1.96020124 2.57632105 3.29149997 4.41943950
1e+05 : 0.1300 | 1.95998771 2.57587847 3.29062403 4.41739993
1e+06 : 0.2800 | 1.95996636 2.57583422 3.29053646 4.41719606
1e+07 : 0.3800 | 1.95996422 2.57582979 3.29052770 4.41717568
_Inf_ : --- | 1.95996398 2.57582930 3.29052672 4.41718074
## S-plus (3.4): From df ~= 1e4 (p=1-5e-6) results are COMPLETELY wrong !
## ----- You get quick results for even higher 'df' -- but they are
wrong...
1 : 0.0100 | 12.70620474 63.65674116 636.61924876 63661.96770658
2 : 0.0000 | 4.30265273 9.92484320 31.59905458 316.22539430
3 : 0.0000 | 3.18244631 5.84090931 12.92397864 60.39682404
4 : 0.0100 | 2.77644511 4.60409487 8.61030158 27.77164766
5 : 0.0000 | 2.57058184 4.03214298 6.86882663 17.89686614
10 : 0.0000 | 2.22813885 3.16927267 4.58689386 8.15028656
100 : 0.0100 | 1.98397152 2.62589052 3.39049131 4.65424006
1000 : 0.0200 | 1.96233908 2.58075470 3.30028265 4.43992647
1e+04 : 0.0200 | 1.96020124 2.57632105 3.29966223 4.70204286
1e+05 : 0.0200 | 1.95998771 2.57587848 5.14482712 12.48628360
1e+06 : 0.0100 | 1.95996636 2.57583569 14.70818229 38.67299101
1e+07 : 0.0200 | 1.95996422 2.57600108 45.98857869 122.03496539
_Inf_ : --- | 1.95996398 2.57582930 3.29052673 4.41717341
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !) To:
r-devel-request@stat.math.ethz.ch
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-