thr3ads.net - R devel - R-alpha: New Incomplete Beta Function [Apr 1997]

If this information is useful, please help other people find it:
Share via:

Ross Ihaka

1997-Apr-30 04:56 UTC

R-alpha: New Incomplete Beta Function

Here is a drop-in replacement for the R incomplete beta function.

	src/math/pbeta.c

It is a slightly modified version of the cephes library one from
Netlib.  In the few cases I tried it seems to give at least 14
digit agreement with the one in S-PLUS (its hard to get more).

I'm not sure what performance is like.  I'd like to know if it
helps with some of the problems which have been reported for the
t and F code in R.

If it works It'll be in an official second patch for 0.49 later in
the week (so just swap it temporarily with the old one).

	Ross


- - s n i p - - h e r e - -

/*
 *    Incomplete Beta Integral
 *
 *    Taken from Cephes Math Library, Release 2.3:  March, 1995
 *    Copyright 1984, 1995 by Stephen L. Moshier
 *    Changes for R : Copyright 1997 by Ross Ihaka
 */

#include <math.h>
#include <float.h>
#include <errno.h>

static double incbcf(), incbd(), pseries();

static double big = 4.503599627370496e15;
static double biginv = 2.22044604925031308085e-16;


double pbeta(double xx, double aa, double bb)
{
	double a, b, t, x, xc, w, y;
	int flag;

	if (aa <= 0 || bb <= 0)
		goto domerr;

	if (xx <= 0 || xx >= 1) {
		if (xx == 0)
			return 0;
		if (xx == 1)
			return 1;
domerr:
		errno = EDOM;
		return 0;
	}
	flag = 0;
	if (bb * xx <= 1 && xx <= 0.95) {
		t = pseries(aa, bb, xx);
		goto done;
	}
	w = 1 - xx;

	/* Reverse a and b if x is greater than the mean. */

	if (xx > (aa / (aa + bb))) {
		flag = 1;
		a = bb;
		b = aa;
		xc = xx;
		x = w;
	} else {
		a = aa;
		b = bb;
		xc = w;
		x = xx;
	}

	if (flag == 1 && (b * x) <= 1 && x <= 0.95) {
		t = pseries(a, b, x);
		goto done;
	}

	/* Choose expansion for better convergence. */

	y = x * (a + b - 2) - (a - 1);
	if (y < 0)
		w = incbcf(a, b, x);
	else
		w = incbd(a, b, x) / xc;

	/* Multiply w by the factor
	     a      b   _             _     _
	    x  (1-x)   | (a+b) / ( a | (a) | (b) ) .   */

	y = a * log(x);
	t = b * log(xc);

	/* Resort to logarithms.  */

	y += t + lgamma(a + b) - lgamma(a) - lgamma(b);
	y += log(w / a);
	t = exp(y);

done:

	if (flag == 1) {
		if (t <= DBL_EPSILON)
			t = 1 - DBL_EPSILON;
		else
			t = 1 - t;
	}
	return t;
}


/* Continued fraction expansion #1
 * for incomplete beta integral
 */

static double incbcf(double a, double b, double x)
{
	double xk, pk, pkm1, pkm2, qk, qkm1, qkm2;
	double k1, k2, k3, k4, k5, k6, k7, k8;
	double r, t, ans, thresh;
	int n;

	k1 = a;
	k2 = a + b;
	k3 = a;
	k4 = a + 1;
	k5 = 1;
	k6 = b - 1;
	k7 = k4;
	k8 = a + 2;

	pkm2 = 0;
	qkm2 = 1;
	pkm1 = 1;
	qkm1 = 1;
	ans = 1;
	r = 1;
	n = 0;
	thresh = 3 * DBL_EPSILON;
	do {

		xk = -(x * k1 * k2) / (k3 * k4);
		pk = pkm1 + pkm2 * xk;
		qk = qkm1 + qkm2 * xk;
		pkm2 = pkm1;
		pkm1 = pk;
		qkm2 = qkm1;
		qkm1 = qk;

		xk = (x * k5 * k6) / (k7 * k8);
		pk = pkm1 + pkm2 * xk;
		qk = qkm1 + qkm2 * xk;
		pkm2 = pkm1;
		pkm1 = pk;
		qkm2 = qkm1;
		qkm1 = qk;

		if (qk != 0)
			r = pk / qk;
		if (r != 0) {
			t = fabs((ans - r) / r);
			ans = r;
		} else
			t = 1;

		if (t < thresh)
			goto cdone;

		k1 += 1;
		k2 += 1;
		k3 += 2;
		k4 += 2;
		k5 += 1;
		k6 -= 1;
		k7 += 2;
		k8 += 2;

		if ((fabs(qk) + fabs(pk)) > big) {
			pkm2 *= biginv;
			pkm1 *= biginv;
			qkm2 *= biginv;
			qkm1 *= biginv;
		}
		if ((fabs(qk) < biginv) || (fabs(pk) < biginv)) {
			pkm2 *= big;
			pkm1 *= big;
			qkm2 *= big;
			qkm1 *= big;
		}
	}
	while (++n < 300);

cdone:
	return ans;
}



/* Continued fraction expansion #2
 * for incomplete beta integral
 */

static double incbd(double a, double b, double x)
{
	double xk, pk, pkm1, pkm2, qk, qkm1, qkm2;
	double k1, k2, k3, k4, k5, k6, k7, k8;
	double r, t, ans, z, thresh;
	int n;

	k1 = a;
	k2 = b - 1;
	k3 = a;
	k4 = a + 1;
	k5 = 1;
	k6 = a + b;
	k7 = a + 1;
	k8 = a + 2;

	pkm2 = 0;
	qkm2 = 1;
	pkm1 = 1;
	qkm1 = 1;
	z = x / (1 - x);
	ans = 1;
	r = 1;
	n = 0;
	thresh = 3 * DBL_EPSILON;
	do {

		xk = -(z * k1 * k2) / (k3 * k4);
		pk = pkm1 + pkm2 * xk;
		qk = qkm1 + qkm2 * xk;
		pkm2 = pkm1;
		pkm1 = pk;
		qkm2 = qkm1;
		qkm1 = qk;

		xk = (z * k5 * k6) / (k7 * k8);
		pk = pkm1 + pkm2 * xk;
		qk = qkm1 + qkm2 * xk;
		pkm2 = pkm1;
		pkm1 = pk;
		qkm2 = qkm1;
		qkm1 = qk;

		if (qk != 0)
			r = pk / qk;
		if (r != 0) {
			t = fabs((ans - r) / r);
			ans = r;
		} else
			t = 1;

		if (t < thresh)
			goto cdone;

		k1 += 1;
		k2 -= 1;
		k3 += 2;
		k4 += 2;
		k5 += 1;
		k6 += 1;
		k7 += 2;
		k8 += 2;

		if ((fabs(qk) + fabs(pk)) > big) {
			pkm2 *= biginv;
			pkm1 *= biginv;
			qkm2 *= biginv;
			qkm1 *= biginv;
		}
		if ((fabs(qk) < biginv) || (fabs(pk) < biginv)) {
			pkm2 *= big;
			pkm1 *= big;
			qkm2 *= big;
			qkm1 *= big;
		}
	}
	while (++n < 300);
cdone:
	return ans;
}


/* Power series for incomplete beta integral.
   Use when b*x is small and x not too close to 1.  */

static double pseries(double a, double b, double x)
{
	double s, t, u, v, n, t1, z, ai;

	ai = 1 / a;
	u = (1 - b) * x;
	v = u / (a + 1);
	t1 = v;
	t = u;
	n = 2;
	s = 0;
	z = DBL_EPSILON * ai;
	while (fabs(v) > z) {
		u = (n - b) * x / n;
		t *= u;
		v = t / (a + n);
		s += v;
		n += 1;
	}
	s += t1;
	s += ai;

	u = a * log(x);
	t = lgamma(a + b) - lgamma(a) - lgamma(b) + u + log(s);
	s = exp(t);
	return s;
}
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To:
r-devel-request@stat.math.ethz.ch
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

Kurt Hornik

1997-Apr-30 05:52 UTC

head link

R-alpha: New Incomplete Beta Function

>>>>> Ross Ihaka writes:
> Here is a drop-in replacement for the R incomplete beta function.
> 	src/math/pbeta.c
> It is a slightly modified version of the cephes library one from
> Netlib.  In the few cases I tried it seems to give at least 14
> digit agreement with the one in S-PLUS (its hard to get more).
> I'm not sure what performance is like.  I'd like to know if it
> helps with some of the problems which have been reported for the
> t and F code in R.
Indeed, `qt(0.975, 3)' now also works on Debian Linux/GNU/ix86 when
compiled with `-O2 -g'.

Thanks!

(Of course, this is the only example I tried thus far ...)

-k
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To:
r-devel-request@stat.math.ethz.ch
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

Martin Maechler

1997-Apr-30 10:04 UTC

head link

R-alpha: New pbeta.c : Still problems for BIG 'df' (and an S-bug)

I've only made a few experiments using  qt(.)  with the new pbeta.c:

At least the problem of  "BIG df" --> 'long/infinite' CPU 
remains even
though it has been pushed slightly

For 0.49 'plain', 
	qt(.99, 10^k) already takes quite some time for k=5, and 
			takes for ever for k=7
Try
	for(ee in 1:6) cat(10^ee,":",
unix.time(qt(.99,df=10^ee))[1],"\n")

For the new  pbeta.c,  the problem appears about two exponents later.

 	qt(.99, df = 1e8)	## 'never' returns

The following code shows you a bit of what is going on.

BTW, S-plus seems to use a different approximation which remains fast even
when df = BIG.
However, S-plus results then become COMPLETELY RUBBISH!

Here are my few tests:

p <- c(.975, .995, .9995, 1 - 5e-6)
df1 <- c(1:5,10^c(1:8,10,15,10*(2:10)))
df0 <- df1[1:12] ## df0  works for  R 0.49 -patch2-

dig <- 8
for(df in df0)
  cat(formatC(df,w=6),":", formatC(unix.time(qq <- qt(p,
df))[1],form='f'), "|",
      formatC(qq, form='f', dig=dig,wid=dig+6),"\n")
cat(" _Inf_ :  ---   |", formatC(qnorm(p), form='f',
dig=dig,wid=dig+6),"\n")

## R 0.49  with  pbeta.c  of 30 April 1997:

   df  :  CPU[s]|     q  .975           .995         .9995       .999995=1-5e-6

     1 : 0.0100 |    12.70620474    63.65674116   636.61924876 63661.96977709
     2 : 0.0200 |     4.30265273     9.92484320    31.59905458   316.22539430
     3 : 0.0000 |     3.18244631     5.84090931    12.92397864    60.39682404
     4 : 0.0000 |     2.77644511     4.60409487     8.61030158    27.77164766
     5 : 0.0100 |     2.57058184     4.03214298     6.86882663    17.89686614
    10 : 0.0100 |     2.22813885     3.16927267     4.58689386     8.15028656
   100 : 0.0900 |     1.98397152     2.62589052     3.39049131     4.65424006
  1000 : 0.0300 |     1.96233908     2.58075470     3.30028265     4.43992647
 1e+04 : 0.0400 |     1.96020124     2.57632105     3.29149997     4.41943950
 1e+05 : 0.1300 |     1.95998771     2.57587847     3.29062403     4.41739993
 1e+06 : 0.2800 |     1.95996636     2.57583422     3.29053646     4.41719606
 1e+07 : 0.3800 |     1.95996422     2.57582979     3.29052770     4.41717568
 _Inf_ :  ---   |     1.95996398     2.57582930     3.29052672     4.41718074

## S-plus (3.4): From df ~= 1e4 (p=1-5e-6) results are COMPLETELY wrong !
## ----- You get quick results for even higher 'df' -- but they are
wrong...

     1 : 0.0100 |    12.70620474    63.65674116   636.61924876 63661.96770658
     2 : 0.0000 |     4.30265273     9.92484320    31.59905458   316.22539430
     3 : 0.0000 |     3.18244631     5.84090931    12.92397864    60.39682404
     4 : 0.0100 |     2.77644511     4.60409487     8.61030158    27.77164766
     5 : 0.0000 |     2.57058184     4.03214298     6.86882663    17.89686614
    10 : 0.0000 |     2.22813885     3.16927267     4.58689386     8.15028656
   100 : 0.0100 |     1.98397152     2.62589052     3.39049131     4.65424006
  1000 : 0.0200 |     1.96233908     2.58075470     3.30028265     4.43992647
 1e+04 : 0.0200 |     1.96020124     2.57632105     3.29966223     4.70204286
 1e+05 : 0.0200 |     1.95998771     2.57587848     5.14482712    12.48628360
 1e+06 : 0.0100 |     1.95996636     2.57583569    14.70818229    38.67299101
 1e+07 : 0.0200 |     1.95996422     2.57600108    45.98857869   122.03496539
 _Inf_ :  ---   |     1.95996398     2.57582930     3.29052673     4.41717341

=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To:
r-devel-request@stat.math.ethz.ch
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-

Apparently Analagous Threads

Search for more seemingly similar threads

R devel - Apr 1997 - R-alpha: New Incomplete Beta Function

R-alpha: New Incomplete Beta Function

R-alpha: New Incomplete Beta Function

R-alpha: New pbeta.c : Still problems for BIG 'df' (and an S-bug)

Apparently Analagous Threads