I've been inspired to look at the R source code by some strange timing
results that I wrote about on my blog at radfordneal.wordpress.com
(see the posts on "Speeding up parentheses..." and "Two
surprising
things...".
I discovered that the strange speed advantage of curly brackets over
parentheses is partially explained by an inefficiency in the evalList
and evalListKeepMissing procedures in eval.c, in directory src/main,
which are on the critical path for many operations. These procedures
unnecessarily allocate an extra CONS node. I rewrote them to avoid
this, which seems to speed up a typical program by about 5% (assuming
it doesn't spend most of its time in things like matrix multiplies).
I think it would be well worthwhile to put this minor change into the
next R release. I'll be looking at some other places where R can also
be sped up, and expect that an average improvement of maybe 15% is
possible, with some programs probably speeding up by a factor of two.
For now, though, I'll just give the revised versions of evalList and
evalListKeepMissing, below.
Radford Neal
---------------------------------------------------------------------
/* Used in eval and applyMethod (object.c) for builtin primitives,
do_internal (names.c) for builtin .Internals
and in evalArgs.
'n' is the number of arguments already evaluated and hence not
passed to evalArgs and hence to here.
*/
SEXP attribute_hidden evalList(SEXP el, SEXP rho, SEXP call, int n)
{
SEXP head, tail, ev, h;
int mode; /* mode==0 is 0 args, mode==1 is 1 arg, mode==2 is >1 arg */
head = R_NilValue;
mode = 0;
while (el != R_NilValue) {
n++;
if (CAR(el) == R_DotsSymbol) {
/* If we have a ... symbol, we look to see what it is bound to.
* If its binding is Null (i.e. zero length)
* we just ignore it and return the cdr with all its expressions evaluated;
* if it is bound to a ... list of promises,
* we force all the promises and then splice
* the list of resulting values into the return value.
* Anything else bound to a ... symbol is an error
*/
h = findVar(CAR(el), rho);
if (TYPEOF(h) == DOTSXP || h == R_NilValue) {
while (h != R_NilValue) {
if (mode==1) {
PROTECT(head);
mode = 2;
}
ev = CONS(eval(CAR(h), rho), R_NilValue);
COPY_TAG(ev, h);
if (mode==0) {
head = ev;
mode = 1;
}
else {
SETCDR(tail, ev);
}
tail = ev;
h = CDR(h);
}
}
else if (h != R_MissingArg)
error(_("'...' used in an incorrect context"));
} else if (CAR(el) == R_MissingArg) {
/* It was an empty element: most likely get here from evalArgs
which may have been called on part of the args. */
errorcall(call, _("argument %d is empty"), n);
} else if (isSymbol(CAR(el)) && R_isMissing(CAR(el), rho)) {
/* It was missing */
errorcall(call, _("'%s' is missing"),
CHAR(PRINTNAME(CAR(el))));
} else {
if (mode==1) {
PROTECT(head);
mode = 2;
}
ev = CONS(eval(CAR(el), rho), R_NilValue);
COPY_TAG(ev, el);
if (mode==0) {
head = ev;
mode = 1;
}
else {
SETCDR(tail, ev);
}
tail = ev;
}
el = CDR(el);
}
if (mode==2) UNPROTECT(1);
return head;
} /* evalList() */
/* A slight variation of evaluating each expression in "el" in
"rho". */
/* used in evalArgs, arithmetic.c, seq.c */
SEXP attribute_hidden evalListKeepMissing(SEXP el, SEXP rho)
{
SEXP head, tail, ev, h;
int mode; /* mode==0 is 0 args, mode==1 is 1 arg, mode==2 is >1 arg */
head = R_NilValue;
mode = 0;
while (el != R_NilValue) {
/* If we have a ... symbol, we look to see what it is bound to.
* If its binding is Null (i.e. zero length)
* we just ignore it and return the cdr with all its expressions evaluated;
* if it is bound to a ... list of promises,
* we force all the promises and then splice
* the list of resulting values into the return value.
* Anything else bound to a ... symbol is an error
*/
if (CAR(el) == R_DotsSymbol) {
h = findVar(CAR(el), rho);
if (TYPEOF(h) == DOTSXP || h == R_NilValue) {
while (h != R_NilValue) {
if (mode==1) {
PROTECT(head);
mode = 2;
}
if (CAR(h) == R_MissingArg)
ev = CONS(R_MissingArg, R_NilValue);
else
ev = CONS(eval(CAR(h), rho), R_NilValue);
COPY_TAG(ev, h);
if (mode==0) {
head = ev;
mode = 1;
}
else {
SETCDR(tail, ev);
}
tail = ev;
h = CDR(h);
}
}
else if(h != R_MissingArg)
error(_("'...' used in an incorrect context"));
}
else {
if (mode==1) {
PROTECT(head);
mode = 2;
}
if (CAR(el) == R_MissingArg ||
(isSymbol(CAR(el)) && R_isMissing(CAR(el), rho)))
ev = CONS(R_MissingArg, R_NilValue);
else
ev = CONS(eval(CAR(el), rho), R_NilValue);
COPY_TAG(ev, el);
if (mode==0) {
head = ev;
mode = 1;
}
else {
SETCDR(tail, ev);
}
tail = ev;
}
el = CDR(el);
}
if (mode==2) UNPROTECT(1);
return head;
}
Regarding my suggesting speed improvement to evalList, Martin Morgan
has commented by email to me that at one point an object is left
unprotected when COPY_TAG is called, and has wondered whether that is
safe. I think it is safe, but the code can be changed to protect this
as well, which actually simplifies things, and could be more robust to
changes to the garbage collector. The cost is that sometimes there is
one more call of PROTECT and UNPROTECT, but with the speed improvement
to these that I just posted, this is a minor issue.
Martin has also pointed me to where you can get R sources via
subversion, but while I figure that out, and how to post up "diffs"
for changes, I'll put the revised evalList code below for anyone
interested...
Radford Neal
----------------------------------------------------------------------
/* Used in eval and applyMethod (object.c) for builtin primitives,
do_internal (names.c) for builtin .Internals
and in evalArgs.
'n' is the number of arguments already evaluated and hence not
passed to evalArgs and hence to here.
*/
SEXP attribute_hidden evalList(SEXP el, SEXP rho, SEXP call, int n)
{
SEXP head, tail, ev, h;
head = R_NilValue;
while (el != R_NilValue) {
n++;
if (CAR(el) == R_DotsSymbol) {
/* If we have a ... symbol, we look to see what it is bound to.
* If its binding is Null (i.e. zero length),
* we just ignore it and return the cdr with all its expressions
* evaluated.
* If it is bound to a ... list of promises,
* we force all the promises and then splice
* the list of resulting values into the return value.
* Anything else bound to a ... symbol is an error.
*/
h = findVar(CAR(el), rho);
if (TYPEOF(h) == DOTSXP || h == R_NilValue) {
while (h != R_NilValue) {
ev = CONS(eval(CAR(h), rho), R_NilValue);
if (head==R_NilValue)
PROTECT(head = ev);
else
SETCDR(tail, ev);
COPY_TAG(ev, h);
tail = ev;
h = CDR(h);
}
}
else if (h != R_MissingArg)
error(_("'...' used in an incorrect context"));
} else if (CAR(el) == R_MissingArg) {
/* It was an empty element: most likely get here from evalArgs
which may have been called on part of the args. */
errorcall(call, _("argument %d is empty"), n);
} else if (isSymbol(CAR(el)) && R_isMissing(CAR(el), rho)) {
/* It was missing */
errorcall(call, _("'%s' is missing"),
CHAR(PRINTNAME(CAR(el))));
} else {
ev = CONS(eval(CAR(el), rho), R_NilValue);
if (head==R_NilValue)
PROTECT(head = ev);
else
SETCDR(tail, ev);
COPY_TAG(ev, el);
tail = ev;
}
el = CDR(el);
}
if (head!=R_NilValue)
UNPROTECT(1);
return head;
} /* evalList() */
/* A slight variation of evaluating each expression in "el" in
"rho". */
/* used in evalArgs, arithmetic.c, seq.c */
SEXP attribute_hidden evalListKeepMissing(SEXP el, SEXP rho)
{
SEXP head, tail, ev, h;
head = R_NilValue;
while (el != R_NilValue) {
/* If we have a ... symbol, we look to see what it is bound to.
* If its binding is Null (i.e. zero length)
* we just ignore it and return the cdr with all its expressions evaluated;
* if it is bound to a ... list of promises,
* we force all the promises and then splice
* the list of resulting values into the return value.
* Anything else bound to a ... symbol is an error
*/
if (CAR(el) == R_DotsSymbol) {
h = findVar(CAR(el), rho);
if (TYPEOF(h) == DOTSXP || h == R_NilValue) {
while (h != R_NilValue) {
if (CAR(h) == R_MissingArg)
ev = CONS(R_MissingArg, R_NilValue);
else
ev = CONS(eval(CAR(h), rho), R_NilValue);
if (head==R_NilValue)
PROTECT(head = ev);
else
SETCDR(tail, ev);
COPY_TAG(ev, h);
tail = ev;
h = CDR(h);
}
}
else if(h != R_MissingArg)
error(_("'...' used in an incorrect context"));
}
else {
if (CAR(el) == R_MissingArg ||
(isSymbol(CAR(el)) && R_isMissing(CAR(el), rho)))
ev = CONS(R_MissingArg, R_NilValue);
else
ev = CONS(eval(CAR(el), rho), R_NilValue);
if (head==R_NilValue)
PROTECT(head = ev);
else
SETCDR(tail, ev);
COPY_TAG(ev, el);
tail = ev;
}
el = CDR(el);
}
if (head!=R_NilValue)
UNPROTECT(1);
return head;
}
Apparently Analagous Threads
- Mishandling missing "..." (PR#1247)
- Implementation of substring search in omegascript
- Problerm building R-1.7.0 on OpenBSD3.2/sparc64
- Speed improvement to PROTECT, UNPROTECT, etc.
- round() and signif() do not check argument names when a single argument is given