For the mailing list archive and for anyone else interested.
A few years ago we needed to automatically run a second AGI if the first AGI
failed i.e. a "failsafe" setup. Mainly because I'm not a very
good programmer. 8-|
The code below is very similar to what we use in production. This code is also
at http://pastebin.com/LBaLhdfJ for a while. "sm" stands for Switch
Manager (get your mind out of the gutter), the name of one of our internal
Asterisk projects.
/*
This script runs an AGI, if the AGI fails, a "circuit breaker" will
"trip".
The breaker causes this script to skip running the primary AGI on and only run
the failsafe AGI until the max calls or max seconds.
Once max calls or max seconds elapse the breaker is reset on the next call and
everything goes back to normal.
GLOBAL VARIABLES
SM_NODE: system hostname
SM_AGI_BREAKER_MAX_CALLS: reset breaker after this many calls
SM_AGI_BREAKER_MAX_ELAPSED: reset breaker after this many seconds
SM_AGI_BREAKER_NOTIFY: e-mail address list
*/
macro sm_agi(agi,agi_failsafe) {
// extract script name from primary agi for the global breaker variable name
Set(LOCAL(breaker)=sm_agi_breaker-${CUT(CUT(agi,/,${FIELDQTY(agi,/)}),^,1)});
Set(SM_AGI_STATUS=);
if ("${GLOBAL(${breaker})}" == "tripped") {
// globally lock breaker variable to prevent race condition
if (${TRYLOCK(${breaker})}) {
Noop(AGI Breaker '${breaker}' is currently 'tripped'
Checking elapsed time and elapsed calls.);
if (${${breaker}_calls} > ${SM_AGI_BREAKER_MAX_CALLS} ||
${MATH(${STRFTIME(,,%s)}-${${breaker}_timestamp},int)} >
${SM_AGI_BREAKER_MAX_ELAPSED}) {
System(/bin/echo "AGI breaker '${breaker}' reset on
'${SM_NODE}' after
'${MATH(${STRFTIME(,,%s)}-${${breaker}_timestamp},int)}' seconds and
'${${breaker}_calls}' calls. Normal call processing has resumed."
| mail -s "NOTICE: ${SM_NODE} AGI Breaker '${breaker}' Reset"
${SM_AGI_BREAKER_NOTIFY});
Set(ARRAY(GLOBAL(${breaker}),GLOBAL(${breaker}_calls),GLOBAL(${breaker}_elapsed))=,,,);
}
Set(undef=${UNLOCK(${breaker})});
}
}
if ("${GLOBAL(${breaker})}" != "tripped") {
// run agi, replacing ^ with ,
AGI(${REPLACE(agi,^,\,)});
// the agi should set SM_AGI_STATUS to "FAIL" when it starts
and set it to "SUCCESS" just before the AGI exits. This is because
if the AGI fails it won't be able to set the SM_AGI_STATUS variable.
if ("${SM_AGI_STATUS}" == "SUCCESS") {
return;
}
if ("${SM_AGI_STATUS}" != "FAIL" &&
"${AGISTATUS}" == "SUCCESS") {
Set(SM_AGI_STATUS=SUCCESS);
return;
}
// agi failed, trip the circuit breaker
if (${TRYLOCK(${breaker})}) {
Set(ARRAY(GLOBAL(${breaker}),GLOBAL(${breaker}_calls),GLOBAL(${breaker}_timestamp))=tripped,1,${STRFTIME(,,%s)});
Set(undef=${UNLOCK(${breaker})});
}
System(/bin/echo "AGI '${agi}' failed on channel
'${CHANNEL(name)}' with status '${AGISTATUS}' on
'${SM_NODE}', tripping AGI breaker '${breaker}'. Failsafe mode
enabled, AGI breaker will reset after '${SM_AGI_BREAKER_MAX_ELAPSED}'
seconds or '${SM_AGI_BREAKER_MAX_CALLS}' calls" | mail -s
"ERROR: ${SM_NODE} AGI Breaker '${breaker}' Tripped"
${SM_AGI_BREAKER_NOTIFY});
Playback(custom/sm_agi_fail);
}
// try using the failsafe
AGI(${REPLACE(agi_failsafe,^,\,)});
if ("${AGISTATUS}" == "SUCCESS") {
if (${LOCK(${breaker})}) {
Set(ARRAY(GLOBAL(${breaker}_calls),SM_AGI_STATUS)=${MATH(${${breaker}_calls}+1,int)},SUCCESS);
Set(undef=${UNLOCK(${breaker})});
}
return;
}
System(/bin/echo "Backup AGI '${agi_failsafe}' failed with
status '${AGISTATUS}' on '${SM_NODE}'. This is a critical
emergency. All calls on this node are failing. Drop whatever you are doing and
deal with it." | mail -s "PANIC: ${SM_NODE} AGI FAILED"
${SM_AGI_BREAKER_NOTIFY});
// both agi and failsafe agi failed. reset the agi breaker for the next call
and hope for the best.
Set(ARRAY(GLOBAL(${breaker}),GLOBAL(${breaker}_calls),GLOBAL(${breaker}_timestamp))=,,,);
return;
}