-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
For those game enough to try the results of my handiwork ;-), enclosed
is a patch against the files in /usr/src/sys/dev/ata for RELENG_6 (and
possibly others) with the following objectives:
1) the ata-raid driver currently leaks ata_composite and ata_request
structures into "neverland" in a mirrored configuration. This can be
observed using "sysctl -a | grep ^ata_" and noting the increasing
"in-use" count as time goes on. Eventually, this causes the kernel to
run out of memory. This is fixed by tracking the request counts on each
composite request.
2) another part of this patch is to ata-queue where a channel lock is
asserted in a (hopefully rare) rebuild process even if the dependencies
flag is set (we're waiting for a read). Moving the test for a dependency
outside of the lock saves waiting on it when nothing can be done. A
small nit with near negligible impact but, when you're waiting for a
rebuild ...
3) another part of this patch is to ata-raid where the choice of drive
from which to read favours one side of a mirror even when both drives
are near the block(s) we want. Because the mirror is on another channel
on the Highpoint controller, it performs (marginally :-() better when we
toggle between them.
As usual, this patch comes with no warranty ... it works for me. "If it
breaks your system, you own all the pieces".
I recommend you back up your system before testing,
Michael
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.0 (MingW32)
iD8DBQFDihFsiJykeV6HPMURAlHQAJ9UO/vD8rp/V3+zh89qBBOPGQ+cugCg5qGp
9N8FaffInZMuOIMSGICV70c=NZTT
-----END PGP SIGNATURE-----
-------------- next part --------------
*** /usr/src/sys/dev/ata/ata-all.h.orig Sun Nov 27 14:17:57 2005
--- /usr/src/sys/dev/ata/ata-all.h Sun Nov 27 14:22:05 2005
***************
*** 331,336 ****
--- 331,337 ----
u_int32_t wr_depend; /* write depends on subdisks
*/
u_int32_t wr_done; /* done write subdisks */
struct ata_request *request[32]; /* size must match maps above
*/
+ long count; /* count required of this composite */
caddr_t data_1;
caddr_t data_2;
};
*** /usr/src/sys/dev/ata/ata-queue.c.orig Sun Nov 27 14:17:57 2005
--- /usr/src/sys/dev/ata/ata-queue.c Sun Nov 27 14:40:43 2005
***************
*** 182,209 ****
}
/* check we are in the right state and has no dependencies */
! mtx_lock(&ch->state_mtx);
! if (ch->state == ATA_IDLE && !dependencies) {
! ATA_DEBUG_RQ(request, "starting");
! TAILQ_REMOVE(&ch->ata_queue, request, chain);
! ch->running = request;
! ch->state = ATA_ACTIVE;
!
! /* if we are the freezing point release it */
! if (ch->freezepoint == request)
! ch->freezepoint = NULL;
!
! if (ch->hw.begin_transaction(request) == ATA_OP_FINISHED) {
! ch->running = NULL;
! ch->state = ATA_IDLE;
! mtx_unlock(&ch->state_mtx);
! mtx_unlock(&ch->queue_mtx);
! ATA_LOCKING(dev, ATA_LF_UNLOCK);
! ata_finish(request);
! return;
}
}
- mtx_unlock(&ch->state_mtx);
}
}
mtx_unlock(&ch->queue_mtx);
--- 182,211 ----
}
/* check we are in the right state and has no dependencies */
! if (!dependencies) {
! mtx_lock(&ch->state_mtx);
! if (ch->state == ATA_IDLE) {
! ATA_DEBUG_RQ(request, "starting");
! TAILQ_REMOVE(&ch->ata_queue, request, chain);
! ch->running = request;
! ch->state = ATA_ACTIVE;
!
! /* if we are the freezing point release it */
! if (ch->freezepoint == request)
! ch->freezepoint = NULL;
!
! if (ch->hw.begin_transaction(request) == ATA_OP_FINISHED) {
! ch->running = NULL;
! ch->state = ATA_IDLE;
! mtx_unlock(&ch->state_mtx);
! mtx_unlock(&ch->queue_mtx);
! ATA_LOCKING(dev, ATA_LF_UNLOCK);
! ata_finish(request);
! return;
! }
}
+ mtx_unlock(&ch->state_mtx);
}
}
}
mtx_unlock(&ch->queue_mtx);
***************
*** 426,432 ****
composite->wr_done |= (1 << request->this);
if (composite->wr_depend &&
! (composite->rd_done &
composite->wr_depend)==composite->wr_depend &&
(composite->wr_needed & (~composite->wr_done))) {
index = ((composite->wr_needed & (~composite->wr_done))) - 1;
}
--- 428,434 ----
composite->wr_done |= (1 << request->this);
if (composite->wr_depend &&
! ((composite->rd_done & composite->wr_depend) ==
composite->wr_depend) &&
(composite->wr_needed & (~composite->wr_done))) {
index = ((composite->wr_needed & (~composite->wr_done))) - 1;
}
*** /usr/src/sys/dev/ata/ata-raid.c.orig Sun Nov 27 14:17:57 2005
--- /usr/src/sys/dev/ata/ata-raid.c Sun Nov 27 14:31:13 2005
***************
*** 44,49 ****
--- 44,50 ----
#include <sys/cons.h>
#include <sys/sema.h>
#include <sys/taskqueue.h>
+ #include <sys/libkern.h>
#include <vm/uma.h>
#include <machine/bus.h>
#include <sys/rman.h>
***************
*** 346,380 ****
if (rdp->status & AR_S_REBUILDING)
blk = ((lba / rdp->interleave) * rdp->width) * rdp->interleave +
(rdp->interleave * (drv % rdp->width)) +
! lba % rdp->interleave;;
if (bp->bio_cmd == BIO_READ) {
! int src_online ! (rdp->disks[drv].flags & AR_DF_ONLINE);
! int mir_online ! (rdp->disks[drv+rdp->width].flags &
AR_DF_ONLINE);
!
! /* if mirror gone or close to last access on source */
! if (!mir_online ||
! ((src_online) &&
! bp->bio_pblkno >! (rdp->disks[drv].last_lba - AR_PROXIMITY)
&&
! bp->bio_pblkno <! (rdp->disks[drv].last_lba +
AR_PROXIMITY))) {
rdp->toggle = 0;
}
! /* if source gone or close to last access on mirror */
! else if (!src_online ||
! ((mir_online) &&
! bp->bio_pblkno >!
(rdp->disks[drv+rdp->width].last_lba-AR_PROXIMITY) &&
! bp->bio_pblkno <!
(rdp->disks[drv+rdp->width].last_lba+AR_PROXIMITY))) {
drv += rdp->width;
rdp->toggle = 1;
}
! /* not close to any previous access, toggle */
! else {
if (rdp->toggle)
rdp->toggle = 0;
else {
--- 347,386 ----
if (rdp->status & AR_S_REBUILDING)
blk = ((lba / rdp->interleave) * rdp->width) * rdp->interleave +
(rdp->interleave * (drv % rdp->width)) +
! lba % rdp->interleave;
if (bp->bio_cmd == BIO_READ) {
! int decided = 0;
!
! /* if mirror gone, use drv */
! if ((rdp->disks[drv+rdp->width].flags & AR_DF_ONLINE) == 0) {
rdp->toggle = 0;
+ decided = 1;
+ } else
+ /* if src is gone, use mirror */
+ if ((rdp->disks[drv].flags & AR_DF_ONLINE) == 0) {
+ drv += rdp->width;
+ rdp->toggle = 1;
+ decided = 1;
}
!
! /* both appear to be online, find out if we're already
close */
! if (!decided) {
! /* if drv is near and mirror is not, use drv */
! if (abs(bp->bio_pblkno - rdp->disks[drv].last_lba)
<= AR_PROXIMITY &&
! abs(bp->bio_pblkno - rdp->disks[drv +
rdp->width].last_lba) > AR_PROXIMITY) {
! rdp->toggle = 0;
! decided = 1;
! /* if mirror is near and drv is not, use mirror */
! } else if (abs(bp->bio_pblkno - rdp->disks[drv +
rdp->width].last_lba) <= AR_PROXIMITY &&
! abs(bp->bio_pblkno - rdp->disks[drv].last_lba)
> AR_PROXIMITY) {
drv += rdp->width;
rdp->toggle = 1;
+ decided = 1;
}
! }
! /* neither is near or both are, just toggle */
! if(!decided) {
if (rdp->toggle)
rdp->toggle = 0;
else {
***************
*** 410,415 ****
--- 416,422 ----
mtx_init(&composite->lock,
"ATA PseudoRAID rebuild lock",
NULL, MTX_DEF);
+ composite->count = request->bytecount;
composite->rd_needed |= (1 << drv);
composite->wr_depend |= (1 << drv);
composite->wr_needed |= (1 << this);
***************
*** 468,473 ****
--- 475,481 ----
mtx_init(&composite->lock,
"ATA PseudoRAID mirror lock",
NULL, MTX_DEF);
+ composite->count = request->bytecount;
composite->wr_needed |= (1 << drv);
composite->wr_needed |= (1 << this);
composite->request[drv] = request;
***************
*** 607,613 ****
/* good data, update how far we've gotten */
else {
bp->bio_resid -= request->donecount;
! if (bp->bio_resid == 0) {
if (composite->wr_done & (1 << mirror))
finished = 1;
}
--- 615,622 ----
/* good data, update how far we've gotten */
else {
bp->bio_resid -= request->donecount;
! composite->count -= request->donecount;
! if (composite->count == 0) {
if (composite->wr_done & (1 << mirror))
finished = 1;
}
***************
*** 621,627 ****
printf("DOH! rebuild failed\n"); /* XXX SOS */
rdp->rebuild_lba = blk;
}
! if (bp->bio_resid == 0)
finished = 1;
}
}
--- 630,636 ----
printf("DOH! rebuild failed\n"); /* XXX SOS */
rdp->rebuild_lba = blk;
}
! if (composite->count == 0)
finished = 1;
}
}
***************
*** 658,667 ****
}
bp->bio_resid - composite->request[mirror]->donecount;
}
! else
bp->bio_resid -= request->donecount;
! if (bp->bio_resid == 0)
finished = 1;
}
mtx_unlock(&composite->lock);
--- 667,680 ----
}
bp->bio_resid - composite->request[mirror]->donecount;
+ composite->count -+ composite->request[mirror]->donecount;
}
! else {
bp->bio_resid -= request->donecount;
! composite->count -= request->donecount;
! }
! if (composite->count == 0)
finished = 1;
}
mtx_unlock(&composite->lock);
***************
*** 723,729 ****
rdp->status &= ~AR_S_REBUILDING;
ata_raid_config_changed(rdp, 1);
}
! biodone(bp);
}
if (composite) {
--- 736,744 ----
rdp->status &= ~AR_S_REBUILDING;
ata_raid_config_changed(rdp, 1);
}
! /* cover the case of a series of composites which are only partly complete */
! if (bp->bio_resid == 0)
! biodone(bp);
}
if (composite) {