This diff is a partial MFC (picking parts out of -current) that makes aio_return() return the error return of a completed AIO request. (as it does on othe OS's and in 7.x). The man page for 6.x and other OS's indicate that aio_return shoud return all the same results as a returning read() or write() including setting errno on error. in 6.x this does not happen. on 7.0 it does. The included test program can show the result when using gnop() to simulate IO errors. BTW the test program could be used as a start to sample code as to how to use kqueue and aio together. If people agree this is worth fixing, it would be nice to get it in 6.3 -------------- next part -------------- Index: vfs_aio.c ==================================================================RCS file: /home/ncvs/src/sys/kern/vfs_aio.c,v retrieving revision 1.195.2.4 diff -d -u -r1.195.2.4 vfs_aio.c --- vfs_aio.c 9 Sep 2006 01:30:11 -0000 1.195.2.4 +++ vfs_aio.c 29 Nov 2007 19:26:12 -0000 @@ -1529,6 +1529,7 @@ struct aiocblist *cb, *ncb; struct aiocb *ujob; struct kaioinfo *ki; + int status, error; ujob = uap->aiocbp; jobref = fuword(&ujob->_aiocb_private.kernelinfo); @@ -1542,14 +1543,6 @@ TAILQ_FOREACH(cb, &ki->kaio_jobdone, plist) { if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) = jobref) { - if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) { - p->p_stats->p_ru.ru_oublock +- cb->outputcharge; - cb->outputcharge = 0; - } else if (cb->uaiocb.aio_lio_opcode == LIO_READ) { - p->p_stats->p_ru.ru_inblock += cb->inputcharge; - cb->inputcharge = 0; - } goto done; } } @@ -1565,15 +1558,33 @@ done: PROC_UNLOCK(p); if (cb != NULL) { - if (ujob == cb->uuaiocb) { - td->td_retval[0] - cb->uaiocb._aiocb_private.status; - } else - td->td_retval[0] = EFAULT; - aio_free_entry(cb); - return (0); + status = cb->uaiocb._aiocb_private.status; + error = cb->uaiocb._aiocb_private.error; + if (ujob != cb->uuaiocb) { + /* check for a mismatch. is it possible? */ + /* (It's not in 7.x) */ + error = EFAULT; + } else { + if (error == 0) { + td->td_retval[0] = status; + } + if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) { + p->p_stats->p_ru.ru_oublock ++ cb->outputcharge; + cb->outputcharge = 0; + } else if (cb->uaiocb.aio_lio_opcode == LIO_READ) { + p->p_stats->p_ru.ru_inblock += cb->inputcharge; + cb->inputcharge = 0; + } + suword(&ujob->_aiocb_private.error, error); + suword(&ujob->_aiocb_private.status, status); + aio_free_entry(cb); + } + } else { + /* no such aiocb known */ + error = EINVAL; } - return (EINVAL); + return (error); } /* -------------- next part -------------- #include <stdio.h> #include <stdlib.h> #include <errno.h> #include <strings.h> #include <signal.h> #include <fcntl.h> #include <sys/param.h> #include <stddef.h> #include <sys/aio.h> #include <sys/types.h> #include <sys/event.h> #include <sys/time.h> #define BUFSIZE 512 #define TMOUT_SEC 5 #define TMOUT_NSEC 0 main() { int fd; int ret; struct aiocb my_aiocb; int kq; if ((kq = kqueue()) == -1) err(1, "kqueue"); fd = open("/dev/mfid0s1d.nop", O_RDONLY); if (fd < 0) perror("open"); /* Zero out the aiocb structure (recommended) */ bzero((char *)&my_aiocb, sizeof(struct aiocb)); /* Allocate a data buffer for the aiocb request */ my_aiocb.aio_buf = malloc(BUFSIZE + 1); if (!my_aiocb.aio_buf) perror("malloc"); /* Initialize the necessary fields in the aiocb */ my_aiocb.aio_fildes = fd; my_aiocb.aio_nbytes = BUFSIZE; my_aiocb.aio_offset = (512 * (100LL + 10)); my_aiocb.aio_sigevent.sigev_notify = SIGEV_KEVENT; my_aiocb.aio_sigevent.sigev_notify_kqueue = kq; /* udata for the created kqueue */ #if __FreeBSD_version > 700000 my_aiocb.aio_sigevent.sigev_value.sival_ptr = NULL; #else my_aiocb.aio_sigevent.sigev_value.sigval_ptr = NULL; #endif ret = aio_read(&my_aiocb); if (ret < 0) perror("aio_read"); #define USE_KQUEUE 1 #if USE_KQUEUE while (1) { int i; int n; int nchanges = 0; int nevents = 1; struct kevent ch[4]; struct kevent ev[4]; struct timespec timeout = {TMOUT_SEC, TMOUT_NSEC}; n = kevent(kq, ch, nchanges, ev, nevents, &timeout); if (n <= 0) { perror("kevent"); exit(1); } printf("%d events\n", n); for (i = 0; i < n; i++) { printf("event = 0x%x, %hd, %hx, 0x%x, 0x%x, 0x%x\n", ev[i].ident, ev[i].filter, ev[i].flags, ev[i].fflags, ev[i].data, ev[i].udata); if (ev[i].flags & (EV_ERROR | EV_EOF)) { if (ev[i].flags & EV_EOF) { printf("EV_EOF set\n"); } if (ev[i].flags & EV_ERROR) { printf("EV_ERROR set\n"); } /* error */ printf("errno from ev_data is %d\n", (int)ev[i].data); break; } else { printf("kevent returned valid data ready\n"); #if 0 if (ev[i].filter == EVFILT_READ) readable_fd(evi.ident); if (ev[i].filter == EVFILT_WRITE) writeable_fd(evi.ident); #endif } } if ((ret = aio_error(&my_aiocb)) != EINPROGRESS) { printf("aio_error returned %d\n", ret); } if ((ret = aio_return(&my_aiocb)) > 0) { printf("aio_return returned %d\n", ret); printf("aio_return said we have got data\n"); /* got ret bytes on the read */ } else { /* read failed, consult errno */ printf("aio_return returned %d\n", ret); perror("aio_return"); } break; } #else /* ! USE_KQUEUE */ while (aio_error(&my_aiocb) == EINPROGRESS) { printf("aio_error returned %d\n", ret); sleep (1); { if ((ret = aio_return(&my_aiocb)) > 0) { printf("aio_return returned %d\n", ret); printf("got data\n"); /* got ret bytes on the read */ } else { /* read failed, consult errno */ printf("aio_return returned %d\n", ret); perror("aio_return"); } #endif } #if 0 struct kevent { uintptr_t ident; /* identifier for this event */ short filter; /* filter for event */ u_short flags; /* action flags for kqueue */ u_int fflags; /* filter flag value */ intptr_t data; /* filter data value */ void *udata; /* opaque user data identifier */ }; struct __aiocb_private { long status; long error; void *kernelinfo; }; typedef struct aiocb { int aio_fildes; /* File descriptor */ off_t aio_offset; /* File offset for I/O */ volatile void *aio_buf;/* I/O buffer in process space */ size_t aio_nbytes; /* Number of bytes for I/O */ char __spare__ [sizeof(int) * 2 + sizeof(void *)]; /* osigevent. */ int aio_lio_opcode; /* LIO opcode */ int aio_reqprio; /* Request priority -- ignored */ struct __aiocb_private _aiocb_private; struct sigevent aio_sigevent; /* Signal to deliver */ } aiocb_t; #endif