From: Chris Mason AIO poll support I've attached the patch, it's not quite as obvious as the pipe code, but not too bad. I'm not sure if I'm using struct kiocb->private the way it was intended, but I don't see any other code touching it, so it should be ok. On Mon 2004-02-23 at 14:05, Suparna Bhattacharya wrote: > I was wondering if a particular fop->poll routine, could possibly > invoke __pollwait for more than one wait queue (I don't know if such > a case even exists). That kind of a thing would work OK with the existing > poll logic, but not in our case, because we'd end up queueing the same > wait queue on two queues which would be a problem. Oh, I see what you mean. I looked at a few of the poll_wait callers, and it seems safe, but there are too many for a full audit right now. The attached patch fixes the page allocation problem and adds a check to make sure we don't abuse current->io_wait. An oops is better than random corruption at least. I ran it through my basic test and pipetest, the pipetest results are below. The pipetest epoll usage needs updating, so I can only compare against regular poll. ./pipetest --aio-poll 10000 1 5 using 10000 pipe pairs, 1 message threads, 5 generations, 12 bufsize Ok! Mode aio-poll: 5 passes in 0.000073 seconds passes_per_sec: 68493.15 coffee:/usr/src/aio # ./pipetest 10000 1 5 using 10000 pipe pairs, 1 message threads, 5 generations, 12 bufsize Ok! Mode poll: 5 passes in 0.083066 seconds passes_per_sec: 60.19 Here are some optimizations. aio-poll-3 avoids wake_up when it can use finish_wait instead, and adds a fast path to aio-poll for when data is already available. --- akpm-linux-osdl-ogasawara/fs/aio.c | 17 +++ akpm-linux-osdl-ogasawara/fs/select.c | 104 +++++++++++++++++++++- akpm-linux-osdl-ogasawara/include/linux/aio.h | 1 akpm-linux-osdl-ogasawara/include/linux/aio_abi.h | 2 4 files changed, 122 insertions(+), 2 deletions(-) diff -puN fs/aio.c~aio-poll fs/aio.c --- akpm-linux-osdl/fs/aio.c~aio-poll 2004-08-18 15:36:26.753519864 -0700 +++ akpm-linux-osdl-ogasawara/fs/aio.c 2004-08-18 15:36:26.782514728 -0700 @@ -1396,6 +1396,16 @@ static ssize_t aio_fsync(struct kiocb *i } /* + * Retry method for aio_poll (also used for first time submit) + * Responsible for updating iocb state as retries progress + */ +static ssize_t aio_poll(struct kiocb *iocb) +{ + unsigned events = (unsigned)(iocb->ki_buf); + return generic_aio_poll(iocb, events); +} + +/* * aio_setup_iocb: * Performs the initial checks and aio retry method * setup for the kiocb at the time of io submission. @@ -1440,6 +1450,13 @@ ssize_t aio_setup_iocb(struct kiocb *kio if (file->f_op->aio_fsync) kiocb->ki_retry = aio_fsync; break; + case IOCB_CMD_POLL: + ret = -EINVAL; + if (file->f_op->poll) { + memset(kiocb->private, 0, sizeof(kiocb->private)); + kiocb->ki_retry = aio_poll; + } + break; default: dprintk("EINVAL: io_submit: no operation provided\n"); ret = -EINVAL; diff -puN fs/select.c~aio-poll fs/select.c --- akpm-linux-osdl/fs/select.c~aio-poll 2004-08-18 15:36:26.759518802 -0700 +++ akpm-linux-osdl-ogasawara/fs/select.c 2004-08-18 15:36:26.784514374 -0700 @@ -21,6 +21,7 @@ #include /* for STICKY_TIMEOUTS */ #include #include +#include #include @@ -39,6 +40,12 @@ struct poll_table_page { struct poll_table_entry entries[0]; }; +struct aio_poll_table { + int init; + struct poll_wqueues wq; + struct poll_table_page table; +}; + #define POLL_TABLE_FULL(table) \ ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table)) @@ -109,12 +116,34 @@ void __pollwait(struct file *filp, wait_ /* Add a new entry */ { struct poll_table_entry * entry = table->entry; + wait_queue_t *wait; + wait_queue_t *aio_wait = current->io_wait; + + if (aio_wait) { + /* for aio, there can only be one wait_address. + * we might be adding it again via a retry call + * if so, just return. + * if not, bad things are happening + */ + if (table->entry != table->entries) { + if (table->entries[0].wait_address != wait_address) + BUG(); + return; + } + } + table->entry = entry+1; get_file(filp); entry->filp = filp; entry->wait_address = wait_address; init_waitqueue_entry(&entry->wait, current); - add_wait_queue(wait_address,&entry->wait); + + /* if we're in aioland, use current->io_wait */ + if (aio_wait) + wait = aio_wait; + else + wait = &entry->wait; + add_wait_queue(wait_address,wait); } } @@ -533,3 +562,76 @@ out_fds: poll_freewait(&table); return err; } + +static void aio_poll_freewait(struct aio_poll_table *ap, struct kiocb *iocb) +{ + struct poll_table_page * p = ap->wq.table; + if (p) { + struct poll_table_entry * entry = p->entry; + if (entry > p->entries) { + /* + * there is only one entry for aio polls + */ + entry = p->entries; + if (iocb) + finish_wait(entry->wait_address,&iocb->ki_wait); + else + wake_up(entry->wait_address); + fput(entry->filp); + } + } + ap->init = 0; +} + +static int +aio_poll_cancel(struct kiocb *iocb, struct io_event *evt) +{ + struct aio_poll_table *aio_table; + aio_table = (struct aio_poll_table *)iocb->private; + + evt->obj = (u64)(unsigned long)iocb-> ki_obj.user; + evt->data = iocb->ki_user_data; + evt->res = iocb->ki_nbytes - iocb->ki_left; + if (evt->res == 0) + evt->res = -EINTR; + evt->res2 = 0; + if (aio_table->init) + aio_poll_freewait(aio_table, NULL); + aio_put_req(iocb); + return 0; +} + +ssize_t generic_aio_poll(struct kiocb *iocb, unsigned events) +{ + struct aio_poll_table *aio_table; + unsigned mask; + struct file *file = iocb->ki_filp; + aio_table = (struct aio_poll_table *)iocb->private; + + /* fast path */ + mask = file->f_op->poll(file, NULL); + mask &= events | POLLERR | POLLHUP; + if (mask) + return mask; + + if ((sizeof(*aio_table) + sizeof(struct poll_table_entry)) > + sizeof(iocb->private)) + BUG(); + + if (!aio_table->init) { + aio_table->init = 1; + poll_initwait(&aio_table->wq); + aio_table->wq.table = &aio_table->table; + aio_table->table.next = NULL; + aio_table->table.entry = aio_table->table.entries; + } + iocb->ki_cancel = aio_poll_cancel; + + mask = file->f_op->poll(file, &aio_table->wq.pt); + mask &= events | POLLERR | POLLHUP; + if (mask) { + aio_poll_freewait(aio_table, iocb); + return mask; + } + return -EIOCBRETRY; +} diff -puN include/linux/aio_abi.h~aio-poll include/linux/aio_abi.h --- akpm-linux-osdl/include/linux/aio_abi.h~aio-poll 2004-08-18 15:36:26.764517916 -0700 +++ akpm-linux-osdl-ogasawara/include/linux/aio_abi.h 2004-08-18 15:36:26.785514196 -0700 @@ -38,8 +38,8 @@ enum { IOCB_CMD_FDSYNC = 3, /* These two are experimental. * IOCB_CMD_PREADX = 4, - * IOCB_CMD_POLL = 5, */ + IOCB_CMD_POLL = 5, IOCB_CMD_NOOP = 6, }; diff -puN include/linux/aio.h~aio-poll include/linux/aio.h --- akpm-linux-osdl/include/linux/aio.h~aio-poll 2004-08-18 15:36:26.770516853 -0700 +++ akpm-linux-osdl-ogasawara/include/linux/aio.h 2004-08-18 15:36:26.787513842 -0700 @@ -203,4 +203,5 @@ static inline struct kiocb *list_kiocb(s extern atomic_t aio_nr; extern unsigned aio_max_nr; +extern ssize_t generic_aio_poll(struct kiocb *, unsigned); #endif /* __LINUX__AIO_H */ _