1
2
3
4
5
6
7
8
9
10
11#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/time.h>
14#include <linux/aio_abi.h>
15
16
17
18#include <linux/sched.h>
19#include <linux/fs.h>
20#include <linux/file.h>
21#include <linux/mm.h>
22#include <linux/mman.h>
23#include <linux/vmalloc.h>
24#include <linux/iobuf.h>
25#include <linux/slab.h>
26#include <linux/timer.h>
27#include <linux/brlock.h>
28#include <linux/aio.h>
29#include <linux/smp_lock.h>
30#include <linux/compiler.h>
31#include <linux/brlock.h>
32#include <linux/module.h>
33#include <linux/tqueue.h>
34#include <linux/highmem.h>
35
36#include <asm/kmap_types.h>
37#include <asm/uaccess.h>
38
39#if DEBUG > 1
40#define dprintk printk
41#else
42#define dprintk(x...) do { ; } while (0)
43#endif
44
45
46atomic_t aio_nr = ATOMIC_INIT(0);
47unsigned aio_max_nr = 0x10000;
48
49
50static kmem_cache_t *kiocb_cachep;
51static kmem_cache_t *kioctx_cachep;
52
53
54static void aio_fput_routine(void *);
55static struct tq_struct fput_tqueue = {
56 .routine = aio_fput_routine,
57};
58
59static spinlock_t fput_lock = SPIN_LOCK_UNLOCKED;
60LIST_HEAD(fput_head);
61
62
63
64
65
66static int __init aio_setup(void)
67{
68 kiocb_cachep = kmem_cache_create("kiocb", sizeof(struct kiocb),
69 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
70 if (!kiocb_cachep)
71 panic("unable to create kiocb cache\n");
72
73 kioctx_cachep = kmem_cache_create("kioctx", sizeof(struct kioctx),
74 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
75 if (!kioctx_cachep)
76 panic("unable to create kioctx cache");
77
78 printk(KERN_NOTICE "aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
79
80 return 0;
81}
82
83static void aio_free_ring(struct kioctx *ctx)
84{
85 struct aio_ring_info *info = &ctx->ring_info;
86 long i;
87
88 for (i=0; i<info->nr_pages; i++)
89 put_page(info->ring_pages[i]);
90
91 if (info->mmap_size) {
92 down_write(&ctx->mm->mmap_sem);
93 do_munmap(ctx->mm, info->mmap_base, info->mmap_size);
94 up_write(&ctx->mm->mmap_sem);
95 }
96
97 if (info->ring_pages && info->ring_pages != info->internal_pages)
98 kfree(info->ring_pages);
99 info->ring_pages = NULL;
100 info->nr = 0;
101}
102
103static int aio_setup_ring(struct kioctx *ctx)
104{
105 struct aio_ring *ring;
106 struct aio_ring_info *info = &ctx->ring_info;
107 unsigned nr_events = ctx->max_reqs;
108 unsigned long size;
109 int nr_pages;
110
111
112 nr_events += 2;
113
114 size = sizeof(struct aio_ring);
115 size += sizeof(struct io_event) * nr_events;
116 nr_pages = (size + PAGE_SIZE-1) >> PAGE_SHIFT;
117
118 if (nr_pages < 0)
119 return -EINVAL;
120
121 info->nr_pages = nr_pages;
122
123 nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event);
124
125 info->nr = 0;
126 info->ring_pages = info->internal_pages;
127 if (nr_pages > AIO_RING_PAGES) {
128 info->ring_pages = kmalloc(sizeof(struct page *) * nr_pages, GFP_KERNEL);
129 if (!info->ring_pages)
130 return -ENOMEM;
131 memset(info->ring_pages, 0, sizeof(struct page *) * nr_pages);
132 }
133
134 info->mmap_size = nr_pages * PAGE_SIZE;
135 dprintk("attempting mmap of %lu bytes\n", info->mmap_size);
136 down_write(&ctx->mm->mmap_sem);
137 info->mmap_base = do_mmap(NULL, 0, info->mmap_size,
138 PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE,
139 0);
140 if (IS_ERR((void *)info->mmap_base)) {
141 up_write(&ctx->mm->mmap_sem);
142 printk("mmap err: %ld\n", -info->mmap_base);
143 info->mmap_size = 0;
144 aio_free_ring(ctx);
145 return -EAGAIN;
146 }
147
148 dprintk("mmap address: 0x%08lx\n", info->mmap_base);
149 info->nr_pages = get_user_pages(current, ctx->mm,
150 info->mmap_base, info->mmap_size,
151 1, 0, info->ring_pages, NULL);
152 up_write(&ctx->mm->mmap_sem);
153
154 if (unlikely(info->nr_pages != nr_pages)) {
155 aio_free_ring(ctx);
156 return -EAGAIN;
157 }
158
159 ctx->user_id = info->mmap_base;
160
161 info->nr = nr_events;
162
163 ring = kmap_atomic(info->ring_pages[0], KM_USER0);
164 ring->nr = nr_events;
165 ring->id = ctx->user_id;
166 ring->head = ring->tail = 0;
167 ring->magic = AIO_RING_MAGIC;
168 ring->compat_features = AIO_RING_COMPAT_FEATURES;
169 ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
170 ring->header_length = sizeof(struct aio_ring);
171 kunmap_atomic(ring, KM_USER0);
172
173 return 0;
174}
175
176
177
178
179static inline struct io_event *aio_ring_event(struct aio_ring_info *info, int nr, enum km_type km)
180{
181 struct io_event *events;
182#define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event))
183#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
184
185 if (nr < AIO_EVENTS_FIRST_PAGE) {
186 struct aio_ring *ring;
187 ring = kmap_atomic(info->ring_pages[0], km);
188 return &ring->io_events[nr];
189 }
190 nr -= AIO_EVENTS_FIRST_PAGE;
191
192 events = kmap_atomic(info->ring_pages[1 + nr / AIO_EVENTS_PER_PAGE], km);
193
194 return events + (nr % AIO_EVENTS_PER_PAGE);
195}
196
197static inline void put_aio_ring_event(struct io_event *event, enum km_type km)
198{
199 void *p = (void *)((unsigned long)event & PAGE_MASK);
200 kunmap_atomic(p, km);
201}
202
203
204
205
206static struct kioctx *ioctx_alloc(unsigned nr_events)
207{
208 struct mm_struct *mm;
209 struct kioctx *ctx;
210
211
212 if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
213 (nr_events > (0x10000000U / sizeof(struct kiocb)))) {
214 pr_debug("ENOMEM: nr_events too high\n");
215 return ERR_PTR(-EINVAL);
216 }
217
218 if (nr_events > aio_max_nr)
219 return ERR_PTR(-EAGAIN);
220
221 ctx = kmem_cache_alloc(kioctx_cachep, GFP_KERNEL);
222 if (!ctx)
223 return ERR_PTR(-ENOMEM);
224
225 memset(ctx, 0, sizeof(*ctx));
226 ctx->max_reqs = nr_events;
227 mm = ctx->mm = current->mm;
228 atomic_inc(&mm->mm_count);
229
230 atomic_set(&ctx->users, 1);
231 spin_lock_init(&ctx->ctx_lock);
232 spin_lock_init(&ctx->ring_info.ring_lock);
233 init_waitqueue_head(&ctx->wait);
234
235 INIT_LIST_HEAD(&ctx->active_reqs);
236
237 if (aio_setup_ring(ctx) < 0)
238 goto out_freectx;
239
240
241 atomic_add(ctx->max_reqs, &aio_nr);
242 if (unlikely(atomic_read(&aio_nr) > aio_max_nr))
243 goto out_cleanup;
244 write_lock(&mm->ioctx_list_lock);
245 ctx->next = mm->ioctx_list;
246 mm->ioctx_list = ctx;
247 write_unlock(&mm->ioctx_list_lock);
248
249 dprintk("aio: allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
250 ctx, ctx->user_id, current->mm, ctx->ring_info.ring->nr);
251 return ctx;
252
253out_cleanup:
254 atomic_sub(ctx->max_reqs, &aio_nr);
255 ctx->max_reqs = 0;
256 __put_ioctx(ctx);
257 return ERR_PTR(-EAGAIN);
258
259out_freectx:
260 kmem_cache_free(kioctx_cachep, ctx);
261 ctx = ERR_PTR(-ENOMEM);
262
263 dprintk("aio: error allocating ioctx %p\n", ctx);
264 return ctx;
265}
266
267
268
269
270
271
272static void aio_cancel_all(struct kioctx *ctx)
273{
274 int (*cancel)(struct kiocb *, struct io_event *);
275 struct io_event res;
276 spin_lock_irq(&ctx->ctx_lock);
277 ctx->dead = 1;
278 while (!list_empty(&ctx->active_reqs)) {
279 struct list_head *pos = ctx->active_reqs.next;
280 struct kiocb *iocb = list_kiocb(pos);
281 list_del_init(&iocb->ki_list);
282 cancel = iocb->ki_cancel;
283 if (cancel)
284 iocb->ki_users++;
285 spin_unlock_irq(&ctx->ctx_lock);
286 if (cancel)
287 cancel(iocb, &res);
288 spin_lock_irq(&ctx->ctx_lock);
289 }
290 spin_unlock_irq(&ctx->ctx_lock);
291}
292
293void wait_for_all_aios(struct kioctx *ctx)
294{
295 struct task_struct *tsk = current;
296 DECLARE_WAITQUEUE(wait, tsk);
297
298 if (!ctx->reqs_active)
299 return;
300
301 add_wait_queue(&ctx->wait, &wait);
302 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
303 while (ctx->reqs_active) {
304 printk("ctx->reqs_active = %d\n", ctx->reqs_active);
305 schedule();
306 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
307 }
308 __set_task_state(tsk, TASK_RUNNING);
309 remove_wait_queue(&ctx->wait, &wait);
310}
311
312
313
314
315ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
316{
317 while (iocb->ki_users) {
318 set_current_state(TASK_UNINTERRUPTIBLE);
319 if (!iocb->ki_users)
320 break;
321 schedule();
322 }
323 __set_current_state(TASK_RUNNING);
324 return iocb->ki_user_data;
325}
326
327
328
329
330
331
332
333
334void exit_aio(struct mm_struct *mm)
335{
336 struct kioctx *ctx = mm->ioctx_list;
337 mm->ioctx_list = NULL;
338 while (ctx) {
339 struct kioctx *next = ctx->next;
340 ctx->next = NULL;
341 aio_cancel_all(ctx);
342
343 wait_for_all_aios(ctx);
344
345 if (1 != atomic_read(&ctx->users))
346 printk(KERN_DEBUG
347 "exit_aio:ioctx still alive: %d %d %d\n",
348 atomic_read(&ctx->users), ctx->dead,
349 ctx->reqs_active);
350 put_ioctx(ctx);
351 ctx = next;
352 }
353}
354
355
356
357
358
359void __put_ioctx(struct kioctx *ctx)
360{
361 unsigned nr_events = ctx->max_reqs;
362
363 if (unlikely(ctx->reqs_active))
364 BUG();
365
366 aio_free_ring(ctx);
367 mmdrop(ctx->mm);
368 ctx->mm = NULL;
369 pr_debug("__put_ioctx: freeing %p\n", ctx);
370 kmem_cache_free(kioctx_cachep, ctx);
371
372 atomic_sub(nr_events, &aio_nr);
373}
374
375
376
377
378
379
380static struct kiocb *FASTCALL(__aio_get_req(struct kioctx *ctx));
381static struct kiocb *__aio_get_req(struct kioctx *ctx)
382{
383 struct kiocb *req = NULL;
384 struct aio_ring *ring;
385
386 req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
387 if (unlikely(!req))
388 return NULL;
389
390
391
392
393 spin_lock_irq(&ctx->ctx_lock);
394 ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0);
395 if (likely(ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring))) {
396 list_add(&req->ki_list, &ctx->active_reqs);
397 get_ioctx(ctx);
398 ctx->reqs_active++;
399 req->ki_user_obj = NULL;
400 req->ki_ctx = ctx;
401 req->ki_users = 1;
402 } else
403 kmem_cache_free(kiocb_cachep, req);
404 kunmap_atomic(ring, KM_USER0);
405 spin_unlock_irq(&ctx->ctx_lock);
406
407 return req;
408}
409
410static inline struct kiocb *aio_get_req(struct kioctx *ctx)
411{
412 struct kiocb *req;
413
414
415
416
417 req = __aio_get_req(ctx);
418 if (unlikely(NULL == req)) {
419 aio_fput_routine(NULL);
420 req = __aio_get_req(ctx);
421 }
422 return req;
423}
424
425static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
426{
427 req->ki_ctx = NULL;
428 req->ki_filp = NULL;
429 req->ki_user_obj = NULL;
430 kmem_cache_free(kiocb_cachep, req);
431 ctx->reqs_active--;
432
433 if (unlikely(!ctx->reqs_active && ctx->dead))
434 wake_up(&ctx->wait);
435}
436
437static void aio_fput_routine(void *data)
438{
439 spin_lock_irq(&fput_lock);
440 while (likely(!list_empty(&fput_head))) {
441 struct kiocb *req = list_kiocb(fput_head.next);
442 struct kioctx *ctx = req->ki_ctx;
443
444 list_del(&req->ki_list);
445 spin_unlock_irq(&fput_lock);
446
447
448 __fput(req->ki_filp);
449
450
451 spin_lock_irq(&ctx->ctx_lock);
452 really_put_req(ctx, req);
453 spin_unlock_irq(&ctx->ctx_lock);
454
455 put_ioctx(ctx);
456 spin_lock_irq(&fput_lock);
457 }
458 spin_unlock_irq(&fput_lock);
459}
460
461
462
463
464static inline int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
465{
466 dprintk(KERN_DEBUG "aio_put(%p): f_count=%d\n",
467 req, atomic_read(&req->ki_filp->f_count));
468
469 req->ki_users --;
470 if (unlikely(req->ki_users < 0))
471 BUG();
472 if (likely(req->ki_users))
473 return 0;
474 list_del(&req->ki_list);
475 req->ki_cancel = NULL;
476
477
478
479
480 if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) {
481 get_ioctx(ctx);
482 spin_lock(&fput_lock);
483 list_add(&req->ki_list, &fput_head);
484 spin_unlock(&fput_lock);
485 schedule_task(&fput_tqueue);
486 } else
487 really_put_req(ctx, req);
488 return 1;
489}
490
491
492
493
494
495int aio_put_req(struct kiocb *req)
496{
497 struct kioctx *ctx = req->ki_ctx;
498 int ret;
499 spin_lock_irq(&ctx->ctx_lock);
500 ret = __aio_put_req(ctx, req);
501 spin_unlock_irq(&ctx->ctx_lock);
502 if (ret)
503 put_ioctx(ctx);
504 return ret;
505}
506
507
508
509
510static inline struct kioctx *lookup_ioctx(unsigned long ctx_id)
511{
512 struct kioctx *ioctx;
513 struct mm_struct *mm;
514
515 mm = current->mm;
516 read_lock(&mm->ioctx_list_lock);
517 for (ioctx = mm->ioctx_list; ioctx; ioctx = ioctx->next)
518 if (likely(ioctx->user_id == ctx_id && !ioctx->dead)) {
519 get_ioctx(ioctx);
520 break;
521 }
522 read_unlock(&mm->ioctx_list_lock);
523
524 return ioctx;
525}
526
527
528
529
530
531
532int aio_complete(struct kiocb *iocb, long res, long res2)
533{
534 struct kioctx *ctx = iocb->ki_ctx;
535 struct aio_ring_info *info;
536 struct aio_ring *ring;
537 struct io_event *event;
538 unsigned long flags;
539 unsigned long tail;
540 int ret;
541
542
543
544
545
546
547
548 if (ctx == &ctx->mm->default_kioctx) {
549 int ret;
550
551 iocb->ki_user_data = res;
552 if (iocb->ki_users == 1) {
553 iocb->ki_users = 0;
554 return 1;
555 }
556 spin_lock_irq(&ctx->ctx_lock);
557 iocb->ki_users--;
558 ret = (0 == iocb->ki_users);
559 spin_unlock_irq(&ctx->ctx_lock);
560 return 0;
561 }
562
563 info = &ctx->ring_info;
564
565
566
567
568
569
570
571 spin_lock_irqsave(&ctx->ctx_lock, flags);
572
573 ring = kmap_atomic(info->ring_pages[0], KM_IRQ1);
574
575 tail = info->tail;
576 event = aio_ring_event(info, tail, KM_IRQ0);
577 tail = (tail + 1) % info->nr;
578
579 event->obj = (u64)(unsigned long)iocb->ki_user_obj;
580 event->data = iocb->ki_user_data;
581 event->res = res;
582 event->res2 = res2;
583
584 dprintk("aio_complete: %p[%lu]: %p: %p %Lx %lx %lx\n",
585 ctx, tail, iocb, iocb->ki_user_obj, iocb->ki_user_data,
586 res, res2);
587
588
589
590
591 barrier();
592
593 info->tail = tail;
594 ring->tail = tail;
595
596 wmb();
597 put_aio_ring_event(event, KM_IRQ0);
598 kunmap_atomic(ring, KM_IRQ1);
599
600 pr_debug("added to ring %p at [%lu]\n", iocb, tail);
601
602
603 ret = __aio_put_req(ctx, iocb);
604
605 spin_unlock_irqrestore(&ctx->ctx_lock, flags);
606
607 if (waitqueue_active(&ctx->wait))
608 wake_up(&ctx->wait);
609
610 if (ret)
611 put_ioctx(ctx);
612
613 return ret;
614}
615
616
617
618
619
620
621
622static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
623{
624 struct aio_ring_info *info = &ioctx->ring_info;
625 struct aio_ring *ring;
626 unsigned long head;
627 int ret = 0;
628
629 ring = kmap_atomic(info->ring_pages[0], KM_USER0);
630 dprintk("in aio_read_evt h%lu t%lu m%lu\n",
631 (unsigned long)ring->head, (unsigned long)ring->tail,
632 (unsigned long)ring->nr);
633 barrier();
634 if (ring->head == ring->tail)
635 goto out;
636
637 spin_lock(&info->ring_lock);
638
639 head = ring->head % info->nr;
640 if (head != ring->tail) {
641 struct io_event *evp = aio_ring_event(info, head, KM_USER1);
642 *ent = *evp;
643 head = (head + 1) % info->nr;
644 barrier();
645 ring->head = head;
646 ret = 1;
647 put_aio_ring_event(evp, KM_USER1);
648 }
649 spin_unlock(&info->ring_lock);
650
651out:
652 kunmap_atomic(ring, KM_USER0);
653 dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret,
654 (unsigned long)ring->head, (unsigned long)ring->tail);
655 return ret;
656}
657
658struct timeout {
659 struct timer_list timer;
660 int timed_out;
661 struct task_struct *p;
662};
663
664static void timeout_func(unsigned long data)
665{
666 struct timeout *to = (struct timeout *)data;
667
668 to->timed_out = 1;
669 wake_up_process(to->p);
670}
671
672static inline void init_timeout(struct timeout *to)
673{
674 init_timer(&to->timer);
675 to->timer.data = (unsigned long)to;
676 to->timer.function = timeout_func;
677 to->timed_out = 0;
678 to->p = current;
679}
680
681static inline void set_timeout(long start_jiffies, struct timeout *to,
682 const struct timespec *ts)
683{
684 unsigned long how_long;
685
686 if (ts->tv_sec < 0 || (!ts->tv_sec && !ts->tv_nsec)) {
687 to->timed_out = 1;
688 return;
689 }
690
691 how_long = ts->tv_sec * HZ;
692#define HZ_NS (1000000000 / HZ)
693 how_long += (ts->tv_nsec + HZ_NS - 1) / HZ_NS;
694
695 to->timer.expires = jiffies + how_long;
696 add_timer(&to->timer);
697}
698
699static inline void clear_timeout(struct timeout *to)
700{
701 del_timer_sync(&to->timer);
702}
703
704static int read_events(struct kioctx *ctx,
705 long min_nr, long nr,
706 struct io_event *event,
707 struct timespec *timeout)
708{
709 long start_jiffies = jiffies;
710 struct task_struct *tsk = current;
711 DECLARE_WAITQUEUE(wait, tsk);
712 int ret;
713 int i = 0;
714 struct io_event ent;
715 struct timeout to;
716
717
718
719
720 memset(&ent, 0, sizeof(ent));
721 ret = 0;
722
723 while (likely(i < nr)) {
724 ret = aio_read_evt(ctx, &ent);
725 if (unlikely(ret <= 0))
726 break;
727
728 dprintk("read event: %Lx %Lx %Lx %Lx\n",
729 ent.data, ent.obj, ent.res, ent.res2);
730
731
732 ret = -EFAULT;
733 if (unlikely(copy_to_user(event, &ent, sizeof(ent)))) {
734 dprintk("aio: lost an event due to EFAULT.\n");
735 break;
736 }
737 ret = 0;
738
739
740 event ++;
741 i ++;
742 }
743
744 if (min_nr <= i)
745 return i;
746 if (ret)
747 return ret;
748
749
750
751 if (timeout) {
752 struct timespec ts;
753 ret = -EFAULT;
754 if (unlikely(copy_from_user(&ts, timeout, sizeof(ts))))
755 goto out;
756
757 init_timeout(&to);
758 set_timeout(start_jiffies, &to, &ts);
759 }
760
761 while (likely(i < nr)) {
762 add_wait_queue_exclusive(&ctx->wait, &wait);
763 do {
764 set_task_state(tsk, TASK_INTERRUPTIBLE);
765
766 ret = aio_read_evt(ctx, &ent);
767 if (ret)
768 break;
769 if (min_nr <= i)
770 break;
771 ret = 0;
772 if (to.timed_out)
773 break;
774 schedule();
775 if (signal_pending(tsk)) {
776 ret = -EINTR;
777 break;
778 }
779
780 } while (1) ;
781
782 set_task_state(tsk, TASK_RUNNING);
783 remove_wait_queue(&ctx->wait, &wait);
784
785 if (unlikely(ret <= 0))
786 break;
787
788 ret = -EFAULT;
789 if (unlikely(copy_to_user(event, &ent, sizeof(ent)))) {
790 dprintk("aio: lost an event due to EFAULT.\n");
791 break;
792 }
793
794
795 event ++;
796 i ++;
797 }
798
799 if (timeout)
800 clear_timeout(&to);
801out:
802 return i ? i : ret;
803}
804
805
806
807
808static void io_destroy(struct kioctx *ioctx)
809{
810 struct mm_struct *mm = current->mm;
811 struct kioctx **tmp;
812 int was_dead;
813
814
815 write_lock(&mm->ioctx_list_lock);
816 was_dead = ioctx->dead;
817 ioctx->dead = 1;
818 for (tmp = &mm->ioctx_list; *tmp && *tmp != ioctx;
819 tmp = &(*tmp)->next)
820 ;
821 if (*tmp)
822 *tmp = ioctx->next;
823 write_unlock(&mm->ioctx_list_lock);
824
825 dprintk("aio_release(%p)\n", ioctx);
826 if (likely(!was_dead))
827 put_ioctx(ioctx);
828
829 aio_cancel_all(ioctx);
830 wait_for_all_aios(ioctx);
831 put_ioctx(ioctx);
832}
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t *ctxp)
848{
849 struct kioctx *ioctx = NULL;
850 unsigned long ctx;
851 long ret;
852
853 ret = get_user(ctx, ctxp);
854 if (unlikely(ret))
855 goto out;
856
857 ret = -EINVAL;
858 if (unlikely(ctx || !nr_events || (int)nr_events < 0)) {
859 pr_debug("EINVAL: io_setup: ctx or nr_events > max\n");
860 goto out;
861 }
862
863 ioctx = ioctx_alloc(nr_events);
864 ret = PTR_ERR(ioctx);
865 if (!IS_ERR(ioctx)) {
866 ret = put_user(ioctx->user_id, ctxp);
867 if (!ret)
868 return 0;
869 io_destroy(ioctx);
870 }
871
872out:
873 return ret;
874}
875
876
877
878
879
880
881
882asmlinkage long sys_io_destroy(aio_context_t ctx)
883{
884 struct kioctx *ioctx = lookup_ioctx(ctx);
885 if (likely(NULL != ioctx)) {
886 io_destroy(ioctx);
887 return 0;
888 }
889 pr_debug("EINVAL: io_destroy: invalid context id\n");
890 return -EINVAL;
891}
892
893static int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb *user_iocb,
894 struct iocb *iocb));
895static int io_submit_one(struct kioctx *ctx, struct iocb *user_iocb,
896 struct iocb *iocb)
897{
898 struct kiocb *req;
899 struct file *file;
900 ssize_t ret;
901 char *buf;
902
903
904 if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2 ||
905 iocb->aio_reserved3)) {
906 pr_debug("EINVAL: io_submit: reserve field set\n");
907 return -EINVAL;
908 }
909
910
911 if (unlikely(
912 (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
913 (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
914 ((ssize_t)iocb->aio_nbytes < 0)
915 )) {
916 pr_debug("EINVAL: io_submit: overflow check\n");
917 return -EINVAL;
918 }
919
920 file = fget(iocb->aio_fildes);
921 if (unlikely(!file))
922 return -EBADF;
923
924 req = aio_get_req(ctx);
925 if (unlikely(!req)) {
926 fput(file);
927 return -EAGAIN;
928 }
929
930 req->ki_filp = file;
931 iocb->aio_key = req->ki_key;
932 ret = put_user(iocb->aio_key, &user_iocb->aio_key);
933 if (unlikely(ret)) {
934 dprintk("EFAULT: aio_key\n");
935 goto out_put_req;
936 }
937
938 req->ki_user_obj = user_iocb;
939 req->ki_user_data = iocb->aio_data;
940
941 buf = (char *)(unsigned long)iocb->aio_buf;
942
943 switch (iocb->aio_lio_opcode) {
944 case IOCB_CMD_PREAD:
945 ret = -EBADF;
946 if (unlikely(!(file->f_mode & FMODE_READ)))
947 goto out_put_req;
948 ret = -EFAULT;
949 if (unlikely(!access_ok(VERIFY_WRITE, buf, iocb->aio_nbytes)))
950 goto out_put_req;
951 ret = -EINVAL;
952 if (file->f_op->aio_read)
953 ret = file->f_op->aio_read(req, buf,
954 iocb->aio_nbytes, iocb->aio_offset);
955 break;
956 case IOCB_CMD_PWRITE:
957 ret = -EBADF;
958 if (unlikely(!(file->f_mode & FMODE_WRITE)))
959 goto out_put_req;
960 ret = -EFAULT;
961 if (unlikely(!access_ok(VERIFY_READ, buf, iocb->aio_nbytes)))
962 goto out_put_req;
963 ret = -EINVAL;
964 if (file->f_op->aio_write)
965 ret = file->f_op->aio_write(req, buf,
966 iocb->aio_nbytes, iocb->aio_offset);
967 break;
968 case IOCB_CMD_FDSYNC:
969 ret = -EINVAL;
970 if (file->f_op->aio_fsync)
971 ret = file->f_op->aio_fsync(req, 1);
972 break;
973 case IOCB_CMD_FSYNC:
974 ret = -EINVAL;
975 if (file->f_op->aio_fsync)
976 ret = file->f_op->aio_fsync(req, 0);
977 break;
978 default:
979 dprintk("EINVAL: io_submit: no operation provided\n");
980 ret = -EINVAL;
981 }
982
983 if (likely(EIOCBQUEUED == ret))
984 return 0;
985 if (ret >= 0) {
986 aio_complete(req, ret, 0);
987 return 0;
988 }
989
990out_put_req:
991 aio_put_req(req);
992 return ret;
993}
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007asmlinkage long sys_io_submit(aio_context_t ctx_id, long nr,
1008 struct iocb **iocbpp)
1009{
1010 struct kioctx *ctx;
1011 long ret = 0;
1012 int i;
1013
1014 if (unlikely(nr < 0))
1015 return -EINVAL;
1016
1017 if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
1018 return -EFAULT;
1019
1020 ctx = lookup_ioctx(ctx_id);
1021 if (unlikely(!ctx)) {
1022 pr_debug("EINVAL: io_submit: invalid context id\n");
1023 return -EINVAL;
1024 }
1025
1026 for (i=0; i<nr; i++) {
1027 struct iocb *user_iocb, tmp;
1028
1029 if (unlikely(__get_user(user_iocb, iocbpp + i))) {
1030 ret = -EFAULT;
1031 break;
1032 }
1033
1034 if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) {
1035 ret = -EFAULT;
1036 break;
1037 }
1038
1039 ret = io_submit_one(ctx, user_iocb, &tmp);
1040 if (ret)
1041 break;
1042 }
1043
1044 put_ioctx(ctx);
1045 return i ? i : ret;
1046}
1047
1048
1049
1050
1051
1052struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb *iocb, u32 key)
1053{
1054 struct list_head *pos;
1055
1056 list_for_each(pos, &ctx->active_reqs) {
1057 struct kiocb *kiocb = list_kiocb(pos);
1058 if (kiocb->ki_user_obj == iocb && kiocb->ki_key == key)
1059 return kiocb;
1060 }
1061 return NULL;
1062}
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb *iocb,
1075 struct io_event *result)
1076{
1077 int (*cancel)(struct kiocb *iocb, struct io_event *res);
1078 struct kioctx *ctx;
1079 struct kiocb *kiocb;
1080 u32 key;
1081 int ret;
1082
1083 ret = get_user(key, &iocb->aio_key);
1084 if (unlikely(ret))
1085 return -EFAULT;
1086
1087 ctx = lookup_ioctx(ctx_id);
1088 if (unlikely(!ctx))
1089 return -EINVAL;
1090
1091 spin_lock_irq(&ctx->ctx_lock);
1092 ret = -EAGAIN;
1093 kiocb = lookup_kiocb(ctx, iocb, key);
1094 if (kiocb && kiocb->ki_cancel) {
1095 cancel = kiocb->ki_cancel;
1096 kiocb->ki_users ++;
1097 } else
1098 cancel = NULL;
1099 spin_unlock_irq(&ctx->ctx_lock);
1100
1101 if (NULL != cancel) {
1102 struct io_event tmp;
1103 printk("calling cancel\n");
1104 ret = cancel(kiocb, &tmp);
1105 if (!ret) {
1106
1107
1108
1109 if (copy_to_user(result, &tmp, sizeof(tmp)))
1110 ret = -EFAULT;
1111 }
1112 } else
1113 printk(KERN_DEBUG "iocb has no cancel operation\n");
1114
1115 put_ioctx(ctx);
1116
1117 return ret;
1118}
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132asmlinkage long sys_io_getevents(aio_context_t ctx_id,
1133 long min_nr,
1134 long nr,
1135 struct io_event *events,
1136 struct timespec *timeout)
1137{
1138 struct kioctx *ioctx = lookup_ioctx(ctx_id);
1139 long ret = -EINVAL;
1140
1141 if (unlikely(min_nr > nr || min_nr < 0 || nr < 0))
1142 return ret;
1143
1144 if (likely(NULL != ioctx)) {
1145 ret = read_events(ioctx, min_nr, nr, events, timeout);
1146 put_ioctx(ioctx);
1147 }
1148
1149 return ret;
1150}
1151
1152__initcall(aio_setup);
1153
1154EXPORT_SYMBOL(aio_complete);
1155EXPORT_SYMBOL(aio_put_req);
1156