linux/arch/um/os-Linux/aio.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2004 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
   3 * Licensed under the GPL
   4 */
   5
   6#include <unistd.h>
   7#include <sched.h>
   8#include <signal.h>
   9#include <errno.h>
  10#include <sys/time.h>
  11#include <asm/unistd.h>
  12#include "aio.h"
  13#include "init.h"
  14#include "kern_util.h"
  15#include "os.h"
  16
  17struct aio_thread_req {
  18        enum aio_type type;
  19        int io_fd;
  20        unsigned long long offset;
  21        char *buf;
  22        int len;
  23        struct aio_context *aio;
  24};
  25
  26#if defined(HAVE_AIO_ABI)
  27#include <linux/aio_abi.h>
  28
  29/*
  30 * If we have the headers, we are going to build with AIO enabled.
  31 * If we don't have aio in libc, we define the necessary stubs here.
  32 */
  33
  34#if !defined(HAVE_AIO_LIBC)
  35
  36static long io_setup(int n, aio_context_t *ctxp)
  37{
  38        return syscall(__NR_io_setup, n, ctxp);
  39}
  40
  41static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
  42{
  43        return syscall(__NR_io_submit, ctx, nr, iocbpp);
  44}
  45
  46static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
  47                         struct io_event *events, struct timespec *timeout)
  48{
  49        return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
  50}
  51
  52#endif
  53
  54/*
  55 * The AIO_MMAP cases force the mmapped page into memory here
  56 * rather than in whatever place first touches the data.  I used
  57 * to do this by touching the page, but that's delicate because
  58 * gcc is prone to optimizing that away.  So, what's done here
  59 * is we read from the descriptor from which the page was
  60 * mapped.  The caller is required to pass an offset which is
  61 * inside the page that was mapped.  Thus, when the read
  62 * returns, we know that the page is in the page cache, and
  63 * that it now backs the mmapped area.
  64 */
  65
  66static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
  67                  int len, unsigned long long offset, struct aio_context *aio)
  68{
  69        struct iocb *iocbp = & ((struct iocb) {
  70                                    .aio_data       = (unsigned long) aio,
  71                                    .aio_fildes     = fd,
  72                                    .aio_buf        = (unsigned long) buf,
  73                                    .aio_nbytes     = len,
  74                                    .aio_offset     = offset
  75                             });
  76        char c;
  77
  78        switch (type) {
  79        case AIO_READ:
  80                iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
  81                break;
  82        case AIO_WRITE:
  83                iocbp->aio_lio_opcode = IOCB_CMD_PWRITE;
  84                break;
  85        case AIO_MMAP:
  86                iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
  87                iocbp->aio_buf = (unsigned long) &c;
  88                iocbp->aio_nbytes = sizeof(c);
  89                break;
  90        default:
  91                printk(UM_KERN_ERR "Bogus op in do_aio - %d\n", type);
  92                return -EINVAL;
  93        }
  94
  95        return (io_submit(ctx, 1, &iocbp) > 0) ? 0 : -errno;
  96}
  97
  98/* Initialized in an initcall and unchanged thereafter */
  99static aio_context_t ctx = 0;
 100
 101static int aio_thread(void *arg)
 102{
 103        struct aio_thread_reply reply;
 104        struct io_event event;
 105        int err, n, reply_fd;
 106
 107        signal(SIGWINCH, SIG_IGN);
 108
 109        while (1) {
 110                n = io_getevents(ctx, 1, 1, &event, NULL);
 111                if (n < 0) {
 112                        if (errno == EINTR)
 113                                continue;
 114                        printk(UM_KERN_ERR "aio_thread - io_getevents failed, "
 115                               "errno = %d\n", errno);
 116                }
 117                else {
 118                        reply = ((struct aio_thread_reply)
 119                                { .data = (void *) (long) event.data,
 120                                                .err    = event.res });
 121                        reply_fd = ((struct aio_context *) reply.data)->reply_fd;
 122                        err = write(reply_fd, &reply, sizeof(reply));
 123                        if (err != sizeof(reply))
 124                                printk(UM_KERN_ERR "aio_thread - write failed, "
 125                                       "fd = %d, err = %d\n", reply_fd, errno);
 126                }
 127        }
 128        return 0;
 129}
 130
 131#endif
 132
 133static int do_not_aio(struct aio_thread_req *req)
 134{
 135        char c;
 136        unsigned long long actual;
 137        int n;
 138
 139        actual = lseek64(req->io_fd, req->offset, SEEK_SET);
 140        if (actual != req->offset)
 141                return -errno;
 142
 143        switch (req->type) {
 144        case AIO_READ:
 145                n = read(req->io_fd, req->buf, req->len);
 146                break;
 147        case AIO_WRITE:
 148                n = write(req->io_fd, req->buf, req->len);
 149                break;
 150        case AIO_MMAP:
 151                n = read(req->io_fd, &c, sizeof(c));
 152                break;
 153        default:
 154                printk(UM_KERN_ERR "do_not_aio - bad request type : %d\n",
 155                       req->type);
 156                return -EINVAL;
 157        }
 158
 159        if (n < 0)
 160                return -errno;
 161        return 0;
 162}
 163
 164/* These are initialized in initcalls and not changed */
 165static int aio_req_fd_r = -1;
 166static int aio_req_fd_w = -1;
 167static int aio_pid = -1;
 168static unsigned long aio_stack;
 169
 170static int not_aio_thread(void *arg)
 171{
 172        struct aio_thread_req req;
 173        struct aio_thread_reply reply;
 174        int err;
 175
 176        signal(SIGWINCH, SIG_IGN);
 177        while (1) {
 178                err = read(aio_req_fd_r, &req, sizeof(req));
 179                if (err != sizeof(req)) {
 180                        if (err < 0)
 181                                printk(UM_KERN_ERR "not_aio_thread - "
 182                                       "read failed, fd = %d, err = %d\n",
 183                                       aio_req_fd_r,
 184                                       errno);
 185                        else {
 186                                printk(UM_KERN_ERR "not_aio_thread - short "
 187                                       "read, fd = %d, length = %d\n",
 188                                       aio_req_fd_r, err);
 189                        }
 190                        continue;
 191                }
 192                err = do_not_aio(&req);
 193                reply = ((struct aio_thread_reply) { .data      = req.aio,
 194                                                     .err       = err });
 195                err = write(req.aio->reply_fd, &reply, sizeof(reply));
 196                if (err != sizeof(reply))
 197                        printk(UM_KERN_ERR "not_aio_thread - write failed, "
 198                               "fd = %d, err = %d\n", req.aio->reply_fd, errno);
 199        }
 200
 201        return 0;
 202}
 203
 204static int init_aio_24(void)
 205{
 206        int fds[2], err;
 207
 208        err = os_pipe(fds, 1, 1);
 209        if (err)
 210                goto out;
 211
 212        aio_req_fd_w = fds[0];
 213        aio_req_fd_r = fds[1];
 214
 215        err = os_set_fd_block(aio_req_fd_w, 0);
 216        if (err)
 217                goto out_close_pipe;
 218
 219        err = run_helper_thread(not_aio_thread, NULL,
 220                                CLONE_FILES | CLONE_VM, &aio_stack);
 221        if (err < 0)
 222                goto out_close_pipe;
 223
 224        aio_pid = err;
 225        goto out;
 226
 227out_close_pipe:
 228        close(fds[0]);
 229        close(fds[1]);
 230        aio_req_fd_w = -1;
 231        aio_req_fd_r = -1;
 232out:
 233#ifndef HAVE_AIO_ABI
 234        printk(UM_KERN_INFO "/usr/include/linux/aio_abi.h not present during "
 235               "build\n");
 236#endif
 237        printk(UM_KERN_INFO "2.6 host AIO support not used - falling back to "
 238               "I/O thread\n");
 239        return 0;
 240}
 241
 242#ifdef HAVE_AIO_ABI
 243#define DEFAULT_24_AIO 0
 244static int init_aio_26(void)
 245{
 246        int err;
 247
 248        if (io_setup(256, &ctx)) {
 249                err = -errno;
 250                printk(UM_KERN_ERR "aio_thread failed to initialize context, "
 251                       "err = %d\n", errno);
 252                return err;
 253        }
 254
 255        err = run_helper_thread(aio_thread, NULL,
 256                                CLONE_FILES | CLONE_VM, &aio_stack);
 257        if (err < 0)
 258                return err;
 259
 260        aio_pid = err;
 261
 262        printk(UM_KERN_INFO "Using 2.6 host AIO\n");
 263        return 0;
 264}
 265
 266static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
 267                         unsigned long long offset, struct aio_context *aio)
 268{
 269        struct aio_thread_reply reply;
 270        int err;
 271
 272        err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
 273        if (err) {
 274                reply = ((struct aio_thread_reply) { .data = aio,
 275                                         .err  = err });
 276                err = write(aio->reply_fd, &reply, sizeof(reply));
 277                if (err != sizeof(reply)) {
 278                        err = -errno;
 279                        printk(UM_KERN_ERR "submit_aio_26 - write failed, "
 280                               "fd = %d, err = %d\n", aio->reply_fd, -err);
 281                }
 282                else err = 0;
 283        }
 284
 285        return err;
 286}
 287
 288#else
 289#define DEFAULT_24_AIO 1
 290static int init_aio_26(void)
 291{
 292        return -ENOSYS;
 293}
 294
 295static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
 296                         unsigned long long offset, struct aio_context *aio)
 297{
 298        return -ENOSYS;
 299}
 300#endif
 301
 302/* Initialized in an initcall and unchanged thereafter */
 303static int aio_24 = DEFAULT_24_AIO;
 304
 305static int __init set_aio_24(char *name, int *add)
 306{
 307        aio_24 = 1;
 308        return 0;
 309}
 310
 311__uml_setup("aio=2.4", set_aio_24,
 312"aio=2.4\n"
 313"    This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
 314"    available.  2.4 AIO is a single thread that handles one request at a\n"
 315"    time, synchronously.  2.6 AIO is a thread which uses the 2.6 AIO \n"
 316"    interface to handle an arbitrary number of pending requests.  2.6 AIO \n"
 317"    is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
 318"    /usr/include/linux/aio_abi.h not available.  Many distributions don't\n"
 319"    include aio_abi.h, so you will need to copy it from a kernel tree to\n"
 320"    your /usr/include/linux in order to build an AIO-capable UML\n\n"
 321);
 322
 323static int init_aio(void)
 324{
 325        int err;
 326
 327        if (!aio_24) {
 328                err = init_aio_26();
 329                if (err && (errno == ENOSYS)) {
 330                        printk(UM_KERN_INFO "2.6 AIO not supported on the "
 331                               "host - reverting to 2.4 AIO\n");
 332                        aio_24 = 1;
 333                }
 334                else return err;
 335        }
 336
 337        if (aio_24)
 338                return init_aio_24();
 339
 340        return 0;
 341}
 342
 343/*
 344 * The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
 345 * needs to be called when the kernel is running because it calls run_helper,
 346 * which needs get_free_page.  exit_aio is a __uml_exitcall because the generic
 347 * kernel does not run __exitcalls on shutdown, and can't because many of them
 348 * break when called outside of module unloading.
 349 */
 350__initcall(init_aio);
 351
 352static void exit_aio(void)
 353{
 354        if (aio_pid != -1) {
 355                os_kill_process(aio_pid, 1);
 356                free_stack(aio_stack, 0);
 357        }
 358}
 359
 360__uml_exitcall(exit_aio);
 361
 362static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
 363                         unsigned long long offset, struct aio_context *aio)
 364{
 365        struct aio_thread_req req = { .type             = type,
 366                                      .io_fd            = io_fd,
 367                                      .offset           = offset,
 368                                      .buf              = buf,
 369                                      .len              = len,
 370                                      .aio              = aio,
 371        };
 372        int err;
 373
 374        err = write(aio_req_fd_w, &req, sizeof(req));
 375        if (err == sizeof(req))
 376                err = 0;
 377        else err = -errno;
 378
 379        return err;
 380}
 381
 382int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
 383               unsigned long long offset, int reply_fd,
 384               struct aio_context *aio)
 385{
 386        aio->reply_fd = reply_fd;
 387        if (aio_24)
 388                return submit_aio_24(type, io_fd, buf, len, offset, aio);
 389        else
 390                return submit_aio_26(type, io_fd, buf, len, offset, aio);
 391}
 392
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.