linux/drivers/block/aoe/aoedev.c
<<
>>
Prefs
   1/* Copyright (c) 2013 Coraid, Inc.  See COPYING for GPL terms. */
   2/*
   3 * aoedev.c
   4 * AoE device utility functions; maintains device list.
   5 */
   6
   7#include <linux/hdreg.h>
   8#include <linux/blk-mq.h>
   9#include <linux/netdevice.h>
  10#include <linux/delay.h>
  11#include <linux/slab.h>
  12#include <linux/bitmap.h>
  13#include <linux/kdev_t.h>
  14#include <linux/moduleparam.h>
  15#include <linux/string.h>
  16#include "aoe.h"
  17
  18static void freetgt(struct aoedev *d, struct aoetgt *t);
  19static void skbpoolfree(struct aoedev *d);
  20
  21static int aoe_dyndevs = 1;
  22module_param(aoe_dyndevs, int, 0644);
  23MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices.");
  24
  25static struct aoedev *devlist;
  26static DEFINE_SPINLOCK(devlist_lock);
  27
  28/* Because some systems will have one, many, or no
  29 *   - partitions,
  30 *   - slots per shelf,
  31 *   - or shelves,
  32 * we need some flexibility in the way the minor numbers
  33 * are allocated.  So they are dynamic.
  34 */
  35#define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
  36
  37static DEFINE_SPINLOCK(used_minors_lock);
  38static DECLARE_BITMAP(used_minors, N_DEVS);
  39
  40static int
  41minor_get_dyn(ulong *sysminor)
  42{
  43        ulong flags;
  44        ulong n;
  45        int error = 0;
  46
  47        spin_lock_irqsave(&used_minors_lock, flags);
  48        n = find_first_zero_bit(used_minors, N_DEVS);
  49        if (n < N_DEVS)
  50                set_bit(n, used_minors);
  51        else
  52                error = -1;
  53        spin_unlock_irqrestore(&used_minors_lock, flags);
  54
  55        *sysminor = n * AOE_PARTITIONS;
  56        return error;
  57}
  58
  59static int
  60minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
  61{
  62        ulong flags;
  63        ulong n;
  64        int error = 0;
  65        enum {
  66                /* for backwards compatibility when !aoe_dyndevs,
  67                 * a static number of supported slots per shelf */
  68                NPERSHELF = 16,
  69        };
  70
  71        if (aoemin >= NPERSHELF) {
  72                pr_err("aoe: %s %d slots per shelf\n",
  73                        "static minor device numbers support only",
  74                        NPERSHELF);
  75                error = -1;
  76                goto out;
  77        }
  78
  79        n = aoemaj * NPERSHELF + aoemin;
  80        if (n >= N_DEVS) {
  81                pr_err("aoe: %s with e%ld.%d\n",
  82                        "cannot use static minor device numbers",
  83                        aoemaj, aoemin);
  84                error = -1;
  85                goto out;
  86        }
  87
  88        spin_lock_irqsave(&used_minors_lock, flags);
  89        if (test_bit(n, used_minors)) {
  90                pr_err("aoe: %s %lu\n",
  91                        "existing device already has static minor number",
  92                        n);
  93                error = -1;
  94        } else
  95                set_bit(n, used_minors);
  96        spin_unlock_irqrestore(&used_minors_lock, flags);
  97        *sysminor = n * AOE_PARTITIONS;
  98out:
  99        return error;
 100}
 101
 102static int
 103minor_get(ulong *sysminor, ulong aoemaj, int aoemin)
 104{
 105        if (aoe_dyndevs)
 106                return minor_get_dyn(sysminor);
 107        else
 108                return minor_get_static(sysminor, aoemaj, aoemin);
 109}
 110
 111static void
 112minor_free(ulong minor)
 113{
 114        ulong flags;
 115
 116        minor /= AOE_PARTITIONS;
 117        BUG_ON(minor >= N_DEVS);
 118
 119        spin_lock_irqsave(&used_minors_lock, flags);
 120        BUG_ON(!test_bit(minor, used_minors));
 121        clear_bit(minor, used_minors);
 122        spin_unlock_irqrestore(&used_minors_lock, flags);
 123}
 124
 125/*
 126 * Users who grab a pointer to the device with aoedev_by_aoeaddr
 127 * automatically get a reference count and must be responsible
 128 * for performing a aoedev_put.  With the addition of async
 129 * kthread processing I'm no longer confident that we can
 130 * guarantee consistency in the face of device flushes.
 131 *
 132 * For the time being, we only bother to add extra references for
 133 * frames sitting on the iocq.  When the kthreads finish processing
 134 * these frames, they will aoedev_put the device.
 135 */
 136
 137void
 138aoedev_put(struct aoedev *d)
 139{
 140        ulong flags;
 141
 142        spin_lock_irqsave(&devlist_lock, flags);
 143        d->ref--;
 144        spin_unlock_irqrestore(&devlist_lock, flags);
 145}
 146
 147static void
 148dummy_timer(struct timer_list *t)
 149{
 150        struct aoedev *d;
 151
 152        d = from_timer(d, t, timer);
 153        if (d->flags & DEVFL_TKILL)
 154                return;
 155        d->timer.expires = jiffies + HZ;
 156        add_timer(&d->timer);
 157}
 158
 159static void
 160aoe_failip(struct aoedev *d)
 161{
 162        struct request *rq;
 163        struct aoe_req *req;
 164        struct bio *bio;
 165
 166        aoe_failbuf(d, d->ip.buf);
 167        rq = d->ip.rq;
 168        if (rq == NULL)
 169                return;
 170
 171        req = blk_mq_rq_to_pdu(rq);
 172        while ((bio = d->ip.nxbio)) {
 173                bio->bi_status = BLK_STS_IOERR;
 174                d->ip.nxbio = bio->bi_next;
 175                req->nr_bios--;
 176        }
 177
 178        if (!req->nr_bios)
 179                aoe_end_request(d, rq, 0);
 180}
 181
 182static void
 183downdev_frame(struct list_head *pos)
 184{
 185        struct frame *f;
 186
 187        f = list_entry(pos, struct frame, head);
 188        list_del(pos);
 189        if (f->buf) {
 190                f->buf->nframesout--;
 191                aoe_failbuf(f->t->d, f->buf);
 192        }
 193        aoe_freetframe(f);
 194}
 195
 196void
 197aoedev_downdev(struct aoedev *d)
 198{
 199        struct aoetgt *t, **tt, **te;
 200        struct list_head *head, *pos, *nx;
 201        int i;
 202
 203        d->flags &= ~DEVFL_UP;
 204
 205        /* clean out active and to-be-retransmitted buffers */
 206        for (i = 0; i < NFACTIVE; i++) {
 207                head = &d->factive[i];
 208                list_for_each_safe(pos, nx, head)
 209                        downdev_frame(pos);
 210        }
 211        head = &d->rexmitq;
 212        list_for_each_safe(pos, nx, head)
 213                downdev_frame(pos);
 214
 215        /* reset window dressings */
 216        tt = d->targets;
 217        te = tt + d->ntargets;
 218        for (; tt < te && (t = *tt); tt++) {
 219                aoecmd_wreset(t);
 220                t->nout = 0;
 221        }
 222
 223        /* clean out the in-process request (if any) */
 224        aoe_failip(d);
 225
 226        /* fast fail all pending I/O */
 227        if (d->blkq) {
 228                /* UP is cleared, freeze+quiesce to insure all are errored */
 229                blk_mq_freeze_queue(d->blkq);
 230                blk_mq_quiesce_queue(d->blkq);
 231                blk_mq_unquiesce_queue(d->blkq);
 232                blk_mq_unfreeze_queue(d->blkq);
 233        }
 234
 235        if (d->gd)
 236                set_capacity(d->gd, 0);
 237}
 238
 239/* return whether the user asked for this particular
 240 * device to be flushed
 241 */
 242static int
 243user_req(char *s, size_t slen, struct aoedev *d)
 244{
 245        const char *p;
 246        size_t lim;
 247
 248        if (!d->gd)
 249                return 0;
 250        p = kbasename(d->gd->disk_name);
 251        lim = sizeof(d->gd->disk_name);
 252        lim -= p - d->gd->disk_name;
 253        if (slen < lim)
 254                lim = slen;
 255
 256        return !strncmp(s, p, lim);
 257}
 258
 259static void
 260freedev(struct aoedev *d)
 261{
 262        struct aoetgt **t, **e;
 263        int freeing = 0;
 264        unsigned long flags;
 265
 266        spin_lock_irqsave(&d->lock, flags);
 267        if (d->flags & DEVFL_TKILL
 268        && !(d->flags & DEVFL_FREEING)) {
 269                d->flags |= DEVFL_FREEING;
 270                freeing = 1;
 271        }
 272        spin_unlock_irqrestore(&d->lock, flags);
 273        if (!freeing)
 274                return;
 275
 276        del_timer_sync(&d->timer);
 277        if (d->gd) {
 278                aoedisk_rm_debugfs(d);
 279                del_gendisk(d->gd);
 280                blk_cleanup_disk(d->gd);
 281                blk_mq_free_tag_set(&d->tag_set);
 282        }
 283        t = d->targets;
 284        e = t + d->ntargets;
 285        for (; t < e && *t; t++)
 286                freetgt(d, *t);
 287
 288        mempool_destroy(d->bufpool);
 289        skbpoolfree(d);
 290        minor_free(d->sysminor);
 291
 292        spin_lock_irqsave(&d->lock, flags);
 293        d->flags |= DEVFL_FREED;
 294        spin_unlock_irqrestore(&d->lock, flags);
 295}
 296
 297enum flush_parms {
 298        NOT_EXITING = 0,
 299        EXITING = 1,
 300};
 301
 302static int
 303flush(const char __user *str, size_t cnt, int exiting)
 304{
 305        ulong flags;
 306        struct aoedev *d, **dd;
 307        char buf[16];
 308        int all = 0;
 309        int specified = 0;      /* flush a specific device */
 310        unsigned int skipflags;
 311
 312        skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL;
 313
 314        if (!exiting && cnt >= 3) {
 315                if (cnt > sizeof buf)
 316                        cnt = sizeof buf;
 317                if (copy_from_user(buf, str, cnt))
 318                        return -EFAULT;
 319                all = !strncmp(buf, "all", 3);
 320                if (!all)
 321                        specified = 1;
 322        }
 323
 324        flush_scheduled_work();
 325        /* pass one: do aoedev_downdev, which might sleep */
 326restart1:
 327        spin_lock_irqsave(&devlist_lock, flags);
 328        for (d = devlist; d; d = d->next) {
 329                spin_lock(&d->lock);
 330                if (d->flags & DEVFL_TKILL)
 331                        goto cont;
 332
 333                if (exiting) {
 334                        /* unconditionally take each device down */
 335                } else if (specified) {
 336                        if (!user_req(buf, cnt, d))
 337                                goto cont;
 338                } else if ((!all && (d->flags & DEVFL_UP))
 339                || d->flags & skipflags
 340                || d->nopen
 341                || d->ref)
 342                        goto cont;
 343
 344                spin_unlock(&d->lock);
 345                spin_unlock_irqrestore(&devlist_lock, flags);
 346                aoedev_downdev(d);
 347                d->flags |= DEVFL_TKILL;
 348                goto restart1;
 349cont:
 350                spin_unlock(&d->lock);
 351        }
 352        spin_unlock_irqrestore(&devlist_lock, flags);
 353
 354        /* pass two: call freedev, which might sleep,
 355         * for aoedevs marked with DEVFL_TKILL
 356         */
 357restart2:
 358        spin_lock_irqsave(&devlist_lock, flags);
 359        for (d = devlist; d; d = d->next) {
 360                spin_lock(&d->lock);
 361                if (d->flags & DEVFL_TKILL
 362                && !(d->flags & DEVFL_FREEING)) {
 363                        spin_unlock(&d->lock);
 364                        spin_unlock_irqrestore(&devlist_lock, flags);
 365                        freedev(d);
 366                        goto restart2;
 367                }
 368                spin_unlock(&d->lock);
 369        }
 370
 371        /* pass three: remove aoedevs marked with DEVFL_FREED */
 372        for (dd = &devlist, d = *dd; d; d = *dd) {
 373                struct aoedev *doomed = NULL;
 374
 375                spin_lock(&d->lock);
 376                if (d->flags & DEVFL_FREED) {
 377                        *dd = d->next;
 378                        doomed = d;
 379                } else {
 380                        dd = &d->next;
 381                }
 382                spin_unlock(&d->lock);
 383                if (doomed)
 384                        kfree(doomed->targets);
 385                kfree(doomed);
 386        }
 387        spin_unlock_irqrestore(&devlist_lock, flags);
 388
 389        return 0;
 390}
 391
 392int
 393aoedev_flush(const char __user *str, size_t cnt)
 394{
 395        return flush(str, cnt, NOT_EXITING);
 396}
 397
 398/* This has been confirmed to occur once with Tms=3*1000 due to the
 399 * driver changing link and not processing its transmit ring.  The
 400 * problem is hard enough to solve by returning an error that I'm
 401 * still punting on "solving" this.
 402 */
 403static void
 404skbfree(struct sk_buff *skb)
 405{
 406        enum { Sms = 250, Tms = 30 * 1000};
 407        int i = Tms / Sms;
 408
 409        if (skb == NULL)
 410                return;
 411        while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
 412                msleep(Sms);
 413        if (i < 0) {
 414                printk(KERN_ERR
 415                        "aoe: %s holds ref: %s\n",
 416                        skb->dev ? skb->dev->name : "netif",
 417                        "cannot free skb -- memory leaked.");
 418                return;
 419        }
 420        skb->truesize -= skb->data_len;
 421        skb_shinfo(skb)->nr_frags = skb->data_len = 0;
 422        skb_trim(skb, 0);
 423        dev_kfree_skb(skb);
 424}
 425
 426static void
 427skbpoolfree(struct aoedev *d)
 428{
 429        struct sk_buff *skb, *tmp;
 430
 431        skb_queue_walk_safe(&d->skbpool, skb, tmp)
 432                skbfree(skb);
 433
 434        __skb_queue_head_init(&d->skbpool);
 435}
 436
 437/* find it or allocate it */
 438struct aoedev *
 439aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
 440{
 441        struct aoedev *d;
 442        int i;
 443        ulong flags;
 444        ulong sysminor = 0;
 445
 446        spin_lock_irqsave(&devlist_lock, flags);
 447
 448        for (d=devlist; d; d=d->next)
 449                if (d->aoemajor == maj && d->aoeminor == min) {
 450                        spin_lock(&d->lock);
 451                        if (d->flags & DEVFL_TKILL) {
 452                                spin_unlock(&d->lock);
 453                                d = NULL;
 454                                goto out;
 455                        }
 456                        d->ref++;
 457                        spin_unlock(&d->lock);
 458                        break;
 459                }
 460        if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
 461                goto out;
 462        d = kcalloc(1, sizeof *d, GFP_ATOMIC);
 463        if (!d)
 464                goto out;
 465        d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC);
 466        if (!d->targets) {
 467                kfree(d);
 468                d = NULL;
 469                goto out;
 470        }
 471        d->ntargets = NTARGETS;
 472        INIT_WORK(&d->work, aoecmd_sleepwork);
 473        spin_lock_init(&d->lock);
 474        INIT_LIST_HEAD(&d->rq_list);
 475        skb_queue_head_init(&d->skbpool);
 476        timer_setup(&d->timer, dummy_timer, 0);
 477        d->timer.expires = jiffies + HZ;
 478        add_timer(&d->timer);
 479        d->bufpool = NULL;      /* defer to aoeblk_gdalloc */
 480        d->tgt = d->targets;
 481        d->ref = 1;
 482        for (i = 0; i < NFACTIVE; i++)
 483                INIT_LIST_HEAD(&d->factive[i]);
 484        INIT_LIST_HEAD(&d->rexmitq);
 485        d->sysminor = sysminor;
 486        d->aoemajor = maj;
 487        d->aoeminor = min;
 488        d->rttavg = RTTAVG_INIT;
 489        d->rttdev = RTTDEV_INIT;
 490        d->next = devlist;
 491        devlist = d;
 492 out:
 493        spin_unlock_irqrestore(&devlist_lock, flags);
 494        return d;
 495}
 496
 497static void
 498freetgt(struct aoedev *d, struct aoetgt *t)
 499{
 500        struct frame *f;
 501        struct list_head *pos, *nx, *head;
 502        struct aoeif *ifp;
 503
 504        for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
 505                if (!ifp->nd)
 506                        break;
 507                dev_put(ifp->nd);
 508        }
 509
 510        head = &t->ffree;
 511        list_for_each_safe(pos, nx, head) {
 512                list_del(pos);
 513                f = list_entry(pos, struct frame, head);
 514                skbfree(f->skb);
 515                kfree(f);
 516        }
 517        kfree(t);
 518}
 519
 520void
 521aoedev_exit(void)
 522{
 523        flush_scheduled_work();
 524        flush(NULL, 0, EXITING);
 525}
 526
 527int __init
 528aoedev_init(void)
 529{
 530        return 0;
 531}
 532