linux/drivers/block/aoe/aoecmd.c
<<
>>
Prefs
   1/* Copyright (c) 2004 Coraid, Inc.  See COPYING for GPL terms. */
   2/*
   3 * aoecmd.c
   4 * Filesystem request handling methods
   5 */
   6
   7#include <linux/hdreg.h>
   8#include <linux/blkdev.h>
   9#include <linux/skbuff.h>
  10#include <linux/netdevice.h>
  11#include "aoe.h"
  12
  13#define TIMERTICK (HZ / 10)
  14#define MINTIMER (2 * TIMERTICK)
  15#define MAXTIMER (HZ << 1)
  16#define MAXWAIT (60 * 3)        /* After MAXWAIT seconds, give up and fail dev */
  17
  18static struct sk_buff *
  19new_skb(struct net_device *if_dev, ulong len)
  20{
  21        struct sk_buff *skb;
  22
  23        skb = alloc_skb(len, GFP_ATOMIC);
  24        if (skb) {
  25                skb->nh.raw = skb->mac.raw = skb->data;
  26                skb->dev = if_dev;
  27                skb->protocol = __constant_htons(ETH_P_AOE);
  28                skb->priority = 0;
  29                skb_put(skb, len);
  30                skb->next = skb->prev = NULL;
  31
  32                /* tell the network layer not to perform IP checksums
  33                 * or to get the NIC to do it
  34                 */
  35                skb->ip_summed = CHECKSUM_NONE;
  36        }
  37        return skb;
  38}
  39
  40static struct sk_buff *
  41skb_prepare(struct aoedev *d, struct frame *f)
  42{
  43        struct sk_buff *skb;
  44        char *p;
  45
  46        skb = new_skb(d->ifp, f->ndata + f->writedatalen);
  47        if (!skb) {
  48                printk(KERN_INFO "aoe: skb_prepare: failure to allocate skb\n");
  49                return NULL;
  50        }
  51
  52        p = skb->mac.raw;
  53        memcpy(p, f->data, f->ndata);
  54
  55        if (f->writedatalen) {
  56                p += sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr);
  57                memcpy(p, f->bufaddr, f->writedatalen);
  58        }
  59
  60        return skb;
  61}
  62
  63static struct frame *
  64getframe(struct aoedev *d, int tag)
  65{
  66        struct frame *f, *e;
  67
  68        f = d->frames;
  69        e = f + d->nframes;
  70        for (; f<e; f++)
  71                if (f->tag == tag)
  72                        return f;
  73        return NULL;
  74}
  75
  76/*
  77 * Leave the top bit clear so we have tagspace for userland.
  78 * The bottom 16 bits are the xmit tick for rexmit/rttavg processing.
  79 * This driver reserves tag -1 to mean "unused frame."
  80 */
  81static int
  82newtag(struct aoedev *d)
  83{
  84        register ulong n;
  85
  86        n = jiffies & 0xffff;
  87        return n |= (++d->lasttag & 0x7fff) << 16;
  88}
  89
  90static int
  91aoehdr_atainit(struct aoedev *d, struct aoe_hdr *h)
  92{
  93        u16 type = __constant_cpu_to_be16(ETH_P_AOE);
  94        u16 aoemajor = __cpu_to_be16(d->aoemajor);
  95        u32 host_tag = newtag(d);
  96        u32 tag = __cpu_to_be32(host_tag);
  97
  98        memcpy(h->src, d->ifp->dev_addr, sizeof h->src);
  99        memcpy(h->dst, d->addr, sizeof h->dst);
 100        memcpy(h->type, &type, sizeof type);
 101        h->verfl = AOE_HVER;
 102        memcpy(h->major, &aoemajor, sizeof aoemajor);
 103        h->minor = d->aoeminor;
 104        h->cmd = AOECMD_ATA;
 105        memcpy(h->tag, &tag, sizeof tag);
 106
 107        return host_tag;
 108}
 109
 110static void
 111aoecmd_ata_rw(struct aoedev *d, struct frame *f)
 112{
 113        struct aoe_hdr *h;
 114        struct aoe_atahdr *ah;
 115        struct buf *buf;
 116        struct sk_buff *skb;
 117        ulong bcnt;
 118        register sector_t sector;
 119        char writebit, extbit;
 120
 121        writebit = 0x10;
 122        extbit = 0x4;
 123
 124        buf = d->inprocess;
 125
 126        sector = buf->sector;
 127        bcnt = buf->bv_resid;
 128        if (bcnt > MAXATADATA)
 129                bcnt = MAXATADATA;
 130
 131        /* initialize the headers & frame */
 132        h = (struct aoe_hdr *) f->data;
 133        ah = (struct aoe_atahdr *) (h+1);
 134        f->ndata = sizeof *h + sizeof *ah;
 135        memset(h, 0, f->ndata);
 136        f->tag = aoehdr_atainit(d, h);
 137        f->waited = 0;
 138        f->buf = buf;
 139        f->bufaddr = buf->bufaddr;
 140
 141        /* set up ata header */
 142        ah->scnt = bcnt >> 9;
 143        ah->lba0 = sector;
 144        ah->lba1 = sector >>= 8;
 145        ah->lba2 = sector >>= 8;
 146        ah->lba3 = sector >>= 8;
 147        if (d->flags & DEVFL_EXT) {
 148                ah->aflags |= AOEAFL_EXT;
 149                ah->lba4 = sector >>= 8;
 150                ah->lba5 = sector >>= 8;
 151        } else {
 152                extbit = 0;
 153                ah->lba3 &= 0x0f;
 154                ah->lba3 |= 0xe0;       /* LBA bit + obsolete 0xa0 */
 155        }
 156
 157        if (bio_data_dir(buf->bio) == WRITE) {
 158                ah->aflags |= AOEAFL_WRITE;
 159                f->writedatalen = bcnt;
 160        } else {
 161                writebit = 0;
 162                f->writedatalen = 0;
 163        }
 164
 165        ah->cmdstat = WIN_READ | writebit | extbit;
 166
 167        /* mark all tracking fields and load out */
 168        buf->nframesout += 1;
 169        buf->bufaddr += bcnt;
 170        buf->bv_resid -= bcnt;
 171/* printk(KERN_INFO "aoe: bv_resid=%ld\n", buf->bv_resid); */
 172        buf->resid -= bcnt;
 173        buf->sector += bcnt >> 9;
 174        if (buf->resid == 0) {
 175                d->inprocess = NULL;
 176        } else if (buf->bv_resid == 0) {
 177                buf->bv++;
 178                buf->bv_resid = buf->bv->bv_len;
 179                buf->bufaddr = page_address(buf->bv->bv_page) + buf->bv->bv_offset;
 180        }
 181
 182        skb = skb_prepare(d, f);
 183        if (skb) {
 184                skb->next = d->skblist;
 185                d->skblist = skb;
 186        }
 187}
 188
 189/* enters with d->lock held */
 190void
 191aoecmd_work(struct aoedev *d)
 192{
 193        struct frame *f;
 194        struct buf *buf;
 195loop:
 196        f = getframe(d, FREETAG);
 197        if (f == NULL)
 198                return;
 199        if (d->inprocess == NULL) {
 200                if (list_empty(&d->bufq))
 201                        return;
 202                buf = container_of(d->bufq.next, struct buf, bufs);
 203                list_del(d->bufq.next);
 204/*printk(KERN_INFO "aoecmd_work: bi_size=%ld\n", buf->bio->bi_size); */
 205                d->inprocess = buf;
 206        }
 207        aoecmd_ata_rw(d, f);
 208        goto loop;
 209}
 210
 211static void
 212rexmit(struct aoedev *d, struct frame *f)
 213{
 214        struct sk_buff *skb;
 215        struct aoe_hdr *h;
 216        char buf[128];
 217        u32 n;
 218        u32 net_tag;
 219
 220        n = newtag(d);
 221
 222        snprintf(buf, sizeof buf,
 223                "%15s e%ld.%ld oldtag=%08x@%08lx newtag=%08x\n",
 224                "retransmit",
 225                d->aoemajor, d->aoeminor, f->tag, jiffies, n);
 226        aoechr_error(buf);
 227
 228        h = (struct aoe_hdr *) f->data;
 229        f->tag = n;
 230        net_tag = __cpu_to_be32(n);
 231        memcpy(h->tag, &net_tag, sizeof net_tag);
 232
 233        skb = skb_prepare(d, f);
 234        if (skb) {
 235                skb->next = d->skblist;
 236                d->skblist = skb;
 237        }
 238}
 239
 240static int
 241tsince(int tag)
 242{
 243        int n;
 244
 245        n = jiffies & 0xffff;
 246        n -= tag & 0xffff;
 247        if (n < 0)
 248                n += 1<<16;
 249        return n;
 250}
 251
 252static void
 253rexmit_timer(ulong vp)
 254{
 255        struct aoedev *d;
 256        struct frame *f, *e;
 257        struct sk_buff *sl;
 258        register long timeout;
 259        ulong flags, n;
 260
 261        d = (struct aoedev *) vp;
 262        sl = NULL;
 263
 264        /* timeout is always ~150% of the moving average */
 265        timeout = d->rttavg;
 266        timeout += timeout >> 1;
 267
 268        spin_lock_irqsave(&d->lock, flags);
 269
 270        if (d->flags & DEVFL_TKILL) {
 271tdie:           spin_unlock_irqrestore(&d->lock, flags);
 272                return;
 273        }
 274        f = d->frames;
 275        e = f + d->nframes;
 276        for (; f<e; f++) {
 277                if (f->tag != FREETAG && tsince(f->tag) >= timeout) {
 278                        n = f->waited += timeout;
 279                        n /= HZ;
 280                        if (n > MAXWAIT) { /* waited too long.  device failure. */
 281                                aoedev_downdev(d);
 282                                goto tdie;
 283                        }
 284                        rexmit(d, f);
 285                }
 286        }
 287
 288        sl = d->skblist;
 289        d->skblist = NULL;
 290        if (sl) {
 291                n = d->rttavg <<= 1;
 292                if (n > MAXTIMER)
 293                        d->rttavg = MAXTIMER;
 294        }
 295
 296        d->timer.expires = jiffies + TIMERTICK;
 297        add_timer(&d->timer);
 298
 299        spin_unlock_irqrestore(&d->lock, flags);
 300
 301        aoenet_xmit(sl);
 302}
 303
 304static void
 305ataid_complete(struct aoedev *d, unsigned char *id)
 306{
 307        u64 ssize;
 308        u16 n;
 309
 310        /* word 83: command set supported */
 311        n = __le16_to_cpu(*((u16 *) &id[83<<1]));
 312
 313        /* word 86: command set/feature enabled */
 314        n |= __le16_to_cpu(*((u16 *) &id[86<<1]));
 315
 316        if (n & (1<<10)) {      /* bit 10: LBA 48 */
 317                d->flags |= DEVFL_EXT;
 318
 319                /* word 100: number lba48 sectors */
 320                ssize = __le64_to_cpu(*((u64 *) &id[100<<1]));
 321
 322                /* set as in ide-disk.c:init_idedisk_capacity */
 323                d->geo.cylinders = ssize;
 324                d->geo.cylinders /= (255 * 63);
 325                d->geo.heads = 255;
 326                d->geo.sectors = 63;
 327        } else {
 328                d->flags &= ~DEVFL_EXT;
 329
 330                /* number lba28 sectors */
 331                ssize = __le32_to_cpu(*((u32 *) &id[60<<1]));
 332
 333                /* NOTE: obsolete in ATA 6 */
 334                d->geo.cylinders = __le16_to_cpu(*((u16 *) &id[54<<1]));
 335                d->geo.heads = __le16_to_cpu(*((u16 *) &id[55<<1]));
 336                d->geo.sectors = __le16_to_cpu(*((u16 *) &id[56<<1]));
 337        }
 338        d->ssize = ssize;
 339        d->geo.start = 0;
 340        if (d->gd != NULL) {
 341                d->gd->capacity = ssize;
 342                d->flags |= DEVFL_UP;
 343                return;
 344        }
 345        if (d->flags & DEVFL_WORKON) {
 346                printk(KERN_INFO "aoe: ataid_complete: can't schedule work, it's already on!  "
 347                        "(This really shouldn't happen).\n");
 348                return;
 349        }
 350        INIT_WORK(&d->work, aoeblk_gdalloc, d);
 351        schedule_work(&d->work);
 352        d->flags |= DEVFL_WORKON;
 353}
 354
 355static void
 356calc_rttavg(struct aoedev *d, int rtt)
 357{
 358        register long n;
 359
 360        n = rtt;
 361        if (n < MINTIMER)
 362                n = MINTIMER;
 363        else if (n > MAXTIMER)
 364                n = MAXTIMER;
 365
 366        /* g == .25; cf. Congestion Avoidance and Control, Jacobson & Karels; 1988 */
 367        n -= d->rttavg;
 368        d->rttavg += n >> 2;
 369}
 370
 371void
 372aoecmd_ata_rsp(struct sk_buff *skb)
 373{
 374        struct aoedev *d;
 375        struct aoe_hdr *hin;
 376        struct aoe_atahdr *ahin, *ahout;
 377        struct frame *f;
 378        struct buf *buf;
 379        struct sk_buff *sl;
 380        register long n;
 381        ulong flags;
 382        char ebuf[128];
 383        
 384        hin = (struct aoe_hdr *) skb->mac.raw;
 385        d = aoedev_bymac(hin->src);
 386        if (d == NULL) {
 387                snprintf(ebuf, sizeof ebuf, "aoecmd_ata_rsp: ata response "
 388                        "for unknown device %d.%d\n",
 389                         __be16_to_cpu(*((u16 *) hin->major)),
 390                        hin->minor);
 391                aoechr_error(ebuf);
 392                return;
 393        }
 394
 395        spin_lock_irqsave(&d->lock, flags);
 396
 397        f = getframe(d, __be32_to_cpu(*((u32 *) hin->tag)));
 398        if (f == NULL) {
 399                spin_unlock_irqrestore(&d->lock, flags);
 400                snprintf(ebuf, sizeof ebuf,
 401                        "%15s e%d.%d    tag=%08x@%08lx\n",
 402                        "unexpected rsp",
 403                        __be16_to_cpu(*((u16 *) hin->major)),
 404                        hin->minor,
 405                        __be32_to_cpu(*((u32 *) hin->tag)),
 406                        jiffies);
 407                aoechr_error(ebuf);
 408                return;
 409        }
 410
 411        calc_rttavg(d, tsince(f->tag));
 412
 413        ahin = (struct aoe_atahdr *) (hin+1);
 414        ahout = (struct aoe_atahdr *) (f->data + sizeof(struct aoe_hdr));
 415        buf = f->buf;
 416
 417        if (ahin->cmdstat & 0xa9) {     /* these bits cleared on success */
 418                printk(KERN_CRIT "aoe: aoecmd_ata_rsp: ata error cmd=%2.2Xh "
 419                        "stat=%2.2Xh\n", ahout->cmdstat, ahin->cmdstat);
 420                if (buf)
 421                        buf->flags |= BUFFL_FAIL;
 422        } else {
 423                switch (ahout->cmdstat) {
 424                case WIN_READ:
 425                case WIN_READ_EXT:
 426                        n = ahout->scnt << 9;
 427                        if (skb->len - sizeof *hin - sizeof *ahin < n) {
 428                                printk(KERN_CRIT "aoe: aoecmd_ata_rsp: runt "
 429                                        "ata data size in read.  skb->len=%d\n",
 430                                        skb->len);
 431                                /* fail frame f?  just returning will rexmit. */
 432                                spin_unlock_irqrestore(&d->lock, flags);
 433                                return;
 434                        }
 435                        memcpy(f->bufaddr, ahin+1, n);
 436                case WIN_WRITE:
 437                case WIN_WRITE_EXT:
 438                        break;
 439                case WIN_IDENTIFY:
 440                        if (skb->len - sizeof *hin - sizeof *ahin < 512) {
 441                                printk(KERN_INFO "aoe: aoecmd_ata_rsp: runt data size "
 442                                        "in ataid.  skb->len=%d\n", skb->len);
 443                                spin_unlock_irqrestore(&d->lock, flags);
 444                                return;
 445                        }
 446                        ataid_complete(d, (char *) (ahin+1));
 447                        /* d->flags |= DEVFL_WC_UPDATE; */
 448                        break;
 449                default:
 450                        printk(KERN_INFO "aoe: aoecmd_ata_rsp: unrecognized "
 451                               "outbound ata command %2.2Xh for %d.%d\n", 
 452                               ahout->cmdstat,
 453                               __be16_to_cpu(*((u16 *) hin->major)),
 454                               hin->minor);
 455                }
 456        }
 457
 458        if (buf) {
 459                buf->nframesout -= 1;
 460                if (buf->nframesout == 0 && buf->resid == 0) {
 461                        n = !(buf->flags & BUFFL_FAIL);
 462                        bio_endio(buf->bio, buf->bio->bi_size, 0);
 463                        mempool_free(buf, d->bufpool);
 464                }
 465        }
 466
 467        f->buf = NULL;
 468        f->tag = FREETAG;
 469
 470        aoecmd_work(d);
 471
 472        sl = d->skblist;
 473        d->skblist = NULL;
 474
 475        spin_unlock_irqrestore(&d->lock, flags);
 476
 477        aoenet_xmit(sl);
 478}
 479
 480void
 481aoecmd_cfg(ushort aoemajor, unsigned char aoeminor)
 482{
 483        struct aoe_hdr *h;
 484        struct aoe_cfghdr *ch;
 485        struct sk_buff *skb, *sl;
 486        struct net_device *ifp;
 487        u16 aoe_type = __constant_cpu_to_be16(ETH_P_AOE);
 488        u16 net_aoemajor = __cpu_to_be16(aoemajor);
 489
 490        sl = NULL;
 491
 492        read_lock(&dev_base_lock);
 493        for (ifp = dev_base; ifp; dev_put(ifp), ifp = ifp->next) {
 494                dev_hold(ifp);
 495                if (!is_aoe_netif(ifp))
 496                        continue;
 497
 498                skb = new_skb(ifp, sizeof *h + sizeof *ch);
 499                if (skb == NULL) {
 500                        printk(KERN_INFO "aoe: aoecmd_cfg: skb alloc failure\n");
 501                        continue;
 502                }
 503                h = (struct aoe_hdr *) skb->mac.raw;
 504                memset(h, 0, sizeof *h + sizeof *ch);
 505
 506                memset(h->dst, 0xff, sizeof h->dst);
 507                memcpy(h->src, ifp->dev_addr, sizeof h->src);
 508                memcpy(h->type, &aoe_type, sizeof aoe_type);
 509                h->verfl = AOE_HVER;
 510                memcpy(h->major, &net_aoemajor, sizeof net_aoemajor);
 511                h->minor = aoeminor;
 512                h->cmd = AOECMD_CFG;
 513
 514                skb->next = sl;
 515                sl = skb;
 516        }
 517        read_unlock(&dev_base_lock);
 518
 519        aoenet_xmit(sl);
 520}
 521 
 522/*
 523 * Since we only call this in one place (and it only prepares one frame)
 524 * we just return the skb.  Usually we'd chain it up to the d->skblist.
 525 */
 526static struct sk_buff *
 527aoecmd_ata_id(struct aoedev *d)
 528{
 529        struct aoe_hdr *h;
 530        struct aoe_atahdr *ah;
 531        struct frame *f;
 532        struct sk_buff *skb;
 533
 534        f = getframe(d, FREETAG);
 535        if (f == NULL) {
 536                printk(KERN_CRIT "aoe: aoecmd_ata_id: can't get a frame.  "
 537                        "This shouldn't happen.\n");
 538                return NULL;
 539        }
 540
 541        /* initialize the headers & frame */
 542        h = (struct aoe_hdr *) f->data;
 543        ah = (struct aoe_atahdr *) (h+1);
 544        f->ndata = sizeof *h + sizeof *ah;
 545        memset(h, 0, f->ndata);
 546        f->tag = aoehdr_atainit(d, h);
 547        f->waited = 0;
 548        f->writedatalen = 0;
 549
 550        /* this message initializes the device, so we reset the rttavg */
 551        d->rttavg = MAXTIMER;
 552
 553        /* set up ata header */
 554        ah->scnt = 1;
 555        ah->cmdstat = WIN_IDENTIFY;
 556        ah->lba3 = 0xa0;
 557
 558        skb = skb_prepare(d, f);
 559
 560        /* we now want to start the rexmit tracking */
 561        d->flags &= ~DEVFL_TKILL;
 562        d->timer.data = (ulong) d;
 563        d->timer.function = rexmit_timer;
 564        d->timer.expires = jiffies + TIMERTICK;
 565        add_timer(&d->timer);
 566
 567        return skb;
 568}
 569 
 570void
 571aoecmd_cfg_rsp(struct sk_buff *skb)
 572{
 573        struct aoedev *d;
 574        struct aoe_hdr *h;
 575        struct aoe_cfghdr *ch;
 576        ulong flags, bufcnt, sysminor, aoemajor;
 577        struct sk_buff *sl;
 578        enum { MAXFRAMES = 8, MAXSYSMINOR = 255 };
 579
 580        h = (struct aoe_hdr *) skb->mac.raw;
 581        ch = (struct aoe_cfghdr *) (h+1);
 582
 583        /*
 584         * Enough people have their dip switches set backwards to
 585         * warrant a loud message for this special case.
 586         */
 587        aoemajor = __be16_to_cpu(*((u16 *) h->major));
 588        if (aoemajor == 0xfff) {
 589                printk(KERN_CRIT "aoe: aoecmd_cfg_rsp: Warning: shelf "
 590                        "address is all ones.  Check shelf dip switches\n");
 591                return;
 592        }
 593
 594        sysminor = SYSMINOR(aoemajor, h->minor);
 595        if (sysminor > MAXSYSMINOR) {
 596                printk(KERN_INFO "aoe: aoecmd_cfg_rsp: sysminor %ld too "
 597                        "large\n", sysminor);
 598                return;
 599        }
 600
 601        bufcnt = __be16_to_cpu(*((u16 *) ch->bufcnt));
 602        if (bufcnt > MAXFRAMES) /* keep it reasonable */
 603                bufcnt = MAXFRAMES;
 604
 605        d = aoedev_set(sysminor, h->src, skb->dev, bufcnt);
 606        if (d == NULL) {
 607                printk(KERN_INFO "aoe: aoecmd_cfg_rsp: device set failure\n");
 608                return;
 609        }
 610
 611        spin_lock_irqsave(&d->lock, flags);
 612
 613        if (d->flags & (DEVFL_UP | DEVFL_CLOSEWAIT)) {
 614                spin_unlock_irqrestore(&d->lock, flags);
 615                return;
 616        }
 617
 618        d->fw_ver = __be16_to_cpu(*((u16 *) ch->fwver));
 619
 620        /* we get here only if the device is new */
 621        sl = aoecmd_ata_id(d);
 622
 623        spin_unlock_irqrestore(&d->lock, flags);
 624
 625        aoenet_xmit(sl);
 626}
 627
 628
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.