linux/drivers/infiniband/hw/ipath/ipath_file_ops.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2006 QLogic, Inc. All rights reserved.
   3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
   4 *
   5 * This software is available to you under a choice of one of two
   6 * licenses.  You may choose to be licensed under the terms of the GNU
   7 * General Public License (GPL) Version 2, available from the file
   8 * COPYING in the main directory of this source tree, or the
   9 * OpenIB.org BSD license below:
  10 *
  11 *     Redistribution and use in source and binary forms, with or
  12 *     without modification, are permitted provided that the following
  13 *     conditions are met:
  14 *
  15 *      - Redistributions of source code must retain the above
  16 *        copyright notice, this list of conditions and the following
  17 *        disclaimer.
  18 *
  19 *      - Redistributions in binary form must reproduce the above
  20 *        copyright notice, this list of conditions and the following
  21 *        disclaimer in the documentation and/or other materials
  22 *        provided with the distribution.
  23 *
  24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31 * SOFTWARE.
  32 */
  33
  34#include <linux/pci.h>
  35#include <linux/poll.h>
  36#include <linux/cdev.h>
  37#include <linux/swap.h>
  38#include <linux/vmalloc.h>
  39#include <asm/pgtable.h>
  40
  41#include "ipath_kernel.h"
  42#include "ipath_layer.h"
  43#include "ipath_common.h"
  44
  45static int ipath_open(struct inode *, struct file *);
  46static int ipath_close(struct inode *, struct file *);
  47static ssize_t ipath_write(struct file *, const char __user *, size_t,
  48                           loff_t *);
  49static unsigned int ipath_poll(struct file *, struct poll_table_struct *);
  50static int ipath_mmap(struct file *, struct vm_area_struct *);
  51
  52static struct file_operations ipath_file_ops = {
  53        .owner = THIS_MODULE,
  54        .write = ipath_write,
  55        .open = ipath_open,
  56        .release = ipath_close,
  57        .poll = ipath_poll,
  58        .mmap = ipath_mmap
  59};
  60
  61static int ipath_get_base_info(struct ipath_portdata *pd,
  62                               void __user *ubase, size_t ubase_size)
  63{
  64        int ret = 0;
  65        struct ipath_base_info *kinfo = NULL;
  66        struct ipath_devdata *dd = pd->port_dd;
  67
  68        if (ubase_size < sizeof(*kinfo)) {
  69                ipath_cdbg(PROC,
  70                           "Base size %lu, need %lu (version mismatch?)\n",
  71                           (unsigned long) ubase_size,
  72                           (unsigned long) sizeof(*kinfo));
  73                ret = -EINVAL;
  74                goto bail;
  75        }
  76
  77        kinfo = kzalloc(sizeof(*kinfo), GFP_KERNEL);
  78        if (kinfo == NULL) {
  79                ret = -ENOMEM;
  80                goto bail;
  81        }
  82
  83        ret = dd->ipath_f_get_base_info(pd, kinfo);
  84        if (ret < 0)
  85                goto bail;
  86
  87        kinfo->spi_rcvhdr_cnt = dd->ipath_rcvhdrcnt;
  88        kinfo->spi_rcvhdrent_size = dd->ipath_rcvhdrentsize;
  89        kinfo->spi_tidegrcnt = dd->ipath_rcvegrcnt;
  90        kinfo->spi_rcv_egrbufsize = dd->ipath_rcvegrbufsize;
  91        /*
  92         * have to mmap whole thing
  93         */
  94        kinfo->spi_rcv_egrbuftotlen =
  95                pd->port_rcvegrbuf_chunks * pd->port_rcvegrbuf_size;
  96        kinfo->spi_rcv_egrperchunk = pd->port_rcvegrbufs_perchunk;
  97        kinfo->spi_rcv_egrchunksize = kinfo->spi_rcv_egrbuftotlen /
  98                pd->port_rcvegrbuf_chunks;
  99        kinfo->spi_tidcnt = dd->ipath_rcvtidcnt;
 100        /*
 101         * for this use, may be ipath_cfgports summed over all chips that
 102         * are are configured and present
 103         */
 104        kinfo->spi_nports = dd->ipath_cfgports;
 105        /* unit (chip/board) our port is on */
 106        kinfo->spi_unit = dd->ipath_unit;
 107        /* for now, only a single page */
 108        kinfo->spi_tid_maxsize = PAGE_SIZE;
 109
 110        /*
 111         * Doing this per port, and based on the skip value, etc.  This has
 112         * to be the actual buffer size, since the protocol code treats it
 113         * as an array.
 114         *
 115         * These have to be set to user addresses in the user code via mmap.
 116         * These values are used on return to user code for the mmap target
 117         * addresses only.  For 32 bit, same 44 bit address problem, so use
 118         * the physical address, not virtual.  Before 2.6.11, using the
 119         * page_address() macro worked, but in 2.6.11, even that returns the
 120         * full 64 bit address (upper bits all 1's).  So far, using the
 121         * physical addresses (or chip offsets, for chip mapping) works, but
 122         * no doubt some future kernel release will chang that, and we'll be
 123         * on to yet another method of dealing with this
 124         */
 125        kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
 126        kinfo->spi_rcvhdr_tailaddr = (u64)pd->port_rcvhdrqtailaddr_phys;
 127        kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
 128        kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
 129        kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
 130                (void *) dd->ipath_statusp -
 131                (void *) dd->ipath_pioavailregs_dma;
 132        kinfo->spi_piobufbase = (u64) pd->port_piobufs;
 133        kinfo->__spi_uregbase =
 134                dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
 135
 136        kinfo->spi_pioindex = dd->ipath_pbufsport * (pd->port_port - 1);
 137        kinfo->spi_piocnt = dd->ipath_pbufsport;
 138        kinfo->spi_pioalign = dd->ipath_palign;
 139
 140        kinfo->spi_qpair = IPATH_KD_QP;
 141        kinfo->spi_piosize = dd->ipath_ibmaxlen;
 142        kinfo->spi_mtu = dd->ipath_ibmaxlen;    /* maxlen, not ibmtu */
 143        kinfo->spi_port = pd->port_port;
 144        kinfo->spi_sw_version = IPATH_KERN_SWVERSION;
 145        kinfo->spi_hw_version = dd->ipath_revision;
 146
 147        if (copy_to_user(ubase, kinfo, sizeof(*kinfo)))
 148                ret = -EFAULT;
 149
 150bail:
 151        kfree(kinfo);
 152        return ret;
 153}
 154
 155/**
 156 * ipath_tid_update - update a port TID
 157 * @pd: the port
 158 * @ti: the TID information
 159 *
 160 * The new implementation as of Oct 2004 is that the driver assigns
 161 * the tid and returns it to the caller.   To make it easier to
 162 * catch bugs, and to reduce search time, we keep a cursor for
 163 * each port, walking the shadow tid array to find one that's not
 164 * in use.
 165 *
 166 * For now, if we can't allocate the full list, we fail, although
 167 * in the long run, we'll allocate as many as we can, and the
 168 * caller will deal with that by trying the remaining pages later.
 169 * That means that when we fail, we have to mark the tids as not in
 170 * use again, in our shadow copy.
 171 *
 172 * It's up to the caller to free the tids when they are done.
 173 * We'll unlock the pages as they free them.
 174 *
 175 * Also, right now we are locking one page at a time, but since
 176 * the intended use of this routine is for a single group of
 177 * virtually contiguous pages, that should change to improve
 178 * performance.
 179 */
 180static int ipath_tid_update(struct ipath_portdata *pd,
 181                            const struct ipath_tid_info *ti)
 182{
 183        int ret = 0, ntids;
 184        u32 tid, porttid, cnt, i, tidcnt;
 185        u16 *tidlist;
 186        struct ipath_devdata *dd = pd->port_dd;
 187        u64 physaddr;
 188        unsigned long vaddr;
 189        u64 __iomem *tidbase;
 190        unsigned long tidmap[8];
 191        struct page **pagep = NULL;
 192
 193        if (!dd->ipath_pageshadow) {
 194                ret = -ENOMEM;
 195                goto done;
 196        }
 197
 198        cnt = ti->tidcnt;
 199        if (!cnt) {
 200                ipath_dbg("After copyin, tidcnt 0, tidlist %llx\n",
 201                          (unsigned long long) ti->tidlist);
 202                /*
 203                 * Should we treat as success?  likely a bug
 204                 */
 205                ret = -EFAULT;
 206                goto done;
 207        }
 208        tidcnt = dd->ipath_rcvtidcnt;
 209        if (cnt >= tidcnt) {
 210                /* make sure it all fits in port_tid_pg_list */
 211                dev_info(&dd->pcidev->dev, "Process tried to allocate %u "
 212                         "TIDs, only trying max (%u)\n", cnt, tidcnt);
 213                cnt = tidcnt;
 214        }
 215        pagep = (struct page **)pd->port_tid_pg_list;
 216        tidlist = (u16 *) (&pagep[cnt]);
 217
 218        memset(tidmap, 0, sizeof(tidmap));
 219        tid = pd->port_tidcursor;
 220        /* before decrement; chip actual # */
 221        porttid = pd->port_port * tidcnt;
 222        ntids = tidcnt;
 223        tidbase = (u64 __iomem *) (((char __iomem *) dd->ipath_kregbase) +
 224                                   dd->ipath_rcvtidbase +
 225                                   porttid * sizeof(*tidbase));
 226
 227        ipath_cdbg(VERBOSE, "Port%u %u tids, cursor %u, tidbase %p\n",
 228                   pd->port_port, cnt, tid, tidbase);
 229
 230        /* virtual address of first page in transfer */
 231        vaddr = ti->tidvaddr;
 232        if (!access_ok(VERIFY_WRITE, (void __user *) vaddr,
 233                       cnt * PAGE_SIZE)) {
 234                ipath_dbg("Fail vaddr %p, %u pages, !access_ok\n",
 235                          (void *)vaddr, cnt);
 236                ret = -EFAULT;
 237                goto done;
 238        }
 239        ret = ipath_get_user_pages(vaddr, cnt, pagep);
 240        if (ret) {
 241                if (ret == -EBUSY) {
 242                        ipath_dbg("Failed to lock addr %p, %u pages "
 243                                  "(already locked)\n",
 244                                  (void *) vaddr, cnt);
 245                        /*
 246                         * for now, continue, and see what happens but with
 247                         * the new implementation, this should never happen,
 248                         * unless perhaps the user has mpin'ed the pages
 249                         * themselves (something we need to test)
 250                         */
 251                        ret = 0;
 252                } else {
 253                        dev_info(&dd->pcidev->dev,
 254                                 "Failed to lock addr %p, %u pages: "
 255                                 "errno %d\n", (void *) vaddr, cnt, -ret);
 256                        goto done;
 257                }
 258        }
 259        for (i = 0; i < cnt; i++, vaddr += PAGE_SIZE) {
 260                for (; ntids--; tid++) {
 261                        if (tid == tidcnt)
 262                                tid = 0;
 263                        if (!dd->ipath_pageshadow[porttid + tid])
 264                                break;
 265                }
 266                if (ntids < 0) {
 267                        /*
 268                         * oops, wrapped all the way through their TIDs,
 269                         * and didn't have enough free; see comments at
 270                         * start of routine
 271                         */
 272                        ipath_dbg("Not enough free TIDs for %u pages "
 273                                  "(index %d), failing\n", cnt, i);
 274                        i--;    /* last tidlist[i] not filled in */
 275                        ret = -ENOMEM;
 276                        break;
 277                }
 278                tidlist[i] = tid;
 279                ipath_cdbg(VERBOSE, "Updating idx %u to TID %u, "
 280                           "vaddr %lx\n", i, tid, vaddr);
 281                /* we "know" system pages and TID pages are same size */
 282                dd->ipath_pageshadow[porttid + tid] = pagep[i];
 283                /*
 284                 * don't need atomic or it's overhead
 285                 */
 286                __set_bit(tid, tidmap);
 287                physaddr = page_to_phys(pagep[i]);
 288                ipath_stats.sps_pagelocks++;
 289                ipath_cdbg(VERBOSE,
 290                           "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
 291                           tid, vaddr, (unsigned long long) physaddr,
 292                           pagep[i]);
 293                dd->ipath_f_put_tid(dd, &tidbase[tid], 1, physaddr);
 294                /*
 295                 * don't check this tid in ipath_portshadow, since we
 296                 * just filled it in; start with the next one.
 297                 */
 298                tid++;
 299        }
 300
 301        if (ret) {
 302                u32 limit;
 303        cleanup:
 304                /* jump here if copy out of updated info failed... */
 305                ipath_dbg("After failure (ret=%d), undo %d of %d entries\n",
 306                          -ret, i, cnt);
 307                /* same code that's in ipath_free_tid() */
 308                limit = sizeof(tidmap) * BITS_PER_BYTE;
 309                if (limit > tidcnt)
 310                        /* just in case size changes in future */
 311                        limit = tidcnt;
 312                tid = find_first_bit((const unsigned long *)tidmap, limit);
 313                for (; tid < limit; tid++) {
 314                        if (!test_bit(tid, tidmap))
 315                                continue;
 316                        if (dd->ipath_pageshadow[porttid + tid]) {
 317                                ipath_cdbg(VERBOSE, "Freeing TID %u\n",
 318                                           tid);
 319                                dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
 320                                                    dd->ipath_tidinvalid);
 321                                dd->ipath_pageshadow[porttid + tid] = NULL;
 322                                ipath_stats.sps_pageunlocks++;
 323                        }
 324                }
 325                ipath_release_user_pages(pagep, cnt);
 326        } else {
 327                /*
 328                 * Copy the updated array, with ipath_tid's filled in, back
 329                 * to user.  Since we did the copy in already, this "should
 330                 * never fail" If it does, we have to clean up...
 331                 */
 332                if (copy_to_user((void __user *)
 333                                 (unsigned long) ti->tidlist,
 334                                 tidlist, cnt * sizeof(*tidlist))) {
 335                        ret = -EFAULT;
 336                        goto cleanup;
 337                }
 338                if (copy_to_user((void __user *) (unsigned long) ti->tidmap,
 339                                 tidmap, sizeof tidmap)) {
 340                        ret = -EFAULT;
 341                        goto cleanup;
 342                }
 343                if (tid == tidcnt)
 344                        tid = 0;
 345                pd->port_tidcursor = tid;
 346        }
 347
 348done:
 349        if (ret)
 350                ipath_dbg("Failed to map %u TID pages, failing with %d\n",
 351                          ti->tidcnt, -ret);
 352        return ret;
 353}
 354
 355/**
 356 * ipath_tid_free - free a port TID
 357 * @pd: the port
 358 * @ti: the TID info
 359 *
 360 * right now we are unlocking one page at a time, but since
 361 * the intended use of this routine is for a single group of
 362 * virtually contiguous pages, that should change to improve
 363 * performance.  We check that the TID is in range for this port
 364 * but otherwise don't check validity; if user has an error and
 365 * frees the wrong tid, it's only their own data that can thereby
 366 * be corrupted.  We do check that the TID was in use, for sanity
 367 * We always use our idea of the saved address, not the address that
 368 * they pass in to us.
 369 */
 370
 371static int ipath_tid_free(struct ipath_portdata *pd,
 372                          const struct ipath_tid_info *ti)
 373{
 374        int ret = 0;
 375        u32 tid, porttid, cnt, limit, tidcnt;
 376        struct ipath_devdata *dd = pd->port_dd;
 377        u64 __iomem *tidbase;
 378        unsigned long tidmap[8];
 379
 380        if (!dd->ipath_pageshadow) {
 381                ret = -ENOMEM;
 382                goto done;
 383        }
 384
 385        if (copy_from_user(tidmap, (void __user *)(unsigned long)ti->tidmap,
 386                           sizeof tidmap)) {
 387                ret = -EFAULT;
 388                goto done;
 389        }
 390
 391        porttid = pd->port_port * dd->ipath_rcvtidcnt;
 392        tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
 393                                   dd->ipath_rcvtidbase +
 394                                   porttid * sizeof(*tidbase));
 395
 396        tidcnt = dd->ipath_rcvtidcnt;
 397        limit = sizeof(tidmap) * BITS_PER_BYTE;
 398        if (limit > tidcnt)
 399                /* just in case size changes in future */
 400                limit = tidcnt;
 401        tid = find_first_bit(tidmap, limit);
 402        ipath_cdbg(VERBOSE, "Port%u free %u tids; first bit (max=%d) "
 403                   "set is %d, porttid %u\n", pd->port_port, ti->tidcnt,
 404                   limit, tid, porttid);
 405        for (cnt = 0; tid < limit; tid++) {
 406                /*
 407                 * small optimization; if we detect a run of 3 or so without
 408                 * any set, use find_first_bit again.  That's mainly to
 409                 * accelerate the case where we wrapped, so we have some at
 410                 * the beginning, and some at the end, and a big gap
 411                 * in the middle.
 412                 */
 413                if (!test_bit(tid, tidmap))
 414                        continue;
 415                cnt++;
 416                if (dd->ipath_pageshadow[porttid + tid]) {
 417                        ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
 418                                   pd->port_pid, tid);
 419                        dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
 420                                            dd->ipath_tidinvalid);
 421                        ipath_release_user_pages(
 422                                &dd->ipath_pageshadow[porttid + tid], 1);
 423                        dd->ipath_pageshadow[porttid + tid] = NULL;
 424                        ipath_stats.sps_pageunlocks++;
 425                } else
 426                        ipath_dbg("Unused tid %u, ignoring\n", tid);
 427        }
 428        if (cnt != ti->tidcnt)
 429                ipath_dbg("passed in tidcnt %d, only %d bits set in map\n",
 430                          ti->tidcnt, cnt);
 431done:
 432        if (ret)
 433                ipath_dbg("Failed to unmap %u TID pages, failing with %d\n",
 434                          ti->tidcnt, -ret);
 435        return ret;
 436}
 437
 438/**
 439 * ipath_set_part_key - set a partition key
 440 * @pd: the port
 441 * @key: the key
 442 *
 443 * We can have up to 4 active at a time (other than the default, which is
 444 * always allowed).  This is somewhat tricky, since multiple ports may set
 445 * the same key, so we reference count them, and clean up at exit.  All 4
 446 * partition keys are packed into a single infinipath register.  It's an
 447 * error for a process to set the same pkey multiple times.  We provide no
 448 * mechanism to de-allocate a pkey at this time, we may eventually need to
 449 * do that.  I've used the atomic operations, and no locking, and only make
 450 * a single pass through what's available.  This should be more than
 451 * adequate for some time. I'll think about spinlocks or the like if and as
 452 * it's necessary.
 453 */
 454static int ipath_set_part_key(struct ipath_portdata *pd, u16 key)
 455{
 456        struct ipath_devdata *dd = pd->port_dd;
 457        int i, any = 0, pidx = -1;
 458        u16 lkey = key & 0x7FFF;
 459        int ret;
 460
 461        if (lkey == (IPATH_DEFAULT_P_KEY & 0x7FFF)) {
 462                /* nothing to do; this key always valid */
 463                ret = 0;
 464                goto bail;
 465        }
 466
 467        ipath_cdbg(VERBOSE, "p%u try to set pkey %hx, current keys "
 468                   "%hx:%x %hx:%x %hx:%x %hx:%x\n",
 469                   pd->port_port, key, dd->ipath_pkeys[0],
 470                   atomic_read(&dd->ipath_pkeyrefs[0]), dd->ipath_pkeys[1],
 471                   atomic_read(&dd->ipath_pkeyrefs[1]), dd->ipath_pkeys[2],
 472                   atomic_read(&dd->ipath_pkeyrefs[2]), dd->ipath_pkeys[3],
 473                   atomic_read(&dd->ipath_pkeyrefs[3]));
 474
 475        if (!lkey) {
 476                ipath_cdbg(PROC, "p%u tries to set key 0, not allowed\n",
 477                           pd->port_port);
 478                ret = -EINVAL;
 479                goto bail;
 480        }
 481
 482        /*
 483         * Set the full membership bit, because it has to be
 484         * set in the register or the packet, and it seems
 485         * cleaner to set in the register than to force all
 486         * callers to set it. (see bug 4331)
 487         */
 488        key |= 0x8000;
 489
 490        for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
 491                if (!pd->port_pkeys[i] && pidx == -1)
 492                        pidx = i;
 493                if (pd->port_pkeys[i] == key) {
 494                        ipath_cdbg(VERBOSE, "p%u tries to set same pkey "
 495                                   "(%x) more than once\n",
 496                                   pd->port_port, key);
 497                        ret = -EEXIST;
 498                        goto bail;
 499                }
 500        }
 501        if (pidx == -1) {
 502                ipath_dbg("All pkeys for port %u already in use, "
 503                          "can't set %x\n", pd->port_port, key);
 504                ret = -EBUSY;
 505                goto bail;
 506        }
 507        for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
 508                if (!dd->ipath_pkeys[i]) {
 509                        any++;
 510                        continue;
 511                }
 512                if (dd->ipath_pkeys[i] == key) {
 513                        atomic_t *pkrefs = &dd->ipath_pkeyrefs[i];
 514
 515                        if (atomic_inc_return(pkrefs) > 1) {
 516                                pd->port_pkeys[pidx] = key;
 517                                ipath_cdbg(VERBOSE, "p%u set key %x "
 518                                           "matches #%d, count now %d\n",
 519                                           pd->port_port, key, i,
 520                                           atomic_read(pkrefs));
 521                                ret = 0;
 522                                goto bail;
 523                        } else {
 524                                /*
 525                                 * lost race, decrement count, catch below
 526                                 */
 527                                atomic_dec(pkrefs);
 528                                ipath_cdbg(VERBOSE, "Lost race, count was "
 529                                           "0, after dec, it's %d\n",
 530                                           atomic_read(pkrefs));
 531                                any++;
 532                        }
 533                }
 534                if ((dd->ipath_pkeys[i] & 0x7FFF) == lkey) {
 535                        /*
 536                         * It makes no sense to have both the limited and
 537                         * full membership PKEY set at the same time since
 538                         * the unlimited one will disable the limited one.
 539                         */
 540                        ret = -EEXIST;
 541                        goto bail;
 542                }
 543        }
 544        if (!any) {
 545                ipath_dbg("port %u, all pkeys already in use, "
 546                          "can't set %x\n", pd->port_port, key);
 547                ret = -EBUSY;
 548                goto bail;
 549        }
 550        for (any = i = 0; i < ARRAY_SIZE(dd->ipath_pkeys); i++) {
 551                if (!dd->ipath_pkeys[i] &&
 552                    atomic_inc_return(&dd->ipath_pkeyrefs[i]) == 1) {
 553                        u64 pkey;
 554
 555                        /* for ipathstats, etc. */
 556                        ipath_stats.sps_pkeys[i] = lkey;
 557                        pd->port_pkeys[pidx] = dd->ipath_pkeys[i] = key;
 558                        pkey =
 559                                (u64) dd->ipath_pkeys[0] |
 560                                ((u64) dd->ipath_pkeys[1] << 16) |
 561                                ((u64) dd->ipath_pkeys[2] << 32) |
 562                                ((u64) dd->ipath_pkeys[3] << 48);
 563                        ipath_cdbg(PROC, "p%u set key %x in #%d, "
 564                                   "portidx %d, new pkey reg %llx\n",
 565                                   pd->port_port, key, i, pidx,
 566                                   (unsigned long long) pkey);
 567                        ipath_write_kreg(
 568                                dd, dd->ipath_kregs->kr_partitionkey, pkey);
 569
 570                        ret = 0;
 571                        goto bail;
 572                }
 573        }
 574        ipath_dbg("port %u, all pkeys already in use 2nd pass, "
 575                  "can't set %x\n", pd->port_port, key);
 576        ret = -EBUSY;
 577
 578bail:
 579        return ret;
 580}
 581
 582/**
 583 * ipath_manage_rcvq - manage a port's receive queue
 584 * @pd: the port
 585 * @start_stop: action to carry out
 586 *
 587 * start_stop == 0 disables receive on the port, for use in queue
 588 * overflow conditions.  start_stop==1 re-enables, to be used to
 589 * re-init the software copy of the head register
 590 */
 591static int ipath_manage_rcvq(struct ipath_portdata *pd, int start_stop)
 592{
 593        struct ipath_devdata *dd = pd->port_dd;
 594        u64 tval;
 595
 596        ipath_cdbg(PROC, "%sabling rcv for unit %u port %u\n",
 597                   start_stop ? "en" : "dis", dd->ipath_unit,
 598                   pd->port_port);
 599        /* atomically clear receive enable port. */
 600        if (start_stop) {
 601                /*
 602                 * On enable, force in-memory copy of the tail register to
 603                 * 0, so that protocol code doesn't have to worry about
 604                 * whether or not the chip has yet updated the in-memory
 605                 * copy or not on return from the system call. The chip
 606                 * always resets it's tail register back to 0 on a
 607                 * transition from disabled to enabled.  This could cause a
 608                 * problem if software was broken, and did the enable w/o
 609                 * the disable, but eventually the in-memory copy will be
 610                 * updated and correct itself, even in the face of software
 611                 * bugs.
 612                 */
 613                *pd->port_rcvhdrtail_kvaddr = 0;
 614                set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
 615                        &dd->ipath_rcvctrl);
 616        } else
 617                clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
 618                          &dd->ipath_rcvctrl);
 619        ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 620                         dd->ipath_rcvctrl);
 621        /* now be sure chip saw it before we return */
 622        tval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 623        if (start_stop) {
 624                /*
 625                 * And try to be sure that tail reg update has happened too.
 626                 * This should in theory interlock with the RXE changes to
 627                 * the tail register.  Don't assign it to the tail register
 628                 * in memory copy, since we could overwrite an update by the
 629                 * chip if we did.
 630                 */
 631                tval = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
 632        }
 633        /* always; new head should be equal to new tail; see above */
 634        return 0;
 635}
 636
 637static void ipath_clean_part_key(struct ipath_portdata *pd,
 638                                 struct ipath_devdata *dd)
 639{
 640        int i, j, pchanged = 0;
 641        u64 oldpkey;
 642
 643        /* for debugging only */
 644        oldpkey = (u64) dd->ipath_pkeys[0] |
 645                ((u64) dd->ipath_pkeys[1] << 16) |
 646                ((u64) dd->ipath_pkeys[2] << 32) |
 647                ((u64) dd->ipath_pkeys[3] << 48);
 648
 649        for (i = 0; i < ARRAY_SIZE(pd->port_pkeys); i++) {
 650                if (!pd->port_pkeys[i])
 651                        continue;
 652                ipath_cdbg(VERBOSE, "look for key[%d] %hx in pkeys\n", i,
 653                           pd->port_pkeys[i]);
 654                for (j = 0; j < ARRAY_SIZE(dd->ipath_pkeys); j++) {
 655                        /* check for match independent of the global bit */
 656                        if ((dd->ipath_pkeys[j] & 0x7fff) !=
 657                            (pd->port_pkeys[i] & 0x7fff))
 658                                continue;
 659                        if (atomic_dec_and_test(&dd->ipath_pkeyrefs[j])) {
 660                                ipath_cdbg(VERBOSE, "p%u clear key "
 661                                           "%x matches #%d\n",
 662                                           pd->port_port,
 663                                           pd->port_pkeys[i], j);
 664                                ipath_stats.sps_pkeys[j] =
 665                                        dd->ipath_pkeys[j] = 0;
 666                                pchanged++;
 667                        }
 668                        else ipath_cdbg(
 669                                VERBOSE, "p%u key %x matches #%d, "
 670                                "but ref still %d\n", pd->port_port,
 671                                pd->port_pkeys[i], j,
 672                                atomic_read(&dd->ipath_pkeyrefs[j]));
 673                        break;
 674                }
 675                pd->port_pkeys[i] = 0;
 676        }
 677        if (pchanged) {
 678                u64 pkey = (u64) dd->ipath_pkeys[0] |
 679                        ((u64) dd->ipath_pkeys[1] << 16) |
 680                        ((u64) dd->ipath_pkeys[2] << 32) |
 681                        ((u64) dd->ipath_pkeys[3] << 48);
 682                ipath_cdbg(VERBOSE, "p%u old pkey reg %llx, "
 683                           "new pkey reg %llx\n", pd->port_port,
 684                           (unsigned long long) oldpkey,
 685                           (unsigned long long) pkey);
 686                ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
 687                                 pkey);
 688        }
 689}
 690
 691/**
 692 * ipath_create_user_egr - allocate eager TID buffers
 693 * @pd: the port to allocate TID buffers for
 694 *
 695 * This routine is now quite different for user and kernel, because
 696 * the kernel uses skb's, for the accelerated network performance
 697 * This is the user port version
 698 *
 699 * Allocate the eager TID buffers and program them into infinipath
 700 * They are no longer completely contiguous, we do multiple allocation
 701 * calls.
 702 */
 703static int ipath_create_user_egr(struct ipath_portdata *pd)
 704{
 705        struct ipath_devdata *dd = pd->port_dd;
 706        unsigned e, egrcnt, alloced, egrperchunk, chunk, egrsize, egroff;
 707        size_t size;
 708        int ret;
 709        gfp_t gfp_flags;
 710
 711        /*
 712         * GFP_USER, but without GFP_FS, so buffer cache can be
 713         * coalesced (we hope); otherwise, even at order 4,
 714         * heavy filesystem activity makes these fail, and we can
 715         * use compound pages.
 716         */
 717        gfp_flags = __GFP_WAIT | __GFP_IO | __GFP_COMP;
 718
 719        egrcnt = dd->ipath_rcvegrcnt;
 720        /* TID number offset for this port */
 721        egroff = pd->port_port * egrcnt;
 722        egrsize = dd->ipath_rcvegrbufsize;
 723        ipath_cdbg(VERBOSE, "Allocating %d egr buffers, at egrtid "
 724                   "offset %x, egrsize %u\n", egrcnt, egroff, egrsize);
 725
 726        /*
 727         * to avoid wasting a lot of memory, we allocate 32KB chunks of
 728         * physically contiguous memory, advance through it until used up
 729         * and then allocate more.  Of course, we need memory to store those
 730         * extra pointers, now.  Started out with 256KB, but under heavy
 731         * memory pressure (creating large files and then copying them over
 732         * NFS while doing lots of MPI jobs), we hit some allocation
 733         * failures, even though we can sleep...  (2.6.10) Still get
 734         * failures at 64K.  32K is the lowest we can go without wasting
 735         * additional memory.
 736         */
 737        size = 0x8000;
 738        alloced = ALIGN(egrsize * egrcnt, size);
 739        egrperchunk = size / egrsize;
 740        chunk = (egrcnt + egrperchunk - 1) / egrperchunk;
 741        pd->port_rcvegrbuf_chunks = chunk;
 742        pd->port_rcvegrbufs_perchunk = egrperchunk;
 743        pd->port_rcvegrbuf_size = size;
 744        pd->port_rcvegrbuf = vmalloc(chunk * sizeof(pd->port_rcvegrbuf[0]));
 745        if (!pd->port_rcvegrbuf) {
 746                ret = -ENOMEM;
 747                goto bail;
 748        }
 749        pd->port_rcvegrbuf_phys =
 750                vmalloc(chunk * sizeof(pd->port_rcvegrbuf_phys[0]));
 751        if (!pd->port_rcvegrbuf_phys) {
 752                ret = -ENOMEM;
 753                goto bail_rcvegrbuf;
 754        }
 755        for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
 756
 757                pd->port_rcvegrbuf[e] = dma_alloc_coherent(
 758                        &dd->pcidev->dev, size, &pd->port_rcvegrbuf_phys[e],
 759                        gfp_flags);
 760
 761                if (!pd->port_rcvegrbuf[e]) {
 762                        ret = -ENOMEM;
 763                        goto bail_rcvegrbuf_phys;
 764                }
 765        }
 766
 767        pd->port_rcvegr_phys = pd->port_rcvegrbuf_phys[0];
 768
 769        for (e = chunk = 0; chunk < pd->port_rcvegrbuf_chunks; chunk++) {
 770                dma_addr_t pa = pd->port_rcvegrbuf_phys[chunk];
 771                unsigned i;
 772
 773                for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) {
 774                        dd->ipath_f_put_tid(dd, e + egroff +
 775                                            (u64 __iomem *)
 776                                            ((char __iomem *)
 777                                             dd->ipath_kregbase +
 778                                             dd->ipath_rcvegrbase), 0, pa);
 779                        pa += egrsize;
 780                }
 781                cond_resched(); /* don't hog the cpu */
 782        }
 783
 784        ret = 0;
 785        goto bail;
 786
 787bail_rcvegrbuf_phys:
 788        for (e = 0; e < pd->port_rcvegrbuf_chunks &&
 789                pd->port_rcvegrbuf[e]; e++) {
 790                dma_free_coherent(&dd->pcidev->dev, size,
 791                                  pd->port_rcvegrbuf[e],
 792                                  pd->port_rcvegrbuf_phys[e]);
 793
 794        }
 795        vfree(pd->port_rcvegrbuf_phys);
 796        pd->port_rcvegrbuf_phys = NULL;
 797bail_rcvegrbuf:
 798        vfree(pd->port_rcvegrbuf);
 799        pd->port_rcvegrbuf = NULL;
 800bail:
 801        return ret;
 802}
 803
 804static int ipath_do_user_init(struct ipath_portdata *pd,
 805                              const struct ipath_user_info *uinfo)
 806{
 807        int ret = 0;
 808        struct ipath_devdata *dd = pd->port_dd;
 809        u32 head32;
 810
 811        /* for now, if major version is different, bail */
 812        if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
 813                dev_info(&dd->pcidev->dev,
 814                         "User major version %d not same as driver "
 815                         "major %d\n", uinfo->spu_userversion >> 16,
 816                         IPATH_USER_SWMAJOR);
 817                ret = -ENODEV;
 818                goto done;
 819        }
 820
 821        if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR)
 822                ipath_dbg("User minor version %d not same as driver "
 823                          "minor %d\n", uinfo->spu_userversion & 0xffff,
 824                          IPATH_USER_SWMINOR);
 825
 826        if (uinfo->spu_rcvhdrsize) {
 827                ret = ipath_setrcvhdrsize(dd, uinfo->spu_rcvhdrsize);
 828                if (ret)
 829                        goto done;
 830        }
 831
 832        /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
 833
 834        /* for right now, kernel piobufs are at end, so port 1 is at 0 */
 835        pd->port_piobufs = dd->ipath_piobufbase +
 836                dd->ipath_pbufsport * (pd->port_port -
 837                                       1) * dd->ipath_palign;
 838        ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
 839                   pd->port_port, pd->port_piobufs);
 840
 841        /*
 842         * Now allocate the rcvhdr Q and eager TIDs; skip the TID
 843         * array for time being.  If pd->port_port > chip-supported,
 844         * we need to do extra stuff here to handle by handling overflow
 845         * through port 0, someday
 846         */
 847        ret = ipath_create_rcvhdrq(dd, pd);
 848        if (!ret)
 849                ret = ipath_create_user_egr(pd);
 850        if (ret)
 851                goto done;
 852
 853        /*
 854         * set the eager head register for this port to the current values
 855         * of the tail pointers, since we don't know if they were
 856         * updated on last use of the port.
 857         */
 858        head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
 859        ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
 860        dd->ipath_lastegrheads[pd->port_port] = -1;
 861        dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
 862        ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
 863                pd->port_port, head32);
 864        pd->port_tidcursor = 0; /* start at beginning after open */
 865        /*
 866         * now enable the port; the tail registers will be written to memory
 867         * by the chip as soon as it sees the write to
 868         * dd->ipath_kregs->kr_rcvctrl.  The update only happens on
 869         * transition from 0 to 1, so clear it first, then set it as part of
 870         * enabling the port.  This will (very briefly) affect any other
 871         * open ports, but it shouldn't be long enough to be an issue.
 872         * We explictly set the in-memory copy to 0 beforehand, so we don't
 873         * have to wait to be sure the DMA update has happened.
 874         */
 875        *pd->port_rcvhdrtail_kvaddr = 0ULL;
 876        set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
 877                &dd->ipath_rcvctrl);
 878        ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 879                         dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
 880        ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 881                         dd->ipath_rcvctrl);
 882done:
 883        return ret;
 884}
 885
 886
 887/* common code for the mappings on dma_alloc_coherent mem */
 888static int ipath_mmap_mem(struct vm_area_struct *vma,
 889                             struct ipath_portdata *pd, unsigned len,
 890                             int write_ok, dma_addr_t addr, char *what)
 891{
 892        struct ipath_devdata *dd = pd->port_dd;
 893        unsigned pfn = (unsigned long)addr >> PAGE_SHIFT;
 894        int ret;
 895
 896        if ((vma->vm_end - vma->vm_start) > len) {
 897                dev_info(&dd->pcidev->dev,
 898                         "FAIL on %s: len %lx > %x\n", what,
 899                         vma->vm_end - vma->vm_start, len);
 900                ret = -EFAULT;
 901                goto bail;
 902        }
 903
 904        if (!write_ok) {
 905                if (vma->vm_flags & VM_WRITE) {
 906                        dev_info(&dd->pcidev->dev,
 907                                 "%s must be mapped readonly\n", what);
 908                        ret = -EPERM;
 909                        goto bail;
 910                }
 911
 912                /* don't allow them to later change with mprotect */
 913                vma->vm_flags &= ~VM_MAYWRITE;
 914        }
 915
 916        ret = remap_pfn_range(vma, vma->vm_start, pfn,
 917                              len, vma->vm_page_prot);
 918        if (ret)
 919                dev_info(&dd->pcidev->dev,
 920                         "%s port%u mmap of %lx, %x bytes r%c failed: %d\n",
 921                         what, pd->port_port, (unsigned long)addr, len,
 922                         write_ok?'w':'o', ret);
 923        else
 924                ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes r%c\n",
 925                        what, pd->port_port, (unsigned long)addr, len,
 926                         write_ok?'w':'o');
 927bail:
 928        return ret;
 929}
 930
 931static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
 932                     u64 ureg)
 933{
 934        unsigned long phys;
 935        int ret;
 936
 937        /*
 938         * This is real hardware, so use io_remap.  This is the mechanism
 939         * for the user process to update the head registers for their port
 940         * in the chip.
 941         */
 942        if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
 943                dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen "
 944                         "%lx > PAGE\n", vma->vm_end - vma->vm_start);
 945                ret = -EFAULT;
 946        } else {
 947                phys = dd->ipath_physaddr + ureg;
 948                vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 949
 950                vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
 951                ret = io_remap_pfn_range(vma, vma->vm_start,
 952                                         phys >> PAGE_SHIFT,
 953                                         vma->vm_end - vma->vm_start,
 954                                         vma->vm_page_prot);
 955        }
 956        return ret;
 957}
 958
 959static int mmap_piobufs(struct vm_area_struct *vma,
 960                        struct ipath_devdata *dd,
 961                        struct ipath_portdata *pd)
 962{
 963        unsigned long phys;
 964        int ret;
 965
 966        /*
 967         * When we map the PIO buffers in the chip, we want to map them as
 968         * writeonly, no read possible.   This prevents access to previous
 969         * process data, and catches users who might try to read the i/o
 970         * space due to a bug.
 971         */
 972        if ((vma->vm_end - vma->vm_start) >
 973            (dd->ipath_pbufsport * dd->ipath_palign)) {
 974                dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
 975                         "reqlen %lx > PAGE\n",
 976                         vma->vm_end - vma->vm_start);
 977                ret = -EFAULT;
 978                goto bail;
 979        }
 980
 981        phys = dd->ipath_physaddr + pd->port_piobufs;
 982
 983        /*
 984         * Don't mark this as non-cached, or we don't get the
 985         * write combining behavior we want on the PIO buffers!
 986         */
 987
 988        if (vma->vm_flags & VM_READ) {
 989                dev_info(&dd->pcidev->dev,
 990                         "Can't map piobufs as readable (flags=%lx)\n",
 991                         vma->vm_flags);
 992                ret = -EPERM;
 993                goto bail;
 994        }
 995
 996        /* don't allow them to later change to readable with mprotect */
 997        vma->vm_flags &= ~VM_MAYREAD;
 998        vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
 999
1000        ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
1001                                 vma->vm_end - vma->vm_start,
1002                                 vma->vm_page_prot);
1003bail:
1004        return ret;
1005}
1006
1007static int mmap_rcvegrbufs(struct vm_area_struct *vma,
1008                           struct ipath_portdata *pd)
1009{
1010        struct ipath_devdata *dd = pd->port_dd;
1011        unsigned long start, size;
1012        size_t total_size, i;
1013        dma_addr_t *phys;
1014        int ret;
1015
1016        size = pd->port_rcvegrbuf_size;
1017        total_size = pd->port_rcvegrbuf_chunks * size;
1018        if ((vma->vm_end - vma->vm_start) > total_size) {
1019                dev_info(&dd->pcidev->dev, "FAIL on egr bufs: "
1020                         "reqlen %lx > actual %lx\n",
1021                         vma->vm_end - vma->vm_start,
1022                         (unsigned long) total_size);
1023                ret = -EFAULT;
1024                goto bail;
1025        }
1026
1027        if (vma->vm_flags & VM_WRITE) {
1028                dev_info(&dd->pcidev->dev, "Can't map eager buffers as "
1029                         "writable (flags=%lx)\n", vma->vm_flags);
1030                ret = -EPERM;
1031                goto bail;
1032        }
1033        /* don't allow them to later change to writeable with mprotect */
1034        vma->vm_flags &= ~VM_MAYWRITE;
1035
1036        start = vma->vm_start;
1037        phys = pd->port_rcvegrbuf_phys;
1038
1039        for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
1040                ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT,
1041                                      size, vma->vm_page_prot);
1042                if (ret < 0)
1043                        goto bail;
1044        }
1045        ret = 0;
1046
1047bail:
1048        return ret;
1049}
1050
1051/**
1052 * ipath_mmap - mmap various structures into user space
1053 * @fp: the file pointer
1054 * @vma: the VM area
1055 *
1056 * We use this to have a shared buffer between the kernel and the user code
1057 * for the rcvhdr queue, egr buffers, and the per-port user regs and pio
1058 * buffers in the chip.  We have the open and close entries so we can bump
1059 * the ref count and keep the driver from being unloaded while still mapped.
1060 */
1061static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1062{
1063        struct ipath_portdata *pd;
1064        struct ipath_devdata *dd;
1065        u64 pgaddr, ureg;
1066        int ret;
1067
1068        pd = port_fp(fp);
1069        dd = pd->port_dd;
1070
1071        /*
1072         * This is the ipath_do_user_init() code, mapping the shared buffers
1073         * into the user process. The address referred to by vm_pgoff is the
1074         * virtual, not physical, address; we only do one mmap for each
1075         * space mapped.
1076         */
1077        pgaddr = vma->vm_pgoff << PAGE_SHIFT;
1078
1079        /*
1080         * Must fit in 40 bits for our hardware; some checked elsewhere,
1081         * but we'll be paranoid.  Check for 0 is mostly in case one of the
1082         * allocations failed, but user called mmap anyway.   We want to catch
1083         * that before it can match.
1084         */
1085        if (!pgaddr || pgaddr >= (1ULL<<40))  {
1086                ipath_dev_err(dd, "Bad phys addr %llx, start %lx, end %lx\n",
1087                        (unsigned long long)pgaddr, vma->vm_start, vma->vm_end);
1088                return -EINVAL;
1089        }
1090
1091        /* just the offset of the port user registers, not physical addr */
1092        ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
1093
1094        ipath_cdbg(MM, "ushare: pgaddr %llx vm_start=%lx, vmlen %lx\n",
1095                   (unsigned long long) pgaddr, vma->vm_start,
1096                   vma->vm_end - vma->vm_start);
1097
1098        if (vma->vm_start & (PAGE_SIZE-1)) {
1099                ipath_dev_err(dd,
1100                        "vm_start not aligned: %lx, end=%lx phys %lx\n",
1101                        vma->vm_start, vma->vm_end, (unsigned long)pgaddr);
1102                ret = -EINVAL;
1103        }
1104        else if (pgaddr == ureg)
1105                ret = mmap_ureg(vma, dd, ureg);
1106        else if (pgaddr == pd->port_piobufs)
1107                ret = mmap_piobufs(vma, dd, pd);
1108        else if (pgaddr == (u64) pd->port_rcvegr_phys)
1109                ret = mmap_rcvegrbufs(vma, pd);
1110        else if (pgaddr == (u64) pd->port_rcvhdrq_phys) {
1111                /*
1112                 * The rcvhdrq itself; readonly except on HT-400 (so have
1113                 * to allow writable mapping), multiple pages, contiguous
1114                 * from an i/o perspective.
1115                 */
1116                unsigned total_size =
1117                        ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize
1118                           * sizeof(u32), PAGE_SIZE);
1119                ret = ipath_mmap_mem(vma, pd, total_size, 1,
1120                                     pd->port_rcvhdrq_phys,
1121                                     "rcvhdrq");
1122        }
1123        else if (pgaddr == (u64)pd->port_rcvhdrqtailaddr_phys)
1124                /* in-memory copy of rcvhdrq tail register */
1125                ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
1126                                     pd->port_rcvhdrqtailaddr_phys,
1127                                     "rcvhdrq tail");
1128        else if (pgaddr == dd->ipath_pioavailregs_phys)
1129                /* in-memory copy of pioavail registers */
1130                ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
1131                                     dd->ipath_pioavailregs_phys,
1132                                     "pioavail registers");
1133        else
1134                ret = -EINVAL;
1135
1136        vma->vm_private_data = NULL;
1137
1138        if (ret < 0)
1139                dev_info(&dd->pcidev->dev,
1140                         "Failure %d on addr %lx, off %lx\n",
1141                         -ret, vma->vm_start, vma->vm_pgoff);
1142
1143        return ret;
1144}
1145
1146static unsigned int ipath_poll(struct file *fp,
1147                               struct poll_table_struct *pt)
1148{
1149        struct ipath_portdata *pd;
1150        u32 head, tail;
1151        int bit;
1152        struct ipath_devdata *dd;
1153
1154        pd = port_fp(fp);
1155        dd = pd->port_dd;
1156
1157        bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT;
1158        set_bit(bit, &dd->ipath_rcvctrl);
1159
1160        /*
1161         * Before blocking, make sure that head is still == tail,
1162         * reading from the chip, so we can be sure the interrupt
1163         * enable has made it to the chip.  If not equal, disable
1164         * interrupt again and return immediately.  This avoids races,
1165         * and the overhead of the chip read doesn't matter much at
1166         * this point, since we are waiting for something anyway.
1167         */
1168
1169        ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1170                         dd->ipath_rcvctrl);
1171
1172        head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
1173        tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
1174
1175        if (tail == head) {
1176                set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
1177                if(dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
1178                        (void)ipath_write_ureg(dd, ur_rcvhdrhead,
1179                                               dd->ipath_rhdrhead_intr_off
1180                                               | head, pd->port_port);
1181                poll_wait(fp, &pd->port_wait, pt);
1182
1183                if (test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
1184                        /* timed out, no packets received */
1185                        clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
1186                        pd->port_rcvwait_to++;
1187                }
1188        }
1189        else {
1190                /* it's already happened; don't do wait_event overhead */
1191                pd->port_rcvnowait++;
1192        }
1193
1194        clear_bit(bit, &dd->ipath_rcvctrl);
1195        ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
1196                         dd->ipath_rcvctrl);
1197
1198        return 0;
1199}
1200
1201static int try_alloc_port(struct ipath_devdata *dd, int port,
1202                          struct file *fp)
1203{
1204        int ret;
1205
1206        if (!dd->ipath_pd[port]) {
1207                void *p, *ptmp;
1208
1209                p = kzalloc(sizeof(struct ipath_portdata), GFP_KERNEL);
1210
1211                /*
1212                 * Allocate memory for use in ipath_tid_update() just once
1213                 * at open, not per call.  Reduces cost of expected send
1214                 * setup.
1215                 */
1216                ptmp = kmalloc(dd->ipath_rcvtidcnt * sizeof(u16) +
1217                               dd->ipath_rcvtidcnt * sizeof(struct page **),
1218                               GFP_KERNEL);
1219                if (!p || !ptmp) {
1220                        ipath_dev_err(dd, "Unable to allocate portdata "
1221                                      "memory, failing open\n");
1222                        ret = -ENOMEM;
1223                        kfree(p);
1224                        kfree(ptmp);
1225                        goto bail;
1226                }
1227                dd->ipath_pd[port] = p;
1228                dd->ipath_pd[port]->port_port = port;
1229                dd->ipath_pd[port]->port_dd = dd;
1230                dd->ipath_pd[port]->port_tid_pg_list = ptmp;
1231                init_waitqueue_head(&dd->ipath_pd[port]->port_wait);
1232        }
1233        if (!dd->ipath_pd[port]->port_cnt) {
1234                dd->ipath_pd[port]->port_cnt = 1;
1235                fp->private_data = (void *) dd->ipath_pd[port];
1236                ipath_cdbg(PROC, "%s[%u] opened unit:port %u:%u\n",
1237                           current->comm, current->pid, dd->ipath_unit,
1238                           port);
1239                dd->ipath_pd[port]->port_pid = current->pid;
1240                strncpy(dd->ipath_pd[port]->port_comm, current->comm,
1241                        sizeof(dd->ipath_pd[port]->port_comm));
1242                ipath_stats.sps_ports++;
1243                ret = 0;
1244                goto bail;
1245        }
1246        ret = -EBUSY;
1247
1248bail:
1249        return ret;
1250}
1251
1252static inline int usable(struct ipath_devdata *dd)
1253{
1254        return dd &&
1255                (dd->ipath_flags & IPATH_PRESENT) &&
1256                dd->ipath_kregbase &&
1257                dd->ipath_lid &&
1258                !(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_DISABLED
1259                                     | IPATH_LINKUNK));
1260}
1261
1262static int find_free_port(int unit, struct file *fp)
1263{
1264        struct ipath_devdata *dd = ipath_lookup(unit);
1265        int ret, i;
1266
1267        if (!dd) {
1268                ret = -ENODEV;
1269                goto bail;
1270        }
1271
1272        if (!usable(dd)) {
1273                ret = -ENETDOWN;
1274                goto bail;
1275        }
1276
1277        for (i = 0; i < dd->ipath_cfgports; i++) {
1278                ret = try_alloc_port(dd, i, fp);
1279                if (ret != -EBUSY)
1280                        goto bail;
1281        }
1282        ret = -EBUSY;
1283
1284bail:
1285        return ret;
1286}
1287
1288static int find_best_unit(struct file *fp)
1289{
1290        int ret = 0, i, prefunit = -1, devmax;
1291        int maxofallports, npresent, nup;
1292        int ndev;
1293
1294        (void) ipath_count_units(&npresent, &nup, &maxofallports);
1295
1296        /*
1297         * This code is present to allow a knowledgeable person to
1298         * specify the layout of processes to processors before opening
1299         * this driver, and then we'll assign the process to the "closest"
1300         * HT-400 to that processor (we assume reasonable connectivity,
1301         * for now).  This code assumes that if affinity has been set
1302         * before this point, that at most one cpu is set; for now this
1303         * is reasonable.  I check for both cpus_empty() and cpus_full(),
1304         * in case some kernel variant sets none of the bits when no
1305         * affinity is set.  2.6.11 and 12 kernels have all present
1306         * cpus set.  Some day we'll have to fix it up further to handle
1307         * a cpu subset.  This algorithm fails for two HT-400's connected
1308         * in tunnel fashion.  Eventually this needs real topology
1309         * information.  There may be some issues with dual core numbering
1310         * as well.  This needs more work prior to release.
1311         */
1312        if (!cpus_empty(current->cpus_allowed) &&
1313            !cpus_full(current->cpus_allowed)) {
1314                int ncpus = num_online_cpus(), curcpu = -1;
1315                for (i = 0; i < ncpus; i++)
1316                        if (cpu_isset(i, current->cpus_allowed)) {
1317                                ipath_cdbg(PROC, "%s[%u] affinity set for "
1318                                           "cpu %d\n", current->comm,
1319                                           current->pid, i);
1320                                curcpu = i;
1321                        }
1322                if (curcpu != -1) {
1323                        if (npresent) {
1324                                prefunit = curcpu / (ncpus / npresent);
1325                                ipath_dbg("%s[%u] %d chips, %d cpus, "
1326                                          "%d cpus/chip, select unit %d\n",
1327                                          current->comm, current->pid,
1328                                          npresent, ncpus, ncpus / npresent,
1329                                          prefunit);
1330                        }
1331                }
1332        }
1333
1334        /*
1335         * user ports start at 1, kernel port is 0
1336         * For now, we do round-robin access across all chips
1337         */
1338
1339        if (prefunit != -1)
1340                devmax = prefunit + 1;
1341        else
1342                devmax = ipath_count_units(NULL, NULL, NULL);
1343recheck:
1344        for (i = 1; i < maxofallports; i++) {
1345                for (ndev = prefunit != -1 ? prefunit : 0; ndev < devmax;
1346                     ndev++) {
1347                        struct ipath_devdata *dd = ipath_lookup(ndev);
1348
1349                        if (!usable(dd))
1350                                continue; /* can't use this unit */
1351                        if (i >= dd->ipath_cfgports)
1352                                /*
1353                                 * Maxed out on users of this unit. Try
1354                                 * next.
1355                                 */
1356                                continue;
1357                        ret = try_alloc_port(dd, i, fp);
1358                        if (!ret)
1359                                goto done;
1360                }
1361        }
1362
1363        if (npresent) {
1364                if (nup == 0) {
1365                        ret = -ENETDOWN;
1366                        ipath_dbg("No ports available (none initialized "
1367                                  "and ready)\n");
1368                } else {
1369                        if (prefunit > 0) {
1370                                /* if started above 0, retry from 0 */
1371                                ipath_cdbg(PROC,
1372                                           "%s[%u] no ports on prefunit "
1373                                           "%d, clear and re-check\n",
1374                                           current->comm, current->pid,
1375                                           prefunit);
1376                                devmax = ipath_count_units(NULL, NULL,
1377                                                           NULL);
1378                                prefunit = -1;
1379                                goto recheck;
1380                        }
1381                        ret = -EBUSY;
1382                        ipath_dbg("No ports available\n");
1383                }
1384        } else {
1385                ret = -ENXIO;
1386                ipath_dbg("No boards found\n");
1387        }
1388
1389done:
1390        return ret;
1391}
1392
1393static int ipath_open(struct inode *in, struct file *fp)
1394{
1395        int ret, user_minor;
1396
1397        mutex_lock(&ipath_mutex);
1398
1399        user_minor = iminor(in) - IPATH_USER_MINOR_BASE;
1400        ipath_cdbg(VERBOSE, "open on dev %lx (minor %d)\n",
1401                   (long)in->i_rdev, user_minor);
1402
1403        if (user_minor)
1404                ret = find_free_port(user_minor - 1, fp);
1405        else
1406                ret = find_best_unit(fp);
1407
1408        mutex_unlock(&ipath_mutex);
1409        return ret;
1410}
1411
1412/**
1413 * unlock_exptid - unlock any expected TID entries port still had in use
1414 * @pd: port
1415 *
1416 * We don't actually update the chip here, because we do a bulk update
1417 * below, using ipath_f_clear_tids.
1418 */
1419static void unlock_expected_tids(struct ipath_portdata *pd)
1420{
1421        struct ipath_devdata *dd = pd->port_dd;
1422        int port_tidbase = pd->port_port * dd->ipath_rcvtidcnt;
1423        int i, cnt = 0, maxtid = port_tidbase + dd->ipath_rcvtidcnt;
1424
1425        ipath_cdbg(VERBOSE, "Port %u unlocking any locked expTID pages\n",
1426                   pd->port_port);
1427        for (i = port_tidbase; i < maxtid; i++) {
1428                if (!dd->ipath_pageshadow[i])
1429                        continue;
1430
1431                ipath_release_user_pages_on_close(&dd->ipath_pageshadow[i],
1432                                                  1);
1433                dd->ipath_pageshadow[i] = NULL;
1434                cnt++;
1435                ipath_stats.sps_pageunlocks++;
1436        }
1437        if (cnt)
1438                ipath_cdbg(VERBOSE, "Port %u locked %u expTID entries\n",
1439                           pd->port_port, cnt);
1440
1441        if (ipath_stats.sps_pagelocks || ipath_stats.sps_pageunlocks)
1442                ipath_cdbg(VERBOSE, "%llu pages locked, %llu unlocked\n",
1443                           (unsigned long long) ipath_stats.sps_pagelocks,
1444                           (unsigned long long)
1445                           ipath_stats.sps_pageunlocks);
1446}
1447
1448static int ipath_close(struct inode *in, struct file *fp)
1449{
1450        int ret = 0;
1451        struct ipath_portdata *pd;
1452        struct ipath_devdata *dd;
1453        unsigned port;
1454
1455        ipath_cdbg(VERBOSE, "close on dev %lx, private data %p\n",
1456                   (long)in->i_rdev, fp->private_data);
1457
1458        mutex_lock(&ipath_mutex);
1459
1460        pd = port_fp(fp);
1461        port = pd->port_port;
1462        fp->private_data = NULL;
1463        dd = pd->port_dd;
1464
1465        if (pd->port_hdrqfull) {
1466                ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors "
1467                           "during run\n", pd->port_comm, pd->port_pid,
1468                           pd->port_hdrqfull);
1469                pd->port_hdrqfull = 0;
1470        }
1471
1472        if (pd->port_rcvwait_to || pd->port_piowait_to
1473            || pd->port_rcvnowait || pd->port_pionowait) {
1474                ipath_cdbg(VERBOSE, "port%u, %u rcv, %u pio wait timeo; "
1475                           "%u rcv %u, pio already\n",
1476                           pd->port_port, pd->port_rcvwait_to,
1477                           pd->port_piowait_to, pd->port_rcvnowait,
1478                           pd->port_pionowait);
1479                pd->port_rcvwait_to = pd->port_piowait_to =
1480                        pd->port_rcvnowait = pd->port_pionowait = 0;
1481        }
1482        if (pd->port_flag) {
1483                ipath_dbg("port %u port_flag still set to 0x%lx\n",
1484                          pd->port_port, pd->port_flag);
1485                pd->port_flag = 0;
1486        }
1487
1488        if (dd->ipath_kregbase) {
1489                int i;
1490                /* atomically clear receive enable port. */
1491                clear_bit(INFINIPATH_R_PORTENABLE_SHIFT + port,
1492                          &dd->ipath_rcvctrl);
1493                ipath_write_kreg( dd, dd->ipath_kregs->kr_rcvctrl,
1494                        dd->ipath_rcvctrl);
1495                /* and read back from chip to be sure that nothing
1496                 * else is in flight when we do the rest */
1497                (void)ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1498
1499                /* clean up the pkeys for this port user */
1500                ipath_clean_part_key(pd, dd);
1501
1502
1503                /*
1504                 * be paranoid, and never write 0's to these, just use an
1505                 * unused part of the port 0 tail page.  Of course,
1506                 * rcvhdraddr points to a large chunk of memory, so this
1507                 * could still trash things, but at least it won't trash
1508                 * page 0, and by disabling the port, it should stop "soon",
1509                 * even if a packet or two is in already in flight after we
1510                 * disabled the port.
1511                 */
1512                ipath_write_kreg_port(dd,
1513                        dd->ipath_kregs->kr_rcvhdrtailaddr, port,
1514                        dd->ipath_dummy_hdrq_phys);
1515                ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
1516                        pd->port_port, dd->ipath_dummy_hdrq_phys);
1517
1518                i = dd->ipath_pbufsport * (port - 1);
1519                ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
1520
1521                if (dd->ipath_pageshadow)
1522                        unlock_expected_tids(pd);
1523                ipath_stats.sps_ports--;
1524                ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
1525                           pd->port_comm, pd->port_pid,
1526                           dd->ipath_unit, port);
1527
1528                dd->ipath_f_clear_tids(dd, pd->port_port);
1529        }
1530
1531        pd->port_cnt = 0;
1532        pd->port_pid = 0;
1533
1534        dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
1535        mutex_unlock(&ipath_mutex);
1536        ipath_free_pddata(dd, pd); /* after releasing the mutex */
1537
1538        return ret;
1539}
1540
1541static int ipath_port_info(struct ipath_portdata *pd,
1542                           struct ipath_port_info __user *uinfo)
1543{
1544        struct ipath_port_info info;
1545        int nup;
1546        int ret;
1547
1548        (void) ipath_count_units(NULL, &nup, NULL);
1549        info.num_active = nup;
1550        info.unit = pd->port_dd->ipath_unit;
1551        info.port = pd->port_port;
1552
1553        if (copy_to_user(uinfo, &info, sizeof(info))) {
1554                ret = -EFAULT;
1555                goto bail;
1556        }
1557        ret = 0;
1558
1559bail:
1560        return ret;
1561}
1562
1563static ssize_t ipath_write(struct file *fp, const char __user *data,
1564                           size_t count, loff_t *off)
1565{
1566        const struct ipath_cmd __user *ucmd;
1567        struct ipath_portdata *pd;
1568        const void __user *src;
1569        size_t consumed, copy;
1570        struct ipath_cmd cmd;
1571        ssize_t ret = 0;
1572        void *dest;
1573
1574        if (count < sizeof(cmd.type)) {
1575                ret = -EINVAL;
1576                goto bail;
1577        }
1578
1579        ucmd = (const struct ipath_cmd __user *) data;
1580
1581        if (copy_from_user(&cmd.type, &ucmd->type, sizeof(cmd.type))) {
1582                ret = -EFAULT;
1583                goto bail;
1584        }
1585
1586        consumed = sizeof(cmd.type);
1587
1588        switch (cmd.type) {
1589        case IPATH_CMD_USER_INIT:
1590                copy = sizeof(cmd.cmd.user_info);
1591                dest = &cmd.cmd.user_info;
1592                src = &ucmd->cmd.user_info;
1593                break;
1594        case IPATH_CMD_RECV_CTRL:
1595                copy = sizeof(cmd.cmd.recv_ctrl);
1596                dest = &cmd.cmd.recv_ctrl;
1597                src = &ucmd->cmd.recv_ctrl;
1598                break;
1599        case IPATH_CMD_PORT_INFO:
1600                copy = sizeof(cmd.cmd.port_info);
1601                dest = &cmd.cmd.port_info;
1602                src = &ucmd->cmd.port_info;
1603                break;
1604        case IPATH_CMD_TID_UPDATE:
1605        case IPATH_CMD_TID_FREE:
1606                copy = sizeof(cmd.cmd.tid_info);
1607                dest = &cmd.cmd.tid_info;
1608                src = &ucmd->cmd.tid_info;
1609                break;
1610        case IPATH_CMD_SET_PART_KEY:
1611                copy = sizeof(cmd.cmd.part_key);
1612                dest = &cmd.cmd.part_key;
1613                src = &ucmd->cmd.part_key;
1614                break;
1615        default:
1616                ret = -EINVAL;
1617                goto bail;
1618        }
1619
1620        if ((count - consumed) < copy) {
1621                ret = -EINVAL;
1622                goto bail;
1623        }
1624
1625        if (copy_from_user(dest, src, copy)) {
1626                ret = -EFAULT;
1627                goto bail;
1628        }
1629
1630        consumed += copy;
1631        pd = port_fp(fp);
1632
1633        switch (cmd.type) {
1634        case IPATH_CMD_USER_INIT:
1635                ret = ipath_do_user_init(pd, &cmd.cmd.user_info);
1636                if (ret < 0)
1637                        goto bail;
1638                ret = ipath_get_base_info(
1639                        pd, (void __user *) (unsigned long)
1640                        cmd.cmd.user_info.spu_base_info,
1641                        cmd.cmd.user_info.spu_base_info_size);
1642                break;
1643        case IPATH_CMD_RECV_CTRL:
1644                ret = ipath_manage_rcvq(pd, cmd.cmd.recv_ctrl);
1645                break;
1646        case IPATH_CMD_PORT_INFO:
1647                ret = ipath_port_info(pd,
1648                                      (struct ipath_port_info __user *)
1649                                      (unsigned long) cmd.cmd.port_info);
1650                break;
1651        case IPATH_CMD_TID_UPDATE:
1652                ret = ipath_tid_update(pd, &cmd.cmd.tid_info);
1653                break;
1654        case IPATH_CMD_TID_FREE:
1655                ret = ipath_tid_free(pd, &cmd.cmd.tid_info);
1656                break;
1657        case IPATH_CMD_SET_PART_KEY:
1658                ret = ipath_set_part_key(pd, cmd.cmd.part_key);
1659                break;
1660        }
1661
1662        if (ret >= 0)
1663                ret = consumed;
1664
1665bail:
1666        return ret;
1667}
1668
1669static struct class *ipath_class;
1670
1671static int init_cdev(int minor, char *name, struct file_operations *fops,
1672                     struct cdev **cdevp, struct class_device **class_devp)
1673{
1674        const dev_t dev = MKDEV(IPATH_MAJOR, minor);
1675        struct cdev *cdev = NULL;
1676        struct class_device *class_dev = NULL;
1677        int ret;
1678
1679        cdev = cdev_alloc();
1680        if (!cdev) {
1681                printk(KERN_ERR IPATH_DRV_NAME
1682                       ": Could not allocate cdev for minor %d, %s\n",
1683                       minor, name);
1684                ret = -ENOMEM;
1685                goto done;
1686        }
1687
1688        cdev->owner = THIS_MODULE;
1689        cdev->ops = fops;
1690        kobject_set_name(&cdev->kobj, name);
1691
1692        ret = cdev_add(cdev, dev, 1);
1693        if (ret < 0) {
1694                printk(KERN_ERR IPATH_DRV_NAME
1695                       ": Could not add cdev for minor %d, %s (err %d)\n",
1696                       minor, name, -ret);
1697                goto err_cdev;
1698        }
1699
1700        class_dev = class_device_create(ipath_class, NULL, dev, NULL, name);
1701
1702        if (IS_ERR(class_dev)) {
1703                ret = PTR_ERR(class_dev);
1704                printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
1705                       "class_dev for minor %d, %s (err %d)\n",
1706                       minor, name, -ret);
1707                goto err_cdev;
1708        }
1709
1710        goto done;
1711
1712err_cdev:
1713        cdev_del(cdev);
1714        cdev = NULL;
1715
1716done:
1717        if (ret >= 0) {
1718                *cdevp = cdev;
1719                *class_devp = class_dev;
1720        } else {
1721                *cdevp = NULL;
1722                *class_devp = NULL;
1723        }
1724
1725        return ret;
1726}
1727
1728int ipath_cdev_init(int minor, char *name, struct file_operations *fops,
1729                    struct cdev **cdevp, struct class_device **class_devp)
1730{
1731        return init_cdev(minor, name, fops, cdevp, class_devp);
1732}
1733
1734static void cleanup_cdev(struct cdev **cdevp,
1735                         struct class_device **class_devp)
1736{
1737        struct class_device *class_dev = *class_devp;
1738
1739        if (class_dev) {
1740                class_device_unregister(class_dev);
1741                *class_devp = NULL;
1742        }
1743
1744        if (*cdevp) {
1745                cdev_del(*cdevp);
1746                *cdevp = NULL;
1747        }
1748}
1749
1750void ipath_cdev_cleanup(struct cdev **cdevp,
1751                        struct class_device **class_devp)
1752{
1753        cleanup_cdev(cdevp, class_devp);
1754}
1755
1756static struct cdev *wildcard_cdev;
1757static struct class_device *wildcard_class_dev;
1758
1759static const dev_t dev = MKDEV(IPATH_MAJOR, 0);
1760
1761static int user_init(void)
1762{
1763        int ret;
1764
1765        ret = register_chrdev_region(dev, IPATH_NMINORS, IPATH_DRV_NAME);
1766        if (ret < 0) {
1767                printk(KERN_ERR IPATH_DRV_NAME ": Could not register "
1768                       "chrdev region (err %d)\n", -ret);
1769                goto done;
1770        }
1771
1772        ipath_class = class_create(THIS_MODULE, IPATH_DRV_NAME);
1773
1774        if (IS_ERR(ipath_class)) {
1775                ret = PTR_ERR(ipath_class);
1776                printk(KERN_ERR IPATH_DRV_NAME ": Could not create "
1777                       "device class (err %d)\n", -ret);
1778                goto bail;
1779        }
1780
1781        goto done;
1782bail:
1783        unregister_chrdev_region(dev, IPATH_NMINORS);
1784done:
1785        return ret;
1786}
1787
1788static void user_cleanup(void)
1789{
1790        if (ipath_class) {
1791                class_destroy(ipath_class);
1792                ipath_class = NULL;
1793        }
1794
1795        unregister_chrdev_region(dev, IPATH_NMINORS);
1796}
1797
1798static atomic_t user_count = ATOMIC_INIT(0);
1799static atomic_t user_setup = ATOMIC_INIT(0);
1800
1801int ipath_user_add(struct ipath_devdata *dd)
1802{
1803        char name[10];
1804        int ret;
1805
1806        if (atomic_inc_return(&user_count) == 1) {
1807                ret = user_init();
1808                if (ret < 0) {
1809                        ipath_dev_err(dd, "Unable to set up user support: "
1810                                      "error %d\n", -ret);
1811                        goto bail;
1812                }
1813                ret = init_cdev(0, "ipath", &ipath_file_ops, &wildcard_cdev,
1814                                &wildcard_class_dev);
1815                if (ret < 0) {
1816                        ipath_dev_err(dd, "Could not create wildcard "
1817                                      "minor: error %d\n", -ret);
1818                        goto bail_sma;
1819                }
1820
1821                atomic_set(&user_setup, 1);
1822        }
1823
1824        snprintf(name, sizeof(name), "ipath%d", dd->ipath_unit);
1825
1826        ret = init_cdev(dd->ipath_unit + 1, name, &ipath_file_ops,
1827                        &dd->user_cdev, &dd->user_class_dev);
1828        if (ret < 0)
1829                ipath_dev_err(dd, "Could not create user minor %d, %s\n",
1830                              dd->ipath_unit + 1, name);
1831
1832        goto bail;
1833
1834bail_sma:
1835        user_cleanup();
1836bail:
1837        return ret;
1838}
1839
1840void ipath_user_remove(struct ipath_devdata *dd)
1841{
1842        cleanup_cdev(&dd->user_cdev, &dd->user_class_dev);
1843
1844        if (atomic_dec_return(&user_count) == 0) {
1845                if (atomic_read(&user_setup) == 0)
1846                        goto bail;
1847
1848                cleanup_cdev(&wildcard_cdev, &wildcard_class_dev);
1849                user_cleanup();
1850
1851                atomic_set(&user_setup, 0);
1852        }
1853bail:
1854        return;
1855}
1856
1857
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.