linux/crypto/async_tx/async_tx.c
<<
>>
Prefs
   1/*
   2 * core routines for the asynchronous memory transfer/transform api
   3 *
   4 * Copyright \xC2\xA9 2006, Intel Corporation.
   5 *
   6 *      Dan Williams <dan.j.williams@intel.com>
   7 *
   8 *      with architecture considerations by:
   9 *      Neil Brown <neilb@suse.de>
  10 *      Jeff Garzik <jeff@garzik.org>
  11 *
  12 * This program is free software; you can redistribute it and/or modify it
  13 * under the terms and conditions of the GNU General Public License,
  14 * version 2, as published by the Free Software Foundation.
  15 *
  16 * This program is distributed in the hope it will be useful, but WITHOUT
  17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  18 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  19 * more details.
  20 *
  21 * You should have received a copy of the GNU General Public License along with
  22 * this program; if not, write to the Free Software Foundation, Inc.,
  23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  24 *
  25 */
  26#include <linux/rculist.h>
  27#include <linux/kernel.h>
  28#include <linux/async_tx.h>
  29
  30#ifdef CONFIG_DMA_ENGINE
  31static enum dma_state_client
  32dma_channel_add_remove(struct dma_client *client,
  33        struct dma_chan *chan, enum dma_state state);
  34
  35static struct dma_client async_tx_dma = {
  36        .event_callback = dma_channel_add_remove,
  37        /* .cap_mask == 0 defaults to all channels */
  38};
  39
  40/**
  41 * dma_cap_mask_all - enable iteration over all operation types
  42 */
  43static dma_cap_mask_t dma_cap_mask_all;
  44
  45/**
  46 * chan_ref_percpu - tracks channel allocations per core/opertion
  47 */
  48struct chan_ref_percpu {
  49        struct dma_chan_ref *ref;
  50};
  51
  52static int channel_table_initialized;
  53static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END];
  54
  55/**
  56 * async_tx_lock - protect modification of async_tx_master_list and serialize
  57 *      rebalance operations
  58 */
  59static spinlock_t async_tx_lock;
  60
  61static LIST_HEAD(async_tx_master_list);
  62
  63/* async_tx_issue_pending_all - start all transactions on all channels */
  64void async_tx_issue_pending_all(void)
  65{
  66        struct dma_chan_ref *ref;
  67
  68        rcu_read_lock();
  69        list_for_each_entry_rcu(ref, &async_tx_master_list, node)
  70                ref->chan->device->device_issue_pending(ref->chan);
  71        rcu_read_unlock();
  72}
  73EXPORT_SYMBOL_GPL(async_tx_issue_pending_all);
  74
  75/* dma_wait_for_async_tx - spin wait for a transcation to complete
  76 * @tx: transaction to wait on
  77 */
  78enum dma_status
  79dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
  80{
  81        enum dma_status status;
  82        struct dma_async_tx_descriptor *iter;
  83        struct dma_async_tx_descriptor *parent;
  84
  85        if (!tx)
  86                return DMA_SUCCESS;
  87
  88        /* poll through the dependency chain, return when tx is complete */
  89        do {
  90                iter = tx;
  91
  92                /* find the root of the unsubmitted dependency chain */
  93                do {
  94                        parent = iter->parent;
  95                        if (!parent)
  96                                break;
  97                        else
  98                                iter = parent;
  99                } while (parent);
 100
 101                /* there is a small window for ->parent == NULL and
 102                 * ->cookie == -EBUSY
 103                 */
 104                while (iter->cookie == -EBUSY)
 105                        cpu_relax();
 106
 107                status = dma_sync_wait(iter->chan, iter->cookie);
 108        } while (status == DMA_IN_PROGRESS || (iter != tx));
 109
 110        return status;
 111}
 112EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
 113
 114/* async_tx_run_dependencies - helper routine for dma drivers to process
 115 *      (start) dependent operations on their target channel
 116 * @tx: transaction with dependencies
 117 */
 118void
 119async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
 120{
 121        struct dma_async_tx_descriptor *next = tx->next;
 122        struct dma_chan *chan;
 123
 124        if (!next)
 125                return;
 126
 127        tx->next = NULL;
 128        chan = next->chan;
 129
 130        /* keep submitting up until a channel switch is detected
 131         * in that case we will be called again as a result of
 132         * processing the interrupt from async_tx_channel_switch
 133         */
 134        while (next && next->chan == chan) {
 135                struct dma_async_tx_descriptor *_next;
 136
 137                spin_lock_bh(&next->lock);
 138                next->parent = NULL;
 139                _next = next->next;
 140                if (_next && _next->chan == chan)
 141                        next->next = NULL;
 142                spin_unlock_bh(&next->lock);
 143
 144                next->tx_submit(next);
 145                next = _next;
 146        }
 147
 148        chan->device->device_issue_pending(chan);
 149}
 150EXPORT_SYMBOL_GPL(async_tx_run_dependencies);
 151
 152static void
 153free_dma_chan_ref(struct rcu_head *rcu)
 154{
 155        struct dma_chan_ref *ref;
 156        ref = container_of(rcu, struct dma_chan_ref, rcu);
 157        kfree(ref);
 158}
 159
 160static void
 161init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan)
 162{
 163        INIT_LIST_HEAD(&ref->node);
 164        INIT_RCU_HEAD(&ref->rcu);
 165        ref->chan = chan;
 166        atomic_set(&ref->count, 0);
 167}
 168
 169/**
 170 * get_chan_ref_by_cap - returns the nth channel of the given capability
 171 *      defaults to returning the channel with the desired capability and the
 172 *      lowest reference count if the index can not be satisfied
 173 * @cap: capability to match
 174 * @index: nth channel desired, passing -1 has the effect of forcing the
 175 *  default return value
 176 */
 177static struct dma_chan_ref *
 178get_chan_ref_by_cap(enum dma_transaction_type cap, int index)
 179{
 180        struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref;
 181
 182        rcu_read_lock();
 183        list_for_each_entry_rcu(ref, &async_tx_master_list, node)
 184                if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
 185                        if (!min_ref)
 186                                min_ref = ref;
 187                        else if (atomic_read(&ref->count) <
 188                                atomic_read(&min_ref->count))
 189                                min_ref = ref;
 190
 191                        if (index-- == 0) {
 192                                ret_ref = ref;
 193                                break;
 194                        }
 195                }
 196        rcu_read_unlock();
 197
 198        if (!ret_ref)
 199                ret_ref = min_ref;
 200
 201        if (ret_ref)
 202                atomic_inc(&ret_ref->count);
 203
 204        return ret_ref;
 205}
 206
 207/**
 208 * async_tx_rebalance - redistribute the available channels, optimize
 209 * for cpu isolation in the SMP case, and opertaion isolation in the
 210 * uniprocessor case
 211 */
 212static void async_tx_rebalance(void)
 213{
 214        int cpu, cap, cpu_idx = 0;
 215        unsigned long flags;
 216
 217        if (!channel_table_initialized)
 218                return;
 219
 220        spin_lock_irqsave(&async_tx_lock, flags);
 221
 222        /* undo the last distribution */
 223        for_each_dma_cap_mask(cap, dma_cap_mask_all)
 224                for_each_possible_cpu(cpu) {
 225                        struct dma_chan_ref *ref =
 226                                per_cpu_ptr(channel_table[cap], cpu)->ref;
 227                        if (ref) {
 228                                atomic_set(&ref->count, 0);
 229                                per_cpu_ptr(channel_table[cap], cpu)->ref =
 230                                                                        NULL;
 231                        }
 232                }
 233
 234        for_each_dma_cap_mask(cap, dma_cap_mask_all)
 235                for_each_online_cpu(cpu) {
 236                        struct dma_chan_ref *new;
 237                        if (NR_CPUS > 1)
 238                                new = get_chan_ref_by_cap(cap, cpu_idx++);
 239                        else
 240                                new = get_chan_ref_by_cap(cap, -1);
 241
 242                        per_cpu_ptr(channel_table[cap], cpu)->ref = new;
 243                }
 244
 245        spin_unlock_irqrestore(&async_tx_lock, flags);
 246}
 247
 248static enum dma_state_client
 249dma_channel_add_remove(struct dma_client *client,
 250        struct dma_chan *chan, enum dma_state state)
 251{
 252        unsigned long found, flags;
 253        struct dma_chan_ref *master_ref, *ref;
 254        enum dma_state_client ack = DMA_DUP; /* default: take no action */
 255
 256        switch (state) {
 257        case DMA_RESOURCE_AVAILABLE:
 258                found = 0;
 259                rcu_read_lock();
 260                list_for_each_entry_rcu(ref, &async_tx_master_list, node)
 261                        if (ref->chan == chan) {
 262                                found = 1;
 263                                break;
 264                        }
 265                rcu_read_unlock();
 266
 267                pr_debug("async_tx: dma resource available [%s]\n",
 268                        found ? "old" : "new");
 269
 270                if (!found)
 271                        ack = DMA_ACK;
 272                else
 273                        break;
 274
 275                /* add the channel to the generic management list */
 276                master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL);
 277                if (master_ref) {
 278                        /* keep a reference until async_tx is unloaded */
 279                        dma_chan_get(chan);
 280                        init_dma_chan_ref(master_ref, chan);
 281                        spin_lock_irqsave(&async_tx_lock, flags);
 282                        list_add_tail_rcu(&master_ref->node,
 283                                &async_tx_master_list);
 284                        spin_unlock_irqrestore(&async_tx_lock,
 285                                flags);
 286                } else {
 287                        printk(KERN_WARNING "async_tx: unable to create"
 288                                " new master entry in response to"
 289                                " a DMA_RESOURCE_ADDED event"
 290                                " (-ENOMEM)\n");
 291                        return 0;
 292                }
 293
 294                async_tx_rebalance();
 295                break;
 296        case DMA_RESOURCE_REMOVED:
 297                found = 0;
 298                spin_lock_irqsave(&async_tx_lock, flags);
 299                list_for_each_entry(ref, &async_tx_master_list, node)
 300                        if (ref->chan == chan) {
 301                                /* permit backing devices to go away */
 302                                dma_chan_put(ref->chan);
 303                                list_del_rcu(&ref->node);
 304                                call_rcu(&ref->rcu, free_dma_chan_ref);
 305                                found = 1;
 306                                break;
 307                        }
 308                spin_unlock_irqrestore(&async_tx_lock, flags);
 309
 310                pr_debug("async_tx: dma resource removed [%s]\n",
 311                        found ? "ours" : "not ours");
 312
 313                if (found)
 314                        ack = DMA_ACK;
 315                else
 316                        break;
 317
 318                async_tx_rebalance();
 319                break;
 320        case DMA_RESOURCE_SUSPEND:
 321        case DMA_RESOURCE_RESUME:
 322                printk(KERN_WARNING "async_tx: does not support dma channel"
 323                        " suspend/resume\n");
 324                break;
 325        default:
 326                BUG();
 327        }
 328
 329        return ack;
 330}
 331
 332static int __init
 333async_tx_init(void)
 334{
 335        enum dma_transaction_type cap;
 336
 337        spin_lock_init(&async_tx_lock);
 338        bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
 339
 340        /* an interrupt will never be an explicit operation type.
 341         * clearing this bit prevents allocation to a slot in 'channel_table'
 342         */
 343        clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
 344
 345        for_each_dma_cap_mask(cap, dma_cap_mask_all) {
 346                channel_table[cap] = alloc_percpu(struct chan_ref_percpu);
 347                if (!channel_table[cap])
 348                        goto err;
 349        }
 350
 351        channel_table_initialized = 1;
 352        dma_async_client_register(&async_tx_dma);
 353        dma_async_client_chan_request(&async_tx_dma);
 354
 355        printk(KERN_INFO "async_tx: api initialized (async)\n");
 356
 357        return 0;
 358err:
 359        printk(KERN_ERR "async_tx: initialization failure\n");
 360
 361        while (--cap >= 0)
 362                free_percpu(channel_table[cap]);
 363
 364        return 1;
 365}
 366
 367static void __exit async_tx_exit(void)
 368{
 369        enum dma_transaction_type cap;
 370
 371        channel_table_initialized = 0;
 372
 373        for_each_dma_cap_mask(cap, dma_cap_mask_all)
 374                if (channel_table[cap])
 375                        free_percpu(channel_table[cap]);
 376
 377        dma_async_client_unregister(&async_tx_dma);
 378}
 379
 380/**
 381 * __async_tx_find_channel - find a channel to carry out the operation or let
 382 *      the transaction execute synchronously
 383 * @depend_tx: transaction dependency
 384 * @tx_type: transaction type
 385 */
 386struct dma_chan *
 387__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
 388        enum dma_transaction_type tx_type)
 389{
 390        /* see if we can keep the chain on one channel */
 391        if (depend_tx &&
 392                dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
 393                return depend_tx->chan;
 394        else if (likely(channel_table_initialized)) {
 395                struct dma_chan_ref *ref;
 396                int cpu = get_cpu();
 397                ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref;
 398                put_cpu();
 399                return ref ? ref->chan : NULL;
 400        } else
 401                return NULL;
 402}
 403EXPORT_SYMBOL_GPL(__async_tx_find_channel);
 404#else
 405static int __init async_tx_init(void)
 406{
 407        printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
 408        return 0;
 409}
 410
 411static void __exit async_tx_exit(void)
 412{
 413        do { } while (0);
 414}
 415#endif
 416
 417
 418/**
 419 * async_tx_channel_switch - queue an interrupt descriptor with a dependency
 420 *      pre-attached.
 421 * @depend_tx: the operation that must finish before the new operation runs
 422 * @tx: the new operation
 423 */
 424static void
 425async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 426                        struct dma_async_tx_descriptor *tx)
 427{
 428        struct dma_chan *chan;
 429        struct dma_device *device;
 430        struct dma_async_tx_descriptor *intr_tx = (void *) ~0;
 431
 432        /* first check to see if we can still append to depend_tx */
 433        spin_lock_bh(&depend_tx->lock);
 434        if (depend_tx->parent && depend_tx->chan == tx->chan) {
 435                tx->parent = depend_tx;
 436                depend_tx->next = tx;
 437                intr_tx = NULL;
 438        }
 439        spin_unlock_bh(&depend_tx->lock);
 440
 441        if (!intr_tx)
 442                return;
 443
 444        chan = depend_tx->chan;
 445        device = chan->device;
 446
 447        /* see if we can schedule an interrupt
 448         * otherwise poll for completion
 449         */
 450        if (dma_has_cap(DMA_INTERRUPT, device->cap_mask))
 451                intr_tx = device->device_prep_dma_interrupt(chan, 0);
 452        else
 453                intr_tx = NULL;
 454
 455        if (intr_tx) {
 456                intr_tx->callback = NULL;
 457                intr_tx->callback_param = NULL;
 458                tx->parent = intr_tx;
 459                /* safe to set ->next outside the lock since we know we are
 460                 * not submitted yet
 461                 */
 462                intr_tx->next = tx;
 463
 464                /* check if we need to append */
 465                spin_lock_bh(&depend_tx->lock);
 466                if (depend_tx->parent) {
 467                        intr_tx->parent = depend_tx;
 468                        depend_tx->next = intr_tx;
 469                        async_tx_ack(intr_tx);
 470                        intr_tx = NULL;
 471                }
 472                spin_unlock_bh(&depend_tx->lock);
 473
 474                if (intr_tx) {
 475                        intr_tx->parent = NULL;
 476                        intr_tx->tx_submit(intr_tx);
 477                        async_tx_ack(intr_tx);
 478                }
 479        } else {
 480                if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
 481                        panic("%s: DMA_ERROR waiting for depend_tx\n",
 482                              __func__);
 483                tx->tx_submit(tx);
 484        }
 485}
 486
 487
 488/**
 489 * submit_disposition - while holding depend_tx->lock we must avoid submitting
 490 *      new operations to prevent a circular locking dependency with
 491 *      drivers that already hold a channel lock when calling
 492 *      async_tx_run_dependencies.
 493 * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock
 494 * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch
 495 * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly
 496 */
 497enum submit_disposition {
 498        ASYNC_TX_SUBMITTED,
 499        ASYNC_TX_CHANNEL_SWITCH,
 500        ASYNC_TX_DIRECT_SUBMIT,
 501};
 502
 503void
 504async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
 505        enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
 506        dma_async_tx_callback cb_fn, void *cb_param)
 507{
 508        tx->callback = cb_fn;
 509        tx->callback_param = cb_param;
 510
 511        if (depend_tx) {
 512                enum submit_disposition s;
 513
 514                /* sanity check the dependency chain:
 515                 * 1/ if ack is already set then we cannot be sure
 516                 * we are referring to the correct operation
 517                 * 2/ dependencies are 1:1 i.e. two transactions can
 518                 * not depend on the same parent
 519                 */
 520                BUG_ON(async_tx_test_ack(depend_tx) || depend_tx->next ||
 521                       tx->parent);
 522
 523                /* the lock prevents async_tx_run_dependencies from missing
 524                 * the setting of ->next when ->parent != NULL
 525                 */
 526                spin_lock_bh(&depend_tx->lock);
 527                if (depend_tx->parent) {
 528                        /* we have a parent so we can not submit directly
 529                         * if we are staying on the same channel: append
 530                         * else: channel switch
 531                         */
 532                        if (depend_tx->chan == chan) {
 533                                tx->parent = depend_tx;
 534                                depend_tx->next = tx;
 535                                s = ASYNC_TX_SUBMITTED;
 536                        } else
 537                                s = ASYNC_TX_CHANNEL_SWITCH;
 538                } else {
 539                        /* we do not have a parent so we may be able to submit
 540                         * directly if we are staying on the same channel
 541                         */
 542                        if (depend_tx->chan == chan)
 543                                s = ASYNC_TX_DIRECT_SUBMIT;
 544                        else
 545                                s = ASYNC_TX_CHANNEL_SWITCH;
 546                }
 547                spin_unlock_bh(&depend_tx->lock);
 548
 549                switch (s) {
 550                case ASYNC_TX_SUBMITTED:
 551                        break;
 552                case ASYNC_TX_CHANNEL_SWITCH:
 553                        async_tx_channel_switch(depend_tx, tx);
 554                        break;
 555                case ASYNC_TX_DIRECT_SUBMIT:
 556                        tx->parent = NULL;
 557                        tx->tx_submit(tx);
 558                        break;
 559                }
 560        } else {
 561                tx->parent = NULL;
 562                tx->tx_submit(tx);
 563        }
 564
 565        if (flags & ASYNC_TX_ACK)
 566                async_tx_ack(tx);
 567
 568        if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
 569                async_tx_ack(depend_tx);
 570}
 571EXPORT_SYMBOL_GPL(async_tx_submit);
 572
 573/**
 574 * async_trigger_callback - schedules the callback function to be run after
 575 * any dependent operations have been completed.
 576 * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
 577 * @depend_tx: 'callback' requires the completion of this transaction
 578 * @cb_fn: function to call after depend_tx completes
 579 * @cb_param: parameter to pass to the callback routine
 580 */
 581struct dma_async_tx_descriptor *
 582async_trigger_callback(enum async_tx_flags flags,
 583        struct dma_async_tx_descriptor *depend_tx,
 584        dma_async_tx_callback cb_fn, void *cb_param)
 585{
 586        struct dma_chan *chan;
 587        struct dma_device *device;
 588        struct dma_async_tx_descriptor *tx;
 589
 590        if (depend_tx) {
 591                chan = depend_tx->chan;
 592                device = chan->device;
 593
 594                /* see if we can schedule an interrupt
 595                 * otherwise poll for completion
 596                 */
 597                if (device && !dma_has_cap(DMA_INTERRUPT, device->cap_mask))
 598                        device = NULL;
 599
 600                tx = device ? device->device_prep_dma_interrupt(chan, 0) : NULL;
 601        } else
 602                tx = NULL;
 603
 604        if (tx) {
 605                pr_debug("%s: (async)\n", __func__);
 606
 607                async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
 608        } else {
 609                pr_debug("%s: (sync)\n", __func__);
 610
 611                /* wait for any prerequisite operations */
 612                async_tx_quiesce(&depend_tx);
 613
 614                async_tx_sync_epilog(cb_fn, cb_param);
 615        }
 616
 617        return tx;
 618}
 619EXPORT_SYMBOL_GPL(async_trigger_callback);
 620
 621/**
 622 * async_tx_quiesce - ensure tx is complete and freeable upon return
 623 * @tx - transaction to quiesce
 624 */
 625void async_tx_quiesce(struct dma_async_tx_descriptor **tx)
 626{
 627        if (*tx) {
 628                /* if ack is already set then we cannot be sure
 629                 * we are referring to the correct operation
 630                 */
 631                BUG_ON(async_tx_test_ack(*tx));
 632                if (dma_wait_for_async_tx(*tx) == DMA_ERROR)
 633                        panic("DMA_ERROR waiting for transaction\n");
 634                async_tx_ack(*tx);
 635                *tx = NULL;
 636        }
 637}
 638EXPORT_SYMBOL_GPL(async_tx_quiesce);
 639
 640module_init(async_tx_init);
 641module_exit(async_tx_exit);
 642
 643MODULE_AUTHOR("Intel Corporation");
 644MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
 645MODULE_LICENSE("GPL");
 646