linux/block/blk-ioc.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Functions related to io context handling
   4 */
   5#include <linux/kernel.h>
   6#include <linux/module.h>
   7#include <linux/init.h>
   8#include <linux/bio.h>
   9#include <linux/blkdev.h>
  10#include <linux/slab.h>
  11#include <linux/security.h>
  12#include <linux/sched/task.h>
  13
  14#include "blk.h"
  15#include "blk-mq-sched.h"
  16
  17/*
  18 * For io context allocations
  19 */
  20static struct kmem_cache *iocontext_cachep;
  21
  22#ifdef CONFIG_BLK_ICQ
  23/**
  24 * get_io_context - increment reference count to io_context
  25 * @ioc: io_context to get
  26 *
  27 * Increment reference count to @ioc.
  28 */
  29static void get_io_context(struct io_context *ioc)
  30{
  31        BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
  32        atomic_long_inc(&ioc->refcount);
  33}
  34
  35static void icq_free_icq_rcu(struct rcu_head *head)
  36{
  37        struct io_cq *icq = container_of(head, struct io_cq, __rcu_head);
  38
  39        kmem_cache_free(icq->__rcu_icq_cache, icq);
  40}
  41
  42/*
  43 * Exit an icq. Called with ioc locked for blk-mq, and with both ioc
  44 * and queue locked for legacy.
  45 */
  46static void ioc_exit_icq(struct io_cq *icq)
  47{
  48        struct elevator_type *et = icq->q->elevator->type;
  49
  50        if (icq->flags & ICQ_EXITED)
  51                return;
  52
  53        if (et->ops.exit_icq)
  54                et->ops.exit_icq(icq);
  55
  56        icq->flags |= ICQ_EXITED;
  57}
  58
  59static void ioc_exit_icqs(struct io_context *ioc)
  60{
  61        struct io_cq *icq;
  62
  63        spin_lock_irq(&ioc->lock);
  64        hlist_for_each_entry(icq, &ioc->icq_list, ioc_node)
  65                ioc_exit_icq(icq);
  66        spin_unlock_irq(&ioc->lock);
  67}
  68
  69/*
  70 * Release an icq. Called with ioc locked for blk-mq, and with both ioc
  71 * and queue locked for legacy.
  72 */
  73static void ioc_destroy_icq(struct io_cq *icq)
  74{
  75        struct io_context *ioc = icq->ioc;
  76        struct request_queue *q = icq->q;
  77        struct elevator_type *et = q->elevator->type;
  78
  79        lockdep_assert_held(&ioc->lock);
  80        lockdep_assert_held(&q->queue_lock);
  81
  82        if (icq->flags & ICQ_DESTROYED)
  83                return;
  84
  85        radix_tree_delete(&ioc->icq_tree, icq->q->id);
  86        hlist_del_init(&icq->ioc_node);
  87        list_del_init(&icq->q_node);
  88
  89        /*
  90         * Both setting lookup hint to and clearing it from @icq are done
  91         * under queue_lock.  If it's not pointing to @icq now, it never
  92         * will.  Hint assignment itself can race safely.
  93         */
  94        if (rcu_access_pointer(ioc->icq_hint) == icq)
  95                rcu_assign_pointer(ioc->icq_hint, NULL);
  96
  97        ioc_exit_icq(icq);
  98
  99        /*
 100         * @icq->q might have gone away by the time RCU callback runs
 101         * making it impossible to determine icq_cache.  Record it in @icq.
 102         */
 103        icq->__rcu_icq_cache = et->icq_cache;
 104        icq->flags |= ICQ_DESTROYED;
 105        call_rcu(&icq->__rcu_head, icq_free_icq_rcu);
 106}
 107
 108/*
 109 * Slow path for ioc release in put_io_context().  Performs double-lock
 110 * dancing to unlink all icq's and then frees ioc.
 111 */
 112static void ioc_release_fn(struct work_struct *work)
 113{
 114        struct io_context *ioc = container_of(work, struct io_context,
 115                                              release_work);
 116        spin_lock_irq(&ioc->lock);
 117
 118        while (!hlist_empty(&ioc->icq_list)) {
 119                struct io_cq *icq = hlist_entry(ioc->icq_list.first,
 120                                                struct io_cq, ioc_node);
 121                struct request_queue *q = icq->q;
 122
 123                if (spin_trylock(&q->queue_lock)) {
 124                        ioc_destroy_icq(icq);
 125                        spin_unlock(&q->queue_lock);
 126                } else {
 127                        /* Make sure q and icq cannot be freed. */
 128                        rcu_read_lock();
 129
 130                        /* Re-acquire the locks in the correct order. */
 131                        spin_unlock(&ioc->lock);
 132                        spin_lock(&q->queue_lock);
 133                        spin_lock(&ioc->lock);
 134
 135                        ioc_destroy_icq(icq);
 136
 137                        spin_unlock(&q->queue_lock);
 138                        rcu_read_unlock();
 139                }
 140        }
 141
 142        spin_unlock_irq(&ioc->lock);
 143
 144        kmem_cache_free(iocontext_cachep, ioc);
 145}
 146
 147/*
 148 * Releasing icqs requires reverse order double locking and we may already be
 149 * holding a queue_lock.  Do it asynchronously from a workqueue.
 150 */
 151static bool ioc_delay_free(struct io_context *ioc)
 152{
 153        unsigned long flags;
 154
 155        spin_lock_irqsave(&ioc->lock, flags);
 156        if (!hlist_empty(&ioc->icq_list)) {
 157                queue_work(system_power_efficient_wq, &ioc->release_work);
 158                spin_unlock_irqrestore(&ioc->lock, flags);
 159                return true;
 160        }
 161        spin_unlock_irqrestore(&ioc->lock, flags);
 162        return false;
 163}
 164
 165/**
 166 * ioc_clear_queue - break any ioc association with the specified queue
 167 * @q: request_queue being cleared
 168 *
 169 * Walk @q->icq_list and exit all io_cq's.
 170 */
 171void ioc_clear_queue(struct request_queue *q)
 172{
 173        spin_lock_irq(&q->queue_lock);
 174        while (!list_empty(&q->icq_list)) {
 175                struct io_cq *icq =
 176                        list_first_entry(&q->icq_list, struct io_cq, q_node);
 177
 178                /*
 179                 * Other context won't hold ioc lock to wait for queue_lock, see
 180                 * details in ioc_release_fn().
 181                 */
 182                spin_lock(&icq->ioc->lock);
 183                ioc_destroy_icq(icq);
 184                spin_unlock(&icq->ioc->lock);
 185        }
 186        spin_unlock_irq(&q->queue_lock);
 187}
 188#else /* CONFIG_BLK_ICQ */
 189static inline void ioc_exit_icqs(struct io_context *ioc)
 190{
 191}
 192static inline bool ioc_delay_free(struct io_context *ioc)
 193{
 194        return false;
 195}
 196#endif /* CONFIG_BLK_ICQ */
 197
 198/**
 199 * put_io_context - put a reference of io_context
 200 * @ioc: io_context to put
 201 *
 202 * Decrement reference count of @ioc and release it if the count reaches
 203 * zero.
 204 */
 205void put_io_context(struct io_context *ioc)
 206{
 207        BUG_ON(atomic_long_read(&ioc->refcount) <= 0);
 208        if (atomic_long_dec_and_test(&ioc->refcount) && !ioc_delay_free(ioc))
 209                kmem_cache_free(iocontext_cachep, ioc);
 210}
 211EXPORT_SYMBOL_GPL(put_io_context);
 212
 213/* Called by the exiting task */
 214void exit_io_context(struct task_struct *task)
 215{
 216        struct io_context *ioc;
 217
 218        task_lock(task);
 219        ioc = task->io_context;
 220        task->io_context = NULL;
 221        task_unlock(task);
 222
 223        if (atomic_dec_and_test(&ioc->active_ref)) {
 224                ioc_exit_icqs(ioc);
 225                put_io_context(ioc);
 226        }
 227}
 228
 229static struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
 230{
 231        struct io_context *ioc;
 232
 233        ioc = kmem_cache_alloc_node(iocontext_cachep, gfp_flags | __GFP_ZERO,
 234                                    node);
 235        if (unlikely(!ioc))
 236                return NULL;
 237
 238        atomic_long_set(&ioc->refcount, 1);
 239        atomic_set(&ioc->active_ref, 1);
 240#ifdef CONFIG_BLK_ICQ
 241        spin_lock_init(&ioc->lock);
 242        INIT_RADIX_TREE(&ioc->icq_tree, GFP_ATOMIC);
 243        INIT_HLIST_HEAD(&ioc->icq_list);
 244        INIT_WORK(&ioc->release_work, ioc_release_fn);
 245#endif
 246        ioc->ioprio = IOPRIO_DEFAULT;
 247
 248        return ioc;
 249}
 250
 251int set_task_ioprio(struct task_struct *task, int ioprio)
 252{
 253        int err;
 254        const struct cred *cred = current_cred(), *tcred;
 255
 256        rcu_read_lock();
 257        tcred = __task_cred(task);
 258        if (!uid_eq(tcred->uid, cred->euid) &&
 259            !uid_eq(tcred->uid, cred->uid) && !capable(CAP_SYS_NICE)) {
 260                rcu_read_unlock();
 261                return -EPERM;
 262        }
 263        rcu_read_unlock();
 264
 265        err = security_task_setioprio(task, ioprio);
 266        if (err)
 267                return err;
 268
 269        task_lock(task);
 270        if (unlikely(!task->io_context)) {
 271                struct io_context *ioc;
 272
 273                task_unlock(task);
 274
 275                ioc = alloc_io_context(GFP_ATOMIC, NUMA_NO_NODE);
 276                if (!ioc)
 277                        return -ENOMEM;
 278
 279                task_lock(task);
 280                if (task->flags & PF_EXITING) {
 281                        kmem_cache_free(iocontext_cachep, ioc);
 282                        goto out;
 283                }
 284                if (task->io_context)
 285                        kmem_cache_free(iocontext_cachep, ioc);
 286                else
 287                        task->io_context = ioc;
 288        }
 289        task->io_context->ioprio = ioprio;
 290out:
 291        task_unlock(task);
 292        return 0;
 293}
 294EXPORT_SYMBOL_GPL(set_task_ioprio);
 295
 296int __copy_io(unsigned long clone_flags, struct task_struct *tsk)
 297{
 298        struct io_context *ioc = current->io_context;
 299
 300        /*
 301         * Share io context with parent, if CLONE_IO is set
 302         */
 303        if (clone_flags & CLONE_IO) {
 304                atomic_inc(&ioc->active_ref);
 305                tsk->io_context = ioc;
 306        } else if (ioprio_valid(ioc->ioprio)) {
 307                tsk->io_context = alloc_io_context(GFP_KERNEL, NUMA_NO_NODE);
 308                if (!tsk->io_context)
 309                        return -ENOMEM;
 310                tsk->io_context->ioprio = ioc->ioprio;
 311        }
 312
 313        return 0;
 314}
 315
 316#ifdef CONFIG_BLK_ICQ
 317/**
 318 * ioc_lookup_icq - lookup io_cq from ioc
 319 * @q: the associated request_queue
 320 *
 321 * Look up io_cq associated with @ioc - @q pair from @ioc.  Must be called
 322 * with @q->queue_lock held.
 323 */
 324struct io_cq *ioc_lookup_icq(struct request_queue *q)
 325{
 326        struct io_context *ioc = current->io_context;
 327        struct io_cq *icq;
 328
 329        lockdep_assert_held(&q->queue_lock);
 330
 331        /*
 332         * icq's are indexed from @ioc using radix tree and hint pointer,
 333         * both of which are protected with RCU.  All removals are done
 334         * holding both q and ioc locks, and we're holding q lock - if we
 335         * find a icq which points to us, it's guaranteed to be valid.
 336         */
 337        rcu_read_lock();
 338        icq = rcu_dereference(ioc->icq_hint);
 339        if (icq && icq->q == q)
 340                goto out;
 341
 342        icq = radix_tree_lookup(&ioc->icq_tree, q->id);
 343        if (icq && icq->q == q)
 344                rcu_assign_pointer(ioc->icq_hint, icq); /* allowed to race */
 345        else
 346                icq = NULL;
 347out:
 348        rcu_read_unlock();
 349        return icq;
 350}
 351EXPORT_SYMBOL(ioc_lookup_icq);
 352
 353/**
 354 * ioc_create_icq - create and link io_cq
 355 * @q: request_queue of interest
 356 *
 357 * Make sure io_cq linking @ioc and @q exists.  If icq doesn't exist, they
 358 * will be created using @gfp_mask.
 359 *
 360 * The caller is responsible for ensuring @ioc won't go away and @q is
 361 * alive and will stay alive until this function returns.
 362 */
 363static struct io_cq *ioc_create_icq(struct request_queue *q)
 364{
 365        struct io_context *ioc = current->io_context;
 366        struct elevator_type *et = q->elevator->type;
 367        struct io_cq *icq;
 368
 369        /* allocate stuff */
 370        icq = kmem_cache_alloc_node(et->icq_cache, GFP_ATOMIC | __GFP_ZERO,
 371                                    q->node);
 372        if (!icq)
 373                return NULL;
 374
 375        if (radix_tree_maybe_preload(GFP_ATOMIC) < 0) {
 376                kmem_cache_free(et->icq_cache, icq);
 377                return NULL;
 378        }
 379
 380        icq->ioc = ioc;
 381        icq->q = q;
 382        INIT_LIST_HEAD(&icq->q_node);
 383        INIT_HLIST_NODE(&icq->ioc_node);
 384
 385        /* lock both q and ioc and try to link @icq */
 386        spin_lock_irq(&q->queue_lock);
 387        spin_lock(&ioc->lock);
 388
 389        if (likely(!radix_tree_insert(&ioc->icq_tree, q->id, icq))) {
 390                hlist_add_head(&icq->ioc_node, &ioc->icq_list);
 391                list_add(&icq->q_node, &q->icq_list);
 392                if (et->ops.init_icq)
 393                        et->ops.init_icq(icq);
 394        } else {
 395                kmem_cache_free(et->icq_cache, icq);
 396                icq = ioc_lookup_icq(q);
 397                if (!icq)
 398                        printk(KERN_ERR "cfq: icq link failed!\n");
 399        }
 400
 401        spin_unlock(&ioc->lock);
 402        spin_unlock_irq(&q->queue_lock);
 403        radix_tree_preload_end();
 404        return icq;
 405}
 406
 407struct io_cq *ioc_find_get_icq(struct request_queue *q)
 408{
 409        struct io_context *ioc = current->io_context;
 410        struct io_cq *icq = NULL;
 411
 412        if (unlikely(!ioc)) {
 413                ioc = alloc_io_context(GFP_ATOMIC, q->node);
 414                if (!ioc)
 415                        return NULL;
 416
 417                task_lock(current);
 418                if (current->io_context) {
 419                        kmem_cache_free(iocontext_cachep, ioc);
 420                        ioc = current->io_context;
 421                } else {
 422                        current->io_context = ioc;
 423                }
 424
 425                get_io_context(ioc);
 426                task_unlock(current);
 427        } else {
 428                get_io_context(ioc);
 429
 430                spin_lock_irq(&q->queue_lock);
 431                icq = ioc_lookup_icq(q);
 432                spin_unlock_irq(&q->queue_lock);
 433        }
 434
 435        if (!icq) {
 436                icq = ioc_create_icq(q);
 437                if (!icq) {
 438                        put_io_context(ioc);
 439                        return NULL;
 440                }
 441        }
 442        return icq;
 443}
 444EXPORT_SYMBOL_GPL(ioc_find_get_icq);
 445#endif /* CONFIG_BLK_ICQ */
 446
 447static int __init blk_ioc_init(void)
 448{
 449        iocontext_cachep = kmem_cache_create("blkdev_ioc",
 450                        sizeof(struct io_context), 0, SLAB_PANIC, NULL);
 451        return 0;
 452}
 453subsys_initcall(blk_ioc_init);
 454