linux/block/ioprio.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * fs/ioprio.c
   4 *
   5 * Copyright (C) 2004 Jens Axboe <axboe@kernel.dk>
   6 *
   7 * Helper functions for setting/querying io priorities of processes. The
   8 * system calls closely mimmick getpriority/setpriority, see the man page for
   9 * those. The prio argument is a composite of prio class and prio data, where
  10 * the data argument has meaning within that class. The standard scheduling
  11 * classes have 8 distinct prio levels, with 0 being the highest prio and 7
  12 * being the lowest.
  13 *
  14 * IOW, setting BE scheduling class with prio 2 is done ala:
  15 *
  16 * unsigned int prio = (IOPRIO_CLASS_BE << IOPRIO_CLASS_SHIFT) | 2;
  17 *
  18 * ioprio_set(PRIO_PROCESS, pid, prio);
  19 *
  20 * See also Documentation/block/ioprio.rst
  21 *
  22 */
  23#include <linux/gfp.h>
  24#include <linux/kernel.h>
  25#include <linux/ioprio.h>
  26#include <linux/cred.h>
  27#include <linux/blkdev.h>
  28#include <linux/capability.h>
  29#include <linux/syscalls.h>
  30#include <linux/security.h>
  31#include <linux/pid_namespace.h>
  32
  33int ioprio_check_cap(int ioprio)
  34{
  35        int class = IOPRIO_PRIO_CLASS(ioprio);
  36        int level = IOPRIO_PRIO_LEVEL(ioprio);
  37
  38        switch (class) {
  39                case IOPRIO_CLASS_RT:
  40                        /*
  41                         * Originally this only checked for CAP_SYS_ADMIN,
  42                         * which was implicitly allowed for pid 0 by security
  43                         * modules such as SELinux. Make sure we check
  44                         * CAP_SYS_ADMIN first to avoid a denial/avc for
  45                         * possibly missing CAP_SYS_NICE permission.
  46                         */
  47                        if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
  48                                return -EPERM;
  49                        fallthrough;
  50                        /* rt has prio field too */
  51                case IOPRIO_CLASS_BE:
  52                        if (level >= IOPRIO_NR_LEVELS)
  53                                return -EINVAL;
  54                        break;
  55                case IOPRIO_CLASS_IDLE:
  56                        break;
  57                case IOPRIO_CLASS_NONE:
  58                        if (level)
  59                                return -EINVAL;
  60                        break;
  61                case IOPRIO_CLASS_INVALID:
  62                default:
  63                        return -EINVAL;
  64        }
  65
  66        return 0;
  67}
  68
  69SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
  70{
  71        struct task_struct *p, *g;
  72        struct user_struct *user;
  73        struct pid *pgrp;
  74        kuid_t uid;
  75        int ret;
  76
  77        ret = ioprio_check_cap(ioprio);
  78        if (ret)
  79                return ret;
  80
  81        ret = -ESRCH;
  82        rcu_read_lock();
  83        switch (which) {
  84                case IOPRIO_WHO_PROCESS:
  85                        if (!who)
  86                                p = current;
  87                        else
  88                                p = find_task_by_vpid(who);
  89                        if (p)
  90                                ret = set_task_ioprio(p, ioprio);
  91                        break;
  92                case IOPRIO_WHO_PGRP:
  93                        if (!who)
  94                                pgrp = task_pgrp(current);
  95                        else
  96                                pgrp = find_vpid(who);
  97
  98                        read_lock(&tasklist_lock);
  99                        do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
 100                                ret = set_task_ioprio(p, ioprio);
 101                                if (ret) {
 102                                        read_unlock(&tasklist_lock);
 103                                        goto out;
 104                                }
 105                        } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
 106                        read_unlock(&tasklist_lock);
 107
 108                        break;
 109                case IOPRIO_WHO_USER:
 110                        uid = make_kuid(current_user_ns(), who);
 111                        if (!uid_valid(uid))
 112                                break;
 113                        if (!who)
 114                                user = current_user();
 115                        else
 116                                user = find_user(uid);
 117
 118                        if (!user)
 119                                break;
 120
 121                        for_each_process_thread(g, p) {
 122                                if (!uid_eq(task_uid(p), uid) ||
 123                                    !task_pid_vnr(p))
 124                                        continue;
 125                                ret = set_task_ioprio(p, ioprio);
 126                                if (ret)
 127                                        goto free_uid;
 128                        }
 129free_uid:
 130                        if (who)
 131                                free_uid(user);
 132                        break;
 133                default:
 134                        ret = -EINVAL;
 135        }
 136
 137out:
 138        rcu_read_unlock();
 139        return ret;
 140}
 141
 142/*
 143 * If the task has set an I/O priority, use that. Otherwise, return
 144 * the default I/O priority.
 145 *
 146 * Expected to be called for current task or with task_lock() held to keep
 147 * io_context stable.
 148 */
 149int __get_task_ioprio(struct task_struct *p)
 150{
 151        struct io_context *ioc = p->io_context;
 152        int prio;
 153
 154        if (p != current)
 155                lockdep_assert_held(&p->alloc_lock);
 156        if (ioc)
 157                prio = ioc->ioprio;
 158        else
 159                prio = IOPRIO_DEFAULT;
 160
 161        if (IOPRIO_PRIO_CLASS(prio) == IOPRIO_CLASS_NONE)
 162                prio = IOPRIO_PRIO_VALUE(task_nice_ioclass(p),
 163                                         task_nice_ioprio(p));
 164        return prio;
 165}
 166EXPORT_SYMBOL_GPL(__get_task_ioprio);
 167
 168static int get_task_ioprio(struct task_struct *p)
 169{
 170        int ret;
 171
 172        ret = security_task_getioprio(p);
 173        if (ret)
 174                goto out;
 175        task_lock(p);
 176        ret = __get_task_ioprio(p);
 177        task_unlock(p);
 178out:
 179        return ret;
 180}
 181
 182/*
 183 * Return raw IO priority value as set by userspace. We use this for
 184 * ioprio_get(pid, IOPRIO_WHO_PROCESS) so that we keep historical behavior and
 185 * also so that userspace can distinguish unset IO priority (which just gets
 186 * overriden based on task's nice value) from IO priority set to some value.
 187 */
 188static int get_task_raw_ioprio(struct task_struct *p)
 189{
 190        int ret;
 191
 192        ret = security_task_getioprio(p);
 193        if (ret)
 194                goto out;
 195        task_lock(p);
 196        if (p->io_context)
 197                ret = p->io_context->ioprio;
 198        else
 199                ret = IOPRIO_DEFAULT;
 200        task_unlock(p);
 201out:
 202        return ret;
 203}
 204
 205static int ioprio_best(unsigned short aprio, unsigned short bprio)
 206{
 207        return min(aprio, bprio);
 208}
 209
 210SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
 211{
 212        struct task_struct *g, *p;
 213        struct user_struct *user;
 214        struct pid *pgrp;
 215        kuid_t uid;
 216        int ret = -ESRCH;
 217        int tmpio;
 218
 219        rcu_read_lock();
 220        switch (which) {
 221                case IOPRIO_WHO_PROCESS:
 222                        if (!who)
 223                                p = current;
 224                        else
 225                                p = find_task_by_vpid(who);
 226                        if (p)
 227                                ret = get_task_raw_ioprio(p);
 228                        break;
 229                case IOPRIO_WHO_PGRP:
 230                        if (!who)
 231                                pgrp = task_pgrp(current);
 232                        else
 233                                pgrp = find_vpid(who);
 234                        read_lock(&tasklist_lock);
 235                        do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
 236                                tmpio = get_task_ioprio(p);
 237                                if (tmpio < 0)
 238                                        continue;
 239                                if (ret == -ESRCH)
 240                                        ret = tmpio;
 241                                else
 242                                        ret = ioprio_best(ret, tmpio);
 243                        } while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
 244                        read_unlock(&tasklist_lock);
 245
 246                        break;
 247                case IOPRIO_WHO_USER:
 248                        uid = make_kuid(current_user_ns(), who);
 249                        if (!who)
 250                                user = current_user();
 251                        else
 252                                user = find_user(uid);
 253
 254                        if (!user)
 255                                break;
 256
 257                        for_each_process_thread(g, p) {
 258                                if (!uid_eq(task_uid(p), user->uid) ||
 259                                    !task_pid_vnr(p))
 260                                        continue;
 261                                tmpio = get_task_ioprio(p);
 262                                if (tmpio < 0)
 263                                        continue;
 264                                if (ret == -ESRCH)
 265                                        ret = tmpio;
 266                                else
 267                                        ret = ioprio_best(ret, tmpio);
 268                        }
 269
 270                        if (who)
 271                                free_uid(user);
 272                        break;
 273                default:
 274                        ret = -EINVAL;
 275        }
 276
 277        rcu_read_unlock();
 278        return ret;
 279}
 280