linux/security/device_cgroup.c
<<
>>
Prefs
   1/*
   2 * device_cgroup.c - device cgroup subsystem
   3 *
   4 * Copyright 2007 IBM Corp
   5 */
   6
   7#include <linux/device_cgroup.h>
   8#include <linux/cgroup.h>
   9#include <linux/ctype.h>
  10#include <linux/list.h>
  11#include <linux/uaccess.h>
  12#include <linux/seq_file.h>
  13#include <linux/slab.h>
  14#include <linux/rcupdate.h>
  15#include <linux/mutex.h>
  16
  17#define ACC_MKNOD 1
  18#define ACC_READ  2
  19#define ACC_WRITE 4
  20#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE)
  21
  22#define DEV_BLOCK 1
  23#define DEV_CHAR  2
  24#define DEV_ALL   4  /* this represents all devices */
  25
  26static DEFINE_MUTEX(devcgroup_mutex);
  27
  28/*
  29 * exception list locking rules:
  30 * hold devcgroup_mutex for update/read.
  31 * hold rcu_read_lock() for read.
  32 */
  33
  34struct dev_exception_item {
  35        u32 major, minor;
  36        short type;
  37        short access;
  38        struct list_head list;
  39        struct rcu_head rcu;
  40};
  41
  42struct dev_cgroup {
  43        struct cgroup_subsys_state css;
  44        struct list_head exceptions;
  45        enum {
  46                DEVCG_DEFAULT_ALLOW,
  47                DEVCG_DEFAULT_DENY,
  48        } behavior;
  49};
  50
  51static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
  52{
  53        return container_of(s, struct dev_cgroup, css);
  54}
  55
  56static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
  57{
  58        return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
  59}
  60
  61static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
  62{
  63        return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
  64}
  65
  66struct cgroup_subsys devices_subsys;
  67
  68static int devcgroup_can_attach(struct cgroup *new_cgrp,
  69                                struct cgroup_taskset *set)
  70{
  71        struct task_struct *task = cgroup_taskset_first(set);
  72
  73        if (current != task && !capable(CAP_SYS_ADMIN))
  74                return -EPERM;
  75        return 0;
  76}
  77
  78/*
  79 * called under devcgroup_mutex
  80 */
  81static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
  82{
  83        struct dev_exception_item *ex, *tmp, *new;
  84
  85        lockdep_assert_held(&devcgroup_mutex);
  86
  87        list_for_each_entry(ex, orig, list) {
  88                new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
  89                if (!new)
  90                        goto free_and_exit;
  91                list_add_tail(&new->list, dest);
  92        }
  93
  94        return 0;
  95
  96free_and_exit:
  97        list_for_each_entry_safe(ex, tmp, dest, list) {
  98                list_del(&ex->list);
  99                kfree(ex);
 100        }
 101        return -ENOMEM;
 102}
 103
 104/*
 105 * called under devcgroup_mutex
 106 */
 107static int dev_exception_add(struct dev_cgroup *dev_cgroup,
 108                             struct dev_exception_item *ex)
 109{
 110        struct dev_exception_item *excopy, *walk;
 111
 112        lockdep_assert_held(&devcgroup_mutex);
 113
 114        excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
 115        if (!excopy)
 116                return -ENOMEM;
 117
 118        list_for_each_entry(walk, &dev_cgroup->exceptions, list) {
 119                if (walk->type != ex->type)
 120                        continue;
 121                if (walk->major != ex->major)
 122                        continue;
 123                if (walk->minor != ex->minor)
 124                        continue;
 125
 126                walk->access |= ex->access;
 127                kfree(excopy);
 128                excopy = NULL;
 129        }
 130
 131        if (excopy != NULL)
 132                list_add_tail_rcu(&excopy->list, &dev_cgroup->exceptions);
 133        return 0;
 134}
 135
 136/*
 137 * called under devcgroup_mutex
 138 */
 139static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
 140                             struct dev_exception_item *ex)
 141{
 142        struct dev_exception_item *walk, *tmp;
 143
 144        lockdep_assert_held(&devcgroup_mutex);
 145
 146        list_for_each_entry_safe(walk, tmp, &dev_cgroup->exceptions, list) {
 147                if (walk->type != ex->type)
 148                        continue;
 149                if (walk->major != ex->major)
 150                        continue;
 151                if (walk->minor != ex->minor)
 152                        continue;
 153
 154                walk->access &= ~ex->access;
 155                if (!walk->access) {
 156                        list_del_rcu(&walk->list);
 157                        kfree_rcu(walk, rcu);
 158                }
 159        }
 160}
 161
 162/**
 163 * dev_exception_clean - frees all entries of the exception list
 164 * @dev_cgroup: dev_cgroup with the exception list to be cleaned
 165 *
 166 * called under devcgroup_mutex
 167 */
 168static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
 169{
 170        struct dev_exception_item *ex, *tmp;
 171
 172        lockdep_assert_held(&devcgroup_mutex);
 173
 174        list_for_each_entry_safe(ex, tmp, &dev_cgroup->exceptions, list) {
 175                list_del_rcu(&ex->list);
 176                kfree_rcu(ex, rcu);
 177        }
 178}
 179
 180/*
 181 * called from kernel/cgroup.c with cgroup_lock() held.
 182 */
 183static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup)
 184{
 185        struct dev_cgroup *dev_cgroup, *parent_dev_cgroup;
 186        struct cgroup *parent_cgroup;
 187        int ret;
 188
 189        dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
 190        if (!dev_cgroup)
 191                return ERR_PTR(-ENOMEM);
 192        INIT_LIST_HEAD(&dev_cgroup->exceptions);
 193        parent_cgroup = cgroup->parent;
 194
 195        if (parent_cgroup == NULL)
 196                dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
 197        else {
 198                parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
 199                mutex_lock(&devcgroup_mutex);
 200                ret = dev_exceptions_copy(&dev_cgroup->exceptions,
 201                                          &parent_dev_cgroup->exceptions);
 202                dev_cgroup->behavior = parent_dev_cgroup->behavior;
 203                mutex_unlock(&devcgroup_mutex);
 204                if (ret) {
 205                        kfree(dev_cgroup);
 206                        return ERR_PTR(ret);
 207                }
 208        }
 209
 210        return &dev_cgroup->css;
 211}
 212
 213static void devcgroup_css_free(struct cgroup *cgroup)
 214{
 215        struct dev_cgroup *dev_cgroup;
 216
 217        dev_cgroup = cgroup_to_devcgroup(cgroup);
 218        mutex_lock(&devcgroup_mutex);
 219        dev_exception_clean(dev_cgroup);
 220        mutex_unlock(&devcgroup_mutex);
 221        kfree(dev_cgroup);
 222}
 223
 224#define DEVCG_ALLOW 1
 225#define DEVCG_DENY 2
 226#define DEVCG_LIST 3
 227
 228#define MAJMINLEN 13
 229#define ACCLEN 4
 230
 231static void set_access(char *acc, short access)
 232{
 233        int idx = 0;
 234        memset(acc, 0, ACCLEN);
 235        if (access & ACC_READ)
 236                acc[idx++] = 'r';
 237        if (access & ACC_WRITE)
 238                acc[idx++] = 'w';
 239        if (access & ACC_MKNOD)
 240                acc[idx++] = 'm';
 241}
 242
 243static char type_to_char(short type)
 244{
 245        if (type == DEV_ALL)
 246                return 'a';
 247        if (type == DEV_CHAR)
 248                return 'c';
 249        if (type == DEV_BLOCK)
 250                return 'b';
 251        return 'X';
 252}
 253
 254static void set_majmin(char *str, unsigned m)
 255{
 256        if (m == ~0)
 257                strcpy(str, "*");
 258        else
 259                sprintf(str, "%u", m);
 260}
 261
 262static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
 263                                struct seq_file *m)
 264{
 265        struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup);
 266        struct dev_exception_item *ex;
 267        char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
 268
 269        rcu_read_lock();
 270        /*
 271         * To preserve the compatibility:
 272         * - Only show the "all devices" when the default policy is to allow
 273         * - List the exceptions in case the default policy is to deny
 274         * This way, the file remains as a "whitelist of devices"
 275         */
 276        if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
 277                set_access(acc, ACC_MASK);
 278                set_majmin(maj, ~0);
 279                set_majmin(min, ~0);
 280                seq_printf(m, "%c %s:%s %s\n", type_to_char(DEV_ALL),
 281                           maj, min, acc);
 282        } else {
 283                list_for_each_entry_rcu(ex, &devcgroup->exceptions, list) {
 284                        set_access(acc, ex->access);
 285                        set_majmin(maj, ex->major);
 286                        set_majmin(min, ex->minor);
 287                        seq_printf(m, "%c %s:%s %s\n", type_to_char(ex->type),
 288                                   maj, min, acc);
 289                }
 290        }
 291        rcu_read_unlock();
 292
 293        return 0;
 294}
 295
 296/**
 297 * may_access - verifies if a new exception is part of what is allowed
 298 *              by a dev cgroup based on the default policy +
 299 *              exceptions. This is used to make sure a child cgroup
 300 *              won't have more privileges than its parent or to
 301 *              verify if a certain access is allowed.
 302 * @dev_cgroup: dev cgroup to be tested against
 303 * @refex: new exception
 304 */
 305static int may_access(struct dev_cgroup *dev_cgroup,
 306                      struct dev_exception_item *refex)
 307{
 308        struct dev_exception_item *ex;
 309        bool match = false;
 310
 311        rcu_lockdep_assert(rcu_read_lock_held() ||
 312                           lockdep_is_held(&devcgroup_mutex),
 313                           "device_cgroup::may_access() called without proper synchronization");
 314
 315        list_for_each_entry_rcu(ex, &dev_cgroup->exceptions, list) {
 316                if ((refex->type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
 317                        continue;
 318                if ((refex->type & DEV_CHAR) && !(ex->type & DEV_CHAR))
 319                        continue;
 320                if (ex->major != ~0 && ex->major != refex->major)
 321                        continue;
 322                if (ex->minor != ~0 && ex->minor != refex->minor)
 323                        continue;
 324                if (refex->access & (~ex->access))
 325                        continue;
 326                match = true;
 327                break;
 328        }
 329
 330        /*
 331         * In two cases we'll consider this new exception valid:
 332         * - the dev cgroup has its default policy to allow + exception list:
 333         *   the new exception should *not* match any of the exceptions
 334         *   (behavior == DEVCG_DEFAULT_ALLOW, !match)
 335         * - the dev cgroup has its default policy to deny + exception list:
 336         *   the new exception *should* match the exceptions
 337         *   (behavior == DEVCG_DEFAULT_DENY, match)
 338         */
 339        if ((dev_cgroup->behavior == DEVCG_DEFAULT_DENY) == match)
 340                return 1;
 341        return 0;
 342}
 343
 344/*
 345 * parent_has_perm:
 346 * when adding a new allow rule to a device exception list, the rule
 347 * must be allowed in the parent device
 348 */
 349static int parent_has_perm(struct dev_cgroup *childcg,
 350                                  struct dev_exception_item *ex)
 351{
 352        struct cgroup *pcg = childcg->css.cgroup->parent;
 353        struct dev_cgroup *parent;
 354
 355        if (!pcg)
 356                return 1;
 357        parent = cgroup_to_devcgroup(pcg);
 358        return may_access(parent, ex);
 359}
 360
 361/**
 362 * may_allow_all - checks if it's possible to change the behavior to
 363 *                 allow based on parent's rules.
 364 * @parent: device cgroup's parent
 365 * returns: != 0 in case it's allowed, 0 otherwise
 366 */
 367static inline int may_allow_all(struct dev_cgroup *parent)
 368{
 369        if (!parent)
 370                return 1;
 371        return parent->behavior == DEVCG_DEFAULT_ALLOW;
 372}
 373
 374/*
 375 * Modify the exception list using allow/deny rules.
 376 * CAP_SYS_ADMIN is needed for this.  It's at least separate from CAP_MKNOD
 377 * so we can give a container CAP_MKNOD to let it create devices but not
 378 * modify the exception list.
 379 * It seems likely we'll want to add a CAP_CONTAINER capability to allow
 380 * us to also grant CAP_SYS_ADMIN to containers without giving away the
 381 * device exception list controls, but for now we'll stick with CAP_SYS_ADMIN
 382 *
 383 * Taking rules away is always allowed (given CAP_SYS_ADMIN).  Granting
 384 * new access is only allowed if you're in the top-level cgroup, or your
 385 * parent cgroup has the access you're asking for.
 386 */
 387static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 388                                   int filetype, const char *buffer)
 389{
 390        const char *b;
 391        char temp[12];          /* 11 + 1 characters needed for a u32 */
 392        int count, rc;
 393        struct dev_exception_item ex;
 394        struct cgroup *p = devcgroup->css.cgroup;
 395        struct dev_cgroup *parent = NULL;
 396
 397        if (!capable(CAP_SYS_ADMIN))
 398                return -EPERM;
 399
 400        if (p->parent)
 401                parent = cgroup_to_devcgroup(p->parent);
 402
 403        memset(&ex, 0, sizeof(ex));
 404        b = buffer;
 405
 406        switch (*b) {
 407        case 'a':
 408                switch (filetype) {
 409                case DEVCG_ALLOW:
 410                        if (!may_allow_all(parent))
 411                                return -EPERM;
 412                        dev_exception_clean(devcgroup);
 413                        devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
 414                        if (!parent)
 415                                break;
 416
 417                        rc = dev_exceptions_copy(&devcgroup->exceptions,
 418                                                 &parent->exceptions);
 419                        if (rc)
 420                                return rc;
 421                        break;
 422                case DEVCG_DENY:
 423                        dev_exception_clean(devcgroup);
 424                        devcgroup->behavior = DEVCG_DEFAULT_DENY;
 425                        break;
 426                default:
 427                        return -EINVAL;
 428                }
 429                return 0;
 430        case 'b':
 431                ex.type = DEV_BLOCK;
 432                break;
 433        case 'c':
 434                ex.type = DEV_CHAR;
 435                break;
 436        default:
 437                return -EINVAL;
 438        }
 439        b++;
 440        if (!isspace(*b))
 441                return -EINVAL;
 442        b++;
 443        if (*b == '*') {
 444                ex.major = ~0;
 445                b++;
 446        } else if (isdigit(*b)) {
 447                memset(temp, 0, sizeof(temp));
 448                for (count = 0; count < sizeof(temp) - 1; count++) {
 449                        temp[count] = *b;
 450                        b++;
 451                        if (!isdigit(*b))
 452                                break;
 453                }
 454                rc = kstrtou32(temp, 10, &ex.major);
 455                if (rc)
 456                        return -EINVAL;
 457        } else {
 458                return -EINVAL;
 459        }
 460        if (*b != ':')
 461                return -EINVAL;
 462        b++;
 463
 464        /* read minor */
 465        if (*b == '*') {
 466                ex.minor = ~0;
 467                b++;
 468        } else if (isdigit(*b)) {
 469                memset(temp, 0, sizeof(temp));
 470                for (count = 0; count < sizeof(temp) - 1; count++) {
 471                        temp[count] = *b;
 472                        b++;
 473                        if (!isdigit(*b))
 474                                break;
 475                }
 476                rc = kstrtou32(temp, 10, &ex.minor);
 477                if (rc)
 478                        return -EINVAL;
 479        } else {
 480                return -EINVAL;
 481        }
 482        if (!isspace(*b))
 483                return -EINVAL;
 484        for (b++, count = 0; count < 3; count++, b++) {
 485                switch (*b) {
 486                case 'r':
 487                        ex.access |= ACC_READ;
 488                        break;
 489                case 'w':
 490                        ex.access |= ACC_WRITE;
 491                        break;
 492                case 'm':
 493                        ex.access |= ACC_MKNOD;
 494                        break;
 495                case '\n':
 496                case '\0':
 497                        count = 3;
 498                        break;
 499                default:
 500                        return -EINVAL;
 501                }
 502        }
 503
 504        switch (filetype) {
 505        case DEVCG_ALLOW:
 506                if (!parent_has_perm(devcgroup, &ex))
 507                        return -EPERM;
 508                /*
 509                 * If the default policy is to allow by default, try to remove
 510                 * an matching exception instead. And be silent about it: we
 511                 * don't want to break compatibility
 512                 */
 513                if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
 514                        dev_exception_rm(devcgroup, &ex);
 515                        return 0;
 516                }
 517                return dev_exception_add(devcgroup, &ex);
 518        case DEVCG_DENY:
 519                /*
 520                 * If the default policy is to deny by default, try to remove
 521                 * an matching exception instead. And be silent about it: we
 522                 * don't want to break compatibility
 523                 */
 524                if (devcgroup->behavior == DEVCG_DEFAULT_DENY) {
 525                        dev_exception_rm(devcgroup, &ex);
 526                        return 0;
 527                }
 528                return dev_exception_add(devcgroup, &ex);
 529        default:
 530                return -EINVAL;
 531        }
 532        return 0;
 533}
 534
 535static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
 536                                  const char *buffer)
 537{
 538        int retval;
 539
 540        mutex_lock(&devcgroup_mutex);
 541        retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
 542                                         cft->private, buffer);
 543        mutex_unlock(&devcgroup_mutex);
 544        return retval;
 545}
 546
 547static struct cftype dev_cgroup_files[] = {
 548        {
 549                .name = "allow",
 550                .write_string  = devcgroup_access_write,
 551                .private = DEVCG_ALLOW,
 552        },
 553        {
 554                .name = "deny",
 555                .write_string = devcgroup_access_write,
 556                .private = DEVCG_DENY,
 557        },
 558        {
 559                .name = "list",
 560                .read_seq_string = devcgroup_seq_read,
 561                .private = DEVCG_LIST,
 562        },
 563        { }     /* terminate */
 564};
 565
 566struct cgroup_subsys devices_subsys = {
 567        .name = "devices",
 568        .can_attach = devcgroup_can_attach,
 569        .css_alloc = devcgroup_css_alloc,
 570        .css_free = devcgroup_css_free,
 571        .subsys_id = devices_subsys_id,
 572        .base_cftypes = dev_cgroup_files,
 573
 574        /*
 575         * While devices cgroup has the rudimentary hierarchy support which
 576         * checks the parent's restriction, it doesn't properly propagates
 577         * config changes in ancestors to their descendents.  A child
 578         * should only be allowed to add more restrictions to the parent's
 579         * configuration.  Fix it and remove the following.
 580         */
 581        .broken_hierarchy = true,
 582};
 583
 584/**
 585 * __devcgroup_check_permission - checks if an inode operation is permitted
 586 * @dev_cgroup: the dev cgroup to be tested against
 587 * @type: device type
 588 * @major: device major number
 589 * @minor: device minor number
 590 * @access: combination of ACC_WRITE, ACC_READ and ACC_MKNOD
 591 *
 592 * returns 0 on success, -EPERM case the operation is not permitted
 593 */
 594static int __devcgroup_check_permission(short type, u32 major, u32 minor,
 595                                        short access)
 596{
 597        struct dev_cgroup *dev_cgroup;
 598        struct dev_exception_item ex;
 599        int rc;
 600
 601        memset(&ex, 0, sizeof(ex));
 602        ex.type = type;
 603        ex.major = major;
 604        ex.minor = minor;
 605        ex.access = access;
 606
 607        rcu_read_lock();
 608        dev_cgroup = task_devcgroup(current);
 609        rc = may_access(dev_cgroup, &ex);
 610        rcu_read_unlock();
 611
 612        if (!rc)
 613                return -EPERM;
 614
 615        return 0;
 616}
 617
 618int __devcgroup_inode_permission(struct inode *inode, int mask)
 619{
 620        short type, access = 0;
 621
 622        if (S_ISBLK(inode->i_mode))
 623                type = DEV_BLOCK;
 624        if (S_ISCHR(inode->i_mode))
 625                type = DEV_CHAR;
 626        if (mask & MAY_WRITE)
 627                access |= ACC_WRITE;
 628        if (mask & MAY_READ)
 629                access |= ACC_READ;
 630
 631        return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
 632                        access);
 633}
 634
 635int devcgroup_inode_mknod(int mode, dev_t dev)
 636{
 637        short type;
 638
 639        if (!S_ISBLK(mode) && !S_ISCHR(mode))
 640                return 0;
 641
 642        if (S_ISBLK(mode))
 643                type = DEV_BLOCK;
 644        else
 645                type = DEV_CHAR;
 646
 647        return __devcgroup_check_permission(type, MAJOR(dev), MINOR(dev),
 648                        ACC_MKNOD);
 649
 650}
 651