linux/security/device_cgroup.c
<<
>>
Prefs
   1/*
   2 * device_cgroup.c - device cgroup subsystem
   3 *
   4 * Copyright 2007 IBM Corp
   5 */
   6
   7#include <linux/device_cgroup.h>
   8#include <linux/cgroup.h>
   9#include <linux/ctype.h>
  10#include <linux/list.h>
  11#include <linux/uaccess.h>
  12#include <linux/seq_file.h>
  13#include <linux/slab.h>
  14#include <linux/rcupdate.h>
  15#include <linux/mutex.h>
  16
  17#define ACC_MKNOD 1
  18#define ACC_READ  2
  19#define ACC_WRITE 4
  20#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE)
  21
  22#define DEV_BLOCK 1
  23#define DEV_CHAR  2
  24#define DEV_ALL   4  /* this represents all devices */
  25
  26static DEFINE_MUTEX(devcgroup_mutex);
  27
  28/*
  29 * exception list locking rules:
  30 * hold devcgroup_mutex for update/read.
  31 * hold rcu_read_lock() for read.
  32 */
  33
  34struct dev_exception_item {
  35        u32 major, minor;
  36        short type;
  37        short access;
  38        struct list_head list;
  39        struct rcu_head rcu;
  40};
  41
  42struct dev_cgroup {
  43        struct cgroup_subsys_state css;
  44        struct list_head exceptions;
  45        enum {
  46                DEVCG_DEFAULT_ALLOW,
  47                DEVCG_DEFAULT_DENY,
  48        } behavior;
  49};
  50
  51static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
  52{
  53        return container_of(s, struct dev_cgroup, css);
  54}
  55
  56static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
  57{
  58        return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
  59}
  60
  61static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
  62{
  63        return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
  64}
  65
  66struct cgroup_subsys devices_subsys;
  67
  68static int devcgroup_can_attach(struct cgroup *new_cgrp,
  69                                struct cgroup_taskset *set)
  70{
  71        struct task_struct *task = cgroup_taskset_first(set);
  72
  73        if (current != task && !capable(CAP_SYS_ADMIN))
  74                return -EPERM;
  75        return 0;
  76}
  77
  78/*
  79 * called under devcgroup_mutex
  80 */
  81static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
  82{
  83        struct dev_exception_item *ex, *tmp, *new;
  84
  85        lockdep_assert_held(&devcgroup_mutex);
  86
  87        list_for_each_entry(ex, orig, list) {
  88                new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
  89                if (!new)
  90                        goto free_and_exit;
  91                list_add_tail(&new->list, dest);
  92        }
  93
  94        return 0;
  95
  96free_and_exit:
  97        list_for_each_entry_safe(ex, tmp, dest, list) {
  98                list_del(&ex->list);
  99                kfree(ex);
 100        }
 101        return -ENOMEM;
 102}
 103
 104/*
 105 * called under devcgroup_mutex
 106 */
 107static int dev_exception_add(struct dev_cgroup *dev_cgroup,
 108                             struct dev_exception_item *ex)
 109{
 110        struct dev_exception_item *excopy, *walk;
 111
 112        lockdep_assert_held(&devcgroup_mutex);
 113
 114        excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
 115        if (!excopy)
 116                return -ENOMEM;
 117
 118        list_for_each_entry(walk, &dev_cgroup->exceptions, list) {
 119                if (walk->type != ex->type)
 120                        continue;
 121                if (walk->major != ex->major)
 122                        continue;
 123                if (walk->minor != ex->minor)
 124                        continue;
 125
 126                walk->access |= ex->access;
 127                kfree(excopy);
 128                excopy = NULL;
 129        }
 130
 131        if (excopy != NULL)
 132                list_add_tail_rcu(&excopy->list, &dev_cgroup->exceptions);
 133        return 0;
 134}
 135
 136/*
 137 * called under devcgroup_mutex
 138 */
 139static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
 140                             struct dev_exception_item *ex)
 141{
 142        struct dev_exception_item *walk, *tmp;
 143
 144        lockdep_assert_held(&devcgroup_mutex);
 145
 146        list_for_each_entry_safe(walk, tmp, &dev_cgroup->exceptions, list) {
 147                if (walk->type != ex->type)
 148                        continue;
 149                if (walk->major != ex->major)
 150                        continue;
 151                if (walk->minor != ex->minor)
 152                        continue;
 153
 154                walk->access &= ~ex->access;
 155                if (!walk->access) {
 156                        list_del_rcu(&walk->list);
 157                        kfree_rcu(walk, rcu);
 158                }
 159        }
 160}
 161
 162static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
 163{
 164        struct dev_exception_item *ex, *tmp;
 165
 166        list_for_each_entry_safe(ex, tmp, &dev_cgroup->exceptions, list) {
 167                list_del_rcu(&ex->list);
 168                kfree_rcu(ex, rcu);
 169        }
 170}
 171
 172/**
 173 * dev_exception_clean - frees all entries of the exception list
 174 * @dev_cgroup: dev_cgroup with the exception list to be cleaned
 175 *
 176 * called under devcgroup_mutex
 177 */
 178static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
 179{
 180        lockdep_assert_held(&devcgroup_mutex);
 181
 182        __dev_exception_clean(dev_cgroup);
 183}
 184
 185/*
 186 * called from kernel/cgroup.c with cgroup_lock() held.
 187 */
 188static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup)
 189{
 190        struct dev_cgroup *dev_cgroup, *parent_dev_cgroup;
 191        struct cgroup *parent_cgroup;
 192        int ret;
 193
 194        dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
 195        if (!dev_cgroup)
 196                return ERR_PTR(-ENOMEM);
 197        INIT_LIST_HEAD(&dev_cgroup->exceptions);
 198        parent_cgroup = cgroup->parent;
 199
 200        if (parent_cgroup == NULL)
 201                dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
 202        else {
 203                parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
 204                mutex_lock(&devcgroup_mutex);
 205                ret = dev_exceptions_copy(&dev_cgroup->exceptions,
 206                                          &parent_dev_cgroup->exceptions);
 207                dev_cgroup->behavior = parent_dev_cgroup->behavior;
 208                mutex_unlock(&devcgroup_mutex);
 209                if (ret) {
 210                        kfree(dev_cgroup);
 211                        return ERR_PTR(ret);
 212                }
 213        }
 214
 215        return &dev_cgroup->css;
 216}
 217
 218static void devcgroup_css_free(struct cgroup *cgroup)
 219{
 220        struct dev_cgroup *dev_cgroup;
 221
 222        dev_cgroup = cgroup_to_devcgroup(cgroup);
 223        __dev_exception_clean(dev_cgroup);
 224        kfree(dev_cgroup);
 225}
 226
 227#define DEVCG_ALLOW 1
 228#define DEVCG_DENY 2
 229#define DEVCG_LIST 3
 230
 231#define MAJMINLEN 13
 232#define ACCLEN 4
 233
 234static void set_access(char *acc, short access)
 235{
 236        int idx = 0;
 237        memset(acc, 0, ACCLEN);
 238        if (access & ACC_READ)
 239                acc[idx++] = 'r';
 240        if (access & ACC_WRITE)
 241                acc[idx++] = 'w';
 242        if (access & ACC_MKNOD)
 243                acc[idx++] = 'm';
 244}
 245
 246static char type_to_char(short type)
 247{
 248        if (type == DEV_ALL)
 249                return 'a';
 250        if (type == DEV_CHAR)
 251                return 'c';
 252        if (type == DEV_BLOCK)
 253                return 'b';
 254        return 'X';
 255}
 256
 257static void set_majmin(char *str, unsigned m)
 258{
 259        if (m == ~0)
 260                strcpy(str, "*");
 261        else
 262                sprintf(str, "%u", m);
 263}
 264
 265static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
 266                                struct seq_file *m)
 267{
 268        struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup);
 269        struct dev_exception_item *ex;
 270        char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
 271
 272        rcu_read_lock();
 273        /*
 274         * To preserve the compatibility:
 275         * - Only show the "all devices" when the default policy is to allow
 276         * - List the exceptions in case the default policy is to deny
 277         * This way, the file remains as a "whitelist of devices"
 278         */
 279        if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
 280                set_access(acc, ACC_MASK);
 281                set_majmin(maj, ~0);
 282                set_majmin(min, ~0);
 283                seq_printf(m, "%c %s:%s %s\n", type_to_char(DEV_ALL),
 284                           maj, min, acc);
 285        } else {
 286                list_for_each_entry_rcu(ex, &devcgroup->exceptions, list) {
 287                        set_access(acc, ex->access);
 288                        set_majmin(maj, ex->major);
 289                        set_majmin(min, ex->minor);
 290                        seq_printf(m, "%c %s:%s %s\n", type_to_char(ex->type),
 291                                   maj, min, acc);
 292                }
 293        }
 294        rcu_read_unlock();
 295
 296        return 0;
 297}
 298
 299/**
 300 * may_access - verifies if a new exception is part of what is allowed
 301 *              by a dev cgroup based on the default policy +
 302 *              exceptions. This is used to make sure a child cgroup
 303 *              won't have more privileges than its parent or to
 304 *              verify if a certain access is allowed.
 305 * @dev_cgroup: dev cgroup to be tested against
 306 * @refex: new exception
 307 */
 308static int may_access(struct dev_cgroup *dev_cgroup,
 309                      struct dev_exception_item *refex)
 310{
 311        struct dev_exception_item *ex;
 312        bool match = false;
 313
 314        rcu_lockdep_assert(rcu_read_lock_held() ||
 315                           lockdep_is_held(&devcgroup_mutex),
 316                           "device_cgroup::may_access() called without proper synchronization");
 317
 318        list_for_each_entry_rcu(ex, &dev_cgroup->exceptions, list) {
 319                if ((refex->type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
 320                        continue;
 321                if ((refex->type & DEV_CHAR) && !(ex->type & DEV_CHAR))
 322                        continue;
 323                if (ex->major != ~0 && ex->major != refex->major)
 324                        continue;
 325                if (ex->minor != ~0 && ex->minor != refex->minor)
 326                        continue;
 327                if (refex->access & (~ex->access))
 328                        continue;
 329                match = true;
 330                break;
 331        }
 332
 333        /*
 334         * In two cases we'll consider this new exception valid:
 335         * - the dev cgroup has its default policy to allow + exception list:
 336         *   the new exception should *not* match any of the exceptions
 337         *   (behavior == DEVCG_DEFAULT_ALLOW, !match)
 338         * - the dev cgroup has its default policy to deny + exception list:
 339         *   the new exception *should* match the exceptions
 340         *   (behavior == DEVCG_DEFAULT_DENY, match)
 341         */
 342        if ((dev_cgroup->behavior == DEVCG_DEFAULT_DENY) == match)
 343                return 1;
 344        return 0;
 345}
 346
 347/*
 348 * parent_has_perm:
 349 * when adding a new allow rule to a device exception list, the rule
 350 * must be allowed in the parent device
 351 */
 352static int parent_has_perm(struct dev_cgroup *childcg,
 353                                  struct dev_exception_item *ex)
 354{
 355        struct cgroup *pcg = childcg->css.cgroup->parent;
 356        struct dev_cgroup *parent;
 357
 358        if (!pcg)
 359                return 1;
 360        parent = cgroup_to_devcgroup(pcg);
 361        return may_access(parent, ex);
 362}
 363
 364/**
 365 * may_allow_all - checks if it's possible to change the behavior to
 366 *                 allow based on parent's rules.
 367 * @parent: device cgroup's parent
 368 * returns: != 0 in case it's allowed, 0 otherwise
 369 */
 370static inline int may_allow_all(struct dev_cgroup *parent)
 371{
 372        if (!parent)
 373                return 1;
 374        return parent->behavior == DEVCG_DEFAULT_ALLOW;
 375}
 376
 377/*
 378 * Modify the exception list using allow/deny rules.
 379 * CAP_SYS_ADMIN is needed for this.  It's at least separate from CAP_MKNOD
 380 * so we can give a container CAP_MKNOD to let it create devices but not
 381 * modify the exception list.
 382 * It seems likely we'll want to add a CAP_CONTAINER capability to allow
 383 * us to also grant CAP_SYS_ADMIN to containers without giving away the
 384 * device exception list controls, but for now we'll stick with CAP_SYS_ADMIN
 385 *
 386 * Taking rules away is always allowed (given CAP_SYS_ADMIN).  Granting
 387 * new access is only allowed if you're in the top-level cgroup, or your
 388 * parent cgroup has the access you're asking for.
 389 */
 390static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 391                                   int filetype, const char *buffer)
 392{
 393        const char *b;
 394        char temp[12];          /* 11 + 1 characters needed for a u32 */
 395        int count, rc;
 396        struct dev_exception_item ex;
 397        struct cgroup *p = devcgroup->css.cgroup;
 398        struct dev_cgroup *parent = NULL;
 399
 400        if (!capable(CAP_SYS_ADMIN))
 401                return -EPERM;
 402
 403        if (p->parent)
 404                parent = cgroup_to_devcgroup(p->parent);
 405
 406        memset(&ex, 0, sizeof(ex));
 407        b = buffer;
 408
 409        switch (*b) {
 410        case 'a':
 411                switch (filetype) {
 412                case DEVCG_ALLOW:
 413                        if (!may_allow_all(parent))
 414                                return -EPERM;
 415                        dev_exception_clean(devcgroup);
 416                        devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
 417                        if (!parent)
 418                                break;
 419
 420                        rc = dev_exceptions_copy(&devcgroup->exceptions,
 421                                                 &parent->exceptions);
 422                        if (rc)
 423                                return rc;
 424                        break;
 425                case DEVCG_DENY:
 426                        dev_exception_clean(devcgroup);
 427                        devcgroup->behavior = DEVCG_DEFAULT_DENY;
 428                        break;
 429                default:
 430                        return -EINVAL;
 431                }
 432                return 0;
 433        case 'b':
 434                ex.type = DEV_BLOCK;
 435                break;
 436        case 'c':
 437                ex.type = DEV_CHAR;
 438                break;
 439        default:
 440                return -EINVAL;
 441        }
 442        b++;
 443        if (!isspace(*b))
 444                return -EINVAL;
 445        b++;
 446        if (*b == '*') {
 447                ex.major = ~0;
 448                b++;
 449        } else if (isdigit(*b)) {
 450                memset(temp, 0, sizeof(temp));
 451                for (count = 0; count < sizeof(temp) - 1; count++) {
 452                        temp[count] = *b;
 453                        b++;
 454                        if (!isdigit(*b))
 455                                break;
 456                }
 457                rc = kstrtou32(temp, 10, &ex.major);
 458                if (rc)
 459                        return -EINVAL;
 460        } else {
 461                return -EINVAL;
 462        }
 463        if (*b != ':')
 464                return -EINVAL;
 465        b++;
 466
 467        /* read minor */
 468        if (*b == '*') {
 469                ex.minor = ~0;
 470                b++;
 471        } else if (isdigit(*b)) {
 472                memset(temp, 0, sizeof(temp));
 473                for (count = 0; count < sizeof(temp) - 1; count++) {
 474                        temp[count] = *b;
 475                        b++;
 476                        if (!isdigit(*b))
 477                                break;
 478                }
 479                rc = kstrtou32(temp, 10, &ex.minor);
 480                if (rc)
 481                        return -EINVAL;
 482        } else {
 483                return -EINVAL;
 484        }
 485        if (!isspace(*b))
 486                return -EINVAL;
 487        for (b++, count = 0; count < 3; count++, b++) {
 488                switch (*b) {
 489                case 'r':
 490                        ex.access |= ACC_READ;
 491                        break;
 492                case 'w':
 493                        ex.access |= ACC_WRITE;
 494                        break;
 495                case 'm':
 496                        ex.access |= ACC_MKNOD;
 497                        break;
 498                case '\n':
 499                case '\0':
 500                        count = 3;
 501                        break;
 502                default:
 503                        return -EINVAL;
 504                }
 505        }
 506
 507        switch (filetype) {
 508        case DEVCG_ALLOW:
 509                if (!parent_has_perm(devcgroup, &ex))
 510                        return -EPERM;
 511                /*
 512                 * If the default policy is to allow by default, try to remove
 513                 * an matching exception instead. And be silent about it: we
 514                 * don't want to break compatibility
 515                 */
 516                if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
 517                        dev_exception_rm(devcgroup, &ex);
 518                        return 0;
 519                }
 520                return dev_exception_add(devcgroup, &ex);
 521        case DEVCG_DENY:
 522                /*
 523                 * If the default policy is to deny by default, try to remove
 524                 * an matching exception instead. And be silent about it: we
 525                 * don't want to break compatibility
 526                 */
 527                if (devcgroup->behavior == DEVCG_DEFAULT_DENY) {
 528                        dev_exception_rm(devcgroup, &ex);
 529                        return 0;
 530                }
 531                return dev_exception_add(devcgroup, &ex);
 532        default:
 533                return -EINVAL;
 534        }
 535        return 0;
 536}
 537
 538static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
 539                                  const char *buffer)
 540{
 541        int retval;
 542
 543        mutex_lock(&devcgroup_mutex);
 544        retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
 545                                         cft->private, buffer);
 546        mutex_unlock(&devcgroup_mutex);
 547        return retval;
 548}
 549
 550static struct cftype dev_cgroup_files[] = {
 551        {
 552                .name = "allow",
 553                .write_string  = devcgroup_access_write,
 554                .private = DEVCG_ALLOW,
 555        },
 556        {
 557                .name = "deny",
 558                .write_string = devcgroup_access_write,
 559                .private = DEVCG_DENY,
 560        },
 561        {
 562                .name = "list",
 563                .read_seq_string = devcgroup_seq_read,
 564                .private = DEVCG_LIST,
 565        },
 566        { }     /* terminate */
 567};
 568
 569struct cgroup_subsys devices_subsys = {
 570        .name = "devices",
 571        .can_attach = devcgroup_can_attach,
 572        .css_alloc = devcgroup_css_alloc,
 573        .css_free = devcgroup_css_free,
 574        .subsys_id = devices_subsys_id,
 575        .base_cftypes = dev_cgroup_files,
 576
 577        /*
 578         * While devices cgroup has the rudimentary hierarchy support which
 579         * checks the parent's restriction, it doesn't properly propagates
 580         * config changes in ancestors to their descendents.  A child
 581         * should only be allowed to add more restrictions to the parent's
 582         * configuration.  Fix it and remove the following.
 583         */
 584        .broken_hierarchy = true,
 585};
 586
 587/**
 588 * __devcgroup_check_permission - checks if an inode operation is permitted
 589 * @dev_cgroup: the dev cgroup to be tested against
 590 * @type: device type
 591 * @major: device major number
 592 * @minor: device minor number
 593 * @access: combination of ACC_WRITE, ACC_READ and ACC_MKNOD
 594 *
 595 * returns 0 on success, -EPERM case the operation is not permitted
 596 */
 597static int __devcgroup_check_permission(short type, u32 major, u32 minor,
 598                                        short access)
 599{
 600        struct dev_cgroup *dev_cgroup;
 601        struct dev_exception_item ex;
 602        int rc;
 603
 604        memset(&ex, 0, sizeof(ex));
 605        ex.type = type;
 606        ex.major = major;
 607        ex.minor = minor;
 608        ex.access = access;
 609
 610        rcu_read_lock();
 611        dev_cgroup = task_devcgroup(current);
 612        rc = may_access(dev_cgroup, &ex);
 613        rcu_read_unlock();
 614
 615        if (!rc)
 616                return -EPERM;
 617
 618        return 0;
 619}
 620
 621int __devcgroup_inode_permission(struct inode *inode, int mask)
 622{
 623        short type, access = 0;
 624
 625        if (S_ISBLK(inode->i_mode))
 626                type = DEV_BLOCK;
 627        if (S_ISCHR(inode->i_mode))
 628                type = DEV_CHAR;
 629        if (mask & MAY_WRITE)
 630                access |= ACC_WRITE;
 631        if (mask & MAY_READ)
 632                access |= ACC_READ;
 633
 634        return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
 635                        access);
 636}
 637
 638int devcgroup_inode_mknod(int mode, dev_t dev)
 639{
 640        short type;
 641
 642        if (!S_ISBLK(mode) && !S_ISCHR(mode))
 643                return 0;
 644
 645        if (S_ISBLK(mode))
 646                type = DEV_BLOCK;
 647        else
 648                type = DEV_CHAR;
 649
 650        return __devcgroup_check_permission(type, MAJOR(dev), MINOR(dev),
 651                        ACC_MKNOD);
 652
 653}
 654
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.