linux/security/device_cgroup.c
<<
>>
Prefs
   1/*
   2 * device_cgroup.c - device cgroup subsystem
   3 *
   4 * Copyright 2007 IBM Corp
   5 */
   6
   7#include <linux/device_cgroup.h>
   8#include <linux/cgroup.h>
   9#include <linux/ctype.h>
  10#include <linux/list.h>
  11#include <linux/uaccess.h>
  12#include <linux/seq_file.h>
  13#include <linux/slab.h>
  14#include <linux/rcupdate.h>
  15#include <linux/mutex.h>
  16
  17#define ACC_MKNOD 1
  18#define ACC_READ  2
  19#define ACC_WRITE 4
  20#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE)
  21
  22#define DEV_BLOCK 1
  23#define DEV_CHAR  2
  24#define DEV_ALL   4  /* this represents all devices */
  25
  26static DEFINE_MUTEX(devcgroup_mutex);
  27
  28/*
  29 * exception list locking rules:
  30 * hold devcgroup_mutex for update/read.
  31 * hold rcu_read_lock() for read.
  32 */
  33
  34struct dev_exception_item {
  35        u32 major, minor;
  36        short type;
  37        short access;
  38        struct list_head list;
  39        struct rcu_head rcu;
  40};
  41
  42struct dev_cgroup {
  43        struct cgroup_subsys_state css;
  44        struct list_head exceptions;
  45        enum {
  46                DEVCG_DEFAULT_ALLOW,
  47                DEVCG_DEFAULT_DENY,
  48        } behavior;
  49};
  50
  51static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
  52{
  53        return container_of(s, struct dev_cgroup, css);
  54}
  55
  56static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
  57{
  58        return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
  59}
  60
  61static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
  62{
  63        return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
  64}
  65
  66struct cgroup_subsys devices_subsys;
  67
  68static int devcgroup_can_attach(struct cgroup *new_cgrp,
  69                                struct cgroup_taskset *set)
  70{
  71        struct task_struct *task = cgroup_taskset_first(set);
  72
  73        if (current != task && !capable(CAP_SYS_ADMIN))
  74                return -EPERM;
  75        return 0;
  76}
  77
  78/*
  79 * called under devcgroup_mutex
  80 */
  81static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
  82{
  83        struct dev_exception_item *ex, *tmp, *new;
  84
  85        list_for_each_entry(ex, orig, list) {
  86                new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
  87                if (!new)
  88                        goto free_and_exit;
  89                list_add_tail(&new->list, dest);
  90        }
  91
  92        return 0;
  93
  94free_and_exit:
  95        list_for_each_entry_safe(ex, tmp, dest, list) {
  96                list_del(&ex->list);
  97                kfree(ex);
  98        }
  99        return -ENOMEM;
 100}
 101
 102/*
 103 * called under devcgroup_mutex
 104 */
 105static int dev_exception_add(struct dev_cgroup *dev_cgroup,
 106                             struct dev_exception_item *ex)
 107{
 108        struct dev_exception_item *excopy, *walk;
 109
 110        excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
 111        if (!excopy)
 112                return -ENOMEM;
 113
 114        list_for_each_entry(walk, &dev_cgroup->exceptions, list) {
 115                if (walk->type != ex->type)
 116                        continue;
 117                if (walk->major != ex->major)
 118                        continue;
 119                if (walk->minor != ex->minor)
 120                        continue;
 121
 122                walk->access |= ex->access;
 123                kfree(excopy);
 124                excopy = NULL;
 125        }
 126
 127        if (excopy != NULL)
 128                list_add_tail_rcu(&excopy->list, &dev_cgroup->exceptions);
 129        return 0;
 130}
 131
 132/*
 133 * called under devcgroup_mutex
 134 */
 135static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
 136                             struct dev_exception_item *ex)
 137{
 138        struct dev_exception_item *walk, *tmp;
 139
 140        list_for_each_entry_safe(walk, tmp, &dev_cgroup->exceptions, list) {
 141                if (walk->type != ex->type)
 142                        continue;
 143                if (walk->major != ex->major)
 144                        continue;
 145                if (walk->minor != ex->minor)
 146                        continue;
 147
 148                walk->access &= ~ex->access;
 149                if (!walk->access) {
 150                        list_del_rcu(&walk->list);
 151                        kfree_rcu(walk, rcu);
 152                }
 153        }
 154}
 155
 156/**
 157 * dev_exception_clean - frees all entries of the exception list
 158 * @dev_cgroup: dev_cgroup with the exception list to be cleaned
 159 *
 160 * called under devcgroup_mutex
 161 */
 162static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
 163{
 164        struct dev_exception_item *ex, *tmp;
 165
 166        list_for_each_entry_safe(ex, tmp, &dev_cgroup->exceptions, list) {
 167                list_del_rcu(&ex->list);
 168                kfree_rcu(ex, rcu);
 169        }
 170}
 171
 172/*
 173 * called from kernel/cgroup.c with cgroup_lock() held.
 174 */
 175static struct cgroup_subsys_state *devcgroup_create(struct cgroup *cgroup)
 176{
 177        struct dev_cgroup *dev_cgroup, *parent_dev_cgroup;
 178        struct cgroup *parent_cgroup;
 179        int ret;
 180
 181        dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
 182        if (!dev_cgroup)
 183                return ERR_PTR(-ENOMEM);
 184        INIT_LIST_HEAD(&dev_cgroup->exceptions);
 185        parent_cgroup = cgroup->parent;
 186
 187        if (parent_cgroup == NULL)
 188                dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
 189        else {
 190                parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
 191                mutex_lock(&devcgroup_mutex);
 192                ret = dev_exceptions_copy(&dev_cgroup->exceptions,
 193                                          &parent_dev_cgroup->exceptions);
 194                dev_cgroup->behavior = parent_dev_cgroup->behavior;
 195                mutex_unlock(&devcgroup_mutex);
 196                if (ret) {
 197                        kfree(dev_cgroup);
 198                        return ERR_PTR(ret);
 199                }
 200        }
 201
 202        return &dev_cgroup->css;
 203}
 204
 205static void devcgroup_destroy(struct cgroup *cgroup)
 206{
 207        struct dev_cgroup *dev_cgroup;
 208
 209        dev_cgroup = cgroup_to_devcgroup(cgroup);
 210        dev_exception_clean(dev_cgroup);
 211        kfree(dev_cgroup);
 212}
 213
 214#define DEVCG_ALLOW 1
 215#define DEVCG_DENY 2
 216#define DEVCG_LIST 3
 217
 218#define MAJMINLEN 13
 219#define ACCLEN 4
 220
 221static void set_access(char *acc, short access)
 222{
 223        int idx = 0;
 224        memset(acc, 0, ACCLEN);
 225        if (access & ACC_READ)
 226                acc[idx++] = 'r';
 227        if (access & ACC_WRITE)
 228                acc[idx++] = 'w';
 229        if (access & ACC_MKNOD)
 230                acc[idx++] = 'm';
 231}
 232
 233static char type_to_char(short type)
 234{
 235        if (type == DEV_ALL)
 236                return 'a';
 237        if (type == DEV_CHAR)
 238                return 'c';
 239        if (type == DEV_BLOCK)
 240                return 'b';
 241        return 'X';
 242}
 243
 244static void set_majmin(char *str, unsigned m)
 245{
 246        if (m == ~0)
 247                strcpy(str, "*");
 248        else
 249                sprintf(str, "%u", m);
 250}
 251
 252static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
 253                                struct seq_file *m)
 254{
 255        struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup);
 256        struct dev_exception_item *ex;
 257        char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
 258
 259        rcu_read_lock();
 260        /*
 261         * To preserve the compatibility:
 262         * - Only show the "all devices" when the default policy is to allow
 263         * - List the exceptions in case the default policy is to deny
 264         * This way, the file remains as a "whitelist of devices"
 265         */
 266        if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
 267                set_access(acc, ACC_MASK);
 268                set_majmin(maj, ~0);
 269                set_majmin(min, ~0);
 270                seq_printf(m, "%c %s:%s %s\n", type_to_char(DEV_ALL),
 271                           maj, min, acc);
 272        } else {
 273                list_for_each_entry_rcu(ex, &devcgroup->exceptions, list) {
 274                        set_access(acc, ex->access);
 275                        set_majmin(maj, ex->major);
 276                        set_majmin(min, ex->minor);
 277                        seq_printf(m, "%c %s:%s %s\n", type_to_char(ex->type),
 278                                   maj, min, acc);
 279                }
 280        }
 281        rcu_read_unlock();
 282
 283        return 0;
 284}
 285
 286/**
 287 * may_access - verifies if a new exception is part of what is allowed
 288 *              by a dev cgroup based on the default policy +
 289 *              exceptions. This is used to make sure a child cgroup
 290 *              won't have more privileges than its parent or to
 291 *              verify if a certain access is allowed.
 292 * @dev_cgroup: dev cgroup to be tested against
 293 * @refex: new exception
 294 */
 295static int may_access(struct dev_cgroup *dev_cgroup,
 296                      struct dev_exception_item *refex)
 297{
 298        struct dev_exception_item *ex;
 299        bool match = false;
 300
 301        list_for_each_entry_rcu(ex, &dev_cgroup->exceptions, list) {
 302                if ((refex->type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
 303                        continue;
 304                if ((refex->type & DEV_CHAR) && !(ex->type & DEV_CHAR))
 305                        continue;
 306                if (ex->major != ~0 && ex->major != refex->major)
 307                        continue;
 308                if (ex->minor != ~0 && ex->minor != refex->minor)
 309                        continue;
 310                if (refex->access & (~ex->access))
 311                        continue;
 312                match = true;
 313                break;
 314        }
 315
 316        /*
 317         * In two cases we'll consider this new exception valid:
 318         * - the dev cgroup has its default policy to allow + exception list:
 319         *   the new exception should *not* match any of the exceptions
 320         *   (behavior == DEVCG_DEFAULT_ALLOW, !match)
 321         * - the dev cgroup has its default policy to deny + exception list:
 322         *   the new exception *should* match the exceptions
 323         *   (behavior == DEVCG_DEFAULT_DENY, match)
 324         */
 325        if ((dev_cgroup->behavior == DEVCG_DEFAULT_DENY) == match)
 326                return 1;
 327        return 0;
 328}
 329
 330/*
 331 * parent_has_perm:
 332 * when adding a new allow rule to a device exception list, the rule
 333 * must be allowed in the parent device
 334 */
 335static int parent_has_perm(struct dev_cgroup *childcg,
 336                                  struct dev_exception_item *ex)
 337{
 338        struct cgroup *pcg = childcg->css.cgroup->parent;
 339        struct dev_cgroup *parent;
 340
 341        if (!pcg)
 342                return 1;
 343        parent = cgroup_to_devcgroup(pcg);
 344        return may_access(parent, ex);
 345}
 346
 347/**
 348 * may_allow_all - checks if it's possible to change the behavior to
 349 *                 allow based on parent's rules.
 350 * @parent: device cgroup's parent
 351 * returns: != 0 in case it's allowed, 0 otherwise
 352 */
 353static inline int may_allow_all(struct dev_cgroup *parent)
 354{
 355        if (!parent)
 356                return 1;
 357        return parent->behavior == DEVCG_DEFAULT_ALLOW;
 358}
 359
 360/*
 361 * Modify the exception list using allow/deny rules.
 362 * CAP_SYS_ADMIN is needed for this.  It's at least separate from CAP_MKNOD
 363 * so we can give a container CAP_MKNOD to let it create devices but not
 364 * modify the exception list.
 365 * It seems likely we'll want to add a CAP_CONTAINER capability to allow
 366 * us to also grant CAP_SYS_ADMIN to containers without giving away the
 367 * device exception list controls, but for now we'll stick with CAP_SYS_ADMIN
 368 *
 369 * Taking rules away is always allowed (given CAP_SYS_ADMIN).  Granting
 370 * new access is only allowed if you're in the top-level cgroup, or your
 371 * parent cgroup has the access you're asking for.
 372 */
 373static int devcgroup_update_access(struct dev_cgroup *devcgroup,
 374                                   int filetype, const char *buffer)
 375{
 376        const char *b;
 377        char temp[12];          /* 11 + 1 characters needed for a u32 */
 378        int count, rc;
 379        struct dev_exception_item ex;
 380        struct cgroup *p = devcgroup->css.cgroup;
 381        struct dev_cgroup *parent = NULL;
 382
 383        if (!capable(CAP_SYS_ADMIN))
 384                return -EPERM;
 385
 386        if (p->parent)
 387                parent = cgroup_to_devcgroup(p->parent);
 388
 389        memset(&ex, 0, sizeof(ex));
 390        b = buffer;
 391
 392        switch (*b) {
 393        case 'a':
 394                switch (filetype) {
 395                case DEVCG_ALLOW:
 396                        if (!may_allow_all(parent))
 397                                return -EPERM;
 398                        dev_exception_clean(devcgroup);
 399                        devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
 400                        if (!parent)
 401                                break;
 402
 403                        rc = dev_exceptions_copy(&devcgroup->exceptions,
 404                                                 &parent->exceptions);
 405                        if (rc)
 406                                return rc;
 407                        break;
 408                case DEVCG_DENY:
 409                        dev_exception_clean(devcgroup);
 410                        devcgroup->behavior = DEVCG_DEFAULT_DENY;
 411                        break;
 412                default:
 413                        return -EINVAL;
 414                }
 415                return 0;
 416        case 'b':
 417                ex.type = DEV_BLOCK;
 418                break;
 419        case 'c':
 420                ex.type = DEV_CHAR;
 421                break;
 422        default:
 423                return -EINVAL;
 424        }
 425        b++;
 426        if (!isspace(*b))
 427                return -EINVAL;
 428        b++;
 429        if (*b == '*') {
 430                ex.major = ~0;
 431                b++;
 432        } else if (isdigit(*b)) {
 433                memset(temp, 0, sizeof(temp));
 434                for (count = 0; count < sizeof(temp) - 1; count++) {
 435                        temp[count] = *b;
 436                        b++;
 437                        if (!isdigit(*b))
 438                                break;
 439                }
 440                rc = kstrtou32(temp, 10, &ex.major);
 441                if (rc)
 442                        return -EINVAL;
 443        } else {
 444                return -EINVAL;
 445        }
 446        if (*b != ':')
 447                return -EINVAL;
 448        b++;
 449
 450        /* read minor */
 451        if (*b == '*') {
 452                ex.minor = ~0;
 453                b++;
 454        } else if (isdigit(*b)) {
 455                memset(temp, 0, sizeof(temp));
 456                for (count = 0; count < sizeof(temp) - 1; count++) {
 457                        temp[count] = *b;
 458                        b++;
 459                        if (!isdigit(*b))
 460                                break;
 461                }
 462                rc = kstrtou32(temp, 10, &ex.minor);
 463                if (rc)
 464                        return -EINVAL;
 465        } else {
 466                return -EINVAL;
 467        }
 468        if (!isspace(*b))
 469                return -EINVAL;
 470        for (b++, count = 0; count < 3; count++, b++) {
 471                switch (*b) {
 472                case 'r':
 473                        ex.access |= ACC_READ;
 474                        break;
 475                case 'w':
 476                        ex.access |= ACC_WRITE;
 477                        break;
 478                case 'm':
 479                        ex.access |= ACC_MKNOD;
 480                        break;
 481                case '\n':
 482                case '\0':
 483                        count = 3;
 484                        break;
 485                default:
 486                        return -EINVAL;
 487                }
 488        }
 489
 490        switch (filetype) {
 491        case DEVCG_ALLOW:
 492                if (!parent_has_perm(devcgroup, &ex))
 493                        return -EPERM;
 494                /*
 495                 * If the default policy is to allow by default, try to remove
 496                 * an matching exception instead. And be silent about it: we
 497                 * don't want to break compatibility
 498                 */
 499                if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
 500                        dev_exception_rm(devcgroup, &ex);
 501                        return 0;
 502                }
 503                return dev_exception_add(devcgroup, &ex);
 504        case DEVCG_DENY:
 505                /*
 506                 * If the default policy is to deny by default, try to remove
 507                 * an matching exception instead. And be silent about it: we
 508                 * don't want to break compatibility
 509                 */
 510                if (devcgroup->behavior == DEVCG_DEFAULT_DENY) {
 511                        dev_exception_rm(devcgroup, &ex);
 512                        return 0;
 513                }
 514                return dev_exception_add(devcgroup, &ex);
 515        default:
 516                return -EINVAL;
 517        }
 518        return 0;
 519}
 520
 521static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
 522                                  const char *buffer)
 523{
 524        int retval;
 525
 526        mutex_lock(&devcgroup_mutex);
 527        retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
 528                                         cft->private, buffer);
 529        mutex_unlock(&devcgroup_mutex);
 530        return retval;
 531}
 532
 533static struct cftype dev_cgroup_files[] = {
 534        {
 535                .name = "allow",
 536                .write_string  = devcgroup_access_write,
 537                .private = DEVCG_ALLOW,
 538        },
 539        {
 540                .name = "deny",
 541                .write_string = devcgroup_access_write,
 542                .private = DEVCG_DENY,
 543        },
 544        {
 545                .name = "list",
 546                .read_seq_string = devcgroup_seq_read,
 547                .private = DEVCG_LIST,
 548        },
 549        { }     /* terminate */
 550};
 551
 552struct cgroup_subsys devices_subsys = {
 553        .name = "devices",
 554        .can_attach = devcgroup_can_attach,
 555        .create = devcgroup_create,
 556        .destroy = devcgroup_destroy,
 557        .subsys_id = devices_subsys_id,
 558        .base_cftypes = dev_cgroup_files,
 559
 560        /*
 561         * While devices cgroup has the rudimentary hierarchy support which
 562         * checks the parent's restriction, it doesn't properly propagates
 563         * config changes in ancestors to their descendents.  A child
 564         * should only be allowed to add more restrictions to the parent's
 565         * configuration.  Fix it and remove the following.
 566         */
 567        .broken_hierarchy = true,
 568};
 569
 570/**
 571 * __devcgroup_check_permission - checks if an inode operation is permitted
 572 * @dev_cgroup: the dev cgroup to be tested against
 573 * @type: device type
 574 * @major: device major number
 575 * @minor: device minor number
 576 * @access: combination of ACC_WRITE, ACC_READ and ACC_MKNOD
 577 *
 578 * returns 0 on success, -EPERM case the operation is not permitted
 579 */
 580static int __devcgroup_check_permission(short type, u32 major, u32 minor,
 581                                        short access)
 582{
 583        struct dev_cgroup *dev_cgroup;
 584        struct dev_exception_item ex;
 585        int rc;
 586
 587        memset(&ex, 0, sizeof(ex));
 588        ex.type = type;
 589        ex.major = major;
 590        ex.minor = minor;
 591        ex.access = access;
 592
 593        rcu_read_lock();
 594        dev_cgroup = task_devcgroup(current);
 595        rc = may_access(dev_cgroup, &ex);
 596        rcu_read_unlock();
 597
 598        if (!rc)
 599                return -EPERM;
 600
 601        return 0;
 602}
 603
 604int __devcgroup_inode_permission(struct inode *inode, int mask)
 605{
 606        short type, access = 0;
 607
 608        if (S_ISBLK(inode->i_mode))
 609                type = DEV_BLOCK;
 610        if (S_ISCHR(inode->i_mode))
 611                type = DEV_CHAR;
 612        if (mask & MAY_WRITE)
 613                access |= ACC_WRITE;
 614        if (mask & MAY_READ)
 615                access |= ACC_READ;
 616
 617        return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
 618                        access);
 619}
 620
 621int devcgroup_inode_mknod(int mode, dev_t dev)
 622{
 623        short type;
 624
 625        if (!S_ISBLK(mode) && !S_ISCHR(mode))
 626                return 0;
 627
 628        if (S_ISBLK(mode))
 629                type = DEV_BLOCK;
 630        else
 631                type = DEV_CHAR;
 632
 633        return __devcgroup_check_permission(type, MAJOR(dev), MINOR(dev),
 634                        ACC_MKNOD);
 635
 636}
 637
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.