linux/security/device_cgroup.c
<<
>>
Prefs
   1/*
   2 * dev_cgroup.c - device cgroup subsystem
   3 *
   4 * Copyright 2007 IBM Corp
   5 */
   6
   7#include <linux/device_cgroup.h>
   8#include <linux/cgroup.h>
   9#include <linux/ctype.h>
  10#include <linux/list.h>
  11#include <linux/uaccess.h>
  12#include <linux/seq_file.h>
  13
  14#define ACC_MKNOD 1
  15#define ACC_READ  2
  16#define ACC_WRITE 4
  17#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE)
  18
  19#define DEV_BLOCK 1
  20#define DEV_CHAR  2
  21#define DEV_ALL   4  /* this represents all devices */
  22
  23/*
  24 * whitelist locking rules:
  25 * cgroup_lock() cannot be taken under dev_cgroup->lock.
  26 * dev_cgroup->lock can be taken with or without cgroup_lock().
  27 *
  28 * modifications always require cgroup_lock
  29 * modifications to a list which is visible require the
  30 *   dev_cgroup->lock *and* cgroup_lock()
  31 * walking the list requires dev_cgroup->lock or cgroup_lock().
  32 *
  33 * reasoning: dev_whitelist_copy() needs to kmalloc, so needs
  34 *   a mutex, which the cgroup_lock() is.  Since modifying
  35 *   a visible list requires both locks, either lock can be
  36 *   taken for walking the list.
  37 */
  38
  39struct dev_whitelist_item {
  40        u32 major, minor;
  41        short type;
  42        short access;
  43        struct list_head list;
  44};
  45
  46struct dev_cgroup {
  47        struct cgroup_subsys_state css;
  48        struct list_head whitelist;
  49        spinlock_t lock;
  50};
  51
  52static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
  53{
  54        return container_of(s, struct dev_cgroup, css);
  55}
  56
  57static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
  58{
  59        return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
  60}
  61
  62struct cgroup_subsys devices_subsys;
  63
  64static int devcgroup_can_attach(struct cgroup_subsys *ss,
  65                struct cgroup *new_cgroup, struct task_struct *task)
  66{
  67        if (current != task && !capable(CAP_SYS_ADMIN))
  68                        return -EPERM;
  69
  70        return 0;
  71}
  72
  73/*
  74 * called under cgroup_lock()
  75 */
  76static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig)
  77{
  78        struct dev_whitelist_item *wh, *tmp, *new;
  79
  80        list_for_each_entry(wh, orig, list) {
  81                new = kmalloc(sizeof(*wh), GFP_KERNEL);
  82                if (!new)
  83                        goto free_and_exit;
  84                new->major = wh->major;
  85                new->minor = wh->minor;
  86                new->type = wh->type;
  87                new->access = wh->access;
  88                list_add_tail(&new->list, dest);
  89        }
  90
  91        return 0;
  92
  93free_and_exit:
  94        list_for_each_entry_safe(wh, tmp, dest, list) {
  95                list_del(&wh->list);
  96                kfree(wh);
  97        }
  98        return -ENOMEM;
  99}
 100
 101/* Stupid prototype - don't bother combining existing entries */
 102/*
 103 * called under cgroup_lock()
 104 * since the list is visible to other tasks, we need the spinlock also
 105 */
 106static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
 107                        struct dev_whitelist_item *wh)
 108{
 109        struct dev_whitelist_item *whcopy, *walk;
 110
 111        whcopy = kmalloc(sizeof(*whcopy), GFP_KERNEL);
 112        if (!whcopy)
 113                return -ENOMEM;
 114
 115        memcpy(whcopy, wh, sizeof(*whcopy));
 116        spin_lock(&dev_cgroup->lock);
 117        list_for_each_entry(walk, &dev_cgroup->whitelist, list) {
 118                if (walk->type != wh->type)
 119                        continue;
 120                if (walk->major != wh->major)
 121                        continue;
 122                if (walk->minor != wh->minor)
 123                        continue;
 124
 125                walk->access |= wh->access;
 126                kfree(whcopy);
 127                whcopy = NULL;
 128        }
 129
 130        if (whcopy != NULL)
 131                list_add_tail(&whcopy->list, &dev_cgroup->whitelist);
 132        spin_unlock(&dev_cgroup->lock);
 133        return 0;
 134}
 135
 136/*
 137 * called under cgroup_lock()
 138 * since the list is visible to other tasks, we need the spinlock also
 139 */
 140static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup,
 141                        struct dev_whitelist_item *wh)
 142{
 143        struct dev_whitelist_item *walk, *tmp;
 144
 145        spin_lock(&dev_cgroup->lock);
 146        list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) {
 147                if (walk->type == DEV_ALL)
 148                        goto remove;
 149                if (walk->type != wh->type)
 150                        continue;
 151                if (walk->major != ~0 && walk->major != wh->major)
 152                        continue;
 153                if (walk->minor != ~0 && walk->minor != wh->minor)
 154                        continue;
 155
 156remove:
 157                walk->access &= ~wh->access;
 158                if (!walk->access) {
 159                        list_del(&walk->list);
 160                        kfree(walk);
 161                }
 162        }
 163        spin_unlock(&dev_cgroup->lock);
 164}
 165
 166/*
 167 * called from kernel/cgroup.c with cgroup_lock() held.
 168 */
 169static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss,
 170                                                struct cgroup *cgroup)
 171{
 172        struct dev_cgroup *dev_cgroup, *parent_dev_cgroup;
 173        struct cgroup *parent_cgroup;
 174        int ret;
 175
 176        dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
 177        if (!dev_cgroup)
 178                return ERR_PTR(-ENOMEM);
 179        INIT_LIST_HEAD(&dev_cgroup->whitelist);
 180        parent_cgroup = cgroup->parent;
 181
 182        if (parent_cgroup == NULL) {
 183                struct dev_whitelist_item *wh;
 184                wh = kmalloc(sizeof(*wh), GFP_KERNEL);
 185                if (!wh) {
 186                        kfree(dev_cgroup);
 187                        return ERR_PTR(-ENOMEM);
 188                }
 189                wh->minor = wh->major = ~0;
 190                wh->type = DEV_ALL;
 191                wh->access = ACC_MKNOD | ACC_READ | ACC_WRITE;
 192                list_add(&wh->list, &dev_cgroup->whitelist);
 193        } else {
 194                parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
 195                ret = dev_whitelist_copy(&dev_cgroup->whitelist,
 196                                &parent_dev_cgroup->whitelist);
 197                if (ret) {
 198                        kfree(dev_cgroup);
 199                        return ERR_PTR(ret);
 200                }
 201        }
 202
 203        spin_lock_init(&dev_cgroup->lock);
 204        return &dev_cgroup->css;
 205}
 206
 207static void devcgroup_destroy(struct cgroup_subsys *ss,
 208                        struct cgroup *cgroup)
 209{
 210        struct dev_cgroup *dev_cgroup;
 211        struct dev_whitelist_item *wh, *tmp;
 212
 213        dev_cgroup = cgroup_to_devcgroup(cgroup);
 214        list_for_each_entry_safe(wh, tmp, &dev_cgroup->whitelist, list) {
 215                list_del(&wh->list);
 216                kfree(wh);
 217        }
 218        kfree(dev_cgroup);
 219}
 220
 221#define DEVCG_ALLOW 1
 222#define DEVCG_DENY 2
 223#define DEVCG_LIST 3
 224
 225#define MAJMINLEN 13
 226#define ACCLEN 4
 227
 228static void set_access(char *acc, short access)
 229{
 230        int idx = 0;
 231        memset(acc, 0, ACCLEN);
 232        if (access & ACC_READ)
 233                acc[idx++] = 'r';
 234        if (access & ACC_WRITE)
 235                acc[idx++] = 'w';
 236        if (access & ACC_MKNOD)
 237                acc[idx++] = 'm';
 238}
 239
 240static char type_to_char(short type)
 241{
 242        if (type == DEV_ALL)
 243                return 'a';
 244        if (type == DEV_CHAR)
 245                return 'c';
 246        if (type == DEV_BLOCK)
 247                return 'b';
 248        return 'X';
 249}
 250
 251static void set_majmin(char *str, unsigned m)
 252{
 253        memset(str, 0, MAJMINLEN);
 254        if (m == ~0)
 255                sprintf(str, "*");
 256        else
 257                snprintf(str, MAJMINLEN, "%u", m);
 258}
 259
 260static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
 261                                struct seq_file *m)
 262{
 263        struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup);
 264        struct dev_whitelist_item *wh;
 265        char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
 266
 267        spin_lock(&devcgroup->lock);
 268        list_for_each_entry(wh, &devcgroup->whitelist, list) {
 269                set_access(acc, wh->access);
 270                set_majmin(maj, wh->major);
 271                set_majmin(min, wh->minor);
 272                seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type),
 273                           maj, min, acc);
 274        }
 275        spin_unlock(&devcgroup->lock);
 276
 277        return 0;
 278}
 279
 280/*
 281 * may_access_whitelist:
 282 * does the access granted to dev_cgroup c contain the access
 283 * requested in whitelist item refwh.
 284 * return 1 if yes, 0 if no.
 285 * call with c->lock held
 286 */
 287static int may_access_whitelist(struct dev_cgroup *c,
 288                                       struct dev_whitelist_item *refwh)
 289{
 290        struct dev_whitelist_item *whitem;
 291
 292        list_for_each_entry(whitem, &c->whitelist, list) {
 293                if (whitem->type & DEV_ALL)
 294                        return 1;
 295                if ((refwh->type & DEV_BLOCK) && !(whitem->type & DEV_BLOCK))
 296                        continue;
 297                if ((refwh->type & DEV_CHAR) && !(whitem->type & DEV_CHAR))
 298                        continue;
 299                if (whitem->major != ~0 && whitem->major != refwh->major)
 300                        continue;
 301                if (whitem->minor != ~0 && whitem->minor != refwh->minor)
 302                        continue;
 303                if (refwh->access & (~whitem->access))
 304                        continue;
 305                return 1;
 306        }
 307        return 0;
 308}
 309
 310/*
 311 * parent_has_perm:
 312 * when adding a new allow rule to a device whitelist, the rule
 313 * must be allowed in the parent device
 314 */
 315static int parent_has_perm(struct cgroup *childcg,
 316                                  struct dev_whitelist_item *wh)
 317{
 318        struct cgroup *pcg = childcg->parent;
 319        struct dev_cgroup *parent;
 320        int ret;
 321
 322        if (!pcg)
 323                return 1;
 324        parent = cgroup_to_devcgroup(pcg);
 325        spin_lock(&parent->lock);
 326        ret = may_access_whitelist(parent, wh);
 327        spin_unlock(&parent->lock);
 328        return ret;
 329}
 330
 331/*
 332 * Modify the whitelist using allow/deny rules.
 333 * CAP_SYS_ADMIN is needed for this.  It's at least separate from CAP_MKNOD
 334 * so we can give a container CAP_MKNOD to let it create devices but not
 335 * modify the whitelist.
 336 * It seems likely we'll want to add a CAP_CONTAINER capability to allow
 337 * us to also grant CAP_SYS_ADMIN to containers without giving away the
 338 * device whitelist controls, but for now we'll stick with CAP_SYS_ADMIN
 339 *
 340 * Taking rules away is always allowed (given CAP_SYS_ADMIN).  Granting
 341 * new access is only allowed if you're in the top-level cgroup, or your
 342 * parent cgroup has the access you're asking for.
 343 */
 344static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
 345                                struct file *file, const char __user *userbuf,
 346                                size_t nbytes, loff_t *ppos)
 347{
 348        struct cgroup *cur_cgroup;
 349        struct dev_cgroup *devcgroup, *cur_devcgroup;
 350        int filetype = cft->private;
 351        char *buffer, *b;
 352        int retval = 0, count;
 353        struct dev_whitelist_item wh;
 354
 355        if (!capable(CAP_SYS_ADMIN))
 356                return -EPERM;
 357
 358        devcgroup = cgroup_to_devcgroup(cgroup);
 359        cur_cgroup = task_cgroup(current, devices_subsys.subsys_id);
 360        cur_devcgroup = cgroup_to_devcgroup(cur_cgroup);
 361
 362        buffer = kmalloc(nbytes+1, GFP_KERNEL);
 363        if (!buffer)
 364                return -ENOMEM;
 365
 366        if (copy_from_user(buffer, userbuf, nbytes)) {
 367                retval = -EFAULT;
 368                goto out1;
 369        }
 370        buffer[nbytes] = 0;     /* nul-terminate */
 371
 372        cgroup_lock();
 373        if (cgroup_is_removed(cgroup)) {
 374                retval = -ENODEV;
 375                goto out2;
 376        }
 377
 378        memset(&wh, 0, sizeof(wh));
 379        b = buffer;
 380
 381        switch (*b) {
 382        case 'a':
 383                wh.type = DEV_ALL;
 384                wh.access = ACC_MASK;
 385                wh.major = ~0;
 386                wh.minor = ~0;
 387                goto handle;
 388        case 'b':
 389                wh.type = DEV_BLOCK;
 390                break;
 391        case 'c':
 392                wh.type = DEV_CHAR;
 393                break;
 394        default:
 395                retval = -EINVAL;
 396                goto out2;
 397        }
 398        b++;
 399        if (!isspace(*b)) {
 400                retval = -EINVAL;
 401                goto out2;
 402        }
 403        b++;
 404        if (*b == '*') {
 405                wh.major = ~0;
 406                b++;
 407        } else if (isdigit(*b)) {
 408                wh.major = 0;
 409                while (isdigit(*b)) {
 410                        wh.major = wh.major*10+(*b-'0');
 411                        b++;
 412                }
 413        } else {
 414                retval = -EINVAL;
 415                goto out2;
 416        }
 417        if (*b != ':') {
 418                retval = -EINVAL;
 419                goto out2;
 420        }
 421        b++;
 422
 423        /* read minor */
 424        if (*b == '*') {
 425                wh.minor = ~0;
 426                b++;
 427        } else if (isdigit(*b)) {
 428                wh.minor = 0;
 429                while (isdigit(*b)) {
 430                        wh.minor = wh.minor*10+(*b-'0');
 431                        b++;
 432                }
 433        } else {
 434                retval = -EINVAL;
 435                goto out2;
 436        }
 437        if (!isspace(*b)) {
 438                retval = -EINVAL;
 439                goto out2;
 440        }
 441        for (b++, count = 0; count < 3; count++, b++) {
 442                switch (*b) {
 443                case 'r':
 444                        wh.access |= ACC_READ;
 445                        break;
 446                case 'w':
 447                        wh.access |= ACC_WRITE;
 448                        break;
 449                case 'm':
 450                        wh.access |= ACC_MKNOD;
 451                        break;
 452                case '\n':
 453                case '\0':
 454                        count = 3;
 455                        break;
 456                default:
 457                        retval = -EINVAL;
 458                        goto out2;
 459                }
 460        }
 461
 462handle:
 463        retval = 0;
 464        switch (filetype) {
 465        case DEVCG_ALLOW:
 466                if (!parent_has_perm(cgroup, &wh))
 467                        retval = -EPERM;
 468                else
 469                        retval = dev_whitelist_add(devcgroup, &wh);
 470                break;
 471        case DEVCG_DENY:
 472                dev_whitelist_rm(devcgroup, &wh);
 473                break;
 474        default:
 475                retval = -EINVAL;
 476                goto out2;
 477        }
 478
 479        if (retval == 0)
 480                retval = nbytes;
 481
 482out2:
 483        cgroup_unlock();
 484out1:
 485        kfree(buffer);
 486        return retval;
 487}
 488
 489static struct cftype dev_cgroup_files[] = {
 490        {
 491                .name = "allow",
 492                .write  = devcgroup_access_write,
 493                .private = DEVCG_ALLOW,
 494        },
 495        {
 496                .name = "deny",
 497                .write = devcgroup_access_write,
 498                .private = DEVCG_DENY,
 499        },
 500        {
 501                .name = "list",
 502                .read_seq_string = devcgroup_seq_read,
 503                .private = DEVCG_LIST,
 504        },
 505};
 506
 507static int devcgroup_populate(struct cgroup_subsys *ss,
 508                                struct cgroup *cgroup)
 509{
 510        return cgroup_add_files(cgroup, ss, dev_cgroup_files,
 511                                        ARRAY_SIZE(dev_cgroup_files));
 512}
 513
 514struct cgroup_subsys devices_subsys = {
 515        .name = "devices",
 516        .can_attach = devcgroup_can_attach,
 517        .create = devcgroup_create,
 518        .destroy  = devcgroup_destroy,
 519        .populate = devcgroup_populate,
 520        .subsys_id = devices_subsys_id,
 521};
 522
 523int devcgroup_inode_permission(struct inode *inode, int mask)
 524{
 525        struct dev_cgroup *dev_cgroup;
 526        struct dev_whitelist_item *wh;
 527
 528        dev_t device = inode->i_rdev;
 529        if (!device)
 530                return 0;
 531        if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode))
 532                return 0;
 533        dev_cgroup = css_to_devcgroup(task_subsys_state(current,
 534                                devices_subsys_id));
 535        if (!dev_cgroup)
 536                return 0;
 537
 538        spin_lock(&dev_cgroup->lock);
 539        list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
 540                if (wh->type & DEV_ALL)
 541                        goto acc_check;
 542                if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode))
 543                        continue;
 544                if ((wh->type & DEV_CHAR) && !S_ISCHR(inode->i_mode))
 545                        continue;
 546                if (wh->major != ~0 && wh->major != imajor(inode))
 547                        continue;
 548                if (wh->minor != ~0 && wh->minor != iminor(inode))
 549                        continue;
 550acc_check:
 551                if ((mask & MAY_WRITE) && !(wh->access & ACC_WRITE))
 552                        continue;
 553                if ((mask & MAY_READ) && !(wh->access & ACC_READ))
 554                        continue;
 555                spin_unlock(&dev_cgroup->lock);
 556                return 0;
 557        }
 558        spin_unlock(&dev_cgroup->lock);
 559
 560        return -EPERM;
 561}
 562
 563int devcgroup_inode_mknod(int mode, dev_t dev)
 564{
 565        struct dev_cgroup *dev_cgroup;
 566        struct dev_whitelist_item *wh;
 567
 568        dev_cgroup = css_to_devcgroup(task_subsys_state(current,
 569                                devices_subsys_id));
 570        if (!dev_cgroup)
 571                return 0;
 572
 573        spin_lock(&dev_cgroup->lock);
 574        list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
 575                if (wh->type & DEV_ALL)
 576                        goto acc_check;
 577                if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode))
 578                        continue;
 579                if ((wh->type & DEV_CHAR) && !S_ISCHR(mode))
 580                        continue;
 581                if (wh->major != ~0 && wh->major != MAJOR(dev))
 582                        continue;
 583                if (wh->minor != ~0 && wh->minor != MINOR(dev))
 584                        continue;
 585acc_check:
 586                if (!(wh->access & ACC_MKNOD))
 587                        continue;
 588                spin_unlock(&dev_cgroup->lock);
 589                return 0;
 590        }
 591        spin_unlock(&dev_cgroup->lock);
 592        return -EPERM;
 593}
 594
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.