linux/kernel/user_namespace.c
<<
>>
Prefs
   1/*
   2 *  This program is free software; you can redistribute it and/or
   3 *  modify it under the terms of the GNU General Public License as
   4 *  published by the Free Software Foundation, version 2 of the
   5 *  License.
   6 */
   7
   8#include <linux/export.h>
   9#include <linux/nsproxy.h>
  10#include <linux/slab.h>
  11#include <linux/user_namespace.h>
  12#include <linux/highuid.h>
  13#include <linux/cred.h>
  14#include <linux/securebits.h>
  15#include <linux/keyctl.h>
  16#include <linux/key-type.h>
  17#include <keys/user-type.h>
  18#include <linux/seq_file.h>
  19#include <linux/fs.h>
  20#include <linux/uaccess.h>
  21#include <linux/ctype.h>
  22
  23static struct kmem_cache *user_ns_cachep __read_mostly;
  24
  25static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
  26                                struct uid_gid_map *map);
  27
  28/*
  29 * Create a new user namespace, deriving the creator from the user in the
  30 * passed credentials, and replacing that user with the new root user for the
  31 * new namespace.
  32 *
  33 * This is called by copy_creds(), which will finish setting the target task's
  34 * credentials.
  35 */
  36int create_user_ns(struct cred *new)
  37{
  38        struct user_namespace *ns, *parent_ns = new->user_ns;
  39        kuid_t owner = new->euid;
  40        kgid_t group = new->egid;
  41
  42        /* The creator needs a mapping in the parent user namespace
  43         * or else we won't be able to reasonably tell userspace who
  44         * created a user_namespace.
  45         */
  46        if (!kuid_has_mapping(parent_ns, owner) ||
  47            !kgid_has_mapping(parent_ns, group))
  48                return -EPERM;
  49
  50        ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
  51        if (!ns)
  52                return -ENOMEM;
  53
  54        kref_init(&ns->kref);
  55        ns->parent = parent_ns;
  56        ns->owner = owner;
  57        ns->group = group;
  58
  59        /* Start with the same capabilities as init but useless for doing
  60         * anything as the capabilities are bound to the new user namespace.
  61         */
  62        new->securebits = SECUREBITS_DEFAULT;
  63        new->cap_inheritable = CAP_EMPTY_SET;
  64        new->cap_permitted = CAP_FULL_SET;
  65        new->cap_effective = CAP_FULL_SET;
  66        new->cap_bset = CAP_FULL_SET;
  67#ifdef CONFIG_KEYS
  68        key_put(new->request_key_auth);
  69        new->request_key_auth = NULL;
  70#endif
  71        /* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
  72
  73        /* Leave the new->user_ns reference with the new user namespace. */
  74        /* Leave the reference to our user_ns with the new cred. */
  75        new->user_ns = ns;
  76
  77        return 0;
  78}
  79
  80void free_user_ns(struct kref *kref)
  81{
  82        struct user_namespace *parent, *ns =
  83                container_of(kref, struct user_namespace, kref);
  84
  85        parent = ns->parent;
  86        kmem_cache_free(user_ns_cachep, ns);
  87        put_user_ns(parent);
  88}
  89EXPORT_SYMBOL(free_user_ns);
  90
  91static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count)
  92{
  93        unsigned idx, extents;
  94        u32 first, last, id2;
  95
  96        id2 = id + count - 1;
  97
  98        /* Find the matching extent */
  99        extents = map->nr_extents;
 100        smp_read_barrier_depends();
 101        for (idx = 0; idx < extents; idx++) {
 102                first = map->extent[idx].first;
 103                last = first + map->extent[idx].count - 1;
 104                if (id >= first && id <= last &&
 105                    (id2 >= first && id2 <= last))
 106                        break;
 107        }
 108        /* Map the id or note failure */
 109        if (idx < extents)
 110                id = (id - first) + map->extent[idx].lower_first;
 111        else
 112                id = (u32) -1;
 113
 114        return id;
 115}
 116
 117static u32 map_id_down(struct uid_gid_map *map, u32 id)
 118{
 119        unsigned idx, extents;
 120        u32 first, last;
 121
 122        /* Find the matching extent */
 123        extents = map->nr_extents;
 124        smp_read_barrier_depends();
 125        for (idx = 0; idx < extents; idx++) {
 126                first = map->extent[idx].first;
 127                last = first + map->extent[idx].count - 1;
 128                if (id >= first && id <= last)
 129                        break;
 130        }
 131        /* Map the id or note failure */
 132        if (idx < extents)
 133                id = (id - first) + map->extent[idx].lower_first;
 134        else
 135                id = (u32) -1;
 136
 137        return id;
 138}
 139
 140static u32 map_id_up(struct uid_gid_map *map, u32 id)
 141{
 142        unsigned idx, extents;
 143        u32 first, last;
 144
 145        /* Find the matching extent */
 146        extents = map->nr_extents;
 147        smp_read_barrier_depends();
 148        for (idx = 0; idx < extents; idx++) {
 149                first = map->extent[idx].lower_first;
 150                last = first + map->extent[idx].count - 1;
 151                if (id >= first && id <= last)
 152                        break;
 153        }
 154        /* Map the id or note failure */
 155        if (idx < extents)
 156                id = (id - first) + map->extent[idx].first;
 157        else
 158                id = (u32) -1;
 159
 160        return id;
 161}
 162
 163/**
 164 *      make_kuid - Map a user-namespace uid pair into a kuid.
 165 *      @ns:  User namespace that the uid is in
 166 *      @uid: User identifier
 167 *
 168 *      Maps a user-namespace uid pair into a kernel internal kuid,
 169 *      and returns that kuid.
 170 *
 171 *      When there is no mapping defined for the user-namespace uid
 172 *      pair INVALID_UID is returned.  Callers are expected to test
 173 *      for and handle handle INVALID_UID being returned.  INVALID_UID
 174 *      may be tested for using uid_valid().
 175 */
 176kuid_t make_kuid(struct user_namespace *ns, uid_t uid)
 177{
 178        /* Map the uid to a global kernel uid */
 179        return KUIDT_INIT(map_id_down(&ns->uid_map, uid));
 180}
 181EXPORT_SYMBOL(make_kuid);
 182
 183/**
 184 *      from_kuid - Create a uid from a kuid user-namespace pair.
 185 *      @targ: The user namespace we want a uid in.
 186 *      @kuid: The kernel internal uid to start with.
 187 *
 188 *      Map @kuid into the user-namespace specified by @targ and
 189 *      return the resulting uid.
 190 *
 191 *      There is always a mapping into the initial user_namespace.
 192 *
 193 *      If @kuid has no mapping in @targ (uid_t)-1 is returned.
 194 */
 195uid_t from_kuid(struct user_namespace *targ, kuid_t kuid)
 196{
 197        /* Map the uid from a global kernel uid */
 198        return map_id_up(&targ->uid_map, __kuid_val(kuid));
 199}
 200EXPORT_SYMBOL(from_kuid);
 201
 202/**
 203 *      from_kuid_munged - Create a uid from a kuid user-namespace pair.
 204 *      @targ: The user namespace we want a uid in.
 205 *      @kuid: The kernel internal uid to start with.
 206 *
 207 *      Map @kuid into the user-namespace specified by @targ and
 208 *      return the resulting uid.
 209 *
 210 *      There is always a mapping into the initial user_namespace.
 211 *
 212 *      Unlike from_kuid from_kuid_munged never fails and always
 213 *      returns a valid uid.  This makes from_kuid_munged appropriate
 214 *      for use in syscalls like stat and getuid where failing the
 215 *      system call and failing to provide a valid uid are not an
 216 *      options.
 217 *
 218 *      If @kuid has no mapping in @targ overflowuid is returned.
 219 */
 220uid_t from_kuid_munged(struct user_namespace *targ, kuid_t kuid)
 221{
 222        uid_t uid;
 223        uid = from_kuid(targ, kuid);
 224
 225        if (uid == (uid_t) -1)
 226                uid = overflowuid;
 227        return uid;
 228}
 229EXPORT_SYMBOL(from_kuid_munged);
 230
 231/**
 232 *      make_kgid - Map a user-namespace gid pair into a kgid.
 233 *      @ns:  User namespace that the gid is in
 234 *      @uid: group identifier
 235 *
 236 *      Maps a user-namespace gid pair into a kernel internal kgid,
 237 *      and returns that kgid.
 238 *
 239 *      When there is no mapping defined for the user-namespace gid
 240 *      pair INVALID_GID is returned.  Callers are expected to test
 241 *      for and handle INVALID_GID being returned.  INVALID_GID may be
 242 *      tested for using gid_valid().
 243 */
 244kgid_t make_kgid(struct user_namespace *ns, gid_t gid)
 245{
 246        /* Map the gid to a global kernel gid */
 247        return KGIDT_INIT(map_id_down(&ns->gid_map, gid));
 248}
 249EXPORT_SYMBOL(make_kgid);
 250
 251/**
 252 *      from_kgid - Create a gid from a kgid user-namespace pair.
 253 *      @targ: The user namespace we want a gid in.
 254 *      @kgid: The kernel internal gid to start with.
 255 *
 256 *      Map @kgid into the user-namespace specified by @targ and
 257 *      return the resulting gid.
 258 *
 259 *      There is always a mapping into the initial user_namespace.
 260 *
 261 *      If @kgid has no mapping in @targ (gid_t)-1 is returned.
 262 */
 263gid_t from_kgid(struct user_namespace *targ, kgid_t kgid)
 264{
 265        /* Map the gid from a global kernel gid */
 266        return map_id_up(&targ->gid_map, __kgid_val(kgid));
 267}
 268EXPORT_SYMBOL(from_kgid);
 269
 270/**
 271 *      from_kgid_munged - Create a gid from a kgid user-namespace pair.
 272 *      @targ: The user namespace we want a gid in.
 273 *      @kgid: The kernel internal gid to start with.
 274 *
 275 *      Map @kgid into the user-namespace specified by @targ and
 276 *      return the resulting gid.
 277 *
 278 *      There is always a mapping into the initial user_namespace.
 279 *
 280 *      Unlike from_kgid from_kgid_munged never fails and always
 281 *      returns a valid gid.  This makes from_kgid_munged appropriate
 282 *      for use in syscalls like stat and getgid where failing the
 283 *      system call and failing to provide a valid gid are not options.
 284 *
 285 *      If @kgid has no mapping in @targ overflowgid is returned.
 286 */
 287gid_t from_kgid_munged(struct user_namespace *targ, kgid_t kgid)
 288{
 289        gid_t gid;
 290        gid = from_kgid(targ, kgid);
 291
 292        if (gid == (gid_t) -1)
 293                gid = overflowgid;
 294        return gid;
 295}
 296EXPORT_SYMBOL(from_kgid_munged);
 297
 298static int uid_m_show(struct seq_file *seq, void *v)
 299{
 300        struct user_namespace *ns = seq->private;
 301        struct uid_gid_extent *extent = v;
 302        struct user_namespace *lower_ns;
 303        uid_t lower;
 304
 305        lower_ns = current_user_ns();
 306        if ((lower_ns == ns) && lower_ns->parent)
 307                lower_ns = lower_ns->parent;
 308
 309        lower = from_kuid(lower_ns, KUIDT_INIT(extent->lower_first));
 310
 311        seq_printf(seq, "%10u %10u %10u\n",
 312                extent->first,
 313                lower,
 314                extent->count);
 315
 316        return 0;
 317}
 318
 319static int gid_m_show(struct seq_file *seq, void *v)
 320{
 321        struct user_namespace *ns = seq->private;
 322        struct uid_gid_extent *extent = v;
 323        struct user_namespace *lower_ns;
 324        gid_t lower;
 325
 326        lower_ns = current_user_ns();
 327        if ((lower_ns == ns) && lower_ns->parent)
 328                lower_ns = lower_ns->parent;
 329
 330        lower = from_kgid(lower_ns, KGIDT_INIT(extent->lower_first));
 331
 332        seq_printf(seq, "%10u %10u %10u\n",
 333                extent->first,
 334                lower,
 335                extent->count);
 336
 337        return 0;
 338}
 339
 340static void *m_start(struct seq_file *seq, loff_t *ppos, struct uid_gid_map *map)
 341{
 342        struct uid_gid_extent *extent = NULL;
 343        loff_t pos = *ppos;
 344
 345        if (pos < map->nr_extents)
 346                extent = &map->extent[pos];
 347
 348        return extent;
 349}
 350
 351static void *uid_m_start(struct seq_file *seq, loff_t *ppos)
 352{
 353        struct user_namespace *ns = seq->private;
 354
 355        return m_start(seq, ppos, &ns->uid_map);
 356}
 357
 358static void *gid_m_start(struct seq_file *seq, loff_t *ppos)
 359{
 360        struct user_namespace *ns = seq->private;
 361
 362        return m_start(seq, ppos, &ns->gid_map);
 363}
 364
 365static void *m_next(struct seq_file *seq, void *v, loff_t *pos)
 366{
 367        (*pos)++;
 368        return seq->op->start(seq, pos);
 369}
 370
 371static void m_stop(struct seq_file *seq, void *v)
 372{
 373        return;
 374}
 375
 376struct seq_operations proc_uid_seq_operations = {
 377        .start = uid_m_start,
 378        .stop = m_stop,
 379        .next = m_next,
 380        .show = uid_m_show,
 381};
 382
 383struct seq_operations proc_gid_seq_operations = {
 384        .start = gid_m_start,
 385        .stop = m_stop,
 386        .next = m_next,
 387        .show = gid_m_show,
 388};
 389
 390static DEFINE_MUTEX(id_map_mutex);
 391
 392static ssize_t map_write(struct file *file, const char __user *buf,
 393                         size_t count, loff_t *ppos,
 394                         int cap_setid,
 395                         struct uid_gid_map *map,
 396                         struct uid_gid_map *parent_map)
 397{
 398        struct seq_file *seq = file->private_data;
 399        struct user_namespace *ns = seq->private;
 400        struct uid_gid_map new_map;
 401        unsigned idx;
 402        struct uid_gid_extent *extent, *last = NULL;
 403        unsigned long page = 0;
 404        char *kbuf, *pos, *next_line;
 405        ssize_t ret = -EINVAL;
 406
 407        /*
 408         * The id_map_mutex serializes all writes to any given map.
 409         *
 410         * Any map is only ever written once.
 411         *
 412         * An id map fits within 1 cache line on most architectures.
 413         *
 414         * On read nothing needs to be done unless you are on an
 415         * architecture with a crazy cache coherency model like alpha.
 416         *
 417         * There is a one time data dependency between reading the
 418         * count of the extents and the values of the extents.  The
 419         * desired behavior is to see the values of the extents that
 420         * were written before the count of the extents.
 421         *
 422         * To achieve this smp_wmb() is used on guarantee the write
 423         * order and smp_read_barrier_depends() is guaranteed that we
 424         * don't have crazy architectures returning stale data.
 425         *
 426         */
 427        mutex_lock(&id_map_mutex);
 428
 429        ret = -EPERM;
 430        /* Only allow one successful write to the map */
 431        if (map->nr_extents != 0)
 432                goto out;
 433
 434        /* Require the appropriate privilege CAP_SETUID or CAP_SETGID
 435         * over the user namespace in order to set the id mapping.
 436         */
 437        if (!ns_capable(ns, cap_setid))
 438                goto out;
 439
 440        /* Get a buffer */
 441        ret = -ENOMEM;
 442        page = __get_free_page(GFP_TEMPORARY);
 443        kbuf = (char *) page;
 444        if (!page)
 445                goto out;
 446
 447        /* Only allow <= page size writes at the beginning of the file */
 448        ret = -EINVAL;
 449        if ((*ppos != 0) || (count >= PAGE_SIZE))
 450                goto out;
 451
 452        /* Slurp in the user data */
 453        ret = -EFAULT;
 454        if (copy_from_user(kbuf, buf, count))
 455                goto out;
 456        kbuf[count] = '\0';
 457
 458        /* Parse the user data */
 459        ret = -EINVAL;
 460        pos = kbuf;
 461        new_map.nr_extents = 0;
 462        for (;pos; pos = next_line) {
 463                extent = &new_map.extent[new_map.nr_extents];
 464
 465                /* Find the end of line and ensure I don't look past it */
 466                next_line = strchr(pos, '\n');
 467                if (next_line) {
 468                        *next_line = '\0';
 469                        next_line++;
 470                        if (*next_line == '\0')
 471                                next_line = NULL;
 472                }
 473
 474                pos = skip_spaces(pos);
 475                extent->first = simple_strtoul(pos, &pos, 10);
 476                if (!isspace(*pos))
 477                        goto out;
 478
 479                pos = skip_spaces(pos);
 480                extent->lower_first = simple_strtoul(pos, &pos, 10);
 481                if (!isspace(*pos))
 482                        goto out;
 483
 484                pos = skip_spaces(pos);
 485                extent->count = simple_strtoul(pos, &pos, 10);
 486                if (*pos && !isspace(*pos))
 487                        goto out;
 488
 489                /* Verify there is not trailing junk on the line */
 490                pos = skip_spaces(pos);
 491                if (*pos != '\0')
 492                        goto out;
 493
 494                /* Verify we have been given valid starting values */
 495                if ((extent->first == (u32) -1) ||
 496                    (extent->lower_first == (u32) -1 ))
 497                        goto out;
 498
 499                /* Verify count is not zero and does not cause the extent to wrap */
 500                if ((extent->first + extent->count) <= extent->first)
 501                        goto out;
 502                if ((extent->lower_first + extent->count) <= extent->lower_first)
 503                        goto out;
 504
 505                /* For now only accept extents that are strictly in order */
 506                if (last &&
 507                    (((last->first + last->count) > extent->first) ||
 508                     ((last->lower_first + last->count) > extent->lower_first)))
 509                        goto out;
 510
 511                new_map.nr_extents++;
 512                last = extent;
 513
 514                /* Fail if the file contains too many extents */
 515                if ((new_map.nr_extents == UID_GID_MAP_MAX_EXTENTS) &&
 516                    (next_line != NULL))
 517                        goto out;
 518        }
 519        /* Be very certaint the new map actually exists */
 520        if (new_map.nr_extents == 0)
 521                goto out;
 522
 523        ret = -EPERM;
 524        /* Validate the user is allowed to use user id's mapped to. */
 525        if (!new_idmap_permitted(ns, cap_setid, &new_map))
 526                goto out;
 527
 528        /* Map the lower ids from the parent user namespace to the
 529         * kernel global id space.
 530         */
 531        for (idx = 0; idx < new_map.nr_extents; idx++) {
 532                u32 lower_first;
 533                extent = &new_map.extent[idx];
 534
 535                lower_first = map_id_range_down(parent_map,
 536                                                extent->lower_first,
 537                                                extent->count);
 538
 539                /* Fail if we can not map the specified extent to
 540                 * the kernel global id space.
 541                 */
 542                if (lower_first == (u32) -1)
 543                        goto out;
 544
 545                extent->lower_first = lower_first;
 546        }
 547
 548        /* Install the map */
 549        memcpy(map->extent, new_map.extent,
 550                new_map.nr_extents*sizeof(new_map.extent[0]));
 551        smp_wmb();
 552        map->nr_extents = new_map.nr_extents;
 553
 554        *ppos = count;
 555        ret = count;
 556out:
 557        mutex_unlock(&id_map_mutex);
 558        if (page)
 559                free_page(page);
 560        return ret;
 561}
 562
 563ssize_t proc_uid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos)
 564{
 565        struct seq_file *seq = file->private_data;
 566        struct user_namespace *ns = seq->private;
 567
 568        if (!ns->parent)
 569                return -EPERM;
 570
 571        return map_write(file, buf, size, ppos, CAP_SETUID,
 572                         &ns->uid_map, &ns->parent->uid_map);
 573}
 574
 575ssize_t proc_gid_map_write(struct file *file, const char __user *buf, size_t size, loff_t *ppos)
 576{
 577        struct seq_file *seq = file->private_data;
 578        struct user_namespace *ns = seq->private;
 579
 580        if (!ns->parent)
 581                return -EPERM;
 582
 583        return map_write(file, buf, size, ppos, CAP_SETGID,
 584                         &ns->gid_map, &ns->parent->gid_map);
 585}
 586
 587static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
 588                                struct uid_gid_map *new_map)
 589{
 590        /* Allow the specified ids if we have the appropriate capability
 591         * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
 592         */
 593        if (ns_capable(ns->parent, cap_setid))
 594                return true;
 595
 596        return false;
 597}
 598
 599static __init int user_namespaces_init(void)
 600{
 601        user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC);
 602        return 0;
 603}
 604module_init(user_namespaces_init);
 605
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.