linux/kernel/ucount.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2
   3#include <linux/stat.h>
   4#include <linux/sysctl.h>
   5#include <linux/slab.h>
   6#include <linux/cred.h>
   7#include <linux/hash.h>
   8#include <linux/kmemleak.h>
   9#include <linux/user_namespace.h>
  10
  11struct ucounts init_ucounts = {
  12        .ns    = &init_user_ns,
  13        .uid   = GLOBAL_ROOT_UID,
  14        .count = ATOMIC_INIT(1),
  15};
  16
  17#define UCOUNTS_HASHTABLE_BITS 10
  18static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)];
  19static DEFINE_SPINLOCK(ucounts_lock);
  20
  21#define ucounts_hashfn(ns, uid)                                         \
  22        hash_long((unsigned long)__kuid_val(uid) + (unsigned long)(ns), \
  23                  UCOUNTS_HASHTABLE_BITS)
  24#define ucounts_hashentry(ns, uid)      \
  25        (ucounts_hashtable + ucounts_hashfn(ns, uid))
  26
  27
  28#ifdef CONFIG_SYSCTL
  29static struct ctl_table_set *
  30set_lookup(struct ctl_table_root *root)
  31{
  32        return &current_user_ns()->set;
  33}
  34
  35static int set_is_seen(struct ctl_table_set *set)
  36{
  37        return &current_user_ns()->set == set;
  38}
  39
  40static int set_permissions(struct ctl_table_header *head,
  41                                  struct ctl_table *table)
  42{
  43        struct user_namespace *user_ns =
  44                container_of(head->set, struct user_namespace, set);
  45        int mode;
  46
  47        /* Allow users with CAP_SYS_RESOURCE unrestrained access */
  48        if (ns_capable(user_ns, CAP_SYS_RESOURCE))
  49                mode = (table->mode & S_IRWXU) >> 6;
  50        else
  51        /* Allow all others at most read-only access */
  52                mode = table->mode & S_IROTH;
  53        return (mode << 6) | (mode << 3) | mode;
  54}
  55
  56static struct ctl_table_root set_root = {
  57        .lookup = set_lookup,
  58        .permissions = set_permissions,
  59};
  60
  61static long ue_zero = 0;
  62static long ue_int_max = INT_MAX;
  63
  64#define UCOUNT_ENTRY(name)                                      \
  65        {                                                       \
  66                .procname       = name,                         \
  67                .maxlen         = sizeof(long),                 \
  68                .mode           = 0644,                         \
  69                .proc_handler   = proc_doulongvec_minmax,       \
  70                .extra1         = &ue_zero,                     \
  71                .extra2         = &ue_int_max,                  \
  72        }
  73static struct ctl_table user_table[] = {
  74        UCOUNT_ENTRY("max_user_namespaces"),
  75        UCOUNT_ENTRY("max_pid_namespaces"),
  76        UCOUNT_ENTRY("max_uts_namespaces"),
  77        UCOUNT_ENTRY("max_ipc_namespaces"),
  78        UCOUNT_ENTRY("max_net_namespaces"),
  79        UCOUNT_ENTRY("max_mnt_namespaces"),
  80        UCOUNT_ENTRY("max_cgroup_namespaces"),
  81        UCOUNT_ENTRY("max_time_namespaces"),
  82#ifdef CONFIG_INOTIFY_USER
  83        UCOUNT_ENTRY("max_inotify_instances"),
  84        UCOUNT_ENTRY("max_inotify_watches"),
  85#endif
  86#ifdef CONFIG_FANOTIFY
  87        UCOUNT_ENTRY("max_fanotify_groups"),
  88        UCOUNT_ENTRY("max_fanotify_marks"),
  89#endif
  90        { },
  91        { },
  92        { },
  93        { },
  94        { }
  95};
  96#endif /* CONFIG_SYSCTL */
  97
  98bool setup_userns_sysctls(struct user_namespace *ns)
  99{
 100#ifdef CONFIG_SYSCTL
 101        struct ctl_table *tbl;
 102
 103        BUILD_BUG_ON(ARRAY_SIZE(user_table) != UCOUNT_COUNTS + 1);
 104        setup_sysctl_set(&ns->set, &set_root, set_is_seen);
 105        tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL);
 106        if (tbl) {
 107                int i;
 108                for (i = 0; i < UCOUNT_COUNTS; i++) {
 109                        tbl[i].data = &ns->ucount_max[i];
 110                }
 111                ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl);
 112        }
 113        if (!ns->sysctls) {
 114                kfree(tbl);
 115                retire_sysctl_set(&ns->set);
 116                return false;
 117        }
 118#endif
 119        return true;
 120}
 121
 122void retire_userns_sysctls(struct user_namespace *ns)
 123{
 124#ifdef CONFIG_SYSCTL
 125        struct ctl_table *tbl;
 126
 127        tbl = ns->sysctls->ctl_table_arg;
 128        unregister_sysctl_table(ns->sysctls);
 129        retire_sysctl_set(&ns->set);
 130        kfree(tbl);
 131#endif
 132}
 133
 134static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struct hlist_head *hashent)
 135{
 136        struct ucounts *ucounts;
 137
 138        hlist_for_each_entry(ucounts, hashent, node) {
 139                if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns))
 140                        return ucounts;
 141        }
 142        return NULL;
 143}
 144
 145static void hlist_add_ucounts(struct ucounts *ucounts)
 146{
 147        struct hlist_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid);
 148        spin_lock_irq(&ucounts_lock);
 149        hlist_add_head(&ucounts->node, hashent);
 150        spin_unlock_irq(&ucounts_lock);
 151}
 152
 153struct ucounts *get_ucounts(struct ucounts *ucounts)
 154{
 155        if (ucounts && atomic_add_negative(1, &ucounts->count)) {
 156                put_ucounts(ucounts);
 157                ucounts = NULL;
 158        }
 159        return ucounts;
 160}
 161
 162struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid)
 163{
 164        struct hlist_head *hashent = ucounts_hashentry(ns, uid);
 165        struct ucounts *ucounts, *new;
 166        long overflow;
 167
 168        spin_lock_irq(&ucounts_lock);
 169        ucounts = find_ucounts(ns, uid, hashent);
 170        if (!ucounts) {
 171                spin_unlock_irq(&ucounts_lock);
 172
 173                new = kzalloc(sizeof(*new), GFP_KERNEL);
 174                if (!new)
 175                        return NULL;
 176
 177                new->ns = ns;
 178                new->uid = uid;
 179                atomic_set(&new->count, 1);
 180
 181                spin_lock_irq(&ucounts_lock);
 182                ucounts = find_ucounts(ns, uid, hashent);
 183                if (ucounts) {
 184                        kfree(new);
 185                } else {
 186                        hlist_add_head(&new->node, hashent);
 187                        spin_unlock_irq(&ucounts_lock);
 188                        return new;
 189                }
 190        }
 191        overflow = atomic_add_negative(1, &ucounts->count);
 192        spin_unlock_irq(&ucounts_lock);
 193        if (overflow) {
 194                put_ucounts(ucounts);
 195                return NULL;
 196        }
 197        return ucounts;
 198}
 199
 200void put_ucounts(struct ucounts *ucounts)
 201{
 202        unsigned long flags;
 203
 204        if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) {
 205                hlist_del_init(&ucounts->node);
 206                spin_unlock_irqrestore(&ucounts_lock, flags);
 207                kfree(ucounts);
 208        }
 209}
 210
 211static inline bool atomic_long_inc_below(atomic_long_t *v, int u)
 212{
 213        long c, old;
 214        c = atomic_long_read(v);
 215        for (;;) {
 216                if (unlikely(c >= u))
 217                        return false;
 218                old = atomic_long_cmpxchg(v, c, c+1);
 219                if (likely(old == c))
 220                        return true;
 221                c = old;
 222        }
 223}
 224
 225struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid,
 226                           enum ucount_type type)
 227{
 228        struct ucounts *ucounts, *iter, *bad;
 229        struct user_namespace *tns;
 230        ucounts = alloc_ucounts(ns, uid);
 231        for (iter = ucounts; iter; iter = tns->ucounts) {
 232                long max;
 233                tns = iter->ns;
 234                max = READ_ONCE(tns->ucount_max[type]);
 235                if (!atomic_long_inc_below(&iter->ucount[type], max))
 236                        goto fail;
 237        }
 238        return ucounts;
 239fail:
 240        bad = iter;
 241        for (iter = ucounts; iter != bad; iter = iter->ns->ucounts)
 242                atomic_long_dec(&iter->ucount[type]);
 243
 244        put_ucounts(ucounts);
 245        return NULL;
 246}
 247
 248void dec_ucount(struct ucounts *ucounts, enum ucount_type type)
 249{
 250        struct ucounts *iter;
 251        for (iter = ucounts; iter; iter = iter->ns->ucounts) {
 252                long dec = atomic_long_dec_if_positive(&iter->ucount[type]);
 253                WARN_ON_ONCE(dec < 0);
 254        }
 255        put_ucounts(ucounts);
 256}
 257
 258long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
 259{
 260        struct ucounts *iter;
 261        long ret = 0;
 262
 263        for (iter = ucounts; iter; iter = iter->ns->ucounts) {
 264                long max = READ_ONCE(iter->ns->ucount_max[type]);
 265                long new = atomic_long_add_return(v, &iter->ucount[type]);
 266                if (new < 0 || new > max)
 267                        ret = LONG_MAX;
 268                else if (iter == ucounts)
 269                        ret = new;
 270        }
 271        return ret;
 272}
 273
 274bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v)
 275{
 276        struct ucounts *iter;
 277        long new = -1; /* Silence compiler warning */
 278        for (iter = ucounts; iter; iter = iter->ns->ucounts) {
 279                long dec = atomic_long_add_return(-v, &iter->ucount[type]);
 280                WARN_ON_ONCE(dec < 0);
 281                if (iter == ucounts)
 282                        new = dec;
 283        }
 284        return (new == 0);
 285}
 286
 287bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max)
 288{
 289        struct ucounts *iter;
 290        if (get_ucounts_value(ucounts, type) > max)
 291                return true;
 292        for (iter = ucounts; iter; iter = iter->ns->ucounts) {
 293                max = READ_ONCE(iter->ns->ucount_max[type]);
 294                if (get_ucounts_value(iter, type) > max)
 295                        return true;
 296        }
 297        return false;
 298}
 299
 300static __init int user_namespace_sysctl_init(void)
 301{
 302#ifdef CONFIG_SYSCTL
 303        static struct ctl_table_header *user_header;
 304        static struct ctl_table empty[1];
 305        /*
 306         * It is necessary to register the user directory in the
 307         * default set so that registrations in the child sets work
 308         * properly.
 309         */
 310        user_header = register_sysctl("user", empty);
 311        kmemleak_ignore(user_header);
 312        BUG_ON(!user_header);
 313        BUG_ON(!setup_userns_sysctls(&init_user_ns));
 314#endif
 315        hlist_add_ucounts(&init_ucounts);
 316        inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1);
 317        return 0;
 318}
 319subsys_initcall(user_namespace_sysctl_init);
 320