linux/net/core/net_namespace.c
<<
>>
Prefs
   1#include <linux/workqueue.h>
   2#include <linux/rtnetlink.h>
   3#include <linux/cache.h>
   4#include <linux/slab.h>
   5#include <linux/list.h>
   6#include <linux/delay.h>
   7#include <linux/sched.h>
   8#include <linux/idr.h>
   9#include <linux/rculist.h>
  10#include <linux/nsproxy.h>
  11#include <linux/proc_fs.h>
  12#include <linux/file.h>
  13#include <linux/export.h>
  14#include <net/net_namespace.h>
  15#include <net/netns/generic.h>
  16
  17/*
  18 *      Our network namespace constructor/destructor lists
  19 */
  20
  21static LIST_HEAD(pernet_list);
  22static struct list_head *first_device = &pernet_list;
  23static DEFINE_MUTEX(net_mutex);
  24
  25LIST_HEAD(net_namespace_list);
  26EXPORT_SYMBOL_GPL(net_namespace_list);
  27
  28struct net init_net;
  29EXPORT_SYMBOL(init_net);
  30
  31#define INITIAL_NET_GEN_PTRS    13 /* +1 for len +2 for rcu_head */
  32
  33static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
  34
  35static struct net_generic *net_alloc_generic(void)
  36{
  37        struct net_generic *ng;
  38        size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
  39
  40        ng = kzalloc(generic_size, GFP_KERNEL);
  41        if (ng)
  42                ng->len = max_gen_ptrs;
  43
  44        return ng;
  45}
  46
  47static int net_assign_generic(struct net *net, int id, void *data)
  48{
  49        struct net_generic *ng, *old_ng;
  50
  51        BUG_ON(!mutex_is_locked(&net_mutex));
  52        BUG_ON(id == 0);
  53
  54        old_ng = rcu_dereference_protected(net->gen,
  55                                           lockdep_is_held(&net_mutex));
  56        ng = old_ng;
  57        if (old_ng->len >= id)
  58                goto assign;
  59
  60        ng = net_alloc_generic();
  61        if (ng == NULL)
  62                return -ENOMEM;
  63
  64        /*
  65         * Some synchronisation notes:
  66         *
  67         * The net_generic explores the net->gen array inside rcu
  68         * read section. Besides once set the net->gen->ptr[x]
  69         * pointer never changes (see rules in netns/generic.h).
  70         *
  71         * That said, we simply duplicate this array and schedule
  72         * the old copy for kfree after a grace period.
  73         */
  74
  75        memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
  76
  77        rcu_assign_pointer(net->gen, ng);
  78        kfree_rcu(old_ng, rcu);
  79assign:
  80        ng->ptr[id - 1] = data;
  81        return 0;
  82}
  83
  84static int ops_init(const struct pernet_operations *ops, struct net *net)
  85{
  86        int err;
  87        if (ops->id && ops->size) {
  88                void *data = kzalloc(ops->size, GFP_KERNEL);
  89                if (!data)
  90                        return -ENOMEM;
  91
  92                err = net_assign_generic(net, *ops->id, data);
  93                if (err) {
  94                        kfree(data);
  95                        return err;
  96                }
  97        }
  98        if (ops->init)
  99                return ops->init(net);
 100        return 0;
 101}
 102
 103static void ops_free(const struct pernet_operations *ops, struct net *net)
 104{
 105        if (ops->id && ops->size) {
 106                int id = *ops->id;
 107                kfree(net_generic(net, id));
 108        }
 109}
 110
 111static void ops_exit_list(const struct pernet_operations *ops,
 112                          struct list_head *net_exit_list)
 113{
 114        struct net *net;
 115        if (ops->exit) {
 116                list_for_each_entry(net, net_exit_list, exit_list)
 117                        ops->exit(net);
 118        }
 119        if (ops->exit_batch)
 120                ops->exit_batch(net_exit_list);
 121}
 122
 123static void ops_free_list(const struct pernet_operations *ops,
 124                          struct list_head *net_exit_list)
 125{
 126        struct net *net;
 127        if (ops->size && ops->id) {
 128                list_for_each_entry(net, net_exit_list, exit_list)
 129                        ops_free(ops, net);
 130        }
 131}
 132
 133/*
 134 * setup_net runs the initializers for the network namespace object.
 135 */
 136static __net_init int setup_net(struct net *net)
 137{
 138        /* Must be called with net_mutex held */
 139        const struct pernet_operations *ops, *saved_ops;
 140        int error = 0;
 141        LIST_HEAD(net_exit_list);
 142
 143        atomic_set(&net->count, 1);
 144        atomic_set(&net->passive, 1);
 145        net->dev_base_seq = 1;
 146
 147#ifdef NETNS_REFCNT_DEBUG
 148        atomic_set(&net->use_count, 0);
 149#endif
 150
 151        list_for_each_entry(ops, &pernet_list, list) {
 152                error = ops_init(ops, net);
 153                if (error < 0)
 154                        goto out_undo;
 155        }
 156out:
 157        return error;
 158
 159out_undo:
 160        /* Walk through the list backwards calling the exit functions
 161         * for the pernet modules whose init functions did not fail.
 162         */
 163        list_add(&net->exit_list, &net_exit_list);
 164        saved_ops = ops;
 165        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 166                ops_exit_list(ops, &net_exit_list);
 167
 168        ops = saved_ops;
 169        list_for_each_entry_continue_reverse(ops, &pernet_list, list)
 170                ops_free_list(ops, &net_exit_list);
 171
 172        rcu_barrier();
 173        goto out;
 174}
 175
 176
 177#ifdef CONFIG_NET_NS
 178static struct kmem_cache *net_cachep;
 179static struct workqueue_struct *netns_wq;
 180
 181static struct net *net_alloc(void)
 182{
 183        struct net *net = NULL;
 184        struct net_generic *ng;
 185
 186        ng = net_alloc_generic();
 187        if (!ng)
 188                goto out;
 189
 190        net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
 191        if (!net)
 192                goto out_free;
 193
 194        rcu_assign_pointer(net->gen, ng);
 195out:
 196        return net;
 197
 198out_free:
 199        kfree(ng);
 200        goto out;
 201}
 202
 203static void net_free(struct net *net)
 204{
 205#ifdef NETNS_REFCNT_DEBUG
 206        if (unlikely(atomic_read(&net->use_count) != 0)) {
 207                printk(KERN_EMERG "network namespace not free! Usage: %d\n",
 208                        atomic_read(&net->use_count));
 209                return;
 210        }
 211#endif
 212        kfree(net->gen);
 213        kmem_cache_free(net_cachep, net);
 214}
 215
 216void net_drop_ns(void *p)
 217{
 218        struct net *ns = p;
 219        if (ns && atomic_dec_and_test(&ns->passive))
 220                net_free(ns);
 221}
 222
 223struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 224{
 225        struct net *net;
 226        int rv;
 227
 228        if (!(flags & CLONE_NEWNET))
 229                return get_net(old_net);
 230
 231        net = net_alloc();
 232        if (!net)
 233                return ERR_PTR(-ENOMEM);
 234        mutex_lock(&net_mutex);
 235        rv = setup_net(net);
 236        if (rv == 0) {
 237                rtnl_lock();
 238                list_add_tail_rcu(&net->list, &net_namespace_list);
 239                rtnl_unlock();
 240        }
 241        mutex_unlock(&net_mutex);
 242        if (rv < 0) {
 243                net_drop_ns(net);
 244                return ERR_PTR(rv);
 245        }
 246        return net;
 247}
 248
 249static DEFINE_SPINLOCK(cleanup_list_lock);
 250static LIST_HEAD(cleanup_list);  /* Must hold cleanup_list_lock to touch */
 251
 252static void cleanup_net(struct work_struct *work)
 253{
 254        const struct pernet_operations *ops;
 255        struct net *net, *tmp;
 256        LIST_HEAD(net_kill_list);
 257        LIST_HEAD(net_exit_list);
 258
 259        /* Atomically snapshot the list of namespaces to cleanup */
 260        spin_lock_irq(&cleanup_list_lock);
 261        list_replace_init(&cleanup_list, &net_kill_list);
 262        spin_unlock_irq(&cleanup_list_lock);
 263
 264        mutex_lock(&net_mutex);
 265
 266        /* Don't let anyone else find us. */
 267        rtnl_lock();
 268        list_for_each_entry(net, &net_kill_list, cleanup_list) {
 269                list_del_rcu(&net->list);
 270                list_add_tail(&net->exit_list, &net_exit_list);
 271        }
 272        rtnl_unlock();
 273
 274        /*
 275         * Another CPU might be rcu-iterating the list, wait for it.
 276         * This needs to be before calling the exit() notifiers, so
 277         * the rcu_barrier() below isn't sufficient alone.
 278         */
 279        synchronize_rcu();
 280
 281        /* Run all of the network namespace exit methods */
 282        list_for_each_entry_reverse(ops, &pernet_list, list)
 283                ops_exit_list(ops, &net_exit_list);
 284
 285        /* Free the net generic variables */
 286        list_for_each_entry_reverse(ops, &pernet_list, list)
 287                ops_free_list(ops, &net_exit_list);
 288
 289        mutex_unlock(&net_mutex);
 290
 291        /* Ensure there are no outstanding rcu callbacks using this
 292         * network namespace.
 293         */
 294        rcu_barrier();
 295
 296        /* Finally it is safe to free my network namespace structure */
 297        list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
 298                list_del_init(&net->exit_list);
 299                net_drop_ns(net);
 300        }
 301}
 302static DECLARE_WORK(net_cleanup_work, cleanup_net);
 303
 304void __put_net(struct net *net)
 305{
 306        /* Cleanup the network namespace in process context */
 307        unsigned long flags;
 308
 309        spin_lock_irqsave(&cleanup_list_lock, flags);
 310        list_add(&net->cleanup_list, &cleanup_list);
 311        spin_unlock_irqrestore(&cleanup_list_lock, flags);
 312
 313        queue_work(netns_wq, &net_cleanup_work);
 314}
 315EXPORT_SYMBOL_GPL(__put_net);
 316
 317struct net *get_net_ns_by_fd(int fd)
 318{
 319        struct proc_inode *ei;
 320        struct file *file;
 321        struct net *net;
 322
 323        file = proc_ns_fget(fd);
 324        if (IS_ERR(file))
 325                return ERR_CAST(file);
 326
 327        ei = PROC_I(file->f_dentry->d_inode);
 328        if (ei->ns_ops == &netns_operations)
 329                net = get_net(ei->ns);
 330        else
 331                net = ERR_PTR(-EINVAL);
 332
 333        fput(file);
 334        return net;
 335}
 336
 337#else
 338struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 339{
 340        if (flags & CLONE_NEWNET)
 341                return ERR_PTR(-EINVAL);
 342        return old_net;
 343}
 344
 345struct net *get_net_ns_by_fd(int fd)
 346{
 347        return ERR_PTR(-EINVAL);
 348}
 349#endif
 350
 351struct net *get_net_ns_by_pid(pid_t pid)
 352{
 353        struct task_struct *tsk;
 354        struct net *net;
 355
 356        /* Lookup the network namespace */
 357        net = ERR_PTR(-ESRCH);
 358        rcu_read_lock();
 359        tsk = find_task_by_vpid(pid);
 360        if (tsk) {
 361                struct nsproxy *nsproxy;
 362                nsproxy = task_nsproxy(tsk);
 363                if (nsproxy)
 364                        net = get_net(nsproxy->net_ns);
 365        }
 366        rcu_read_unlock();
 367        return net;
 368}
 369EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 370
 371static int __init net_ns_init(void)
 372{
 373        struct net_generic *ng;
 374
 375#ifdef CONFIG_NET_NS
 376        net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
 377                                        SMP_CACHE_BYTES,
 378                                        SLAB_PANIC, NULL);
 379
 380        /* Create workqueue for cleanup */
 381        netns_wq = create_singlethread_workqueue("netns");
 382        if (!netns_wq)
 383                panic("Could not create netns workq");
 384#endif
 385
 386        ng = net_alloc_generic();
 387        if (!ng)
 388                panic("Could not allocate generic netns");
 389
 390        rcu_assign_pointer(init_net.gen, ng);
 391
 392        mutex_lock(&net_mutex);
 393        if (setup_net(&init_net))
 394                panic("Could not setup the initial network namespace");
 395
 396        rtnl_lock();
 397        list_add_tail_rcu(&init_net.list, &net_namespace_list);
 398        rtnl_unlock();
 399
 400        mutex_unlock(&net_mutex);
 401
 402        return 0;
 403}
 404
 405pure_initcall(net_ns_init);
 406
 407#ifdef CONFIG_NET_NS
 408static int __register_pernet_operations(struct list_head *list,
 409                                        struct pernet_operations *ops)
 410{
 411        struct net *net;
 412        int error;
 413        LIST_HEAD(net_exit_list);
 414
 415        list_add_tail(&ops->list, list);
 416        if (ops->init || (ops->id && ops->size)) {
 417                for_each_net(net) {
 418                        error = ops_init(ops, net);
 419                        if (error)
 420                                goto out_undo;
 421                        list_add_tail(&net->exit_list, &net_exit_list);
 422                }
 423        }
 424        return 0;
 425
 426out_undo:
 427        /* If I have an error cleanup all namespaces I initialized */
 428        list_del(&ops->list);
 429        ops_exit_list(ops, &net_exit_list);
 430        ops_free_list(ops, &net_exit_list);
 431        return error;
 432}
 433
 434static void __unregister_pernet_operations(struct pernet_operations *ops)
 435{
 436        struct net *net;
 437        LIST_HEAD(net_exit_list);
 438
 439        list_del(&ops->list);
 440        for_each_net(net)
 441                list_add_tail(&net->exit_list, &net_exit_list);
 442        ops_exit_list(ops, &net_exit_list);
 443        ops_free_list(ops, &net_exit_list);
 444}
 445
 446#else
 447
 448static int __register_pernet_operations(struct list_head *list,
 449                                        struct pernet_operations *ops)
 450{
 451        int err = 0;
 452        err = ops_init(ops, &init_net);
 453        if (err)
 454                ops_free(ops, &init_net);
 455        return err;
 456        
 457}
 458
 459static void __unregister_pernet_operations(struct pernet_operations *ops)
 460{
 461        LIST_HEAD(net_exit_list);
 462        list_add(&init_net.exit_list, &net_exit_list);
 463        ops_exit_list(ops, &net_exit_list);
 464        ops_free_list(ops, &net_exit_list);
 465}
 466
 467#endif /* CONFIG_NET_NS */
 468
 469static DEFINE_IDA(net_generic_ids);
 470
 471static int register_pernet_operations(struct list_head *list,
 472                                      struct pernet_operations *ops)
 473{
 474        int error;
 475
 476        if (ops->id) {
 477again:
 478                error = ida_get_new_above(&net_generic_ids, 1, ops->id);
 479                if (error < 0) {
 480                        if (error == -EAGAIN) {
 481                                ida_pre_get(&net_generic_ids, GFP_KERNEL);
 482                                goto again;
 483                        }
 484                        return error;
 485                }
 486                max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
 487        }
 488        error = __register_pernet_operations(list, ops);
 489        if (error) {
 490                rcu_barrier();
 491                if (ops->id)
 492                        ida_remove(&net_generic_ids, *ops->id);
 493        }
 494
 495        return error;
 496}
 497
 498static void unregister_pernet_operations(struct pernet_operations *ops)
 499{
 500        
 501        __unregister_pernet_operations(ops);
 502        rcu_barrier();
 503        if (ops->id)
 504                ida_remove(&net_generic_ids, *ops->id);
 505}
 506
 507/**
 508 *      register_pernet_subsys - register a network namespace subsystem
 509 *      @ops:  pernet operations structure for the subsystem
 510 *
 511 *      Register a subsystem which has init and exit functions
 512 *      that are called when network namespaces are created and
 513 *      destroyed respectively.
 514 *
 515 *      When registered all network namespace init functions are
 516 *      called for every existing network namespace.  Allowing kernel
 517 *      modules to have a race free view of the set of network namespaces.
 518 *
 519 *      When a new network namespace is created all of the init
 520 *      methods are called in the order in which they were registered.
 521 *
 522 *      When a network namespace is destroyed all of the exit methods
 523 *      are called in the reverse of the order with which they were
 524 *      registered.
 525 */
 526int register_pernet_subsys(struct pernet_operations *ops)
 527{
 528        int error;
 529        mutex_lock(&net_mutex);
 530        error =  register_pernet_operations(first_device, ops);
 531        mutex_unlock(&net_mutex);
 532        return error;
 533}
 534EXPORT_SYMBOL_GPL(register_pernet_subsys);
 535
 536/**
 537 *      unregister_pernet_subsys - unregister a network namespace subsystem
 538 *      @ops: pernet operations structure to manipulate
 539 *
 540 *      Remove the pernet operations structure from the list to be
 541 *      used when network namespaces are created or destroyed.  In
 542 *      addition run the exit method for all existing network
 543 *      namespaces.
 544 */
 545void unregister_pernet_subsys(struct pernet_operations *ops)
 546{
 547        mutex_lock(&net_mutex);
 548        unregister_pernet_operations(ops);
 549        mutex_unlock(&net_mutex);
 550}
 551EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
 552
 553/**
 554 *      register_pernet_device - register a network namespace device
 555 *      @ops:  pernet operations structure for the subsystem
 556 *
 557 *      Register a device which has init and exit functions
 558 *      that are called when network namespaces are created and
 559 *      destroyed respectively.
 560 *
 561 *      When registered all network namespace init functions are
 562 *      called for every existing network namespace.  Allowing kernel
 563 *      modules to have a race free view of the set of network namespaces.
 564 *
 565 *      When a new network namespace is created all of the init
 566 *      methods are called in the order in which they were registered.
 567 *
 568 *      When a network namespace is destroyed all of the exit methods
 569 *      are called in the reverse of the order with which they were
 570 *      registered.
 571 */
 572int register_pernet_device(struct pernet_operations *ops)
 573{
 574        int error;
 575        mutex_lock(&net_mutex);
 576        error = register_pernet_operations(&pernet_list, ops);
 577        if (!error && (first_device == &pernet_list))
 578                first_device = &ops->list;
 579        mutex_unlock(&net_mutex);
 580        return error;
 581}
 582EXPORT_SYMBOL_GPL(register_pernet_device);
 583
 584/**
 585 *      unregister_pernet_device - unregister a network namespace netdevice
 586 *      @ops: pernet operations structure to manipulate
 587 *
 588 *      Remove the pernet operations structure from the list to be
 589 *      used when network namespaces are created or destroyed.  In
 590 *      addition run the exit method for all existing network
 591 *      namespaces.
 592 */
 593void unregister_pernet_device(struct pernet_operations *ops)
 594{
 595        mutex_lock(&net_mutex);
 596        if (&ops->list == first_device)
 597                first_device = first_device->next;
 598        unregister_pernet_operations(ops);
 599        mutex_unlock(&net_mutex);
 600}
 601EXPORT_SYMBOL_GPL(unregister_pernet_device);
 602
 603#ifdef CONFIG_NET_NS
 604static void *netns_get(struct task_struct *task)
 605{
 606        struct net *net = NULL;
 607        struct nsproxy *nsproxy;
 608
 609        rcu_read_lock();
 610        nsproxy = task_nsproxy(task);
 611        if (nsproxy)
 612                net = get_net(nsproxy->net_ns);
 613        rcu_read_unlock();
 614
 615        return net;
 616}
 617
 618static void netns_put(void *ns)
 619{
 620        put_net(ns);
 621}
 622
 623static int netns_install(struct nsproxy *nsproxy, void *ns)
 624{
 625        put_net(nsproxy->net_ns);
 626        nsproxy->net_ns = get_net(ns);
 627        return 0;
 628}
 629
 630const struct proc_ns_operations netns_operations = {
 631        .name           = "net",
 632        .type           = CLONE_NEWNET,
 633        .get            = netns_get,
 634        .put            = netns_put,
 635        .install        = netns_install,
 636};
 637#endif
 638
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.