linux/net/netlink/af_netlink.c
<<
>>
Prefs
   1/*
   2 * NETLINK      Kernel-user communication protocol.
   3 *
   4 *              Authors:        Alan Cox <alan@lxorguk.ukuu.org.uk>
   5 *                              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
   6 *
   7 *              This program is free software; you can redistribute it and/or
   8 *              modify it under the terms of the GNU General Public License
   9 *              as published by the Free Software Foundation; either version
  10 *              2 of the License, or (at your option) any later version.
  11 *
  12 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
  13 *                               added netlink_proto_exit
  14 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
  15 *                               use nlk_sk, as sk->protinfo is on a diet 8)
  16 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
  17 *                               - inc module use count of module that owns
  18 *                                 the kernel socket in case userspace opens
  19 *                                 socket of same protocol
  20 *                               - remove all module support, since netlink is
  21 *                                 mandatory if CONFIG_NET=y these days
  22 */
  23
  24#include <linux/module.h>
  25
  26#include <linux/capability.h>
  27#include <linux/kernel.h>
  28#include <linux/init.h>
  29#include <linux/signal.h>
  30#include <linux/sched.h>
  31#include <linux/errno.h>
  32#include <linux/string.h>
  33#include <linux/stat.h>
  34#include <linux/socket.h>
  35#include <linux/un.h>
  36#include <linux/fcntl.h>
  37#include <linux/termios.h>
  38#include <linux/sockios.h>
  39#include <linux/net.h>
  40#include <linux/fs.h>
  41#include <linux/slab.h>
  42#include <asm/uaccess.h>
  43#include <linux/skbuff.h>
  44#include <linux/netdevice.h>
  45#include <linux/rtnetlink.h>
  46#include <linux/proc_fs.h>
  47#include <linux/seq_file.h>
  48#include <linux/notifier.h>
  49#include <linux/security.h>
  50#include <linux/jhash.h>
  51#include <linux/jiffies.h>
  52#include <linux/random.h>
  53#include <linux/bitops.h>
  54#include <linux/mm.h>
  55#include <linux/types.h>
  56#include <linux/audit.h>
  57#include <linux/mutex.h>
  58
  59#include <net/net_namespace.h>
  60#include <net/sock.h>
  61#include <net/scm.h>
  62#include <net/netlink.h>
  63
  64#define NLGRPSZ(x)      (ALIGN(x, sizeof(unsigned long) * 8) / 8)
  65#define NLGRPLONGS(x)   (NLGRPSZ(x)/sizeof(unsigned long))
  66
  67struct netlink_sock {
  68        /* struct sock has to be the first member of netlink_sock */
  69        struct sock             sk;
  70        u32                     pid;
  71        u32                     dst_pid;
  72        u32                     dst_group;
  73        u32                     flags;
  74        u32                     subscriptions;
  75        u32                     ngroups;
  76        unsigned long           *groups;
  77        unsigned long           state;
  78        wait_queue_head_t       wait;
  79        struct netlink_callback *cb;
  80        struct mutex            *cb_mutex;
  81        struct mutex            cb_def_mutex;
  82        void                    (*netlink_rcv)(struct sk_buff *skb);
  83        void                    (*netlink_bind)(int group);
  84        struct module           *module;
  85};
  86
  87struct listeners {
  88        struct rcu_head         rcu;
  89        unsigned long           masks[0];
  90};
  91
  92#define NETLINK_KERNEL_SOCKET   0x1
  93#define NETLINK_RECV_PKTINFO    0x2
  94#define NETLINK_BROADCAST_SEND_ERROR    0x4
  95#define NETLINK_RECV_NO_ENOBUFS 0x8
  96
  97static inline struct netlink_sock *nlk_sk(struct sock *sk)
  98{
  99        return container_of(sk, struct netlink_sock, sk);
 100}
 101
 102static inline int netlink_is_kernel(struct sock *sk)
 103{
 104        return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
 105}
 106
 107struct nl_pid_hash {
 108        struct hlist_head       *table;
 109        unsigned long           rehash_time;
 110
 111        unsigned int            mask;
 112        unsigned int            shift;
 113
 114        unsigned int            entries;
 115        unsigned int            max_shift;
 116
 117        u32                     rnd;
 118};
 119
 120struct netlink_table {
 121        struct nl_pid_hash      hash;
 122        struct hlist_head       mc_list;
 123        struct listeners __rcu  *listeners;
 124        unsigned int            nl_nonroot;
 125        unsigned int            groups;
 126        struct mutex            *cb_mutex;
 127        struct module           *module;
 128        void                    (*bind)(int group);
 129        int                     registered;
 130};
 131
 132static struct netlink_table *nl_table;
 133
 134static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
 135
 136static int netlink_dump(struct sock *sk);
 137
 138static DEFINE_RWLOCK(nl_table_lock);
 139static atomic_t nl_table_users = ATOMIC_INIT(0);
 140
 141static ATOMIC_NOTIFIER_HEAD(netlink_chain);
 142
 143static inline u32 netlink_group_mask(u32 group)
 144{
 145        return group ? 1 << (group - 1) : 0;
 146}
 147
 148static inline struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
 149{
 150        return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask];
 151}
 152
 153static void netlink_destroy_callback(struct netlink_callback *cb)
 154{
 155        kfree_skb(cb->skb);
 156        kfree(cb);
 157}
 158
 159static void netlink_consume_callback(struct netlink_callback *cb)
 160{
 161        consume_skb(cb->skb);
 162        kfree(cb);
 163}
 164
 165static void netlink_sock_destruct(struct sock *sk)
 166{
 167        struct netlink_sock *nlk = nlk_sk(sk);
 168
 169        if (nlk->cb) {
 170                if (nlk->cb->done)
 171                        nlk->cb->done(nlk->cb);
 172                netlink_destroy_callback(nlk->cb);
 173        }
 174
 175        skb_queue_purge(&sk->sk_receive_queue);
 176
 177        if (!sock_flag(sk, SOCK_DEAD)) {
 178                printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
 179                return;
 180        }
 181
 182        WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 183        WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 184        WARN_ON(nlk_sk(sk)->groups);
 185}
 186
 187/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
 188 * SMP. Look, when several writers sleep and reader wakes them up, all but one
 189 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
 190 * this, _but_ remember, it adds useless work on UP machines.
 191 */
 192
 193void netlink_table_grab(void)
 194        __acquires(nl_table_lock)
 195{
 196        might_sleep();
 197
 198        write_lock_irq(&nl_table_lock);
 199
 200        if (atomic_read(&nl_table_users)) {
 201                DECLARE_WAITQUEUE(wait, current);
 202
 203                add_wait_queue_exclusive(&nl_table_wait, &wait);
 204                for (;;) {
 205                        set_current_state(TASK_UNINTERRUPTIBLE);
 206                        if (atomic_read(&nl_table_users) == 0)
 207                                break;
 208                        write_unlock_irq(&nl_table_lock);
 209                        schedule();
 210                        write_lock_irq(&nl_table_lock);
 211                }
 212
 213                __set_current_state(TASK_RUNNING);
 214                remove_wait_queue(&nl_table_wait, &wait);
 215        }
 216}
 217
 218void netlink_table_ungrab(void)
 219        __releases(nl_table_lock)
 220{
 221        write_unlock_irq(&nl_table_lock);
 222        wake_up(&nl_table_wait);
 223}
 224
 225static inline void
 226netlink_lock_table(void)
 227{
 228        /* read_lock() synchronizes us to netlink_table_grab */
 229
 230        read_lock(&nl_table_lock);
 231        atomic_inc(&nl_table_users);
 232        read_unlock(&nl_table_lock);
 233}
 234
 235static inline void
 236netlink_unlock_table(void)
 237{
 238        if (atomic_dec_and_test(&nl_table_users))
 239                wake_up(&nl_table_wait);
 240}
 241
 242static struct sock *netlink_lookup(struct net *net, int protocol, u32 pid)
 243{
 244        struct nl_pid_hash *hash = &nl_table[protocol].hash;
 245        struct hlist_head *head;
 246        struct sock *sk;
 247        struct hlist_node *node;
 248
 249        read_lock(&nl_table_lock);
 250        head = nl_pid_hashfn(hash, pid);
 251        sk_for_each(sk, node, head) {
 252                if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) {
 253                        sock_hold(sk);
 254                        goto found;
 255                }
 256        }
 257        sk = NULL;
 258found:
 259        read_unlock(&nl_table_lock);
 260        return sk;
 261}
 262
 263static struct hlist_head *nl_pid_hash_zalloc(size_t size)
 264{
 265        if (size <= PAGE_SIZE)
 266                return kzalloc(size, GFP_ATOMIC);
 267        else
 268                return (struct hlist_head *)
 269                        __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
 270                                         get_order(size));
 271}
 272
 273static void nl_pid_hash_free(struct hlist_head *table, size_t size)
 274{
 275        if (size <= PAGE_SIZE)
 276                kfree(table);
 277        else
 278                free_pages((unsigned long)table, get_order(size));
 279}
 280
 281static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
 282{
 283        unsigned int omask, mask, shift;
 284        size_t osize, size;
 285        struct hlist_head *otable, *table;
 286        int i;
 287
 288        omask = mask = hash->mask;
 289        osize = size = (mask + 1) * sizeof(*table);
 290        shift = hash->shift;
 291
 292        if (grow) {
 293                if (++shift > hash->max_shift)
 294                        return 0;
 295                mask = mask * 2 + 1;
 296                size *= 2;
 297        }
 298
 299        table = nl_pid_hash_zalloc(size);
 300        if (!table)
 301                return 0;
 302
 303        otable = hash->table;
 304        hash->table = table;
 305        hash->mask = mask;
 306        hash->shift = shift;
 307        get_random_bytes(&hash->rnd, sizeof(hash->rnd));
 308
 309        for (i = 0; i <= omask; i++) {
 310                struct sock *sk;
 311                struct hlist_node *node, *tmp;
 312
 313                sk_for_each_safe(sk, node, tmp, &otable[i])
 314                        __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid));
 315        }
 316
 317        nl_pid_hash_free(otable, osize);
 318        hash->rehash_time = jiffies + 10 * 60 * HZ;
 319        return 1;
 320}
 321
 322static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len)
 323{
 324        int avg = hash->entries >> hash->shift;
 325
 326        if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1))
 327                return 1;
 328
 329        if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
 330                nl_pid_hash_rehash(hash, 0);
 331                return 1;
 332        }
 333
 334        return 0;
 335}
 336
 337static const struct proto_ops netlink_ops;
 338
 339static void
 340netlink_update_listeners(struct sock *sk)
 341{
 342        struct netlink_table *tbl = &nl_table[sk->sk_protocol];
 343        struct hlist_node *node;
 344        unsigned long mask;
 345        unsigned int i;
 346
 347        for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
 348                mask = 0;
 349                sk_for_each_bound(sk, node, &tbl->mc_list) {
 350                        if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
 351                                mask |= nlk_sk(sk)->groups[i];
 352                }
 353                tbl->listeners->masks[i] = mask;
 354        }
 355        /* this function is only called with the netlink table "grabbed", which
 356         * makes sure updates are visible before bind or setsockopt return. */
 357}
 358
 359static int netlink_insert(struct sock *sk, struct net *net, u32 pid)
 360{
 361        struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
 362        struct hlist_head *head;
 363        int err = -EADDRINUSE;
 364        struct sock *osk;
 365        struct hlist_node *node;
 366        int len;
 367
 368        netlink_table_grab();
 369        head = nl_pid_hashfn(hash, pid);
 370        len = 0;
 371        sk_for_each(osk, node, head) {
 372                if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid))
 373                        break;
 374                len++;
 375        }
 376        if (node)
 377                goto err;
 378
 379        err = -EBUSY;
 380        if (nlk_sk(sk)->pid)
 381                goto err;
 382
 383        err = -ENOMEM;
 384        if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
 385                goto err;
 386
 387        if (len && nl_pid_hash_dilute(hash, len))
 388                head = nl_pid_hashfn(hash, pid);
 389        hash->entries++;
 390        nlk_sk(sk)->pid = pid;
 391        sk_add_node(sk, head);
 392        err = 0;
 393
 394err:
 395        netlink_table_ungrab();
 396        return err;
 397}
 398
 399static void netlink_remove(struct sock *sk)
 400{
 401        netlink_table_grab();
 402        if (sk_del_node_init(sk))
 403                nl_table[sk->sk_protocol].hash.entries--;
 404        if (nlk_sk(sk)->subscriptions)
 405                __sk_del_bind_node(sk);
 406        netlink_table_ungrab();
 407}
 408
 409static struct proto netlink_proto = {
 410        .name     = "NETLINK",
 411        .owner    = THIS_MODULE,
 412        .obj_size = sizeof(struct netlink_sock),
 413};
 414
 415static int __netlink_create(struct net *net, struct socket *sock,
 416                            struct mutex *cb_mutex, int protocol)
 417{
 418        struct sock *sk;
 419        struct netlink_sock *nlk;
 420
 421        sock->ops = &netlink_ops;
 422
 423        sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
 424        if (!sk)
 425                return -ENOMEM;
 426
 427        sock_init_data(sock, sk);
 428
 429        nlk = nlk_sk(sk);
 430        if (cb_mutex) {
 431                nlk->cb_mutex = cb_mutex;
 432        } else {
 433                nlk->cb_mutex = &nlk->cb_def_mutex;
 434                mutex_init(nlk->cb_mutex);
 435        }
 436        init_waitqueue_head(&nlk->wait);
 437
 438        sk->sk_destruct = netlink_sock_destruct;
 439        sk->sk_protocol = protocol;
 440        return 0;
 441}
 442
 443static int netlink_create(struct net *net, struct socket *sock, int protocol,
 444                          int kern)
 445{
 446        struct module *module = NULL;
 447        struct mutex *cb_mutex;
 448        struct netlink_sock *nlk;
 449        void (*bind)(int group);
 450        int err = 0;
 451
 452        sock->state = SS_UNCONNECTED;
 453
 454        if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
 455                return -ESOCKTNOSUPPORT;
 456
 457        if (protocol < 0 || protocol >= MAX_LINKS)
 458                return -EPROTONOSUPPORT;
 459
 460        netlink_lock_table();
 461#ifdef CONFIG_MODULES
 462        if (!nl_table[protocol].registered) {
 463                netlink_unlock_table();
 464                request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
 465                netlink_lock_table();
 466        }
 467#endif
 468        if (nl_table[protocol].registered &&
 469            try_module_get(nl_table[protocol].module))
 470                module = nl_table[protocol].module;
 471        else
 472                err = -EPROTONOSUPPORT;
 473        cb_mutex = nl_table[protocol].cb_mutex;
 474        bind = nl_table[protocol].bind;
 475        netlink_unlock_table();
 476
 477        if (err < 0)
 478                goto out;
 479
 480        err = __netlink_create(net, sock, cb_mutex, protocol);
 481        if (err < 0)
 482                goto out_module;
 483
 484        local_bh_disable();
 485        sock_prot_inuse_add(net, &netlink_proto, 1);
 486        local_bh_enable();
 487
 488        nlk = nlk_sk(sock->sk);
 489        nlk->module = module;
 490        nlk->netlink_bind = bind;
 491out:
 492        return err;
 493
 494out_module:
 495        module_put(module);
 496        goto out;
 497}
 498
 499static int netlink_release(struct socket *sock)
 500{
 501        struct sock *sk = sock->sk;
 502        struct netlink_sock *nlk;
 503
 504        if (!sk)
 505                return 0;
 506
 507        netlink_remove(sk);
 508        sock_orphan(sk);
 509        nlk = nlk_sk(sk);
 510
 511        /*
 512         * OK. Socket is unlinked, any packets that arrive now
 513         * will be purged.
 514         */
 515
 516        sock->sk = NULL;
 517        wake_up_interruptible_all(&nlk->wait);
 518
 519        skb_queue_purge(&sk->sk_write_queue);
 520
 521        if (nlk->pid) {
 522                struct netlink_notify n = {
 523                                                .net = sock_net(sk),
 524                                                .protocol = sk->sk_protocol,
 525                                                .pid = nlk->pid,
 526                                          };
 527                atomic_notifier_call_chain(&netlink_chain,
 528                                NETLINK_URELEASE, &n);
 529        }
 530
 531        module_put(nlk->module);
 532
 533        netlink_table_grab();
 534        if (netlink_is_kernel(sk)) {
 535                BUG_ON(nl_table[sk->sk_protocol].registered == 0);
 536                if (--nl_table[sk->sk_protocol].registered == 0) {
 537                        kfree(nl_table[sk->sk_protocol].listeners);
 538                        nl_table[sk->sk_protocol].module = NULL;
 539                        nl_table[sk->sk_protocol].registered = 0;
 540                }
 541        } else if (nlk->subscriptions) {
 542                netlink_update_listeners(sk);
 543        }
 544        netlink_table_ungrab();
 545
 546        kfree(nlk->groups);
 547        nlk->groups = NULL;
 548
 549        local_bh_disable();
 550        sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
 551        local_bh_enable();
 552        sock_put(sk);
 553        return 0;
 554}
 555
 556static int netlink_autobind(struct socket *sock)
 557{
 558        struct sock *sk = sock->sk;
 559        struct net *net = sock_net(sk);
 560        struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
 561        struct hlist_head *head;
 562        struct sock *osk;
 563        struct hlist_node *node;
 564        s32 pid = task_tgid_vnr(current);
 565        int err;
 566        static s32 rover = -4097;
 567
 568retry:
 569        cond_resched();
 570        netlink_table_grab();
 571        head = nl_pid_hashfn(hash, pid);
 572        sk_for_each(osk, node, head) {
 573                if (!net_eq(sock_net(osk), net))
 574                        continue;
 575                if (nlk_sk(osk)->pid == pid) {
 576                        /* Bind collision, search negative pid values. */
 577                        pid = rover--;
 578                        if (rover > -4097)
 579                                rover = -4097;
 580                        netlink_table_ungrab();
 581                        goto retry;
 582                }
 583        }
 584        netlink_table_ungrab();
 585
 586        err = netlink_insert(sk, net, pid);
 587        if (err == -EADDRINUSE)
 588                goto retry;
 589
 590        /* If 2 threads race to autobind, that is fine.  */
 591        if (err == -EBUSY)
 592                err = 0;
 593
 594        return err;
 595}
 596
 597static inline int netlink_capable(const struct socket *sock, unsigned int flag)
 598{
 599        return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
 600               capable(CAP_NET_ADMIN);
 601}
 602
 603static void
 604netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
 605{
 606        struct netlink_sock *nlk = nlk_sk(sk);
 607
 608        if (nlk->subscriptions && !subscriptions)
 609                __sk_del_bind_node(sk);
 610        else if (!nlk->subscriptions && subscriptions)
 611                sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
 612        nlk->subscriptions = subscriptions;
 613}
 614
 615static int netlink_realloc_groups(struct sock *sk)
 616{
 617        struct netlink_sock *nlk = nlk_sk(sk);
 618        unsigned int groups;
 619        unsigned long *new_groups;
 620        int err = 0;
 621
 622        netlink_table_grab();
 623
 624        groups = nl_table[sk->sk_protocol].groups;
 625        if (!nl_table[sk->sk_protocol].registered) {
 626                err = -ENOENT;
 627                goto out_unlock;
 628        }
 629
 630        if (nlk->ngroups >= groups)
 631                goto out_unlock;
 632
 633        new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
 634        if (new_groups == NULL) {
 635                err = -ENOMEM;
 636                goto out_unlock;
 637        }
 638        memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
 639               NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
 640
 641        nlk->groups = new_groups;
 642        nlk->ngroups = groups;
 643 out_unlock:
 644        netlink_table_ungrab();
 645        return err;
 646}
 647
 648static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 649                        int addr_len)
 650{
 651        struct sock *sk = sock->sk;
 652        struct net *net = sock_net(sk);
 653        struct netlink_sock *nlk = nlk_sk(sk);
 654        struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 655        int err;
 656
 657        if (nladdr->nl_family != AF_NETLINK)
 658                return -EINVAL;
 659
 660        /* Only superuser is allowed to listen multicasts */
 661        if (nladdr->nl_groups) {
 662                if (!netlink_capable(sock, NL_NONROOT_RECV))
 663                        return -EPERM;
 664                err = netlink_realloc_groups(sk);
 665                if (err)
 666                        return err;
 667        }
 668
 669        if (nlk->pid) {
 670                if (nladdr->nl_pid != nlk->pid)
 671                        return -EINVAL;
 672        } else {
 673                err = nladdr->nl_pid ?
 674                        netlink_insert(sk, net, nladdr->nl_pid) :
 675                        netlink_autobind(sock);
 676                if (err)
 677                        return err;
 678        }
 679
 680        if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
 681                return 0;
 682
 683        netlink_table_grab();
 684        netlink_update_subscriptions(sk, nlk->subscriptions +
 685                                         hweight32(nladdr->nl_groups) -
 686                                         hweight32(nlk->groups[0]));
 687        nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
 688        netlink_update_listeners(sk);
 689        netlink_table_ungrab();
 690
 691        if (nlk->netlink_bind && nlk->groups[0]) {
 692                int i;
 693
 694                for (i=0; i<nlk->ngroups; i++) {
 695                        if (test_bit(i, nlk->groups))
 696                                nlk->netlink_bind(i);
 697                }
 698        }
 699
 700        return 0;
 701}
 702
 703static int netlink_connect(struct socket *sock, struct sockaddr *addr,
 704                           int alen, int flags)
 705{
 706        int err = 0;
 707        struct sock *sk = sock->sk;
 708        struct netlink_sock *nlk = nlk_sk(sk);
 709        struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 710
 711        if (alen < sizeof(addr->sa_family))
 712                return -EINVAL;
 713
 714        if (addr->sa_family == AF_UNSPEC) {
 715                sk->sk_state    = NETLINK_UNCONNECTED;
 716                nlk->dst_pid    = 0;
 717                nlk->dst_group  = 0;
 718                return 0;
 719        }
 720        if (addr->sa_family != AF_NETLINK)
 721                return -EINVAL;
 722
 723        /* Only superuser is allowed to send multicasts */
 724        if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
 725                return -EPERM;
 726
 727        if (!nlk->pid)
 728                err = netlink_autobind(sock);
 729
 730        if (err == 0) {
 731                sk->sk_state    = NETLINK_CONNECTED;
 732                nlk->dst_pid    = nladdr->nl_pid;
 733                nlk->dst_group  = ffs(nladdr->nl_groups);
 734        }
 735
 736        return err;
 737}
 738
 739static int netlink_getname(struct socket *sock, struct sockaddr *addr,
 740                           int *addr_len, int peer)
 741{
 742        struct sock *sk = sock->sk;
 743        struct netlink_sock *nlk = nlk_sk(sk);
 744        DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
 745
 746        nladdr->nl_family = AF_NETLINK;
 747        nladdr->nl_pad = 0;
 748        *addr_len = sizeof(*nladdr);
 749
 750        if (peer) {
 751                nladdr->nl_pid = nlk->dst_pid;
 752                nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
 753        } else {
 754                nladdr->nl_pid = nlk->pid;
 755                nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
 756        }
 757        return 0;
 758}
 759
 760static void netlink_overrun(struct sock *sk)
 761{
 762        struct netlink_sock *nlk = nlk_sk(sk);
 763
 764        if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
 765                if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
 766                        sk->sk_err = ENOBUFS;
 767                        sk->sk_error_report(sk);
 768                }
 769        }
 770        atomic_inc(&sk->sk_drops);
 771}
 772
 773static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
 774{
 775        struct sock *sock;
 776        struct netlink_sock *nlk;
 777
 778        sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid);
 779        if (!sock)
 780                return ERR_PTR(-ECONNREFUSED);
 781
 782        /* Don't bother queuing skb if kernel socket has no input function */
 783        nlk = nlk_sk(sock);
 784        if (sock->sk_state == NETLINK_CONNECTED &&
 785            nlk->dst_pid != nlk_sk(ssk)->pid) {
 786                sock_put(sock);
 787                return ERR_PTR(-ECONNREFUSED);
 788        }
 789        return sock;
 790}
 791
 792struct sock *netlink_getsockbyfilp(struct file *filp)
 793{
 794        struct inode *inode = filp->f_path.dentry->d_inode;
 795        struct sock *sock;
 796
 797        if (!S_ISSOCK(inode->i_mode))
 798                return ERR_PTR(-ENOTSOCK);
 799
 800        sock = SOCKET_I(inode)->sk;
 801        if (sock->sk_family != AF_NETLINK)
 802                return ERR_PTR(-EINVAL);
 803
 804        sock_hold(sock);
 805        return sock;
 806}
 807
 808/*
 809 * Attach a skb to a netlink socket.
 810 * The caller must hold a reference to the destination socket. On error, the
 811 * reference is dropped. The skb is not send to the destination, just all
 812 * all error checks are performed and memory in the queue is reserved.
 813 * Return values:
 814 * < 0: error. skb freed, reference to sock dropped.
 815 * 0: continue
 816 * 1: repeat lookup - reference dropped while waiting for socket memory.
 817 */
 818int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 819                      long *timeo, struct sock *ssk)
 820{
 821        struct netlink_sock *nlk;
 822
 823        nlk = nlk_sk(sk);
 824
 825        if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
 826            test_bit(0, &nlk->state)) {
 827                DECLARE_WAITQUEUE(wait, current);
 828                if (!*timeo) {
 829                        if (!ssk || netlink_is_kernel(ssk))
 830                                netlink_overrun(sk);
 831                        sock_put(sk);
 832                        kfree_skb(skb);
 833                        return -EAGAIN;
 834                }
 835
 836                __set_current_state(TASK_INTERRUPTIBLE);
 837                add_wait_queue(&nlk->wait, &wait);
 838
 839                if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
 840                     test_bit(0, &nlk->state)) &&
 841                    !sock_flag(sk, SOCK_DEAD))
 842                        *timeo = schedule_timeout(*timeo);
 843
 844                __set_current_state(TASK_RUNNING);
 845                remove_wait_queue(&nlk->wait, &wait);
 846                sock_put(sk);
 847
 848                if (signal_pending(current)) {
 849                        kfree_skb(skb);
 850                        return sock_intr_errno(*timeo);
 851                }
 852                return 1;
 853        }
 854        skb_set_owner_r(skb, sk);
 855        return 0;
 856}
 857
 858static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
 859{
 860        int len = skb->len;
 861
 862        skb_queue_tail(&sk->sk_receive_queue, skb);
 863        sk->sk_data_ready(sk, len);
 864        return len;
 865}
 866
 867int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
 868{
 869        int len = __netlink_sendskb(sk, skb);
 870
 871        sock_put(sk);
 872        return len;
 873}
 874
 875void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
 876{
 877        kfree_skb(skb);
 878        sock_put(sk);
 879}
 880
 881static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
 882{
 883        int delta;
 884
 885        skb_orphan(skb);
 886
 887        delta = skb->end - skb->tail;
 888        if (delta * 2 < skb->truesize)
 889                return skb;
 890
 891        if (skb_shared(skb)) {
 892                struct sk_buff *nskb = skb_clone(skb, allocation);
 893                if (!nskb)
 894                        return skb;
 895                consume_skb(skb);
 896                skb = nskb;
 897        }
 898
 899        if (!pskb_expand_head(skb, 0, -delta, allocation))
 900                skb->truesize -= delta;
 901
 902        return skb;
 903}
 904
 905static void netlink_rcv_wake(struct sock *sk)
 906{
 907        struct netlink_sock *nlk = nlk_sk(sk);
 908
 909        if (skb_queue_empty(&sk->sk_receive_queue))
 910                clear_bit(0, &nlk->state);
 911        if (!test_bit(0, &nlk->state))
 912                wake_up_interruptible(&nlk->wait);
 913}
 914
 915static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb)
 916{
 917        int ret;
 918        struct netlink_sock *nlk = nlk_sk(sk);
 919
 920        ret = -ECONNREFUSED;
 921        if (nlk->netlink_rcv != NULL) {
 922                ret = skb->len;
 923                skb_set_owner_r(skb, sk);
 924                nlk->netlink_rcv(skb);
 925                consume_skb(skb);
 926        } else {
 927                kfree_skb(skb);
 928        }
 929        sock_put(sk);
 930        return ret;
 931}
 932
 933int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
 934                    u32 pid, int nonblock)
 935{
 936        struct sock *sk;
 937        int err;
 938        long timeo;
 939
 940        skb = netlink_trim(skb, gfp_any());
 941
 942        timeo = sock_sndtimeo(ssk, nonblock);
 943retry:
 944        sk = netlink_getsockbypid(ssk, pid);
 945        if (IS_ERR(sk)) {
 946                kfree_skb(skb);
 947                return PTR_ERR(sk);
 948        }
 949        if (netlink_is_kernel(sk))
 950                return netlink_unicast_kernel(sk, skb);
 951
 952        if (sk_filter(sk, skb)) {
 953                err = skb->len;
 954                kfree_skb(skb);
 955                sock_put(sk);
 956                return err;
 957        }
 958
 959        err = netlink_attachskb(sk, skb, &timeo, ssk);
 960        if (err == 1)
 961                goto retry;
 962        if (err)
 963                return err;
 964
 965        return netlink_sendskb(sk, skb);
 966}
 967EXPORT_SYMBOL(netlink_unicast);
 968
 969int netlink_has_listeners(struct sock *sk, unsigned int group)
 970{
 971        int res = 0;
 972        struct listeners *listeners;
 973
 974        BUG_ON(!netlink_is_kernel(sk));
 975
 976        rcu_read_lock();
 977        listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
 978
 979        if (group - 1 < nl_table[sk->sk_protocol].groups)
 980                res = test_bit(group - 1, listeners->masks);
 981
 982        rcu_read_unlock();
 983
 984        return res;
 985}
 986EXPORT_SYMBOL_GPL(netlink_has_listeners);
 987
 988static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
 989{
 990        struct netlink_sock *nlk = nlk_sk(sk);
 991
 992        if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
 993            !test_bit(0, &nlk->state)) {
 994                skb_set_owner_r(skb, sk);
 995                __netlink_sendskb(sk, skb);
 996                return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
 997        }
 998        return -1;
 999}
1000
1001struct netlink_broadcast_data {
1002        struct sock *exclude_sk;
1003        struct net *net;
1004        u32 pid;
1005        u32 group;
1006        int failure;
1007        int delivery_failure;
1008        int congested;
1009        int delivered;
1010        gfp_t allocation;
1011        struct sk_buff *skb, *skb2;
1012        int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
1013        void *tx_data;
1014};
1015
1016static int do_one_broadcast(struct sock *sk,
1017                                   struct netlink_broadcast_data *p)
1018{
1019        struct netlink_sock *nlk = nlk_sk(sk);
1020        int val;
1021
1022        if (p->exclude_sk == sk)
1023                goto out;
1024
1025        if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
1026            !test_bit(p->group - 1, nlk->groups))
1027                goto out;
1028
1029        if (!net_eq(sock_net(sk), p->net))
1030                goto out;
1031
1032        if (p->failure) {
1033                netlink_overrun(sk);
1034                goto out;
1035        }
1036
1037        sock_hold(sk);
1038        if (p->skb2 == NULL) {
1039                if (skb_shared(p->skb)) {
1040                        p->skb2 = skb_clone(p->skb, p->allocation);
1041                } else {
1042                        p->skb2 = skb_get(p->skb);
1043                        /*
1044                         * skb ownership may have been set when
1045                         * delivered to a previous socket.
1046                         */
1047                        skb_orphan(p->skb2);
1048                }
1049        }
1050        if (p->skb2 == NULL) {
1051                netlink_overrun(sk);
1052                /* Clone failed. Notify ALL listeners. */
1053                p->failure = 1;
1054                if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1055                        p->delivery_failure = 1;
1056        } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1057                kfree_skb(p->skb2);
1058                p->skb2 = NULL;
1059        } else if (sk_filter(sk, p->skb2)) {
1060                kfree_skb(p->skb2);
1061                p->skb2 = NULL;
1062        } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
1063                netlink_overrun(sk);
1064                if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1065                        p->delivery_failure = 1;
1066        } else {
1067                p->congested |= val;
1068                p->delivered = 1;
1069                p->skb2 = NULL;
1070        }
1071        sock_put(sk);
1072
1073out:
1074        return 0;
1075}
1076
1077int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
1078        u32 group, gfp_t allocation,
1079        int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
1080        void *filter_data)
1081{
1082        struct net *net = sock_net(ssk);
1083        struct netlink_broadcast_data info;
1084        struct hlist_node *node;
1085        struct sock *sk;
1086
1087        skb = netlink_trim(skb, allocation);
1088
1089        info.exclude_sk = ssk;
1090        info.net = net;
1091        info.pid = pid;
1092        info.group = group;
1093        info.failure = 0;
1094        info.delivery_failure = 0;
1095        info.congested = 0;
1096        info.delivered = 0;
1097        info.allocation = allocation;
1098        info.skb = skb;
1099        info.skb2 = NULL;
1100        info.tx_filter = filter;
1101        info.tx_data = filter_data;
1102
1103        /* While we sleep in clone, do not allow to change socket list */
1104
1105        netlink_lock_table();
1106
1107        sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1108                do_one_broadcast(sk, &info);
1109
1110        consume_skb(skb);
1111
1112        netlink_unlock_table();
1113
1114        if (info.delivery_failure) {
1115                kfree_skb(info.skb2);
1116                return -ENOBUFS;
1117        }
1118        consume_skb(info.skb2);
1119
1120        if (info.delivered) {
1121                if (info.congested && (allocation & __GFP_WAIT))
1122                        yield();
1123                return 0;
1124        }
1125        return -ESRCH;
1126}
1127EXPORT_SYMBOL(netlink_broadcast_filtered);
1128
1129int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1130                      u32 group, gfp_t allocation)
1131{
1132        return netlink_broadcast_filtered(ssk, skb, pid, group, allocation,
1133                NULL, NULL);
1134}
1135EXPORT_SYMBOL(netlink_broadcast);
1136
1137struct netlink_set_err_data {
1138        struct sock *exclude_sk;
1139        u32 pid;
1140        u32 group;
1141        int code;
1142};
1143
1144static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
1145{
1146        struct netlink_sock *nlk = nlk_sk(sk);
1147        int ret = 0;
1148
1149        if (sk == p->exclude_sk)
1150                goto out;
1151
1152        if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
1153                goto out;
1154
1155        if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
1156            !test_bit(p->group - 1, nlk->groups))
1157                goto out;
1158
1159        if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
1160                ret = 1;
1161                goto out;
1162        }
1163
1164        sk->sk_err = p->code;
1165        sk->sk_error_report(sk);
1166out:
1167        return ret;
1168}
1169
1170/**
1171 * netlink_set_err - report error to broadcast listeners
1172 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
1173 * @pid: the PID of a process that we want to skip (if any)
1174 * @groups: the broadcast group that will notice the error
1175 * @code: error code, must be negative (as usual in kernelspace)
1176 *
1177 * This function returns the number of broadcast listeners that have set the
1178 * NETLINK_RECV_NO_ENOBUFS socket option.
1179 */
1180int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
1181{
1182        struct netlink_set_err_data info;
1183        struct hlist_node *node;
1184        struct sock *sk;
1185        int ret = 0;
1186
1187        info.exclude_sk = ssk;
1188        info.pid = pid;
1189        info.group = group;
1190        /* sk->sk_err wants a positive error value */
1191        info.code = -code;
1192
1193        read_lock(&nl_table_lock);
1194
1195        sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1196                ret += do_one_set_err(sk, &info);
1197
1198        read_unlock(&nl_table_lock);
1199        return ret;
1200}
1201EXPORT_SYMBOL(netlink_set_err);
1202
1203/* must be called with netlink table grabbed */
1204static void netlink_update_socket_mc(struct netlink_sock *nlk,
1205                                     unsigned int group,
1206                                     int is_new)
1207{
1208        int old, new = !!is_new, subscriptions;
1209
1210        old = test_bit(group - 1, nlk->groups);
1211        subscriptions = nlk->subscriptions - old + new;
1212        if (new)
1213                __set_bit(group - 1, nlk->groups);
1214        else
1215                __clear_bit(group - 1, nlk->groups);
1216        netlink_update_subscriptions(&nlk->sk, subscriptions);
1217        netlink_update_listeners(&nlk->sk);
1218}
1219
1220static int netlink_setsockopt(struct socket *sock, int level, int optname,
1221                              char __user *optval, unsigned int optlen)
1222{
1223        struct sock *sk = sock->sk;
1224        struct netlink_sock *nlk = nlk_sk(sk);
1225        unsigned int val = 0;
1226        int err;
1227
1228        if (level != SOL_NETLINK)
1229                return -ENOPROTOOPT;
1230
1231        if (optlen >= sizeof(int) &&
1232            get_user(val, (unsigned int __user *)optval))
1233                return -EFAULT;
1234
1235        switch (optname) {
1236        case NETLINK_PKTINFO:
1237                if (val)
1238                        nlk->flags |= NETLINK_RECV_PKTINFO;
1239                else
1240                        nlk->flags &= ~NETLINK_RECV_PKTINFO;
1241                err = 0;
1242                break;
1243        case NETLINK_ADD_MEMBERSHIP:
1244        case NETLINK_DROP_MEMBERSHIP: {
1245                if (!netlink_capable(sock, NL_NONROOT_RECV))
1246                        return -EPERM;
1247                err = netlink_realloc_groups(sk);
1248                if (err)
1249                        return err;
1250                if (!val || val - 1 >= nlk->ngroups)
1251                        return -EINVAL;
1252                netlink_table_grab();
1253                netlink_update_socket_mc(nlk, val,
1254                                         optname == NETLINK_ADD_MEMBERSHIP);
1255                netlink_table_ungrab();
1256
1257                if (nlk->netlink_bind)
1258                        nlk->netlink_bind(val);
1259
1260                err = 0;
1261                break;
1262        }
1263        case NETLINK_BROADCAST_ERROR:
1264                if (val)
1265                        nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
1266                else
1267                        nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
1268                err = 0;
1269                break;
1270        case NETLINK_NO_ENOBUFS:
1271                if (val) {
1272                        nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
1273                        clear_bit(0, &nlk->state);
1274                        wake_up_interruptible(&nlk->wait);
1275                } else {
1276                        nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
1277                }
1278                err = 0;
1279                break;
1280        default:
1281                err = -ENOPROTOOPT;
1282        }
1283        return err;
1284}
1285
1286static int netlink_getsockopt(struct socket *sock, int level, int optname,
1287                              char __user *optval, int __user *optlen)
1288{
1289        struct sock *sk = sock->sk;
1290        struct netlink_sock *nlk = nlk_sk(sk);
1291        int len, val, err;
1292
1293        if (level != SOL_NETLINK)
1294                return -ENOPROTOOPT;
1295
1296        if (get_user(len, optlen))
1297                return -EFAULT;
1298        if (len < 0)
1299                return -EINVAL;
1300
1301        switch (optname) {
1302        case NETLINK_PKTINFO:
1303                if (len < sizeof(int))
1304                        return -EINVAL;
1305                len = sizeof(int);
1306                val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
1307                if (put_user(len, optlen) ||
1308                    put_user(val, optval))
1309                        return -EFAULT;
1310                err = 0;
1311                break;
1312        case NETLINK_BROADCAST_ERROR:
1313                if (len < sizeof(int))
1314                        return -EINVAL;
1315                len = sizeof(int);
1316                val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
1317                if (put_user(len, optlen) ||
1318                    put_user(val, optval))
1319                        return -EFAULT;
1320                err = 0;
1321                break;
1322        case NETLINK_NO_ENOBUFS:
1323                if (len < sizeof(int))
1324                        return -EINVAL;
1325                len = sizeof(int);
1326                val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
1327                if (put_user(len, optlen) ||
1328                    put_user(val, optval))
1329                        return -EFAULT;
1330                err = 0;
1331                break;
1332        default:
1333                err = -ENOPROTOOPT;
1334        }
1335        return err;
1336}
1337
1338static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
1339{
1340        struct nl_pktinfo info;
1341
1342        info.group = NETLINK_CB(skb).dst_group;
1343        put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
1344}
1345
1346static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1347                           struct msghdr *msg, size_t len)
1348{
1349        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1350        struct sock *sk = sock->sk;
1351        struct netlink_sock *nlk = nlk_sk(sk);
1352        struct sockaddr_nl *addr = msg->msg_name;
1353        u32 dst_pid;
1354        u32 dst_group;
1355        struct sk_buff *skb;
1356        int err;
1357        struct scm_cookie scm;
1358
1359        if (msg->msg_flags&MSG_OOB)
1360                return -EOPNOTSUPP;
1361
1362        if (NULL == siocb->scm)
1363                siocb->scm = &scm;
1364
1365        err = scm_send(sock, msg, siocb->scm, true);
1366        if (err < 0)
1367                return err;
1368
1369        if (msg->msg_namelen) {
1370                err = -EINVAL;
1371                if (addr->nl_family != AF_NETLINK)
1372                        goto out;
1373                dst_pid = addr->nl_pid;
1374                dst_group = ffs(addr->nl_groups);
1375                err =  -EPERM;
1376                if ((dst_group || dst_pid) &&
1377                    !netlink_capable(sock, NL_NONROOT_SEND))
1378                        goto out;
1379        } else {
1380                dst_pid = nlk->dst_pid;
1381                dst_group = nlk->dst_group;
1382        }
1383
1384        if (!nlk->pid) {
1385                err = netlink_autobind(sock);
1386                if (err)
1387                        goto out;
1388        }
1389
1390        err = -EMSGSIZE;
1391        if (len > sk->sk_sndbuf - 32)
1392                goto out;
1393        err = -ENOBUFS;
1394        skb = alloc_skb(len, GFP_KERNEL);
1395        if (skb == NULL)
1396                goto out;
1397
1398        NETLINK_CB(skb).pid     = nlk->pid;
1399        NETLINK_CB(skb).dst_group = dst_group;
1400        memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1401
1402        err = -EFAULT;
1403        if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
1404                kfree_skb(skb);
1405                goto out;
1406        }
1407
1408        err = security_netlink_send(sk, skb);
1409        if (err) {
1410                kfree_skb(skb);
1411                goto out;
1412        }
1413
1414        if (dst_group) {
1415                atomic_inc(&skb->users);
1416                netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
1417        }
1418        err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
1419
1420out:
1421        scm_destroy(siocb->scm);
1422        return err;
1423}
1424
1425static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1426                           struct msghdr *msg, size_t len,
1427                           int flags)
1428{
1429        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1430        struct scm_cookie scm;
1431        struct sock *sk = sock->sk;
1432        struct netlink_sock *nlk = nlk_sk(sk);
1433        int noblock = flags&MSG_DONTWAIT;
1434        size_t copied;
1435        struct sk_buff *skb, *data_skb;
1436        int err, ret;
1437
1438        if (flags&MSG_OOB)
1439                return -EOPNOTSUPP;
1440
1441        copied = 0;
1442
1443        skb = skb_recv_datagram(sk, flags, noblock, &err);
1444        if (skb == NULL)
1445                goto out;
1446
1447        data_skb = skb;
1448
1449#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
1450        if (unlikely(skb_shinfo(skb)->frag_list)) {
1451                /*
1452                 * If this skb has a frag_list, then here that means that we
1453                 * will have to use the frag_list skb's data for compat tasks
1454                 * and the regular skb's data for normal (non-compat) tasks.
1455                 *
1456                 * If we need to send the compat skb, assign it to the
1457                 * 'data_skb' variable so that it will be used below for data
1458                 * copying. We keep 'skb' for everything else, including
1459                 * freeing both later.
1460                 */
1461                if (flags & MSG_CMSG_COMPAT)
1462                        data_skb = skb_shinfo(skb)->frag_list;
1463        }
1464#endif
1465
1466        msg->msg_namelen = 0;
1467
1468        copied = data_skb->len;
1469        if (len < copied) {
1470                msg->msg_flags |= MSG_TRUNC;
1471                copied = len;
1472        }
1473
1474        skb_reset_transport_header(data_skb);
1475        err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
1476
1477        if (msg->msg_name) {
1478                struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
1479                addr->nl_family = AF_NETLINK;
1480                addr->nl_pad    = 0;
1481                addr->nl_pid    = NETLINK_CB(skb).pid;
1482                addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
1483                msg->msg_namelen = sizeof(*addr);
1484        }
1485
1486        if (nlk->flags & NETLINK_RECV_PKTINFO)
1487                netlink_cmsg_recv_pktinfo(msg, skb);
1488
1489        if (NULL == siocb->scm) {
1490                memset(&scm, 0, sizeof(scm));
1491                siocb->scm = &scm;
1492        }
1493        siocb->scm->creds = *NETLINK_CREDS(skb);
1494        if (flags & MSG_TRUNC)
1495                copied = data_skb->len;
1496
1497        skb_free_datagram(sk, skb);
1498
1499        if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
1500                ret = netlink_dump(sk);
1501                if (ret) {
1502                        sk->sk_err = ret;
1503                        sk->sk_error_report(sk);
1504                }
1505        }
1506
1507        scm_recv(sock, msg, siocb->scm, flags);
1508out:
1509        netlink_rcv_wake(sk);
1510        return err ? : copied;
1511}
1512
1513static void netlink_data_ready(struct sock *sk, int len)
1514{
1515        BUG();
1516}
1517
1518/*
1519 *      We export these functions to other modules. They provide a
1520 *      complete set of kernel non-blocking support for message
1521 *      queueing.
1522 */
1523
1524struct sock *
1525netlink_kernel_create(struct net *net, int unit,
1526                      struct module *module,
1527                      struct netlink_kernel_cfg *cfg)
1528{
1529        struct socket *sock;
1530        struct sock *sk;
1531        struct netlink_sock *nlk;
1532        struct listeners *listeners = NULL;
1533        struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
1534        unsigned int groups;
1535
1536        BUG_ON(!nl_table);
1537
1538        if (unit < 0 || unit >= MAX_LINKS)
1539                return NULL;
1540
1541        if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1542                return NULL;
1543
1544        /*
1545         * We have to just have a reference on the net from sk, but don't
1546         * get_net it. Besides, we cannot get and then put the net here.
1547         * So we create one inside init_net and the move it to net.
1548         */
1549
1550        if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
1551                goto out_sock_release_nosk;
1552
1553        sk = sock->sk;
1554        sk_change_net(sk, net);
1555
1556        if (!cfg || cfg->groups < 32)
1557                groups = 32;
1558        else
1559                groups = cfg->groups;
1560
1561        listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
1562        if (!listeners)
1563                goto out_sock_release;
1564
1565        sk->sk_data_ready = netlink_data_ready;
1566        if (cfg && cfg->input)
1567                nlk_sk(sk)->netlink_rcv = cfg->input;
1568
1569        if (netlink_insert(sk, net, 0))
1570                goto out_sock_release;
1571
1572        nlk = nlk_sk(sk);
1573        nlk->flags |= NETLINK_KERNEL_SOCKET;
1574
1575        netlink_table_grab();
1576        if (!nl_table[unit].registered) {
1577                nl_table[unit].groups = groups;
1578                rcu_assign_pointer(nl_table[unit].listeners, listeners);
1579                nl_table[unit].cb_mutex = cb_mutex;
1580                nl_table[unit].module = module;
1581                nl_table[unit].bind = cfg ? cfg->bind : NULL;
1582                nl_table[unit].registered = 1;
1583        } else {
1584                kfree(listeners);
1585                nl_table[unit].registered++;
1586        }
1587        netlink_table_ungrab();
1588        return sk;
1589
1590out_sock_release:
1591        kfree(listeners);
1592        netlink_kernel_release(sk);
1593        return NULL;
1594
1595out_sock_release_nosk:
1596        sock_release(sock);
1597        return NULL;
1598}
1599EXPORT_SYMBOL(netlink_kernel_create);
1600
1601
1602void
1603netlink_kernel_release(struct sock *sk)
1604{
1605        sk_release_kernel(sk);
1606}
1607EXPORT_SYMBOL(netlink_kernel_release);
1608
1609int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1610{
1611        struct listeners *new, *old;
1612        struct netlink_table *tbl = &nl_table[sk->sk_protocol];
1613
1614        if (groups < 32)
1615                groups = 32;
1616
1617        if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
1618                new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
1619                if (!new)
1620                        return -ENOMEM;
1621                old = rcu_dereference_protected(tbl->listeners, 1);
1622                memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
1623                rcu_assign_pointer(tbl->listeners, new);
1624
1625                kfree_rcu(old, rcu);
1626        }
1627        tbl->groups = groups;
1628
1629        return 0;
1630}
1631
1632/**
1633 * netlink_change_ngroups - change number of multicast groups
1634 *
1635 * This changes the number of multicast groups that are available
1636 * on a certain netlink family. Note that it is not possible to
1637 * change the number of groups to below 32. Also note that it does
1638 * not implicitly call netlink_clear_multicast_users() when the
1639 * number of groups is reduced.
1640 *
1641 * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
1642 * @groups: The new number of groups.
1643 */
1644int netlink_change_ngroups(struct sock *sk, unsigned int groups)
1645{
1646        int err;
1647
1648        netlink_table_grab();
1649        err = __netlink_change_ngroups(sk, groups);
1650        netlink_table_ungrab();
1651
1652        return err;
1653}
1654
1655void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1656{
1657        struct sock *sk;
1658        struct hlist_node *node;
1659        struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
1660
1661        sk_for_each_bound(sk, node, &tbl->mc_list)
1662                netlink_update_socket_mc(nlk_sk(sk), group, 0);
1663}
1664
1665/**
1666 * netlink_clear_multicast_users - kick off multicast listeners
1667 *
1668 * This function removes all listeners from the given group.
1669 * @ksk: The kernel netlink socket, as returned by
1670 *      netlink_kernel_create().
1671 * @group: The multicast group to clear.
1672 */
1673void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1674{
1675        netlink_table_grab();
1676        __netlink_clear_multicast_users(ksk, group);
1677        netlink_table_ungrab();
1678}
1679
1680void netlink_set_nonroot(int protocol, unsigned int flags)
1681{
1682        if ((unsigned int)protocol < MAX_LINKS)
1683                nl_table[protocol].nl_nonroot = flags;
1684}
1685EXPORT_SYMBOL(netlink_set_nonroot);
1686
1687struct nlmsghdr *
1688__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
1689{
1690        struct nlmsghdr *nlh;
1691        int size = NLMSG_LENGTH(len);
1692
1693        nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
1694        nlh->nlmsg_type = type;
1695        nlh->nlmsg_len = size;
1696        nlh->nlmsg_flags = flags;
1697        nlh->nlmsg_pid = pid;
1698        nlh->nlmsg_seq = seq;
1699        if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
1700                memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size);
1701        return nlh;
1702}
1703EXPORT_SYMBOL(__nlmsg_put);
1704
1705/*
1706 * It looks a bit ugly.
1707 * It would be better to create kernel thread.
1708 */
1709
1710static int netlink_dump(struct sock *sk)
1711{
1712        struct netlink_sock *nlk = nlk_sk(sk);
1713        struct netlink_callback *cb;
1714        struct sk_buff *skb = NULL;
1715        struct nlmsghdr *nlh;
1716        int len, err = -ENOBUFS;
1717        int alloc_size;
1718
1719        mutex_lock(nlk->cb_mutex);
1720
1721        cb = nlk->cb;
1722        if (cb == NULL) {
1723                err = -EINVAL;
1724                goto errout_skb;
1725        }
1726
1727        alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
1728
1729        skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL);
1730        if (!skb)
1731                goto errout_skb;
1732
1733        len = cb->dump(skb, cb);
1734
1735        if (len > 0) {
1736                mutex_unlock(nlk->cb_mutex);
1737
1738                if (sk_filter(sk, skb))
1739                        kfree_skb(skb);
1740                else
1741                        __netlink_sendskb(sk, skb);
1742                return 0;
1743        }
1744
1745        nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
1746        if (!nlh)
1747                goto errout_skb;
1748
1749        nl_dump_check_consistent(cb, nlh);
1750
1751        memcpy(nlmsg_data(nlh), &len, sizeof(len));
1752
1753        if (sk_filter(sk, skb))
1754                kfree_skb(skb);
1755        else
1756                __netlink_sendskb(sk, skb);
1757
1758        if (cb->done)
1759                cb->done(cb);
1760        nlk->cb = NULL;
1761        mutex_unlock(nlk->cb_mutex);
1762
1763        netlink_consume_callback(cb);
1764        return 0;
1765
1766errout_skb:
1767        mutex_unlock(nlk->cb_mutex);
1768        kfree_skb(skb);
1769        return err;
1770}
1771
1772int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1773                       const struct nlmsghdr *nlh,
1774                       struct netlink_dump_control *control)
1775{
1776        struct netlink_callback *cb;
1777        struct sock *sk;
1778        struct netlink_sock *nlk;
1779        int ret;
1780
1781        cb = kzalloc(sizeof(*cb), GFP_KERNEL);
1782        if (cb == NULL)
1783                return -ENOBUFS;
1784
1785        cb->dump = control->dump;
1786        cb->done = control->done;
1787        cb->nlh = nlh;
1788        cb->data = control->data;
1789        cb->min_dump_alloc = control->min_dump_alloc;
1790        atomic_inc(&skb->users);
1791        cb->skb = skb;
1792
1793        sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid);
1794        if (sk == NULL) {
1795                netlink_destroy_callback(cb);
1796                return -ECONNREFUSED;
1797        }
1798        nlk = nlk_sk(sk);
1799        /* A dump is in progress... */
1800        mutex_lock(nlk->cb_mutex);
1801        if (nlk->cb) {
1802                mutex_unlock(nlk->cb_mutex);
1803                netlink_destroy_callback(cb);
1804                sock_put(sk);
1805                return -EBUSY;
1806        }
1807        nlk->cb = cb;
1808        mutex_unlock(nlk->cb_mutex);
1809
1810        ret = netlink_dump(sk);
1811
1812        sock_put(sk);
1813
1814        if (ret)
1815                return ret;
1816
1817        /* We successfully started a dump, by returning -EINTR we
1818         * signal not to send ACK even if it was requested.
1819         */
1820        return -EINTR;
1821}
1822EXPORT_SYMBOL(netlink_dump_start);
1823
1824void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1825{
1826        struct sk_buff *skb;
1827        struct nlmsghdr *rep;
1828        struct nlmsgerr *errmsg;
1829        size_t payload = sizeof(*errmsg);
1830
1831        /* error messages get the original request appened */
1832        if (err)
1833                payload += nlmsg_len(nlh);
1834
1835        skb = nlmsg_new(payload, GFP_KERNEL);
1836        if (!skb) {
1837                struct sock *sk;
1838
1839                sk = netlink_lookup(sock_net(in_skb->sk),
1840                                    in_skb->sk->sk_protocol,
1841                                    NETLINK_CB(in_skb).pid);
1842                if (sk) {
1843                        sk->sk_err = ENOBUFS;
1844                        sk->sk_error_report(sk);
1845                        sock_put(sk);
1846                }
1847                return;
1848        }
1849
1850        rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
1851                          NLMSG_ERROR, payload, 0);
1852        errmsg = nlmsg_data(rep);
1853        errmsg->error = err;
1854        memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
1855        netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1856}
1857EXPORT_SYMBOL(netlink_ack);
1858
1859int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1860                                                     struct nlmsghdr *))
1861{
1862        struct nlmsghdr *nlh;
1863        int err;
1864
1865        while (skb->len >= nlmsg_total_size(0)) {
1866                int msglen;
1867
1868                nlh = nlmsg_hdr(skb);
1869                err = 0;
1870
1871                if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
1872                        return 0;
1873
1874                /* Only requests are handled by the kernel */
1875                if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
1876                        goto ack;
1877
1878                /* Skip control messages */
1879                if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
1880                        goto ack;
1881
1882                err = cb(skb, nlh);
1883                if (err == -EINTR)
1884                        goto skip;
1885
1886ack:
1887                if (nlh->nlmsg_flags & NLM_F_ACK || err)
1888                        netlink_ack(skb, nlh, err);
1889
1890skip:
1891                msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1892                if (msglen > skb->len)
1893                        msglen = skb->len;
1894                skb_pull(skb, msglen);
1895        }
1896
1897        return 0;
1898}
1899EXPORT_SYMBOL(netlink_rcv_skb);
1900
1901/**
1902 * nlmsg_notify - send a notification netlink message
1903 * @sk: netlink socket to use
1904 * @skb: notification message
1905 * @pid: destination netlink pid for reports or 0
1906 * @group: destination multicast group or 0
1907 * @report: 1 to report back, 0 to disable
1908 * @flags: allocation flags
1909 */
1910int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
1911                 unsigned int group, int report, gfp_t flags)
1912{
1913        int err = 0;
1914
1915        if (group) {
1916                int exclude_pid = 0;
1917
1918                if (report) {
1919                        atomic_inc(&skb->users);
1920                        exclude_pid = pid;
1921                }
1922
1923                /* errors reported via destination sk->sk_err, but propagate
1924                 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
1925                err = nlmsg_multicast(sk, skb, exclude_pid, group, flags);
1926        }
1927
1928        if (report) {
1929                int err2;
1930
1931                err2 = nlmsg_unicast(sk, skb, pid);
1932                if (!err || err == -ESRCH)
1933                        err = err2;
1934        }
1935
1936        return err;
1937}
1938EXPORT_SYMBOL(nlmsg_notify);
1939
1940#ifdef CONFIG_PROC_FS
1941struct nl_seq_iter {
1942        struct seq_net_private p;
1943        int link;
1944        int hash_idx;
1945};
1946
1947static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
1948{
1949        struct nl_seq_iter *iter = seq->private;
1950        int i, j;
1951        struct sock *s;
1952        struct hlist_node *node;
1953        loff_t off = 0;
1954
1955        for (i = 0; i < MAX_LINKS; i++) {
1956                struct nl_pid_hash *hash = &nl_table[i].hash;
1957
1958                for (j = 0; j <= hash->mask; j++) {
1959                        sk_for_each(s, node, &hash->table[j]) {
1960                                if (sock_net(s) != seq_file_net(seq))
1961                                        continue;
1962                                if (off == pos) {
1963                                        iter->link = i;
1964                                        iter->hash_idx = j;
1965                                        return s;
1966                                }
1967                                ++off;
1968                        }
1969                }
1970        }
1971        return NULL;
1972}
1973
1974static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
1975        __acquires(nl_table_lock)
1976{
1977        read_lock(&nl_table_lock);
1978        return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1979}
1980
1981static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1982{
1983        struct sock *s;
1984        struct nl_seq_iter *iter;
1985        int i, j;
1986
1987        ++*pos;
1988
1989        if (v == SEQ_START_TOKEN)
1990                return netlink_seq_socket_idx(seq, 0);
1991
1992        iter = seq->private;
1993        s = v;
1994        do {
1995                s = sk_next(s);
1996        } while (s && sock_net(s) != seq_file_net(seq));
1997        if (s)
1998                return s;
1999
2000        i = iter->link;
2001        j = iter->hash_idx + 1;
2002
2003        do {
2004                struct nl_pid_hash *hash = &nl_table[i].hash;
2005
2006                for (; j <= hash->mask; j++) {
2007                        s = sk_head(&hash->table[j]);
2008                        while (s && sock_net(s) != seq_file_net(seq))
2009                                s = sk_next(s);
2010                        if (s) {
2011                                iter->link = i;
2012                                iter->hash_idx = j;
2013                                return s;
2014                        }
2015                }
2016
2017                j = 0;
2018        } while (++i < MAX_LINKS);
2019
2020        return NULL;
2021}
2022
2023static void netlink_seq_stop(struct seq_file *seq, void *v)
2024        __releases(nl_table_lock)
2025{
2026        read_unlock(&nl_table_lock);
2027}
2028
2029
2030static int netlink_seq_show(struct seq_file *seq, void *v)
2031{
2032        if (v == SEQ_START_TOKEN) {
2033                seq_puts(seq,
2034                         "sk       Eth Pid    Groups   "
2035                         "Rmem     Wmem     Dump     Locks     Drops     Inode\n");
2036        } else {
2037                struct sock *s = v;
2038                struct netlink_sock *nlk = nlk_sk(s);
2039
2040                seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
2041                           s,
2042                           s->sk_protocol,
2043                           nlk->pid,
2044                           nlk->groups ? (u32)nlk->groups[0] : 0,
2045                           sk_rmem_alloc_get(s),
2046                           sk_wmem_alloc_get(s),
2047                           nlk->cb,
2048                           atomic_read(&s->sk_refcnt),
2049                           atomic_read(&s->sk_drops),
2050                           sock_i_ino(s)
2051                        );
2052
2053        }
2054        return 0;
2055}
2056
2057static const struct seq_operations netlink_seq_ops = {
2058        .start  = netlink_seq_start,
2059        .next   = netlink_seq_next,
2060        .stop   = netlink_seq_stop,
2061        .show   = netlink_seq_show,
2062};
2063
2064
2065static int netlink_seq_open(struct inode *inode, struct file *file)
2066{
2067        return seq_open_net(inode, file, &netlink_seq_ops,
2068                                sizeof(struct nl_seq_iter));
2069}
2070
2071static const struct file_operations netlink_seq_fops = {
2072        .owner          = THIS_MODULE,
2073        .open           = netlink_seq_open,
2074        .read           = seq_read,
2075        .llseek         = seq_lseek,
2076        .release        = seq_release_net,
2077};
2078
2079#endif
2080
2081int netlink_register_notifier(struct notifier_block *nb)
2082{
2083        return atomic_notifier_chain_register(&netlink_chain, nb);
2084}
2085EXPORT_SYMBOL(netlink_register_notifier);
2086
2087int netlink_unregister_notifier(struct notifier_block *nb)
2088{
2089        return atomic_notifier_chain_unregister(&netlink_chain, nb);
2090}
2091EXPORT_SYMBOL(netlink_unregister_notifier);
2092
2093static const struct proto_ops netlink_ops = {
2094        .family =       PF_NETLINK,
2095        .owner =        THIS_MODULE,
2096        .release =      netlink_release,
2097        .bind =         netlink_bind,
2098        .connect =      netlink_connect,
2099        .socketpair =   sock_no_socketpair,
2100        .accept =       sock_no_accept,
2101        .getname =      netlink_getname,
2102        .poll =         datagram_poll,
2103        .ioctl =        sock_no_ioctl,
2104        .listen =       sock_no_listen,
2105        .shutdown =     sock_no_shutdown,
2106        .setsockopt =   netlink_setsockopt,
2107        .getsockopt =   netlink_getsockopt,
2108        .sendmsg =      netlink_sendmsg,
2109        .recvmsg =      netlink_recvmsg,
2110        .mmap =         sock_no_mmap,
2111        .sendpage =     sock_no_sendpage,
2112};
2113
2114static const struct net_proto_family netlink_family_ops = {
2115        .family = PF_NETLINK,
2116        .create = netlink_create,
2117        .owner  = THIS_MODULE,  /* for consistency 8) */
2118};
2119
2120static int __net_init netlink_net_init(struct net *net)
2121{
2122#ifdef CONFIG_PROC_FS
2123        if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops))
2124                return -ENOMEM;
2125#endif
2126        return 0;
2127}
2128
2129static void __net_exit netlink_net_exit(struct net *net)
2130{
2131#ifdef CONFIG_PROC_FS
2132        proc_net_remove(net, "netlink");
2133#endif
2134}
2135
2136static void __init netlink_add_usersock_entry(void)
2137{
2138        struct listeners *listeners;
2139        int groups = 32;
2140
2141        listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2142        if (!listeners)
2143                panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2144
2145        netlink_table_grab();
2146
2147        nl_table[NETLINK_USERSOCK].groups = groups;
2148        rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
2149        nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2150        nl_table[NETLINK_USERSOCK].registered = 1;
2151        nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND;
2152
2153        netlink_table_ungrab();
2154}
2155
2156static struct pernet_operations __net_initdata netlink_net_ops = {
2157        .init = netlink_net_init,
2158        .exit = netlink_net_exit,
2159};
2160
2161static int __init netlink_proto_init(void)
2162{
2163        struct sk_buff *dummy_skb;
2164        int i;
2165        unsigned long limit;
2166        unsigned int order;
2167        int err = proto_register(&netlink_proto, 0);
2168
2169        if (err != 0)
2170                goto out;
2171
2172        BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb));
2173
2174        nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
2175        if (!nl_table)
2176                goto panic;
2177
2178        if (totalram_pages >= (128 * 1024))
2179                limit = totalram_pages >> (21 - PAGE_SHIFT);
2180        else
2181                limit = totalram_pages >> (23 - PAGE_SHIFT);
2182
2183        order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
2184        limit = (1UL << order) / sizeof(struct hlist_head);
2185        order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
2186
2187        for (i = 0; i < MAX_LINKS; i++) {
2188                struct nl_pid_hash *hash = &nl_table[i].hash;
2189
2190                hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table));
2191                if (!hash->table) {
2192                        while (i-- > 0)
2193                                nl_pid_hash_free(nl_table[i].hash.table,
2194                                                 1 * sizeof(*hash->table));
2195                        kfree(nl_table);
2196                        goto panic;
2197                }
2198                hash->max_shift = order;
2199                hash->shift = 0;
2200                hash->mask = 0;
2201                hash->rehash_time = jiffies;
2202        }
2203
2204        netlink_add_usersock_entry();
2205
2206        sock_register(&netlink_family_ops);
2207        register_pernet_subsys(&netlink_net_ops);
2208        /* The netlink device handler may be needed early. */
2209        rtnetlink_init();
2210out:
2211        return err;
2212panic:
2213        panic("netlink_init: Cannot allocate nl_table\n");
2214}
2215
2216core_initcall(netlink_proto_init);
2217
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.