linux/net/netlink/af_netlink.c
<<
>>
Prefs
   1/*
   2 * NETLINK      Kernel-user communication protocol.
   3 *
   4 *              Authors:        Alan Cox <alan@lxorguk.ukuu.org.uk>
   5 *                              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
   6 *
   7 *              This program is free software; you can redistribute it and/or
   8 *              modify it under the terms of the GNU General Public License
   9 *              as published by the Free Software Foundation; either version
  10 *              2 of the License, or (at your option) any later version.
  11 *
  12 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
  13 *                               added netlink_proto_exit
  14 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
  15 *                               use nlk_sk, as sk->protinfo is on a diet 8)
  16 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
  17 *                               - inc module use count of module that owns
  18 *                                 the kernel socket in case userspace opens
  19 *                                 socket of same protocol
  20 *                               - remove all module support, since netlink is
  21 *                                 mandatory if CONFIG_NET=y these days
  22 */
  23
  24#include <linux/module.h>
  25
  26#include <linux/capability.h>
  27#include <linux/kernel.h>
  28#include <linux/init.h>
  29#include <linux/signal.h>
  30#include <linux/sched.h>
  31#include <linux/errno.h>
  32#include <linux/string.h>
  33#include <linux/stat.h>
  34#include <linux/socket.h>
  35#include <linux/un.h>
  36#include <linux/fcntl.h>
  37#include <linux/termios.h>
  38#include <linux/sockios.h>
  39#include <linux/net.h>
  40#include <linux/fs.h>
  41#include <linux/slab.h>
  42#include <asm/uaccess.h>
  43#include <linux/skbuff.h>
  44#include <linux/netdevice.h>
  45#include <linux/rtnetlink.h>
  46#include <linux/proc_fs.h>
  47#include <linux/seq_file.h>
  48#include <linux/notifier.h>
  49#include <linux/security.h>
  50#include <linux/jhash.h>
  51#include <linux/jiffies.h>
  52#include <linux/random.h>
  53#include <linux/bitops.h>
  54#include <linux/mm.h>
  55#include <linux/types.h>
  56#include <linux/audit.h>
  57#include <linux/mutex.h>
  58
  59#include <net/net_namespace.h>
  60#include <net/sock.h>
  61#include <net/scm.h>
  62#include <net/netlink.h>
  63
  64#define NLGRPSZ(x)      (ALIGN(x, sizeof(unsigned long) * 8) / 8)
  65#define NLGRPLONGS(x)   (NLGRPSZ(x)/sizeof(unsigned long))
  66
  67struct netlink_sock {
  68        /* struct sock has to be the first member of netlink_sock */
  69        struct sock             sk;
  70        u32                     pid;
  71        u32                     dst_pid;
  72        u32                     dst_group;
  73        u32                     flags;
  74        u32                     subscriptions;
  75        u32                     ngroups;
  76        unsigned long           *groups;
  77        unsigned long           state;
  78        wait_queue_head_t       wait;
  79        struct netlink_callback *cb;
  80        struct mutex            *cb_mutex;
  81        struct mutex            cb_def_mutex;
  82        void                    (*netlink_rcv)(struct sk_buff *skb);
  83        void                    (*netlink_bind)(int group);
  84        struct module           *module;
  85};
  86
  87struct listeners {
  88        struct rcu_head         rcu;
  89        unsigned long           masks[0];
  90};
  91
  92#define NETLINK_KERNEL_SOCKET   0x1
  93#define NETLINK_RECV_PKTINFO    0x2
  94#define NETLINK_BROADCAST_SEND_ERROR    0x4
  95#define NETLINK_RECV_NO_ENOBUFS 0x8
  96
  97static inline struct netlink_sock *nlk_sk(struct sock *sk)
  98{
  99        return container_of(sk, struct netlink_sock, sk);
 100}
 101
 102static inline int netlink_is_kernel(struct sock *sk)
 103{
 104        return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
 105}
 106
 107struct nl_pid_hash {
 108        struct hlist_head       *table;
 109        unsigned long           rehash_time;
 110
 111        unsigned int            mask;
 112        unsigned int            shift;
 113
 114        unsigned int            entries;
 115        unsigned int            max_shift;
 116
 117        u32                     rnd;
 118};
 119
 120struct netlink_table {
 121        struct nl_pid_hash      hash;
 122        struct hlist_head       mc_list;
 123        struct listeners __rcu  *listeners;
 124        unsigned int            nl_nonroot;
 125        unsigned int            groups;
 126        struct mutex            *cb_mutex;
 127        struct module           *module;
 128        void                    (*bind)(int group);
 129        int                     registered;
 130};
 131
 132static struct netlink_table *nl_table;
 133
 134static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
 135
 136static int netlink_dump(struct sock *sk);
 137
 138static DEFINE_RWLOCK(nl_table_lock);
 139static atomic_t nl_table_users = ATOMIC_INIT(0);
 140
 141static ATOMIC_NOTIFIER_HEAD(netlink_chain);
 142
 143static inline u32 netlink_group_mask(u32 group)
 144{
 145        return group ? 1 << (group - 1) : 0;
 146}
 147
 148static inline struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
 149{
 150        return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask];
 151}
 152
 153static void netlink_destroy_callback(struct netlink_callback *cb)
 154{
 155        kfree_skb(cb->skb);
 156        kfree(cb);
 157}
 158
 159static void netlink_consume_callback(struct netlink_callback *cb)
 160{
 161        consume_skb(cb->skb);
 162        kfree(cb);
 163}
 164
 165static void netlink_sock_destruct(struct sock *sk)
 166{
 167        struct netlink_sock *nlk = nlk_sk(sk);
 168
 169        if (nlk->cb) {
 170                if (nlk->cb->done)
 171                        nlk->cb->done(nlk->cb);
 172
 173                module_put(nlk->cb->module);
 174                netlink_destroy_callback(nlk->cb);
 175        }
 176
 177        skb_queue_purge(&sk->sk_receive_queue);
 178
 179        if (!sock_flag(sk, SOCK_DEAD)) {
 180                printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
 181                return;
 182        }
 183
 184        WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 185        WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 186        WARN_ON(nlk_sk(sk)->groups);
 187}
 188
 189/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
 190 * SMP. Look, when several writers sleep and reader wakes them up, all but one
 191 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
 192 * this, _but_ remember, it adds useless work on UP machines.
 193 */
 194
 195void netlink_table_grab(void)
 196        __acquires(nl_table_lock)
 197{
 198        might_sleep();
 199
 200        write_lock_irq(&nl_table_lock);
 201
 202        if (atomic_read(&nl_table_users)) {
 203                DECLARE_WAITQUEUE(wait, current);
 204
 205                add_wait_queue_exclusive(&nl_table_wait, &wait);
 206                for (;;) {
 207                        set_current_state(TASK_UNINTERRUPTIBLE);
 208                        if (atomic_read(&nl_table_users) == 0)
 209                                break;
 210                        write_unlock_irq(&nl_table_lock);
 211                        schedule();
 212                        write_lock_irq(&nl_table_lock);
 213                }
 214
 215                __set_current_state(TASK_RUNNING);
 216                remove_wait_queue(&nl_table_wait, &wait);
 217        }
 218}
 219
 220void netlink_table_ungrab(void)
 221        __releases(nl_table_lock)
 222{
 223        write_unlock_irq(&nl_table_lock);
 224        wake_up(&nl_table_wait);
 225}
 226
 227static inline void
 228netlink_lock_table(void)
 229{
 230        /* read_lock() synchronizes us to netlink_table_grab */
 231
 232        read_lock(&nl_table_lock);
 233        atomic_inc(&nl_table_users);
 234        read_unlock(&nl_table_lock);
 235}
 236
 237static inline void
 238netlink_unlock_table(void)
 239{
 240        if (atomic_dec_and_test(&nl_table_users))
 241                wake_up(&nl_table_wait);
 242}
 243
 244static struct sock *netlink_lookup(struct net *net, int protocol, u32 pid)
 245{
 246        struct nl_pid_hash *hash = &nl_table[protocol].hash;
 247        struct hlist_head *head;
 248        struct sock *sk;
 249        struct hlist_node *node;
 250
 251        read_lock(&nl_table_lock);
 252        head = nl_pid_hashfn(hash, pid);
 253        sk_for_each(sk, node, head) {
 254                if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) {
 255                        sock_hold(sk);
 256                        goto found;
 257                }
 258        }
 259        sk = NULL;
 260found:
 261        read_unlock(&nl_table_lock);
 262        return sk;
 263}
 264
 265static struct hlist_head *nl_pid_hash_zalloc(size_t size)
 266{
 267        if (size <= PAGE_SIZE)
 268                return kzalloc(size, GFP_ATOMIC);
 269        else
 270                return (struct hlist_head *)
 271                        __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
 272                                         get_order(size));
 273}
 274
 275static void nl_pid_hash_free(struct hlist_head *table, size_t size)
 276{
 277        if (size <= PAGE_SIZE)
 278                kfree(table);
 279        else
 280                free_pages((unsigned long)table, get_order(size));
 281}
 282
 283static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
 284{
 285        unsigned int omask, mask, shift;
 286        size_t osize, size;
 287        struct hlist_head *otable, *table;
 288        int i;
 289
 290        omask = mask = hash->mask;
 291        osize = size = (mask + 1) * sizeof(*table);
 292        shift = hash->shift;
 293
 294        if (grow) {
 295                if (++shift > hash->max_shift)
 296                        return 0;
 297                mask = mask * 2 + 1;
 298                size *= 2;
 299        }
 300
 301        table = nl_pid_hash_zalloc(size);
 302        if (!table)
 303                return 0;
 304
 305        otable = hash->table;
 306        hash->table = table;
 307        hash->mask = mask;
 308        hash->shift = shift;
 309        get_random_bytes(&hash->rnd, sizeof(hash->rnd));
 310
 311        for (i = 0; i <= omask; i++) {
 312                struct sock *sk;
 313                struct hlist_node *node, *tmp;
 314
 315                sk_for_each_safe(sk, node, tmp, &otable[i])
 316                        __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid));
 317        }
 318
 319        nl_pid_hash_free(otable, osize);
 320        hash->rehash_time = jiffies + 10 * 60 * HZ;
 321        return 1;
 322}
 323
 324static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len)
 325{
 326        int avg = hash->entries >> hash->shift;
 327
 328        if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1))
 329                return 1;
 330
 331        if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
 332                nl_pid_hash_rehash(hash, 0);
 333                return 1;
 334        }
 335
 336        return 0;
 337}
 338
 339static const struct proto_ops netlink_ops;
 340
 341static void
 342netlink_update_listeners(struct sock *sk)
 343{
 344        struct netlink_table *tbl = &nl_table[sk->sk_protocol];
 345        struct hlist_node *node;
 346        unsigned long mask;
 347        unsigned int i;
 348
 349        for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
 350                mask = 0;
 351                sk_for_each_bound(sk, node, &tbl->mc_list) {
 352                        if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
 353                                mask |= nlk_sk(sk)->groups[i];
 354                }
 355                tbl->listeners->masks[i] = mask;
 356        }
 357        /* this function is only called with the netlink table "grabbed", which
 358         * makes sure updates are visible before bind or setsockopt return. */
 359}
 360
 361static int netlink_insert(struct sock *sk, struct net *net, u32 pid)
 362{
 363        struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
 364        struct hlist_head *head;
 365        int err = -EADDRINUSE;
 366        struct sock *osk;
 367        struct hlist_node *node;
 368        int len;
 369
 370        netlink_table_grab();
 371        head = nl_pid_hashfn(hash, pid);
 372        len = 0;
 373        sk_for_each(osk, node, head) {
 374                if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid))
 375                        break;
 376                len++;
 377        }
 378        if (node)
 379                goto err;
 380
 381        err = -EBUSY;
 382        if (nlk_sk(sk)->pid)
 383                goto err;
 384
 385        err = -ENOMEM;
 386        if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
 387                goto err;
 388
 389        if (len && nl_pid_hash_dilute(hash, len))
 390                head = nl_pid_hashfn(hash, pid);
 391        hash->entries++;
 392        nlk_sk(sk)->pid = pid;
 393        sk_add_node(sk, head);
 394        err = 0;
 395
 396err:
 397        netlink_table_ungrab();
 398        return err;
 399}
 400
 401static void netlink_remove(struct sock *sk)
 402{
 403        netlink_table_grab();
 404        if (sk_del_node_init(sk))
 405                nl_table[sk->sk_protocol].hash.entries--;
 406        if (nlk_sk(sk)->subscriptions)
 407                __sk_del_bind_node(sk);
 408        netlink_table_ungrab();
 409}
 410
 411static struct proto netlink_proto = {
 412        .name     = "NETLINK",
 413        .owner    = THIS_MODULE,
 414        .obj_size = sizeof(struct netlink_sock),
 415};
 416
 417static int __netlink_create(struct net *net, struct socket *sock,
 418                            struct mutex *cb_mutex, int protocol)
 419{
 420        struct sock *sk;
 421        struct netlink_sock *nlk;
 422
 423        sock->ops = &netlink_ops;
 424
 425        sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
 426        if (!sk)
 427                return -ENOMEM;
 428
 429        sock_init_data(sock, sk);
 430
 431        nlk = nlk_sk(sk);
 432        if (cb_mutex) {
 433                nlk->cb_mutex = cb_mutex;
 434        } else {
 435                nlk->cb_mutex = &nlk->cb_def_mutex;
 436                mutex_init(nlk->cb_mutex);
 437        }
 438        init_waitqueue_head(&nlk->wait);
 439
 440        sk->sk_destruct = netlink_sock_destruct;
 441        sk->sk_protocol = protocol;
 442        return 0;
 443}
 444
 445static int netlink_create(struct net *net, struct socket *sock, int protocol,
 446                          int kern)
 447{
 448        struct module *module = NULL;
 449        struct mutex *cb_mutex;
 450        struct netlink_sock *nlk;
 451        void (*bind)(int group);
 452        int err = 0;
 453
 454        sock->state = SS_UNCONNECTED;
 455
 456        if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
 457                return -ESOCKTNOSUPPORT;
 458
 459        if (protocol < 0 || protocol >= MAX_LINKS)
 460                return -EPROTONOSUPPORT;
 461
 462        netlink_lock_table();
 463#ifdef CONFIG_MODULES
 464        if (!nl_table[protocol].registered) {
 465                netlink_unlock_table();
 466                request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
 467                netlink_lock_table();
 468        }
 469#endif
 470        if (nl_table[protocol].registered &&
 471            try_module_get(nl_table[protocol].module))
 472                module = nl_table[protocol].module;
 473        else
 474                err = -EPROTONOSUPPORT;
 475        cb_mutex = nl_table[protocol].cb_mutex;
 476        bind = nl_table[protocol].bind;
 477        netlink_unlock_table();
 478
 479        if (err < 0)
 480                goto out;
 481
 482        err = __netlink_create(net, sock, cb_mutex, protocol);
 483        if (err < 0)
 484                goto out_module;
 485
 486        local_bh_disable();
 487        sock_prot_inuse_add(net, &netlink_proto, 1);
 488        local_bh_enable();
 489
 490        nlk = nlk_sk(sock->sk);
 491        nlk->module = module;
 492        nlk->netlink_bind = bind;
 493out:
 494        return err;
 495
 496out_module:
 497        module_put(module);
 498        goto out;
 499}
 500
 501static int netlink_release(struct socket *sock)
 502{
 503        struct sock *sk = sock->sk;
 504        struct netlink_sock *nlk;
 505
 506        if (!sk)
 507                return 0;
 508
 509        netlink_remove(sk);
 510        sock_orphan(sk);
 511        nlk = nlk_sk(sk);
 512
 513        /*
 514         * OK. Socket is unlinked, any packets that arrive now
 515         * will be purged.
 516         */
 517
 518        sock->sk = NULL;
 519        wake_up_interruptible_all(&nlk->wait);
 520
 521        skb_queue_purge(&sk->sk_write_queue);
 522
 523        if (nlk->pid) {
 524                struct netlink_notify n = {
 525                                                .net = sock_net(sk),
 526                                                .protocol = sk->sk_protocol,
 527                                                .pid = nlk->pid,
 528                                          };
 529                atomic_notifier_call_chain(&netlink_chain,
 530                                NETLINK_URELEASE, &n);
 531        }
 532
 533        module_put(nlk->module);
 534
 535        netlink_table_grab();
 536        if (netlink_is_kernel(sk)) {
 537                BUG_ON(nl_table[sk->sk_protocol].registered == 0);
 538                if (--nl_table[sk->sk_protocol].registered == 0) {
 539                        kfree(nl_table[sk->sk_protocol].listeners);
 540                        nl_table[sk->sk_protocol].module = NULL;
 541                        nl_table[sk->sk_protocol].registered = 0;
 542                }
 543        } else if (nlk->subscriptions) {
 544                netlink_update_listeners(sk);
 545        }
 546        netlink_table_ungrab();
 547
 548        kfree(nlk->groups);
 549        nlk->groups = NULL;
 550
 551        local_bh_disable();
 552        sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
 553        local_bh_enable();
 554        sock_put(sk);
 555        return 0;
 556}
 557
 558static int netlink_autobind(struct socket *sock)
 559{
 560        struct sock *sk = sock->sk;
 561        struct net *net = sock_net(sk);
 562        struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
 563        struct hlist_head *head;
 564        struct sock *osk;
 565        struct hlist_node *node;
 566        s32 pid = task_tgid_vnr(current);
 567        int err;
 568        static s32 rover = -4097;
 569
 570retry:
 571        cond_resched();
 572        netlink_table_grab();
 573        head = nl_pid_hashfn(hash, pid);
 574        sk_for_each(osk, node, head) {
 575                if (!net_eq(sock_net(osk), net))
 576                        continue;
 577                if (nlk_sk(osk)->pid == pid) {
 578                        /* Bind collision, search negative pid values. */
 579                        pid = rover--;
 580                        if (rover > -4097)
 581                                rover = -4097;
 582                        netlink_table_ungrab();
 583                        goto retry;
 584                }
 585        }
 586        netlink_table_ungrab();
 587
 588        err = netlink_insert(sk, net, pid);
 589        if (err == -EADDRINUSE)
 590                goto retry;
 591
 592        /* If 2 threads race to autobind, that is fine.  */
 593        if (err == -EBUSY)
 594                err = 0;
 595
 596        return err;
 597}
 598
 599static inline int netlink_capable(const struct socket *sock, unsigned int flag)
 600{
 601        return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
 602               capable(CAP_NET_ADMIN);
 603}
 604
 605static void
 606netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
 607{
 608        struct netlink_sock *nlk = nlk_sk(sk);
 609
 610        if (nlk->subscriptions && !subscriptions)
 611                __sk_del_bind_node(sk);
 612        else if (!nlk->subscriptions && subscriptions)
 613                sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
 614        nlk->subscriptions = subscriptions;
 615}
 616
 617static int netlink_realloc_groups(struct sock *sk)
 618{
 619        struct netlink_sock *nlk = nlk_sk(sk);
 620        unsigned int groups;
 621        unsigned long *new_groups;
 622        int err = 0;
 623
 624        netlink_table_grab();
 625
 626        groups = nl_table[sk->sk_protocol].groups;
 627        if (!nl_table[sk->sk_protocol].registered) {
 628                err = -ENOENT;
 629                goto out_unlock;
 630        }
 631
 632        if (nlk->ngroups >= groups)
 633                goto out_unlock;
 634
 635        new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
 636        if (new_groups == NULL) {
 637                err = -ENOMEM;
 638                goto out_unlock;
 639        }
 640        memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
 641               NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
 642
 643        nlk->groups = new_groups;
 644        nlk->ngroups = groups;
 645 out_unlock:
 646        netlink_table_ungrab();
 647        return err;
 648}
 649
 650static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 651                        int addr_len)
 652{
 653        struct sock *sk = sock->sk;
 654        struct net *net = sock_net(sk);
 655        struct netlink_sock *nlk = nlk_sk(sk);
 656        struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 657        int err;
 658
 659        if (nladdr->nl_family != AF_NETLINK)
 660                return -EINVAL;
 661
 662        /* Only superuser is allowed to listen multicasts */
 663        if (nladdr->nl_groups) {
 664                if (!netlink_capable(sock, NL_NONROOT_RECV))
 665                        return -EPERM;
 666                err = netlink_realloc_groups(sk);
 667                if (err)
 668                        return err;
 669        }
 670
 671        if (nlk->pid) {
 672                if (nladdr->nl_pid != nlk->pid)
 673                        return -EINVAL;
 674        } else {
 675                err = nladdr->nl_pid ?
 676                        netlink_insert(sk, net, nladdr->nl_pid) :
 677                        netlink_autobind(sock);
 678                if (err)
 679                        return err;
 680        }
 681
 682        if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
 683                return 0;
 684
 685        netlink_table_grab();
 686        netlink_update_subscriptions(sk, nlk->subscriptions +
 687                                         hweight32(nladdr->nl_groups) -
 688                                         hweight32(nlk->groups[0]));
 689        nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
 690        netlink_update_listeners(sk);
 691        netlink_table_ungrab();
 692
 693        if (nlk->netlink_bind && nlk->groups[0]) {
 694                int i;
 695
 696                for (i=0; i<nlk->ngroups; i++) {
 697                        if (test_bit(i, nlk->groups))
 698                                nlk->netlink_bind(i);
 699                }
 700        }
 701
 702        return 0;
 703}
 704
 705static int netlink_connect(struct socket *sock, struct sockaddr *addr,
 706                           int alen, int flags)
 707{
 708        int err = 0;
 709        struct sock *sk = sock->sk;
 710        struct netlink_sock *nlk = nlk_sk(sk);
 711        struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 712
 713        if (alen < sizeof(addr->sa_family))
 714                return -EINVAL;
 715
 716        if (addr->sa_family == AF_UNSPEC) {
 717                sk->sk_state    = NETLINK_UNCONNECTED;
 718                nlk->dst_pid    = 0;
 719                nlk->dst_group  = 0;
 720                return 0;
 721        }
 722        if (addr->sa_family != AF_NETLINK)
 723                return -EINVAL;
 724
 725        /* Only superuser is allowed to send multicasts */
 726        if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
 727                return -EPERM;
 728
 729        if (!nlk->pid)
 730                err = netlink_autobind(sock);
 731
 732        if (err == 0) {
 733                sk->sk_state    = NETLINK_CONNECTED;
 734                nlk->dst_pid    = nladdr->nl_pid;
 735                nlk->dst_group  = ffs(nladdr->nl_groups);
 736        }
 737
 738        return err;
 739}
 740
 741static int netlink_getname(struct socket *sock, struct sockaddr *addr,
 742                           int *addr_len, int peer)
 743{
 744        struct sock *sk = sock->sk;
 745        struct netlink_sock *nlk = nlk_sk(sk);
 746        DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
 747
 748        nladdr->nl_family = AF_NETLINK;
 749        nladdr->nl_pad = 0;
 750        *addr_len = sizeof(*nladdr);
 751
 752        if (peer) {
 753                nladdr->nl_pid = nlk->dst_pid;
 754                nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
 755        } else {
 756                nladdr->nl_pid = nlk->pid;
 757                nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
 758        }
 759        return 0;
 760}
 761
 762static void netlink_overrun(struct sock *sk)
 763{
 764        struct netlink_sock *nlk = nlk_sk(sk);
 765
 766        if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
 767                if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
 768                        sk->sk_err = ENOBUFS;
 769                        sk->sk_error_report(sk);
 770                }
 771        }
 772        atomic_inc(&sk->sk_drops);
 773}
 774
 775static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
 776{
 777        struct sock *sock;
 778        struct netlink_sock *nlk;
 779
 780        sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid);
 781        if (!sock)
 782                return ERR_PTR(-ECONNREFUSED);
 783
 784        /* Don't bother queuing skb if kernel socket has no input function */
 785        nlk = nlk_sk(sock);
 786        if (sock->sk_state == NETLINK_CONNECTED &&
 787            nlk->dst_pid != nlk_sk(ssk)->pid) {
 788                sock_put(sock);
 789                return ERR_PTR(-ECONNREFUSED);
 790        }
 791        return sock;
 792}
 793
 794struct sock *netlink_getsockbyfilp(struct file *filp)
 795{
 796        struct inode *inode = filp->f_path.dentry->d_inode;
 797        struct sock *sock;
 798
 799        if (!S_ISSOCK(inode->i_mode))
 800                return ERR_PTR(-ENOTSOCK);
 801
 802        sock = SOCKET_I(inode)->sk;
 803        if (sock->sk_family != AF_NETLINK)
 804                return ERR_PTR(-EINVAL);
 805
 806        sock_hold(sock);
 807        return sock;
 808}
 809
 810/*
 811 * Attach a skb to a netlink socket.
 812 * The caller must hold a reference to the destination socket. On error, the
 813 * reference is dropped. The skb is not send to the destination, just all
 814 * all error checks are performed and memory in the queue is reserved.
 815 * Return values:
 816 * < 0: error. skb freed, reference to sock dropped.
 817 * 0: continue
 818 * 1: repeat lookup - reference dropped while waiting for socket memory.
 819 */
 820int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 821                      long *timeo, struct sock *ssk)
 822{
 823        struct netlink_sock *nlk;
 824
 825        nlk = nlk_sk(sk);
 826
 827        if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
 828            test_bit(0, &nlk->state)) {
 829                DECLARE_WAITQUEUE(wait, current);
 830                if (!*timeo) {
 831                        if (!ssk || netlink_is_kernel(ssk))
 832                                netlink_overrun(sk);
 833                        sock_put(sk);
 834                        kfree_skb(skb);
 835                        return -EAGAIN;
 836                }
 837
 838                __set_current_state(TASK_INTERRUPTIBLE);
 839                add_wait_queue(&nlk->wait, &wait);
 840
 841                if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
 842                     test_bit(0, &nlk->state)) &&
 843                    !sock_flag(sk, SOCK_DEAD))
 844                        *timeo = schedule_timeout(*timeo);
 845
 846                __set_current_state(TASK_RUNNING);
 847                remove_wait_queue(&nlk->wait, &wait);
 848                sock_put(sk);
 849
 850                if (signal_pending(current)) {
 851                        kfree_skb(skb);
 852                        return sock_intr_errno(*timeo);
 853                }
 854                return 1;
 855        }
 856        skb_set_owner_r(skb, sk);
 857        return 0;
 858}
 859
 860static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
 861{
 862        int len = skb->len;
 863
 864        skb_queue_tail(&sk->sk_receive_queue, skb);
 865        sk->sk_data_ready(sk, len);
 866        return len;
 867}
 868
 869int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
 870{
 871        int len = __netlink_sendskb(sk, skb);
 872
 873        sock_put(sk);
 874        return len;
 875}
 876
 877void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
 878{
 879        kfree_skb(skb);
 880        sock_put(sk);
 881}
 882
 883static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
 884{
 885        int delta;
 886
 887        skb_orphan(skb);
 888
 889        delta = skb->end - skb->tail;
 890        if (delta * 2 < skb->truesize)
 891                return skb;
 892
 893        if (skb_shared(skb)) {
 894                struct sk_buff *nskb = skb_clone(skb, allocation);
 895                if (!nskb)
 896                        return skb;
 897                consume_skb(skb);
 898                skb = nskb;
 899        }
 900
 901        if (!pskb_expand_head(skb, 0, -delta, allocation))
 902                skb->truesize -= delta;
 903
 904        return skb;
 905}
 906
 907static void netlink_rcv_wake(struct sock *sk)
 908{
 909        struct netlink_sock *nlk = nlk_sk(sk);
 910
 911        if (skb_queue_empty(&sk->sk_receive_queue))
 912                clear_bit(0, &nlk->state);
 913        if (!test_bit(0, &nlk->state))
 914                wake_up_interruptible(&nlk->wait);
 915}
 916
 917static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb)
 918{
 919        int ret;
 920        struct netlink_sock *nlk = nlk_sk(sk);
 921
 922        ret = -ECONNREFUSED;
 923        if (nlk->netlink_rcv != NULL) {
 924                ret = skb->len;
 925                skb_set_owner_r(skb, sk);
 926                nlk->netlink_rcv(skb);
 927                consume_skb(skb);
 928        } else {
 929                kfree_skb(skb);
 930        }
 931        sock_put(sk);
 932        return ret;
 933}
 934
 935int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
 936                    u32 pid, int nonblock)
 937{
 938        struct sock *sk;
 939        int err;
 940        long timeo;
 941
 942        skb = netlink_trim(skb, gfp_any());
 943
 944        timeo = sock_sndtimeo(ssk, nonblock);
 945retry:
 946        sk = netlink_getsockbypid(ssk, pid);
 947        if (IS_ERR(sk)) {
 948                kfree_skb(skb);
 949                return PTR_ERR(sk);
 950        }
 951        if (netlink_is_kernel(sk))
 952                return netlink_unicast_kernel(sk, skb);
 953
 954        if (sk_filter(sk, skb)) {
 955                err = skb->len;
 956                kfree_skb(skb);
 957                sock_put(sk);
 958                return err;
 959        }
 960
 961        err = netlink_attachskb(sk, skb, &timeo, ssk);
 962        if (err == 1)
 963                goto retry;
 964        if (err)
 965                return err;
 966
 967        return netlink_sendskb(sk, skb);
 968}
 969EXPORT_SYMBOL(netlink_unicast);
 970
 971int netlink_has_listeners(struct sock *sk, unsigned int group)
 972{
 973        int res = 0;
 974        struct listeners *listeners;
 975
 976        BUG_ON(!netlink_is_kernel(sk));
 977
 978        rcu_read_lock();
 979        listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
 980
 981        if (group - 1 < nl_table[sk->sk_protocol].groups)
 982                res = test_bit(group - 1, listeners->masks);
 983
 984        rcu_read_unlock();
 985
 986        return res;
 987}
 988EXPORT_SYMBOL_GPL(netlink_has_listeners);
 989
 990static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
 991{
 992        struct netlink_sock *nlk = nlk_sk(sk);
 993
 994        if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
 995            !test_bit(0, &nlk->state)) {
 996                skb_set_owner_r(skb, sk);
 997                __netlink_sendskb(sk, skb);
 998                return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
 999        }
1000        return -1;
1001}
1002
1003struct netlink_broadcast_data {
1004        struct sock *exclude_sk;
1005        struct net *net;
1006        u32 pid;
1007        u32 group;
1008        int failure;
1009        int delivery_failure;
1010        int congested;
1011        int delivered;
1012        gfp_t allocation;
1013        struct sk_buff *skb, *skb2;
1014        int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
1015        void *tx_data;
1016};
1017
1018static int do_one_broadcast(struct sock *sk,
1019                                   struct netlink_broadcast_data *p)
1020{
1021        struct netlink_sock *nlk = nlk_sk(sk);
1022        int val;
1023
1024        if (p->exclude_sk == sk)
1025                goto out;
1026
1027        if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
1028            !test_bit(p->group - 1, nlk->groups))
1029                goto out;
1030
1031        if (!net_eq(sock_net(sk), p->net))
1032                goto out;
1033
1034        if (p->failure) {
1035                netlink_overrun(sk);
1036                goto out;
1037        }
1038
1039        sock_hold(sk);
1040        if (p->skb2 == NULL) {
1041                if (skb_shared(p->skb)) {
1042                        p->skb2 = skb_clone(p->skb, p->allocation);
1043                } else {
1044                        p->skb2 = skb_get(p->skb);
1045                        /*
1046                         * skb ownership may have been set when
1047                         * delivered to a previous socket.
1048                         */
1049                        skb_orphan(p->skb2);
1050                }
1051        }
1052        if (p->skb2 == NULL) {
1053                netlink_overrun(sk);
1054                /* Clone failed. Notify ALL listeners. */
1055                p->failure = 1;
1056                if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1057                        p->delivery_failure = 1;
1058        } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1059                kfree_skb(p->skb2);
1060                p->skb2 = NULL;
1061        } else if (sk_filter(sk, p->skb2)) {
1062                kfree_skb(p->skb2);
1063                p->skb2 = NULL;
1064        } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
1065                netlink_overrun(sk);
1066                if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1067                        p->delivery_failure = 1;
1068        } else {
1069                p->congested |= val;
1070                p->delivered = 1;
1071                p->skb2 = NULL;
1072        }
1073        sock_put(sk);
1074
1075out:
1076        return 0;
1077}
1078
1079int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
1080        u32 group, gfp_t allocation,
1081        int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
1082        void *filter_data)
1083{
1084        struct net *net = sock_net(ssk);
1085        struct netlink_broadcast_data info;
1086        struct hlist_node *node;
1087        struct sock *sk;
1088
1089        skb = netlink_trim(skb, allocation);
1090
1091        info.exclude_sk = ssk;
1092        info.net = net;
1093        info.pid = pid;
1094        info.group = group;
1095        info.failure = 0;
1096        info.delivery_failure = 0;
1097        info.congested = 0;
1098        info.delivered = 0;
1099        info.allocation = allocation;
1100        info.skb = skb;
1101        info.skb2 = NULL;
1102        info.tx_filter = filter;
1103        info.tx_data = filter_data;
1104
1105        /* While we sleep in clone, do not allow to change socket list */
1106
1107        netlink_lock_table();
1108
1109        sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1110                do_one_broadcast(sk, &info);
1111
1112        consume_skb(skb);
1113
1114        netlink_unlock_table();
1115
1116        if (info.delivery_failure) {
1117                kfree_skb(info.skb2);
1118                return -ENOBUFS;
1119        }
1120        consume_skb(info.skb2);
1121
1122        if (info.delivered) {
1123                if (info.congested && (allocation & __GFP_WAIT))
1124                        yield();
1125                return 0;
1126        }
1127        return -ESRCH;
1128}
1129EXPORT_SYMBOL(netlink_broadcast_filtered);
1130
1131int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1132                      u32 group, gfp_t allocation)
1133{
1134        return netlink_broadcast_filtered(ssk, skb, pid, group, allocation,
1135                NULL, NULL);
1136}
1137EXPORT_SYMBOL(netlink_broadcast);
1138
1139struct netlink_set_err_data {
1140        struct sock *exclude_sk;
1141        u32 pid;
1142        u32 group;
1143        int code;
1144};
1145
1146static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
1147{
1148        struct netlink_sock *nlk = nlk_sk(sk);
1149        int ret = 0;
1150
1151        if (sk == p->exclude_sk)
1152                goto out;
1153
1154        if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
1155                goto out;
1156
1157        if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
1158            !test_bit(p->group - 1, nlk->groups))
1159                goto out;
1160
1161        if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
1162                ret = 1;
1163                goto out;
1164        }
1165
1166        sk->sk_err = p->code;
1167        sk->sk_error_report(sk);
1168out:
1169        return ret;
1170}
1171
1172/**
1173 * netlink_set_err - report error to broadcast listeners
1174 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
1175 * @pid: the PID of a process that we want to skip (if any)
1176 * @groups: the broadcast group that will notice the error
1177 * @code: error code, must be negative (as usual in kernelspace)
1178 *
1179 * This function returns the number of broadcast listeners that have set the
1180 * NETLINK_RECV_NO_ENOBUFS socket option.
1181 */
1182int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
1183{
1184        struct netlink_set_err_data info;
1185        struct hlist_node *node;
1186        struct sock *sk;
1187        int ret = 0;
1188
1189        info.exclude_sk = ssk;
1190        info.pid = pid;
1191        info.group = group;
1192        /* sk->sk_err wants a positive error value */
1193        info.code = -code;
1194
1195        read_lock(&nl_table_lock);
1196
1197        sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1198                ret += do_one_set_err(sk, &info);
1199
1200        read_unlock(&nl_table_lock);
1201        return ret;
1202}
1203EXPORT_SYMBOL(netlink_set_err);
1204
1205/* must be called with netlink table grabbed */
1206static void netlink_update_socket_mc(struct netlink_sock *nlk,
1207                                     unsigned int group,
1208                                     int is_new)
1209{
1210        int old, new = !!is_new, subscriptions;
1211
1212        old = test_bit(group - 1, nlk->groups);
1213        subscriptions = nlk->subscriptions - old + new;
1214        if (new)
1215                __set_bit(group - 1, nlk->groups);
1216        else
1217                __clear_bit(group - 1, nlk->groups);
1218        netlink_update_subscriptions(&nlk->sk, subscriptions);
1219        netlink_update_listeners(&nlk->sk);
1220}
1221
1222static int netlink_setsockopt(struct socket *sock, int level, int optname,
1223                              char __user *optval, unsigned int optlen)
1224{
1225        struct sock *sk = sock->sk;
1226        struct netlink_sock *nlk = nlk_sk(sk);
1227        unsigned int val = 0;
1228        int err;
1229
1230        if (level != SOL_NETLINK)
1231                return -ENOPROTOOPT;
1232
1233        if (optlen >= sizeof(int) &&
1234            get_user(val, (unsigned int __user *)optval))
1235                return -EFAULT;
1236
1237        switch (optname) {
1238        case NETLINK_PKTINFO:
1239                if (val)
1240                        nlk->flags |= NETLINK_RECV_PKTINFO;
1241                else
1242                        nlk->flags &= ~NETLINK_RECV_PKTINFO;
1243                err = 0;
1244                break;
1245        case NETLINK_ADD_MEMBERSHIP:
1246        case NETLINK_DROP_MEMBERSHIP: {
1247                if (!netlink_capable(sock, NL_NONROOT_RECV))
1248                        return -EPERM;
1249                err = netlink_realloc_groups(sk);
1250                if (err)
1251                        return err;
1252                if (!val || val - 1 >= nlk->ngroups)
1253                        return -EINVAL;
1254                netlink_table_grab();
1255                netlink_update_socket_mc(nlk, val,
1256                                         optname == NETLINK_ADD_MEMBERSHIP);
1257                netlink_table_ungrab();
1258
1259                if (nlk->netlink_bind)
1260                        nlk->netlink_bind(val);
1261
1262                err = 0;
1263                break;
1264        }
1265        case NETLINK_BROADCAST_ERROR:
1266                if (val)
1267                        nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
1268                else
1269                        nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
1270                err = 0;
1271                break;
1272        case NETLINK_NO_ENOBUFS:
1273                if (val) {
1274                        nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
1275                        clear_bit(0, &nlk->state);
1276                        wake_up_interruptible(&nlk->wait);
1277                } else {
1278                        nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
1279                }
1280                err = 0;
1281                break;
1282        default:
1283                err = -ENOPROTOOPT;
1284        }
1285        return err;
1286}
1287
1288static int netlink_getsockopt(struct socket *sock, int level, int optname,
1289                              char __user *optval, int __user *optlen)
1290{
1291        struct sock *sk = sock->sk;
1292        struct netlink_sock *nlk = nlk_sk(sk);
1293        int len, val, err;
1294
1295        if (level != SOL_NETLINK)
1296                return -ENOPROTOOPT;
1297
1298        if (get_user(len, optlen))
1299                return -EFAULT;
1300        if (len < 0)
1301                return -EINVAL;
1302
1303        switch (optname) {
1304        case NETLINK_PKTINFO:
1305                if (len < sizeof(int))
1306                        return -EINVAL;
1307                len = sizeof(int);
1308                val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
1309                if (put_user(len, optlen) ||
1310                    put_user(val, optval))
1311                        return -EFAULT;
1312                err = 0;
1313                break;
1314        case NETLINK_BROADCAST_ERROR:
1315                if (len < sizeof(int))
1316                        return -EINVAL;
1317                len = sizeof(int);
1318                val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
1319                if (put_user(len, optlen) ||
1320                    put_user(val, optval))
1321                        return -EFAULT;
1322                err = 0;
1323                break;
1324        case NETLINK_NO_ENOBUFS:
1325                if (len < sizeof(int))
1326                        return -EINVAL;
1327                len = sizeof(int);
1328                val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
1329                if (put_user(len, optlen) ||
1330                    put_user(val, optval))
1331                        return -EFAULT;
1332                err = 0;
1333                break;
1334        default:
1335                err = -ENOPROTOOPT;
1336        }
1337        return err;
1338}
1339
1340static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
1341{
1342        struct nl_pktinfo info;
1343
1344        info.group = NETLINK_CB(skb).dst_group;
1345        put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
1346}
1347
1348static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1349                           struct msghdr *msg, size_t len)
1350{
1351        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1352        struct sock *sk = sock->sk;
1353        struct netlink_sock *nlk = nlk_sk(sk);
1354        struct sockaddr_nl *addr = msg->msg_name;
1355        u32 dst_pid;
1356        u32 dst_group;
1357        struct sk_buff *skb;
1358        int err;
1359        struct scm_cookie scm;
1360
1361        if (msg->msg_flags&MSG_OOB)
1362                return -EOPNOTSUPP;
1363
1364        if (NULL == siocb->scm)
1365                siocb->scm = &scm;
1366
1367        err = scm_send(sock, msg, siocb->scm, true);
1368        if (err < 0)
1369                return err;
1370
1371        if (msg->msg_namelen) {
1372                err = -EINVAL;
1373                if (addr->nl_family != AF_NETLINK)
1374                        goto out;
1375                dst_pid = addr->nl_pid;
1376                dst_group = ffs(addr->nl_groups);
1377                err =  -EPERM;
1378                if ((dst_group || dst_pid) &&
1379                    !netlink_capable(sock, NL_NONROOT_SEND))
1380                        goto out;
1381        } else {
1382                dst_pid = nlk->dst_pid;
1383                dst_group = nlk->dst_group;
1384        }
1385
1386        if (!nlk->pid) {
1387                err = netlink_autobind(sock);
1388                if (err)
1389                        goto out;
1390        }
1391
1392        err = -EMSGSIZE;
1393        if (len > sk->sk_sndbuf - 32)
1394                goto out;
1395        err = -ENOBUFS;
1396        skb = alloc_skb(len, GFP_KERNEL);
1397        if (skb == NULL)
1398                goto out;
1399
1400        NETLINK_CB(skb).pid     = nlk->pid;
1401        NETLINK_CB(skb).dst_group = dst_group;
1402        memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1403
1404        err = -EFAULT;
1405        if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
1406                kfree_skb(skb);
1407                goto out;
1408        }
1409
1410        err = security_netlink_send(sk, skb);
1411        if (err) {
1412                kfree_skb(skb);
1413                goto out;
1414        }
1415
1416        if (dst_group) {
1417                atomic_inc(&skb->users);
1418                netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
1419        }
1420        err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
1421
1422out:
1423        scm_destroy(siocb->scm);
1424        return err;
1425}
1426
1427static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1428                           struct msghdr *msg, size_t len,
1429                           int flags)
1430{
1431        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1432        struct scm_cookie scm;
1433        struct sock *sk = sock->sk;
1434        struct netlink_sock *nlk = nlk_sk(sk);
1435        int noblock = flags&MSG_DONTWAIT;
1436        size_t copied;
1437        struct sk_buff *skb, *data_skb;
1438        int err, ret;
1439
1440        if (flags&MSG_OOB)
1441                return -EOPNOTSUPP;
1442
1443        copied = 0;
1444
1445        skb = skb_recv_datagram(sk, flags, noblock, &err);
1446        if (skb == NULL)
1447                goto out;
1448
1449        data_skb = skb;
1450
1451#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
1452        if (unlikely(skb_shinfo(skb)->frag_list)) {
1453                /*
1454                 * If this skb has a frag_list, then here that means that we
1455                 * will have to use the frag_list skb's data for compat tasks
1456                 * and the regular skb's data for normal (non-compat) tasks.
1457                 *
1458                 * If we need to send the compat skb, assign it to the
1459                 * 'data_skb' variable so that it will be used below for data
1460                 * copying. We keep 'skb' for everything else, including
1461                 * freeing both later.
1462                 */
1463                if (flags & MSG_CMSG_COMPAT)
1464                        data_skb = skb_shinfo(skb)->frag_list;
1465        }
1466#endif
1467
1468        msg->msg_namelen = 0;
1469
1470        copied = data_skb->len;
1471        if (len < copied) {
1472                msg->msg_flags |= MSG_TRUNC;
1473                copied = len;
1474        }
1475
1476        skb_reset_transport_header(data_skb);
1477        err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
1478
1479        if (msg->msg_name) {
1480                struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
1481                addr->nl_family = AF_NETLINK;
1482                addr->nl_pad    = 0;
1483                addr->nl_pid    = NETLINK_CB(skb).pid;
1484                addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
1485                msg->msg_namelen = sizeof(*addr);
1486        }
1487
1488        if (nlk->flags & NETLINK_RECV_PKTINFO)
1489                netlink_cmsg_recv_pktinfo(msg, skb);
1490
1491        if (NULL == siocb->scm) {
1492                memset(&scm, 0, sizeof(scm));
1493                siocb->scm = &scm;
1494        }
1495        siocb->scm->creds = *NETLINK_CREDS(skb);
1496        if (flags & MSG_TRUNC)
1497                copied = data_skb->len;
1498
1499        skb_free_datagram(sk, skb);
1500
1501        if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
1502                ret = netlink_dump(sk);
1503                if (ret) {
1504                        sk->sk_err = ret;
1505                        sk->sk_error_report(sk);
1506                }
1507        }
1508
1509        scm_recv(sock, msg, siocb->scm, flags);
1510out:
1511        netlink_rcv_wake(sk);
1512        return err ? : copied;
1513}
1514
1515static void netlink_data_ready(struct sock *sk, int len)
1516{
1517        BUG();
1518}
1519
1520/*
1521 *      We export these functions to other modules. They provide a
1522 *      complete set of kernel non-blocking support for message
1523 *      queueing.
1524 */
1525
1526struct sock *
1527netlink_kernel_create(struct net *net, int unit,
1528                      struct module *module,
1529                      struct netlink_kernel_cfg *cfg)
1530{
1531        struct socket *sock;
1532        struct sock *sk;
1533        struct netlink_sock *nlk;
1534        struct listeners *listeners = NULL;
1535        struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
1536        unsigned int groups;
1537
1538        BUG_ON(!nl_table);
1539
1540        if (unit < 0 || unit >= MAX_LINKS)
1541                return NULL;
1542
1543        if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1544                return NULL;
1545
1546        /*
1547         * We have to just have a reference on the net from sk, but don't
1548         * get_net it. Besides, we cannot get and then put the net here.
1549         * So we create one inside init_net and the move it to net.
1550         */
1551
1552        if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
1553                goto out_sock_release_nosk;
1554
1555        sk = sock->sk;
1556        sk_change_net(sk, net);
1557
1558        if (!cfg || cfg->groups < 32)
1559                groups = 32;
1560        else
1561                groups = cfg->groups;
1562
1563        listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
1564        if (!listeners)
1565                goto out_sock_release;
1566
1567        sk->sk_data_ready = netlink_data_ready;
1568        if (cfg && cfg->input)
1569                nlk_sk(sk)->netlink_rcv = cfg->input;
1570
1571        if (netlink_insert(sk, net, 0))
1572                goto out_sock_release;
1573
1574        nlk = nlk_sk(sk);
1575        nlk->flags |= NETLINK_KERNEL_SOCKET;
1576
1577        netlink_table_grab();
1578        if (!nl_table[unit].registered) {
1579                nl_table[unit].groups = groups;
1580                rcu_assign_pointer(nl_table[unit].listeners, listeners);
1581                nl_table[unit].cb_mutex = cb_mutex;
1582                nl_table[unit].module = module;
1583                nl_table[unit].bind = cfg ? cfg->bind : NULL;
1584                nl_table[unit].registered = 1;
1585        } else {
1586                kfree(listeners);
1587                nl_table[unit].registered++;
1588        }
1589        netlink_table_ungrab();
1590        return sk;
1591
1592out_sock_release:
1593        kfree(listeners);
1594        netlink_kernel_release(sk);
1595        return NULL;
1596
1597out_sock_release_nosk:
1598        sock_release(sock);
1599        return NULL;
1600}
1601EXPORT_SYMBOL(netlink_kernel_create);
1602
1603
1604void
1605netlink_kernel_release(struct sock *sk)
1606{
1607        sk_release_kernel(sk);
1608}
1609EXPORT_SYMBOL(netlink_kernel_release);
1610
1611int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1612{
1613        struct listeners *new, *old;
1614        struct netlink_table *tbl = &nl_table[sk->sk_protocol];
1615
1616        if (groups < 32)
1617                groups = 32;
1618
1619        if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
1620                new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
1621                if (!new)
1622                        return -ENOMEM;
1623                old = rcu_dereference_protected(tbl->listeners, 1);
1624                memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
1625                rcu_assign_pointer(tbl->listeners, new);
1626
1627                kfree_rcu(old, rcu);
1628        }
1629        tbl->groups = groups;
1630
1631        return 0;
1632}
1633
1634/**
1635 * netlink_change_ngroups - change number of multicast groups
1636 *
1637 * This changes the number of multicast groups that are available
1638 * on a certain netlink family. Note that it is not possible to
1639 * change the number of groups to below 32. Also note that it does
1640 * not implicitly call netlink_clear_multicast_users() when the
1641 * number of groups is reduced.
1642 *
1643 * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
1644 * @groups: The new number of groups.
1645 */
1646int netlink_change_ngroups(struct sock *sk, unsigned int groups)
1647{
1648        int err;
1649
1650        netlink_table_grab();
1651        err = __netlink_change_ngroups(sk, groups);
1652        netlink_table_ungrab();
1653
1654        return err;
1655}
1656
1657void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1658{
1659        struct sock *sk;
1660        struct hlist_node *node;
1661        struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
1662
1663        sk_for_each_bound(sk, node, &tbl->mc_list)
1664                netlink_update_socket_mc(nlk_sk(sk), group, 0);
1665}
1666
1667/**
1668 * netlink_clear_multicast_users - kick off multicast listeners
1669 *
1670 * This function removes all listeners from the given group.
1671 * @ksk: The kernel netlink socket, as returned by
1672 *      netlink_kernel_create().
1673 * @group: The multicast group to clear.
1674 */
1675void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1676{
1677        netlink_table_grab();
1678        __netlink_clear_multicast_users(ksk, group);
1679        netlink_table_ungrab();
1680}
1681
1682void netlink_set_nonroot(int protocol, unsigned int flags)
1683{
1684        if ((unsigned int)protocol < MAX_LINKS)
1685                nl_table[protocol].nl_nonroot = flags;
1686}
1687EXPORT_SYMBOL(netlink_set_nonroot);
1688
1689struct nlmsghdr *
1690__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
1691{
1692        struct nlmsghdr *nlh;
1693        int size = NLMSG_LENGTH(len);
1694
1695        nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
1696        nlh->nlmsg_type = type;
1697        nlh->nlmsg_len = size;
1698        nlh->nlmsg_flags = flags;
1699        nlh->nlmsg_pid = pid;
1700        nlh->nlmsg_seq = seq;
1701        if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
1702                memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size);
1703        return nlh;
1704}
1705EXPORT_SYMBOL(__nlmsg_put);
1706
1707/*
1708 * It looks a bit ugly.
1709 * It would be better to create kernel thread.
1710 */
1711
1712static int netlink_dump(struct sock *sk)
1713{
1714        struct netlink_sock *nlk = nlk_sk(sk);
1715        struct netlink_callback *cb;
1716        struct sk_buff *skb = NULL;
1717        struct nlmsghdr *nlh;
1718        int len, err = -ENOBUFS;
1719        int alloc_size;
1720
1721        mutex_lock(nlk->cb_mutex);
1722
1723        cb = nlk->cb;
1724        if (cb == NULL) {
1725                err = -EINVAL;
1726                goto errout_skb;
1727        }
1728
1729        alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
1730
1731        skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL);
1732        if (!skb)
1733                goto errout_skb;
1734
1735        len = cb->dump(skb, cb);
1736
1737        if (len > 0) {
1738                mutex_unlock(nlk->cb_mutex);
1739
1740                if (sk_filter(sk, skb))
1741                        kfree_skb(skb);
1742                else
1743                        __netlink_sendskb(sk, skb);
1744                return 0;
1745        }
1746
1747        nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
1748        if (!nlh)
1749                goto errout_skb;
1750
1751        nl_dump_check_consistent(cb, nlh);
1752
1753        memcpy(nlmsg_data(nlh), &len, sizeof(len));
1754
1755        if (sk_filter(sk, skb))
1756                kfree_skb(skb);
1757        else
1758                __netlink_sendskb(sk, skb);
1759
1760        if (cb->done)
1761                cb->done(cb);
1762        nlk->cb = NULL;
1763        mutex_unlock(nlk->cb_mutex);
1764
1765        module_put(cb->module);
1766        netlink_consume_callback(cb);
1767        return 0;
1768
1769errout_skb:
1770        mutex_unlock(nlk->cb_mutex);
1771        kfree_skb(skb);
1772        return err;
1773}
1774
1775int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1776                         const struct nlmsghdr *nlh,
1777                         struct netlink_dump_control *control)
1778{
1779        struct netlink_callback *cb;
1780        struct sock *sk;
1781        struct netlink_sock *nlk;
1782        int ret;
1783
1784        cb = kzalloc(sizeof(*cb), GFP_KERNEL);
1785        if (cb == NULL)
1786                return -ENOBUFS;
1787
1788        cb->dump = control->dump;
1789        cb->done = control->done;
1790        cb->nlh = nlh;
1791        cb->data = control->data;
1792        cb->module = control->module;
1793        cb->min_dump_alloc = control->min_dump_alloc;
1794        atomic_inc(&skb->users);
1795        cb->skb = skb;
1796
1797        sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid);
1798        if (sk == NULL) {
1799                netlink_destroy_callback(cb);
1800                return -ECONNREFUSED;
1801        }
1802        nlk = nlk_sk(sk);
1803
1804        mutex_lock(nlk->cb_mutex);
1805        /* A dump is in progress... */
1806        if (nlk->cb) {
1807                mutex_unlock(nlk->cb_mutex);
1808                netlink_destroy_callback(cb);
1809                ret = -EBUSY;
1810                goto out;
1811        }
1812        /* add reference of module which cb->dump belongs to */
1813        if (!try_module_get(cb->module)) {
1814                mutex_unlock(nlk->cb_mutex);
1815                netlink_destroy_callback(cb);
1816                ret = -EPROTONOSUPPORT;
1817                goto out;
1818        }
1819
1820        nlk->cb = cb;
1821        mutex_unlock(nlk->cb_mutex);
1822
1823        ret = netlink_dump(sk);
1824out:
1825        sock_put(sk);
1826
1827        if (ret)
1828                return ret;
1829
1830        /* We successfully started a dump, by returning -EINTR we
1831         * signal not to send ACK even if it was requested.
1832         */
1833        return -EINTR;
1834}
1835EXPORT_SYMBOL(__netlink_dump_start);
1836
1837void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1838{
1839        struct sk_buff *skb;
1840        struct nlmsghdr *rep;
1841        struct nlmsgerr *errmsg;
1842        size_t payload = sizeof(*errmsg);
1843
1844        /* error messages get the original request appened */
1845        if (err)
1846                payload += nlmsg_len(nlh);
1847
1848        skb = nlmsg_new(payload, GFP_KERNEL);
1849        if (!skb) {
1850                struct sock *sk;
1851
1852                sk = netlink_lookup(sock_net(in_skb->sk),
1853                                    in_skb->sk->sk_protocol,
1854                                    NETLINK_CB(in_skb).pid);
1855                if (sk) {
1856                        sk->sk_err = ENOBUFS;
1857                        sk->sk_error_report(sk);
1858                        sock_put(sk);
1859                }
1860                return;
1861        }
1862
1863        rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
1864                          NLMSG_ERROR, payload, 0);
1865        errmsg = nlmsg_data(rep);
1866        errmsg->error = err;
1867        memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
1868        netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1869}
1870EXPORT_SYMBOL(netlink_ack);
1871
1872int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1873                                                     struct nlmsghdr *))
1874{
1875        struct nlmsghdr *nlh;
1876        int err;
1877
1878        while (skb->len >= nlmsg_total_size(0)) {
1879                int msglen;
1880
1881                nlh = nlmsg_hdr(skb);
1882                err = 0;
1883
1884                if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
1885                        return 0;
1886
1887                /* Only requests are handled by the kernel */
1888                if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
1889                        goto ack;
1890
1891                /* Skip control messages */
1892                if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
1893                        goto ack;
1894
1895                err = cb(skb, nlh);
1896                if (err == -EINTR)
1897                        goto skip;
1898
1899ack:
1900                if (nlh->nlmsg_flags & NLM_F_ACK || err)
1901                        netlink_ack(skb, nlh, err);
1902
1903skip:
1904                msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1905                if (msglen > skb->len)
1906                        msglen = skb->len;
1907                skb_pull(skb, msglen);
1908        }
1909
1910        return 0;
1911}
1912EXPORT_SYMBOL(netlink_rcv_skb);
1913
1914/**
1915 * nlmsg_notify - send a notification netlink message
1916 * @sk: netlink socket to use
1917 * @skb: notification message
1918 * @pid: destination netlink pid for reports or 0
1919 * @group: destination multicast group or 0
1920 * @report: 1 to report back, 0 to disable
1921 * @flags: allocation flags
1922 */
1923int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
1924                 unsigned int group, int report, gfp_t flags)
1925{
1926        int err = 0;
1927
1928        if (group) {
1929                int exclude_pid = 0;
1930
1931                if (report) {
1932                        atomic_inc(&skb->users);
1933                        exclude_pid = pid;
1934                }
1935
1936                /* errors reported via destination sk->sk_err, but propagate
1937                 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
1938                err = nlmsg_multicast(sk, skb, exclude_pid, group, flags);
1939        }
1940
1941        if (report) {
1942                int err2;
1943
1944                err2 = nlmsg_unicast(sk, skb, pid);
1945                if (!err || err == -ESRCH)
1946                        err = err2;
1947        }
1948
1949        return err;
1950}
1951EXPORT_SYMBOL(nlmsg_notify);
1952
1953#ifdef CONFIG_PROC_FS
1954struct nl_seq_iter {
1955        struct seq_net_private p;
1956        int link;
1957        int hash_idx;
1958};
1959
1960static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
1961{
1962        struct nl_seq_iter *iter = seq->private;
1963        int i, j;
1964        struct sock *s;
1965        struct hlist_node *node;
1966        loff_t off = 0;
1967
1968        for (i = 0; i < MAX_LINKS; i++) {
1969                struct nl_pid_hash *hash = &nl_table[i].hash;
1970
1971                for (j = 0; j <= hash->mask; j++) {
1972                        sk_for_each(s, node, &hash->table[j]) {
1973                                if (sock_net(s) != seq_file_net(seq))
1974                                        continue;
1975                                if (off == pos) {
1976                                        iter->link = i;
1977                                        iter->hash_idx = j;
1978                                        return s;
1979                                }
1980                                ++off;
1981                        }
1982                }
1983        }
1984        return NULL;
1985}
1986
1987static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
1988        __acquires(nl_table_lock)
1989{
1990        read_lock(&nl_table_lock);
1991        return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1992}
1993
1994static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1995{
1996        struct sock *s;
1997        struct nl_seq_iter *iter;
1998        int i, j;
1999
2000        ++*pos;
2001
2002        if (v == SEQ_START_TOKEN)
2003                return netlink_seq_socket_idx(seq, 0);
2004
2005        iter = seq->private;
2006        s = v;
2007        do {
2008                s = sk_next(s);
2009        } while (s && sock_net(s) != seq_file_net(seq));
2010        if (s)
2011                return s;
2012
2013        i = iter->link;
2014        j = iter->hash_idx + 1;
2015
2016        do {
2017                struct nl_pid_hash *hash = &nl_table[i].hash;
2018
2019                for (; j <= hash->mask; j++) {
2020                        s = sk_head(&hash->table[j]);
2021                        while (s && sock_net(s) != seq_file_net(seq))
2022                                s = sk_next(s);
2023                        if (s) {
2024                                iter->link = i;
2025                                iter->hash_idx = j;
2026                                return s;
2027                        }
2028                }
2029
2030                j = 0;
2031        } while (++i < MAX_LINKS);
2032
2033        return NULL;
2034}
2035
2036static void netlink_seq_stop(struct seq_file *seq, void *v)
2037        __releases(nl_table_lock)
2038{
2039        read_unlock(&nl_table_lock);
2040}
2041
2042
2043static int netlink_seq_show(struct seq_file *seq, void *v)
2044{
2045        if (v == SEQ_START_TOKEN) {
2046                seq_puts(seq,
2047                         "sk       Eth Pid    Groups   "
2048                         "Rmem     Wmem     Dump     Locks     Drops     Inode\n");
2049        } else {
2050                struct sock *s = v;
2051                struct netlink_sock *nlk = nlk_sk(s);
2052
2053                seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
2054                           s,
2055                           s->sk_protocol,
2056                           nlk->pid,
2057                           nlk->groups ? (u32)nlk->groups[0] : 0,
2058                           sk_rmem_alloc_get(s),
2059                           sk_wmem_alloc_get(s),
2060                           nlk->cb,
2061                           atomic_read(&s->sk_refcnt),
2062                           atomic_read(&s->sk_drops),
2063                           sock_i_ino(s)
2064                        );
2065
2066        }
2067        return 0;
2068}
2069
2070static const struct seq_operations netlink_seq_ops = {
2071        .start  = netlink_seq_start,
2072        .next   = netlink_seq_next,
2073        .stop   = netlink_seq_stop,
2074        .show   = netlink_seq_show,
2075};
2076
2077
2078static int netlink_seq_open(struct inode *inode, struct file *file)
2079{
2080        return seq_open_net(inode, file, &netlink_seq_ops,
2081                                sizeof(struct nl_seq_iter));
2082}
2083
2084static const struct file_operations netlink_seq_fops = {
2085        .owner          = THIS_MODULE,
2086        .open           = netlink_seq_open,
2087        .read           = seq_read,
2088        .llseek         = seq_lseek,
2089        .release        = seq_release_net,
2090};
2091
2092#endif
2093
2094int netlink_register_notifier(struct notifier_block *nb)
2095{
2096        return atomic_notifier_chain_register(&netlink_chain, nb);
2097}
2098EXPORT_SYMBOL(netlink_register_notifier);
2099
2100int netlink_unregister_notifier(struct notifier_block *nb)
2101{
2102        return atomic_notifier_chain_unregister(&netlink_chain, nb);
2103}
2104EXPORT_SYMBOL(netlink_unregister_notifier);
2105
2106static const struct proto_ops netlink_ops = {
2107        .family =       PF_NETLINK,
2108        .owner =        THIS_MODULE,
2109        .release =      netlink_release,
2110        .bind =         netlink_bind,
2111        .connect =      netlink_connect,
2112        .socketpair =   sock_no_socketpair,
2113        .accept =       sock_no_accept,
2114        .getname =      netlink_getname,
2115        .poll =         datagram_poll,
2116        .ioctl =        sock_no_ioctl,
2117        .listen =       sock_no_listen,
2118        .shutdown =     sock_no_shutdown,
2119        .setsockopt =   netlink_setsockopt,
2120        .getsockopt =   netlink_getsockopt,
2121        .sendmsg =      netlink_sendmsg,
2122        .recvmsg =      netlink_recvmsg,
2123        .mmap =         sock_no_mmap,
2124        .sendpage =     sock_no_sendpage,
2125};
2126
2127static const struct net_proto_family netlink_family_ops = {
2128        .family = PF_NETLINK,
2129        .create = netlink_create,
2130        .owner  = THIS_MODULE,  /* for consistency 8) */
2131};
2132
2133static int __net_init netlink_net_init(struct net *net)
2134{
2135#ifdef CONFIG_PROC_FS
2136        if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops))
2137                return -ENOMEM;
2138#endif
2139        return 0;
2140}
2141
2142static void __net_exit netlink_net_exit(struct net *net)
2143{
2144#ifdef CONFIG_PROC_FS
2145        proc_net_remove(net, "netlink");
2146#endif
2147}
2148
2149static void __init netlink_add_usersock_entry(void)
2150{
2151        struct listeners *listeners;
2152        int groups = 32;
2153
2154        listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2155        if (!listeners)
2156                panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2157
2158        netlink_table_grab();
2159
2160        nl_table[NETLINK_USERSOCK].groups = groups;
2161        rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
2162        nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2163        nl_table[NETLINK_USERSOCK].registered = 1;
2164        nl_table[NETLINK_USERSOCK].nl_nonroot = NL_NONROOT_SEND;
2165
2166        netlink_table_ungrab();
2167}
2168
2169static struct pernet_operations __net_initdata netlink_net_ops = {
2170        .init = netlink_net_init,
2171        .exit = netlink_net_exit,
2172};
2173
2174static int __init netlink_proto_init(void)
2175{
2176        struct sk_buff *dummy_skb;
2177        int i;
2178        unsigned long limit;
2179        unsigned int order;
2180        int err = proto_register(&netlink_proto, 0);
2181
2182        if (err != 0)
2183                goto out;
2184
2185        BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb));
2186
2187        nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
2188        if (!nl_table)
2189                goto panic;
2190
2191        if (totalram_pages >= (128 * 1024))
2192                limit = totalram_pages >> (21 - PAGE_SHIFT);
2193        else
2194                limit = totalram_pages >> (23 - PAGE_SHIFT);
2195
2196        order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
2197        limit = (1UL << order) / sizeof(struct hlist_head);
2198        order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
2199
2200        for (i = 0; i < MAX_LINKS; i++) {
2201                struct nl_pid_hash *hash = &nl_table[i].hash;
2202
2203                hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table));
2204                if (!hash->table) {
2205                        while (i-- > 0)
2206                                nl_pid_hash_free(nl_table[i].hash.table,
2207                                                 1 * sizeof(*hash->table));
2208                        kfree(nl_table);
2209                        goto panic;
2210                }
2211                hash->max_shift = order;
2212                hash->shift = 0;
2213                hash->mask = 0;
2214                hash->rehash_time = jiffies;
2215        }
2216
2217        netlink_add_usersock_entry();
2218
2219        sock_register(&netlink_family_ops);
2220        register_pernet_subsys(&netlink_net_ops);
2221        /* The netlink device handler may be needed early. */
2222        rtnetlink_init();
2223out:
2224        return err;
2225panic:
2226        panic("netlink_init: Cannot allocate nl_table\n");
2227}
2228
2229core_initcall(netlink_proto_init);
2230
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.