linux/net/netlink/af_netlink.c
<<
>>
Prefs
   1/*
   2 * NETLINK      Kernel-user communication protocol.
   3 *
   4 *              Authors:        Alan Cox <alan@lxorguk.ukuu.org.uk>
   5 *                              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
   6 *
   7 *              This program is free software; you can redistribute it and/or
   8 *              modify it under the terms of the GNU General Public License
   9 *              as published by the Free Software Foundation; either version
  10 *              2 of the License, or (at your option) any later version.
  11 *
  12 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
  13 *                               added netlink_proto_exit
  14 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
  15 *                               use nlk_sk, as sk->protinfo is on a diet 8)
  16 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
  17 *                               - inc module use count of module that owns
  18 *                                 the kernel socket in case userspace opens
  19 *                                 socket of same protocol
  20 *                               - remove all module support, since netlink is
  21 *                                 mandatory if CONFIG_NET=y these days
  22 */
  23
  24#include <linux/module.h>
  25
  26#include <linux/capability.h>
  27#include <linux/kernel.h>
  28#include <linux/init.h>
  29#include <linux/signal.h>
  30#include <linux/sched.h>
  31#include <linux/errno.h>
  32#include <linux/string.h>
  33#include <linux/stat.h>
  34#include <linux/socket.h>
  35#include <linux/un.h>
  36#include <linux/fcntl.h>
  37#include <linux/termios.h>
  38#include <linux/sockios.h>
  39#include <linux/net.h>
  40#include <linux/fs.h>
  41#include <linux/slab.h>
  42#include <asm/uaccess.h>
  43#include <linux/skbuff.h>
  44#include <linux/netdevice.h>
  45#include <linux/rtnetlink.h>
  46#include <linux/proc_fs.h>
  47#include <linux/seq_file.h>
  48#include <linux/notifier.h>
  49#include <linux/security.h>
  50#include <linux/jhash.h>
  51#include <linux/jiffies.h>
  52#include <linux/random.h>
  53#include <linux/bitops.h>
  54#include <linux/mm.h>
  55#include <linux/types.h>
  56#include <linux/audit.h>
  57#include <linux/mutex.h>
  58
  59#include <net/net_namespace.h>
  60#include <net/sock.h>
  61#include <net/scm.h>
  62#include <net/netlink.h>
  63
  64#define NLGRPSZ(x)      (ALIGN(x, sizeof(unsigned long) * 8) / 8)
  65#define NLGRPLONGS(x)   (NLGRPSZ(x)/sizeof(unsigned long))
  66
  67struct netlink_sock {
  68        /* struct sock has to be the first member of netlink_sock */
  69        struct sock             sk;
  70        u32                     portid;
  71        u32                     dst_portid;
  72        u32                     dst_group;
  73        u32                     flags;
  74        u32                     subscriptions;
  75        u32                     ngroups;
  76        unsigned long           *groups;
  77        unsigned long           state;
  78        wait_queue_head_t       wait;
  79        struct netlink_callback *cb;
  80        struct mutex            *cb_mutex;
  81        struct mutex            cb_def_mutex;
  82        void                    (*netlink_rcv)(struct sk_buff *skb);
  83        void                    (*netlink_bind)(int group);
  84        struct module           *module;
  85};
  86
  87struct listeners {
  88        struct rcu_head         rcu;
  89        unsigned long           masks[0];
  90};
  91
  92#define NETLINK_KERNEL_SOCKET   0x1
  93#define NETLINK_RECV_PKTINFO    0x2
  94#define NETLINK_BROADCAST_SEND_ERROR    0x4
  95#define NETLINK_RECV_NO_ENOBUFS 0x8
  96
  97static inline struct netlink_sock *nlk_sk(struct sock *sk)
  98{
  99        return container_of(sk, struct netlink_sock, sk);
 100}
 101
 102static inline int netlink_is_kernel(struct sock *sk)
 103{
 104        return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
 105}
 106
 107struct nl_portid_hash {
 108        struct hlist_head       *table;
 109        unsigned long           rehash_time;
 110
 111        unsigned int            mask;
 112        unsigned int            shift;
 113
 114        unsigned int            entries;
 115        unsigned int            max_shift;
 116
 117        u32                     rnd;
 118};
 119
 120struct netlink_table {
 121        struct nl_portid_hash   hash;
 122        struct hlist_head       mc_list;
 123        struct listeners __rcu  *listeners;
 124        unsigned int            flags;
 125        unsigned int            groups;
 126        struct mutex            *cb_mutex;
 127        struct module           *module;
 128        void                    (*bind)(int group);
 129        int                     registered;
 130};
 131
 132static struct netlink_table *nl_table;
 133
 134static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
 135
 136static int netlink_dump(struct sock *sk);
 137
 138static DEFINE_RWLOCK(nl_table_lock);
 139static atomic_t nl_table_users = ATOMIC_INIT(0);
 140
 141#define nl_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&nl_table_lock));
 142
 143static ATOMIC_NOTIFIER_HEAD(netlink_chain);
 144
 145static inline u32 netlink_group_mask(u32 group)
 146{
 147        return group ? 1 << (group - 1) : 0;
 148}
 149
 150static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid)
 151{
 152        return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask];
 153}
 154
 155static void netlink_destroy_callback(struct netlink_callback *cb)
 156{
 157        kfree_skb(cb->skb);
 158        kfree(cb);
 159}
 160
 161static void netlink_consume_callback(struct netlink_callback *cb)
 162{
 163        consume_skb(cb->skb);
 164        kfree(cb);
 165}
 166
 167static void netlink_sock_destruct(struct sock *sk)
 168{
 169        struct netlink_sock *nlk = nlk_sk(sk);
 170
 171        if (nlk->cb) {
 172                if (nlk->cb->done)
 173                        nlk->cb->done(nlk->cb);
 174
 175                module_put(nlk->cb->module);
 176                netlink_destroy_callback(nlk->cb);
 177        }
 178
 179        skb_queue_purge(&sk->sk_receive_queue);
 180
 181        if (!sock_flag(sk, SOCK_DEAD)) {
 182                printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
 183                return;
 184        }
 185
 186        WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 187        WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 188        WARN_ON(nlk_sk(sk)->groups);
 189}
 190
 191/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
 192 * SMP. Look, when several writers sleep and reader wakes them up, all but one
 193 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
 194 * this, _but_ remember, it adds useless work on UP machines.
 195 */
 196
 197void netlink_table_grab(void)
 198        __acquires(nl_table_lock)
 199{
 200        might_sleep();
 201
 202        write_lock_irq(&nl_table_lock);
 203
 204        if (atomic_read(&nl_table_users)) {
 205                DECLARE_WAITQUEUE(wait, current);
 206
 207                add_wait_queue_exclusive(&nl_table_wait, &wait);
 208                for (;;) {
 209                        set_current_state(TASK_UNINTERRUPTIBLE);
 210                        if (atomic_read(&nl_table_users) == 0)
 211                                break;
 212                        write_unlock_irq(&nl_table_lock);
 213                        schedule();
 214                        write_lock_irq(&nl_table_lock);
 215                }
 216
 217                __set_current_state(TASK_RUNNING);
 218                remove_wait_queue(&nl_table_wait, &wait);
 219        }
 220}
 221
 222void netlink_table_ungrab(void)
 223        __releases(nl_table_lock)
 224{
 225        write_unlock_irq(&nl_table_lock);
 226        wake_up(&nl_table_wait);
 227}
 228
 229static inline void
 230netlink_lock_table(void)
 231{
 232        /* read_lock() synchronizes us to netlink_table_grab */
 233
 234        read_lock(&nl_table_lock);
 235        atomic_inc(&nl_table_users);
 236        read_unlock(&nl_table_lock);
 237}
 238
 239static inline void
 240netlink_unlock_table(void)
 241{
 242        if (atomic_dec_and_test(&nl_table_users))
 243                wake_up(&nl_table_wait);
 244}
 245
 246static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
 247{
 248        struct nl_portid_hash *hash = &nl_table[protocol].hash;
 249        struct hlist_head *head;
 250        struct sock *sk;
 251        struct hlist_node *node;
 252
 253        read_lock(&nl_table_lock);
 254        head = nl_portid_hashfn(hash, portid);
 255        sk_for_each(sk, node, head) {
 256                if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) {
 257                        sock_hold(sk);
 258                        goto found;
 259                }
 260        }
 261        sk = NULL;
 262found:
 263        read_unlock(&nl_table_lock);
 264        return sk;
 265}
 266
 267static struct hlist_head *nl_portid_hash_zalloc(size_t size)
 268{
 269        if (size <= PAGE_SIZE)
 270                return kzalloc(size, GFP_ATOMIC);
 271        else
 272                return (struct hlist_head *)
 273                        __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
 274                                         get_order(size));
 275}
 276
 277static void nl_portid_hash_free(struct hlist_head *table, size_t size)
 278{
 279        if (size <= PAGE_SIZE)
 280                kfree(table);
 281        else
 282                free_pages((unsigned long)table, get_order(size));
 283}
 284
 285static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow)
 286{
 287        unsigned int omask, mask, shift;
 288        size_t osize, size;
 289        struct hlist_head *otable, *table;
 290        int i;
 291
 292        omask = mask = hash->mask;
 293        osize = size = (mask + 1) * sizeof(*table);
 294        shift = hash->shift;
 295
 296        if (grow) {
 297                if (++shift > hash->max_shift)
 298                        return 0;
 299                mask = mask * 2 + 1;
 300                size *= 2;
 301        }
 302
 303        table = nl_portid_hash_zalloc(size);
 304        if (!table)
 305                return 0;
 306
 307        otable = hash->table;
 308        hash->table = table;
 309        hash->mask = mask;
 310        hash->shift = shift;
 311        get_random_bytes(&hash->rnd, sizeof(hash->rnd));
 312
 313        for (i = 0; i <= omask; i++) {
 314                struct sock *sk;
 315                struct hlist_node *node, *tmp;
 316
 317                sk_for_each_safe(sk, node, tmp, &otable[i])
 318                        __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid));
 319        }
 320
 321        nl_portid_hash_free(otable, osize);
 322        hash->rehash_time = jiffies + 10 * 60 * HZ;
 323        return 1;
 324}
 325
 326static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len)
 327{
 328        int avg = hash->entries >> hash->shift;
 329
 330        if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1))
 331                return 1;
 332
 333        if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
 334                nl_portid_hash_rehash(hash, 0);
 335                return 1;
 336        }
 337
 338        return 0;
 339}
 340
 341static const struct proto_ops netlink_ops;
 342
 343static void
 344netlink_update_listeners(struct sock *sk)
 345{
 346        struct netlink_table *tbl = &nl_table[sk->sk_protocol];
 347        struct hlist_node *node;
 348        unsigned long mask;
 349        unsigned int i;
 350        struct listeners *listeners;
 351
 352        listeners = nl_deref_protected(tbl->listeners);
 353        if (!listeners)
 354                return;
 355
 356        for (i = 0; i < NLGRPLONGS(tbl->groups); i++) {
 357                mask = 0;
 358                sk_for_each_bound(sk, node, &tbl->mc_list) {
 359                        if (i < NLGRPLONGS(nlk_sk(sk)->ngroups))
 360                                mask |= nlk_sk(sk)->groups[i];
 361                }
 362                listeners->masks[i] = mask;
 363        }
 364        /* this function is only called with the netlink table "grabbed", which
 365         * makes sure updates are visible before bind or setsockopt return. */
 366}
 367
 368static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
 369{
 370        struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
 371        struct hlist_head *head;
 372        int err = -EADDRINUSE;
 373        struct sock *osk;
 374        struct hlist_node *node;
 375        int len;
 376
 377        netlink_table_grab();
 378        head = nl_portid_hashfn(hash, portid);
 379        len = 0;
 380        sk_for_each(osk, node, head) {
 381                if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid))
 382                        break;
 383                len++;
 384        }
 385        if (node)
 386                goto err;
 387
 388        err = -EBUSY;
 389        if (nlk_sk(sk)->portid)
 390                goto err;
 391
 392        err = -ENOMEM;
 393        if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
 394                goto err;
 395
 396        if (len && nl_portid_hash_dilute(hash, len))
 397                head = nl_portid_hashfn(hash, portid);
 398        hash->entries++;
 399        nlk_sk(sk)->portid = portid;
 400        sk_add_node(sk, head);
 401        err = 0;
 402
 403err:
 404        netlink_table_ungrab();
 405        return err;
 406}
 407
 408static void netlink_remove(struct sock *sk)
 409{
 410        netlink_table_grab();
 411        if (sk_del_node_init(sk))
 412                nl_table[sk->sk_protocol].hash.entries--;
 413        if (nlk_sk(sk)->subscriptions)
 414                __sk_del_bind_node(sk);
 415        netlink_table_ungrab();
 416}
 417
 418static struct proto netlink_proto = {
 419        .name     = "NETLINK",
 420        .owner    = THIS_MODULE,
 421        .obj_size = sizeof(struct netlink_sock),
 422};
 423
 424static int __netlink_create(struct net *net, struct socket *sock,
 425                            struct mutex *cb_mutex, int protocol)
 426{
 427        struct sock *sk;
 428        struct netlink_sock *nlk;
 429
 430        sock->ops = &netlink_ops;
 431
 432        sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
 433        if (!sk)
 434                return -ENOMEM;
 435
 436        sock_init_data(sock, sk);
 437
 438        nlk = nlk_sk(sk);
 439        if (cb_mutex) {
 440                nlk->cb_mutex = cb_mutex;
 441        } else {
 442                nlk->cb_mutex = &nlk->cb_def_mutex;
 443                mutex_init(nlk->cb_mutex);
 444        }
 445        init_waitqueue_head(&nlk->wait);
 446
 447        sk->sk_destruct = netlink_sock_destruct;
 448        sk->sk_protocol = protocol;
 449        return 0;
 450}
 451
 452static int netlink_create(struct net *net, struct socket *sock, int protocol,
 453                          int kern)
 454{
 455        struct module *module = NULL;
 456        struct mutex *cb_mutex;
 457        struct netlink_sock *nlk;
 458        void (*bind)(int group);
 459        int err = 0;
 460
 461        sock->state = SS_UNCONNECTED;
 462
 463        if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
 464                return -ESOCKTNOSUPPORT;
 465
 466        if (protocol < 0 || protocol >= MAX_LINKS)
 467                return -EPROTONOSUPPORT;
 468
 469        netlink_lock_table();
 470#ifdef CONFIG_MODULES
 471        if (!nl_table[protocol].registered) {
 472                netlink_unlock_table();
 473                request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
 474                netlink_lock_table();
 475        }
 476#endif
 477        if (nl_table[protocol].registered &&
 478            try_module_get(nl_table[protocol].module))
 479                module = nl_table[protocol].module;
 480        else
 481                err = -EPROTONOSUPPORT;
 482        cb_mutex = nl_table[protocol].cb_mutex;
 483        bind = nl_table[protocol].bind;
 484        netlink_unlock_table();
 485
 486        if (err < 0)
 487                goto out;
 488
 489        err = __netlink_create(net, sock, cb_mutex, protocol);
 490        if (err < 0)
 491                goto out_module;
 492
 493        local_bh_disable();
 494        sock_prot_inuse_add(net, &netlink_proto, 1);
 495        local_bh_enable();
 496
 497        nlk = nlk_sk(sock->sk);
 498        nlk->module = module;
 499        nlk->netlink_bind = bind;
 500out:
 501        return err;
 502
 503out_module:
 504        module_put(module);
 505        goto out;
 506}
 507
 508static int netlink_release(struct socket *sock)
 509{
 510        struct sock *sk = sock->sk;
 511        struct netlink_sock *nlk;
 512
 513        if (!sk)
 514                return 0;
 515
 516        netlink_remove(sk);
 517        sock_orphan(sk);
 518        nlk = nlk_sk(sk);
 519
 520        /*
 521         * OK. Socket is unlinked, any packets that arrive now
 522         * will be purged.
 523         */
 524
 525        sock->sk = NULL;
 526        wake_up_interruptible_all(&nlk->wait);
 527
 528        skb_queue_purge(&sk->sk_write_queue);
 529
 530        if (nlk->portid) {
 531                struct netlink_notify n = {
 532                                                .net = sock_net(sk),
 533                                                .protocol = sk->sk_protocol,
 534                                                .portid = nlk->portid,
 535                                          };
 536                atomic_notifier_call_chain(&netlink_chain,
 537                                NETLINK_URELEASE, &n);
 538        }
 539
 540        module_put(nlk->module);
 541
 542        netlink_table_grab();
 543        if (netlink_is_kernel(sk)) {
 544                BUG_ON(nl_table[sk->sk_protocol].registered == 0);
 545                if (--nl_table[sk->sk_protocol].registered == 0) {
 546                        struct listeners *old;
 547
 548                        old = nl_deref_protected(nl_table[sk->sk_protocol].listeners);
 549                        RCU_INIT_POINTER(nl_table[sk->sk_protocol].listeners, NULL);
 550                        kfree_rcu(old, rcu);
 551                        nl_table[sk->sk_protocol].module = NULL;
 552                        nl_table[sk->sk_protocol].bind = NULL;
 553                        nl_table[sk->sk_protocol].flags = 0;
 554                        nl_table[sk->sk_protocol].registered = 0;
 555                }
 556        } else if (nlk->subscriptions) {
 557                netlink_update_listeners(sk);
 558        }
 559        netlink_table_ungrab();
 560
 561        kfree(nlk->groups);
 562        nlk->groups = NULL;
 563
 564        local_bh_disable();
 565        sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1);
 566        local_bh_enable();
 567        sock_put(sk);
 568        return 0;
 569}
 570
 571static int netlink_autobind(struct socket *sock)
 572{
 573        struct sock *sk = sock->sk;
 574        struct net *net = sock_net(sk);
 575        struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash;
 576        struct hlist_head *head;
 577        struct sock *osk;
 578        struct hlist_node *node;
 579        s32 portid = task_tgid_vnr(current);
 580        int err;
 581        static s32 rover = -4097;
 582
 583retry:
 584        cond_resched();
 585        netlink_table_grab();
 586        head = nl_portid_hashfn(hash, portid);
 587        sk_for_each(osk, node, head) {
 588                if (!net_eq(sock_net(osk), net))
 589                        continue;
 590                if (nlk_sk(osk)->portid == portid) {
 591                        /* Bind collision, search negative portid values. */
 592                        portid = rover--;
 593                        if (rover > -4097)
 594                                rover = -4097;
 595                        netlink_table_ungrab();
 596                        goto retry;
 597                }
 598        }
 599        netlink_table_ungrab();
 600
 601        err = netlink_insert(sk, net, portid);
 602        if (err == -EADDRINUSE)
 603                goto retry;
 604
 605        /* If 2 threads race to autobind, that is fine.  */
 606        if (err == -EBUSY)
 607                err = 0;
 608
 609        return err;
 610}
 611
 612static inline int netlink_capable(const struct socket *sock, unsigned int flag)
 613{
 614        return (nl_table[sock->sk->sk_protocol].flags & flag) ||
 615               capable(CAP_NET_ADMIN);
 616}
 617
 618static void
 619netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
 620{
 621        struct netlink_sock *nlk = nlk_sk(sk);
 622
 623        if (nlk->subscriptions && !subscriptions)
 624                __sk_del_bind_node(sk);
 625        else if (!nlk->subscriptions && subscriptions)
 626                sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
 627        nlk->subscriptions = subscriptions;
 628}
 629
 630static int netlink_realloc_groups(struct sock *sk)
 631{
 632        struct netlink_sock *nlk = nlk_sk(sk);
 633        unsigned int groups;
 634        unsigned long *new_groups;
 635        int err = 0;
 636
 637        netlink_table_grab();
 638
 639        groups = nl_table[sk->sk_protocol].groups;
 640        if (!nl_table[sk->sk_protocol].registered) {
 641                err = -ENOENT;
 642                goto out_unlock;
 643        }
 644
 645        if (nlk->ngroups >= groups)
 646                goto out_unlock;
 647
 648        new_groups = krealloc(nlk->groups, NLGRPSZ(groups), GFP_ATOMIC);
 649        if (new_groups == NULL) {
 650                err = -ENOMEM;
 651                goto out_unlock;
 652        }
 653        memset((char *)new_groups + NLGRPSZ(nlk->ngroups), 0,
 654               NLGRPSZ(groups) - NLGRPSZ(nlk->ngroups));
 655
 656        nlk->groups = new_groups;
 657        nlk->ngroups = groups;
 658 out_unlock:
 659        netlink_table_ungrab();
 660        return err;
 661}
 662
 663static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 664                        int addr_len)
 665{
 666        struct sock *sk = sock->sk;
 667        struct net *net = sock_net(sk);
 668        struct netlink_sock *nlk = nlk_sk(sk);
 669        struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 670        int err;
 671
 672        if (nladdr->nl_family != AF_NETLINK)
 673                return -EINVAL;
 674
 675        /* Only superuser is allowed to listen multicasts */
 676        if (nladdr->nl_groups) {
 677                if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
 678                        return -EPERM;
 679                err = netlink_realloc_groups(sk);
 680                if (err)
 681                        return err;
 682        }
 683
 684        if (nlk->portid) {
 685                if (nladdr->nl_pid != nlk->portid)
 686                        return -EINVAL;
 687        } else {
 688                err = nladdr->nl_pid ?
 689                        netlink_insert(sk, net, nladdr->nl_pid) :
 690                        netlink_autobind(sock);
 691                if (err)
 692                        return err;
 693        }
 694
 695        if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
 696                return 0;
 697
 698        netlink_table_grab();
 699        netlink_update_subscriptions(sk, nlk->subscriptions +
 700                                         hweight32(nladdr->nl_groups) -
 701                                         hweight32(nlk->groups[0]));
 702        nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
 703        netlink_update_listeners(sk);
 704        netlink_table_ungrab();
 705
 706        if (nlk->netlink_bind && nlk->groups[0]) {
 707                int i;
 708
 709                for (i=0; i<nlk->ngroups; i++) {
 710                        if (test_bit(i, nlk->groups))
 711                                nlk->netlink_bind(i);
 712                }
 713        }
 714
 715        return 0;
 716}
 717
 718static int netlink_connect(struct socket *sock, struct sockaddr *addr,
 719                           int alen, int flags)
 720{
 721        int err = 0;
 722        struct sock *sk = sock->sk;
 723        struct netlink_sock *nlk = nlk_sk(sk);
 724        struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
 725
 726        if (alen < sizeof(addr->sa_family))
 727                return -EINVAL;
 728
 729        if (addr->sa_family == AF_UNSPEC) {
 730                sk->sk_state    = NETLINK_UNCONNECTED;
 731                nlk->dst_portid = 0;
 732                nlk->dst_group  = 0;
 733                return 0;
 734        }
 735        if (addr->sa_family != AF_NETLINK)
 736                return -EINVAL;
 737
 738        /* Only superuser is allowed to send multicasts */
 739        if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
 740                return -EPERM;
 741
 742        if (!nlk->portid)
 743                err = netlink_autobind(sock);
 744
 745        if (err == 0) {
 746                sk->sk_state    = NETLINK_CONNECTED;
 747                nlk->dst_portid = nladdr->nl_pid;
 748                nlk->dst_group  = ffs(nladdr->nl_groups);
 749        }
 750
 751        return err;
 752}
 753
 754static int netlink_getname(struct socket *sock, struct sockaddr *addr,
 755                           int *addr_len, int peer)
 756{
 757        struct sock *sk = sock->sk;
 758        struct netlink_sock *nlk = nlk_sk(sk);
 759        DECLARE_SOCKADDR(struct sockaddr_nl *, nladdr, addr);
 760
 761        nladdr->nl_family = AF_NETLINK;
 762        nladdr->nl_pad = 0;
 763        *addr_len = sizeof(*nladdr);
 764
 765        if (peer) {
 766                nladdr->nl_pid = nlk->dst_portid;
 767                nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
 768        } else {
 769                nladdr->nl_pid = nlk->portid;
 770                nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
 771        }
 772        return 0;
 773}
 774
 775static void netlink_overrun(struct sock *sk)
 776{
 777        struct netlink_sock *nlk = nlk_sk(sk);
 778
 779        if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
 780                if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
 781                        sk->sk_err = ENOBUFS;
 782                        sk->sk_error_report(sk);
 783                }
 784        }
 785        atomic_inc(&sk->sk_drops);
 786}
 787
 788static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
 789{
 790        struct sock *sock;
 791        struct netlink_sock *nlk;
 792
 793        sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid);
 794        if (!sock)
 795                return ERR_PTR(-ECONNREFUSED);
 796
 797        /* Don't bother queuing skb if kernel socket has no input function */
 798        nlk = nlk_sk(sock);
 799        if (sock->sk_state == NETLINK_CONNECTED &&
 800            nlk->dst_portid != nlk_sk(ssk)->portid) {
 801                sock_put(sock);
 802                return ERR_PTR(-ECONNREFUSED);
 803        }
 804        return sock;
 805}
 806
 807struct sock *netlink_getsockbyfilp(struct file *filp)
 808{
 809        struct inode *inode = filp->f_path.dentry->d_inode;
 810        struct sock *sock;
 811
 812        if (!S_ISSOCK(inode->i_mode))
 813                return ERR_PTR(-ENOTSOCK);
 814
 815        sock = SOCKET_I(inode)->sk;
 816        if (sock->sk_family != AF_NETLINK)
 817                return ERR_PTR(-EINVAL);
 818
 819        sock_hold(sock);
 820        return sock;
 821}
 822
 823/*
 824 * Attach a skb to a netlink socket.
 825 * The caller must hold a reference to the destination socket. On error, the
 826 * reference is dropped. The skb is not send to the destination, just all
 827 * all error checks are performed and memory in the queue is reserved.
 828 * Return values:
 829 * < 0: error. skb freed, reference to sock dropped.
 830 * 0: continue
 831 * 1: repeat lookup - reference dropped while waiting for socket memory.
 832 */
 833int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
 834                      long *timeo, struct sock *ssk)
 835{
 836        struct netlink_sock *nlk;
 837
 838        nlk = nlk_sk(sk);
 839
 840        if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
 841            test_bit(0, &nlk->state)) {
 842                DECLARE_WAITQUEUE(wait, current);
 843                if (!*timeo) {
 844                        if (!ssk || netlink_is_kernel(ssk))
 845                                netlink_overrun(sk);
 846                        sock_put(sk);
 847                        kfree_skb(skb);
 848                        return -EAGAIN;
 849                }
 850
 851                __set_current_state(TASK_INTERRUPTIBLE);
 852                add_wait_queue(&nlk->wait, &wait);
 853
 854                if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
 855                     test_bit(0, &nlk->state)) &&
 856                    !sock_flag(sk, SOCK_DEAD))
 857                        *timeo = schedule_timeout(*timeo);
 858
 859                __set_current_state(TASK_RUNNING);
 860                remove_wait_queue(&nlk->wait, &wait);
 861                sock_put(sk);
 862
 863                if (signal_pending(current)) {
 864                        kfree_skb(skb);
 865                        return sock_intr_errno(*timeo);
 866                }
 867                return 1;
 868        }
 869        skb_set_owner_r(skb, sk);
 870        return 0;
 871}
 872
 873static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
 874{
 875        int len = skb->len;
 876
 877        skb_queue_tail(&sk->sk_receive_queue, skb);
 878        sk->sk_data_ready(sk, len);
 879        return len;
 880}
 881
 882int netlink_sendskb(struct sock *sk, struct sk_buff *skb)
 883{
 884        int len = __netlink_sendskb(sk, skb);
 885
 886        sock_put(sk);
 887        return len;
 888}
 889
 890void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
 891{
 892        kfree_skb(skb);
 893        sock_put(sk);
 894}
 895
 896static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
 897{
 898        int delta;
 899
 900        skb_orphan(skb);
 901
 902        delta = skb->end - skb->tail;
 903        if (delta * 2 < skb->truesize)
 904                return skb;
 905
 906        if (skb_shared(skb)) {
 907                struct sk_buff *nskb = skb_clone(skb, allocation);
 908                if (!nskb)
 909                        return skb;
 910                consume_skb(skb);
 911                skb = nskb;
 912        }
 913
 914        if (!pskb_expand_head(skb, 0, -delta, allocation))
 915                skb->truesize -= delta;
 916
 917        return skb;
 918}
 919
 920static void netlink_rcv_wake(struct sock *sk)
 921{
 922        struct netlink_sock *nlk = nlk_sk(sk);
 923
 924        if (skb_queue_empty(&sk->sk_receive_queue))
 925                clear_bit(0, &nlk->state);
 926        if (!test_bit(0, &nlk->state))
 927                wake_up_interruptible(&nlk->wait);
 928}
 929
 930static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb,
 931                                  struct sock *ssk)
 932{
 933        int ret;
 934        struct netlink_sock *nlk = nlk_sk(sk);
 935
 936        ret = -ECONNREFUSED;
 937        if (nlk->netlink_rcv != NULL) {
 938                ret = skb->len;
 939                skb_set_owner_r(skb, sk);
 940                NETLINK_CB(skb).ssk = ssk;
 941                nlk->netlink_rcv(skb);
 942                consume_skb(skb);
 943        } else {
 944                kfree_skb(skb);
 945        }
 946        sock_put(sk);
 947        return ret;
 948}
 949
 950int netlink_unicast(struct sock *ssk, struct sk_buff *skb,
 951                    u32 portid, int nonblock)
 952{
 953        struct sock *sk;
 954        int err;
 955        long timeo;
 956
 957        skb = netlink_trim(skb, gfp_any());
 958
 959        timeo = sock_sndtimeo(ssk, nonblock);
 960retry:
 961        sk = netlink_getsockbyportid(ssk, portid);
 962        if (IS_ERR(sk)) {
 963                kfree_skb(skb);
 964                return PTR_ERR(sk);
 965        }
 966        if (netlink_is_kernel(sk))
 967                return netlink_unicast_kernel(sk, skb, ssk);
 968
 969        if (sk_filter(sk, skb)) {
 970                err = skb->len;
 971                kfree_skb(skb);
 972                sock_put(sk);
 973                return err;
 974        }
 975
 976        err = netlink_attachskb(sk, skb, &timeo, ssk);
 977        if (err == 1)
 978                goto retry;
 979        if (err)
 980                return err;
 981
 982        return netlink_sendskb(sk, skb);
 983}
 984EXPORT_SYMBOL(netlink_unicast);
 985
 986int netlink_has_listeners(struct sock *sk, unsigned int group)
 987{
 988        int res = 0;
 989        struct listeners *listeners;
 990
 991        BUG_ON(!netlink_is_kernel(sk));
 992
 993        rcu_read_lock();
 994        listeners = rcu_dereference(nl_table[sk->sk_protocol].listeners);
 995
 996        if (listeners && group - 1 < nl_table[sk->sk_protocol].groups)
 997                res = test_bit(group - 1, listeners->masks);
 998
 999        rcu_read_unlock();
1000
1001        return res;
1002}
1003EXPORT_SYMBOL_GPL(netlink_has_listeners);
1004
1005static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
1006{
1007        struct netlink_sock *nlk = nlk_sk(sk);
1008
1009        if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
1010            !test_bit(0, &nlk->state)) {
1011                skb_set_owner_r(skb, sk);
1012                __netlink_sendskb(sk, skb);
1013                return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
1014        }
1015        return -1;
1016}
1017
1018struct netlink_broadcast_data {
1019        struct sock *exclude_sk;
1020        struct net *net;
1021        u32 portid;
1022        u32 group;
1023        int failure;
1024        int delivery_failure;
1025        int congested;
1026        int delivered;
1027        gfp_t allocation;
1028        struct sk_buff *skb, *skb2;
1029        int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
1030        void *tx_data;
1031};
1032
1033static int do_one_broadcast(struct sock *sk,
1034                                   struct netlink_broadcast_data *p)
1035{
1036        struct netlink_sock *nlk = nlk_sk(sk);
1037        int val;
1038
1039        if (p->exclude_sk == sk)
1040                goto out;
1041
1042        if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1043            !test_bit(p->group - 1, nlk->groups))
1044                goto out;
1045
1046        if (!net_eq(sock_net(sk), p->net))
1047                goto out;
1048
1049        if (p->failure) {
1050                netlink_overrun(sk);
1051                goto out;
1052        }
1053
1054        sock_hold(sk);
1055        if (p->skb2 == NULL) {
1056                if (skb_shared(p->skb)) {
1057                        p->skb2 = skb_clone(p->skb, p->allocation);
1058                } else {
1059                        p->skb2 = skb_get(p->skb);
1060                        /*
1061                         * skb ownership may have been set when
1062                         * delivered to a previous socket.
1063                         */
1064                        skb_orphan(p->skb2);
1065                }
1066        }
1067        if (p->skb2 == NULL) {
1068                netlink_overrun(sk);
1069                /* Clone failed. Notify ALL listeners. */
1070                p->failure = 1;
1071                if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1072                        p->delivery_failure = 1;
1073        } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
1074                kfree_skb(p->skb2);
1075                p->skb2 = NULL;
1076        } else if (sk_filter(sk, p->skb2)) {
1077                kfree_skb(p->skb2);
1078                p->skb2 = NULL;
1079        } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
1080                netlink_overrun(sk);
1081                if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1082                        p->delivery_failure = 1;
1083        } else {
1084                p->congested |= val;
1085                p->delivered = 1;
1086                p->skb2 = NULL;
1087        }
1088        sock_put(sk);
1089
1090out:
1091        return 0;
1092}
1093
1094int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid,
1095        u32 group, gfp_t allocation,
1096        int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
1097        void *filter_data)
1098{
1099        struct net *net = sock_net(ssk);
1100        struct netlink_broadcast_data info;
1101        struct hlist_node *node;
1102        struct sock *sk;
1103
1104        skb = netlink_trim(skb, allocation);
1105
1106        info.exclude_sk = ssk;
1107        info.net = net;
1108        info.portid = portid;
1109        info.group = group;
1110        info.failure = 0;
1111        info.delivery_failure = 0;
1112        info.congested = 0;
1113        info.delivered = 0;
1114        info.allocation = allocation;
1115        info.skb = skb;
1116        info.skb2 = NULL;
1117        info.tx_filter = filter;
1118        info.tx_data = filter_data;
1119
1120        /* While we sleep in clone, do not allow to change socket list */
1121
1122        netlink_lock_table();
1123
1124        sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1125                do_one_broadcast(sk, &info);
1126
1127        consume_skb(skb);
1128
1129        netlink_unlock_table();
1130
1131        if (info.delivery_failure) {
1132                kfree_skb(info.skb2);
1133                return -ENOBUFS;
1134        }
1135        consume_skb(info.skb2);
1136
1137        if (info.delivered) {
1138                if (info.congested && (allocation & __GFP_WAIT))
1139                        yield();
1140                return 0;
1141        }
1142        return -ESRCH;
1143}
1144EXPORT_SYMBOL(netlink_broadcast_filtered);
1145
1146int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid,
1147                      u32 group, gfp_t allocation)
1148{
1149        return netlink_broadcast_filtered(ssk, skb, portid, group, allocation,
1150                NULL, NULL);
1151}
1152EXPORT_SYMBOL(netlink_broadcast);
1153
1154struct netlink_set_err_data {
1155        struct sock *exclude_sk;
1156        u32 portid;
1157        u32 group;
1158        int code;
1159};
1160
1161static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
1162{
1163        struct netlink_sock *nlk = nlk_sk(sk);
1164        int ret = 0;
1165
1166        if (sk == p->exclude_sk)
1167                goto out;
1168
1169        if (!net_eq(sock_net(sk), sock_net(p->exclude_sk)))
1170                goto out;
1171
1172        if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups ||
1173            !test_bit(p->group - 1, nlk->groups))
1174                goto out;
1175
1176        if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
1177                ret = 1;
1178                goto out;
1179        }
1180
1181        sk->sk_err = p->code;
1182        sk->sk_error_report(sk);
1183out:
1184        return ret;
1185}
1186
1187/**
1188 * netlink_set_err - report error to broadcast listeners
1189 * @ssk: the kernel netlink socket, as returned by netlink_kernel_create()
1190 * @portid: the PORTID of a process that we want to skip (if any)
1191 * @groups: the broadcast group that will notice the error
1192 * @code: error code, must be negative (as usual in kernelspace)
1193 *
1194 * This function returns the number of broadcast listeners that have set the
1195 * NETLINK_RECV_NO_ENOBUFS socket option.
1196 */
1197int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
1198{
1199        struct netlink_set_err_data info;
1200        struct hlist_node *node;
1201        struct sock *sk;
1202        int ret = 0;
1203
1204        info.exclude_sk = ssk;
1205        info.portid = portid;
1206        info.group = group;
1207        /* sk->sk_err wants a positive error value */
1208        info.code = -code;
1209
1210        read_lock(&nl_table_lock);
1211
1212        sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1213                ret += do_one_set_err(sk, &info);
1214
1215        read_unlock(&nl_table_lock);
1216        return ret;
1217}
1218EXPORT_SYMBOL(netlink_set_err);
1219
1220/* must be called with netlink table grabbed */
1221static void netlink_update_socket_mc(struct netlink_sock *nlk,
1222                                     unsigned int group,
1223                                     int is_new)
1224{
1225        int old, new = !!is_new, subscriptions;
1226
1227        old = test_bit(group - 1, nlk->groups);
1228        subscriptions = nlk->subscriptions - old + new;
1229        if (new)
1230                __set_bit(group - 1, nlk->groups);
1231        else
1232                __clear_bit(group - 1, nlk->groups);
1233        netlink_update_subscriptions(&nlk->sk, subscriptions);
1234        netlink_update_listeners(&nlk->sk);
1235}
1236
1237static int netlink_setsockopt(struct socket *sock, int level, int optname,
1238                              char __user *optval, unsigned int optlen)
1239{
1240        struct sock *sk = sock->sk;
1241        struct netlink_sock *nlk = nlk_sk(sk);
1242        unsigned int val = 0;
1243        int err;
1244
1245        if (level != SOL_NETLINK)
1246                return -ENOPROTOOPT;
1247
1248        if (optlen >= sizeof(int) &&
1249            get_user(val, (unsigned int __user *)optval))
1250                return -EFAULT;
1251
1252        switch (optname) {
1253        case NETLINK_PKTINFO:
1254                if (val)
1255                        nlk->flags |= NETLINK_RECV_PKTINFO;
1256                else
1257                        nlk->flags &= ~NETLINK_RECV_PKTINFO;
1258                err = 0;
1259                break;
1260        case NETLINK_ADD_MEMBERSHIP:
1261        case NETLINK_DROP_MEMBERSHIP: {
1262                if (!netlink_capable(sock, NL_CFG_F_NONROOT_RECV))
1263                        return -EPERM;
1264                err = netlink_realloc_groups(sk);
1265                if (err)
1266                        return err;
1267                if (!val || val - 1 >= nlk->ngroups)
1268                        return -EINVAL;
1269                netlink_table_grab();
1270                netlink_update_socket_mc(nlk, val,
1271                                         optname == NETLINK_ADD_MEMBERSHIP);
1272                netlink_table_ungrab();
1273
1274                if (nlk->netlink_bind)
1275                        nlk->netlink_bind(val);
1276
1277                err = 0;
1278                break;
1279        }
1280        case NETLINK_BROADCAST_ERROR:
1281                if (val)
1282                        nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
1283                else
1284                        nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
1285                err = 0;
1286                break;
1287        case NETLINK_NO_ENOBUFS:
1288                if (val) {
1289                        nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
1290                        clear_bit(0, &nlk->state);
1291                        wake_up_interruptible(&nlk->wait);
1292                } else {
1293                        nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
1294                }
1295                err = 0;
1296                break;
1297        default:
1298                err = -ENOPROTOOPT;
1299        }
1300        return err;
1301}
1302
1303static int netlink_getsockopt(struct socket *sock, int level, int optname,
1304                              char __user *optval, int __user *optlen)
1305{
1306        struct sock *sk = sock->sk;
1307        struct netlink_sock *nlk = nlk_sk(sk);
1308        int len, val, err;
1309
1310        if (level != SOL_NETLINK)
1311                return -ENOPROTOOPT;
1312
1313        if (get_user(len, optlen))
1314                return -EFAULT;
1315        if (len < 0)
1316                return -EINVAL;
1317
1318        switch (optname) {
1319        case NETLINK_PKTINFO:
1320                if (len < sizeof(int))
1321                        return -EINVAL;
1322                len = sizeof(int);
1323                val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
1324                if (put_user(len, optlen) ||
1325                    put_user(val, optval))
1326                        return -EFAULT;
1327                err = 0;
1328                break;
1329        case NETLINK_BROADCAST_ERROR:
1330                if (len < sizeof(int))
1331                        return -EINVAL;
1332                len = sizeof(int);
1333                val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
1334                if (put_user(len, optlen) ||
1335                    put_user(val, optval))
1336                        return -EFAULT;
1337                err = 0;
1338                break;
1339        case NETLINK_NO_ENOBUFS:
1340                if (len < sizeof(int))
1341                        return -EINVAL;
1342                len = sizeof(int);
1343                val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
1344                if (put_user(len, optlen) ||
1345                    put_user(val, optval))
1346                        return -EFAULT;
1347                err = 0;
1348                break;
1349        default:
1350                err = -ENOPROTOOPT;
1351        }
1352        return err;
1353}
1354
1355static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
1356{
1357        struct nl_pktinfo info;
1358
1359        info.group = NETLINK_CB(skb).dst_group;
1360        put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
1361}
1362
1363static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1364                           struct msghdr *msg, size_t len)
1365{
1366        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1367        struct sock *sk = sock->sk;
1368        struct netlink_sock *nlk = nlk_sk(sk);
1369        struct sockaddr_nl *addr = msg->msg_name;
1370        u32 dst_portid;
1371        u32 dst_group;
1372        struct sk_buff *skb;
1373        int err;
1374        struct scm_cookie scm;
1375
1376        if (msg->msg_flags&MSG_OOB)
1377                return -EOPNOTSUPP;
1378
1379        if (NULL == siocb->scm)
1380                siocb->scm = &scm;
1381
1382        err = scm_send(sock, msg, siocb->scm, true);
1383        if (err < 0)
1384                return err;
1385
1386        if (msg->msg_namelen) {
1387                err = -EINVAL;
1388                if (addr->nl_family != AF_NETLINK)
1389                        goto out;
1390                dst_portid = addr->nl_pid;
1391                dst_group = ffs(addr->nl_groups);
1392                err =  -EPERM;
1393                if ((dst_group || dst_portid) &&
1394                    !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
1395                        goto out;
1396        } else {
1397                dst_portid = nlk->dst_portid;
1398                dst_group = nlk->dst_group;
1399        }
1400
1401        if (!nlk->portid) {
1402                err = netlink_autobind(sock);
1403                if (err)
1404                        goto out;
1405        }
1406
1407        err = -EMSGSIZE;
1408        if (len > sk->sk_sndbuf - 32)
1409                goto out;
1410        err = -ENOBUFS;
1411        skb = alloc_skb(len, GFP_KERNEL);
1412        if (skb == NULL)
1413                goto out;
1414
1415        NETLINK_CB(skb).portid  = nlk->portid;
1416        NETLINK_CB(skb).dst_group = dst_group;
1417        NETLINK_CB(skb).creds   = siocb->scm->creds;
1418
1419        err = -EFAULT;
1420        if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
1421                kfree_skb(skb);
1422                goto out;
1423        }
1424
1425        err = security_netlink_send(sk, skb);
1426        if (err) {
1427                kfree_skb(skb);
1428                goto out;
1429        }
1430
1431        if (dst_group) {
1432                atomic_inc(&skb->users);
1433                netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL);
1434        }
1435        err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
1436
1437out:
1438        scm_destroy(siocb->scm);
1439        return err;
1440}
1441
1442static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1443                           struct msghdr *msg, size_t len,
1444                           int flags)
1445{
1446        struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1447        struct scm_cookie scm;
1448        struct sock *sk = sock->sk;
1449        struct netlink_sock *nlk = nlk_sk(sk);
1450        int noblock = flags&MSG_DONTWAIT;
1451        size_t copied;
1452        struct sk_buff *skb, *data_skb;
1453        int err, ret;
1454
1455        if (flags&MSG_OOB)
1456                return -EOPNOTSUPP;
1457
1458        copied = 0;
1459
1460        skb = skb_recv_datagram(sk, flags, noblock, &err);
1461        if (skb == NULL)
1462                goto out;
1463
1464        data_skb = skb;
1465
1466#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
1467        if (unlikely(skb_shinfo(skb)->frag_list)) {
1468                /*
1469                 * If this skb has a frag_list, then here that means that we
1470                 * will have to use the frag_list skb's data for compat tasks
1471                 * and the regular skb's data for normal (non-compat) tasks.
1472                 *
1473                 * If we need to send the compat skb, assign it to the
1474                 * 'data_skb' variable so that it will be used below for data
1475                 * copying. We keep 'skb' for everything else, including
1476                 * freeing both later.
1477                 */
1478                if (flags & MSG_CMSG_COMPAT)
1479                        data_skb = skb_shinfo(skb)->frag_list;
1480        }
1481#endif
1482
1483        msg->msg_namelen = 0;
1484
1485        copied = data_skb->len;
1486        if (len < copied) {
1487                msg->msg_flags |= MSG_TRUNC;
1488                copied = len;
1489        }
1490
1491        skb_reset_transport_header(data_skb);
1492        err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
1493
1494        if (msg->msg_name) {
1495                struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name;
1496                addr->nl_family = AF_NETLINK;
1497                addr->nl_pad    = 0;
1498                addr->nl_pid    = NETLINK_CB(skb).portid;
1499                addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
1500                msg->msg_namelen = sizeof(*addr);
1501        }
1502
1503        if (nlk->flags & NETLINK_RECV_PKTINFO)
1504                netlink_cmsg_recv_pktinfo(msg, skb);
1505
1506        if (NULL == siocb->scm) {
1507                memset(&scm, 0, sizeof(scm));
1508                siocb->scm = &scm;
1509        }
1510        siocb->scm->creds = *NETLINK_CREDS(skb);
1511        if (flags & MSG_TRUNC)
1512                copied = data_skb->len;
1513
1514        skb_free_datagram(sk, skb);
1515
1516        if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
1517                ret = netlink_dump(sk);
1518                if (ret) {
1519                        sk->sk_err = ret;
1520                        sk->sk_error_report(sk);
1521                }
1522        }
1523
1524        scm_recv(sock, msg, siocb->scm, flags);
1525out:
1526        netlink_rcv_wake(sk);
1527        return err ? : copied;
1528}
1529
1530static void netlink_data_ready(struct sock *sk, int len)
1531{
1532        BUG();
1533}
1534
1535/*
1536 *      We export these functions to other modules. They provide a
1537 *      complete set of kernel non-blocking support for message
1538 *      queueing.
1539 */
1540
1541struct sock *
1542__netlink_kernel_create(struct net *net, int unit, struct module *module,
1543                        struct netlink_kernel_cfg *cfg)
1544{
1545        struct socket *sock;
1546        struct sock *sk;
1547        struct netlink_sock *nlk;
1548        struct listeners *listeners = NULL;
1549        struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL;
1550        unsigned int groups;
1551
1552        BUG_ON(!nl_table);
1553
1554        if (unit < 0 || unit >= MAX_LINKS)
1555                return NULL;
1556
1557        if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1558                return NULL;
1559
1560        /*
1561         * We have to just have a reference on the net from sk, but don't
1562         * get_net it. Besides, we cannot get and then put the net here.
1563         * So we create one inside init_net and the move it to net.
1564         */
1565
1566        if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
1567                goto out_sock_release_nosk;
1568
1569        sk = sock->sk;
1570        sk_change_net(sk, net);
1571
1572        if (!cfg || cfg->groups < 32)
1573                groups = 32;
1574        else
1575                groups = cfg->groups;
1576
1577        listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
1578        if (!listeners)
1579                goto out_sock_release;
1580
1581        sk->sk_data_ready = netlink_data_ready;
1582        if (cfg && cfg->input)
1583                nlk_sk(sk)->netlink_rcv = cfg->input;
1584
1585        if (netlink_insert(sk, net, 0))
1586                goto out_sock_release;
1587
1588        nlk = nlk_sk(sk);
1589        nlk->flags |= NETLINK_KERNEL_SOCKET;
1590
1591        netlink_table_grab();
1592        if (!nl_table[unit].registered) {
1593                nl_table[unit].groups = groups;
1594                rcu_assign_pointer(nl_table[unit].listeners, listeners);
1595                nl_table[unit].cb_mutex = cb_mutex;
1596                nl_table[unit].module = module;
1597                if (cfg) {
1598                        nl_table[unit].bind = cfg->bind;
1599                        nl_table[unit].flags = cfg->flags;
1600                }
1601                nl_table[unit].registered = 1;
1602        } else {
1603                kfree(listeners);
1604                nl_table[unit].registered++;
1605        }
1606        netlink_table_ungrab();
1607        return sk;
1608
1609out_sock_release:
1610        kfree(listeners);
1611        netlink_kernel_release(sk);
1612        return NULL;
1613
1614out_sock_release_nosk:
1615        sock_release(sock);
1616        return NULL;
1617}
1618EXPORT_SYMBOL(__netlink_kernel_create);
1619
1620void
1621netlink_kernel_release(struct sock *sk)
1622{
1623        sk_release_kernel(sk);
1624}
1625EXPORT_SYMBOL(netlink_kernel_release);
1626
1627int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
1628{
1629        struct listeners *new, *old;
1630        struct netlink_table *tbl = &nl_table[sk->sk_protocol];
1631
1632        if (groups < 32)
1633                groups = 32;
1634
1635        if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
1636                new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC);
1637                if (!new)
1638                        return -ENOMEM;
1639                old = nl_deref_protected(tbl->listeners);
1640                memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups));
1641                rcu_assign_pointer(tbl->listeners, new);
1642
1643                kfree_rcu(old, rcu);
1644        }
1645        tbl->groups = groups;
1646
1647        return 0;
1648}
1649
1650/**
1651 * netlink_change_ngroups - change number of multicast groups
1652 *
1653 * This changes the number of multicast groups that are available
1654 * on a certain netlink family. Note that it is not possible to
1655 * change the number of groups to below 32. Also note that it does
1656 * not implicitly call netlink_clear_multicast_users() when the
1657 * number of groups is reduced.
1658 *
1659 * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
1660 * @groups: The new number of groups.
1661 */
1662int netlink_change_ngroups(struct sock *sk, unsigned int groups)
1663{
1664        int err;
1665
1666        netlink_table_grab();
1667        err = __netlink_change_ngroups(sk, groups);
1668        netlink_table_ungrab();
1669
1670        return err;
1671}
1672
1673void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1674{
1675        struct sock *sk;
1676        struct hlist_node *node;
1677        struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
1678
1679        sk_for_each_bound(sk, node, &tbl->mc_list)
1680                netlink_update_socket_mc(nlk_sk(sk), group, 0);
1681}
1682
1683/**
1684 * netlink_clear_multicast_users - kick off multicast listeners
1685 *
1686 * This function removes all listeners from the given group.
1687 * @ksk: The kernel netlink socket, as returned by
1688 *      netlink_kernel_create().
1689 * @group: The multicast group to clear.
1690 */
1691void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
1692{
1693        netlink_table_grab();
1694        __netlink_clear_multicast_users(ksk, group);
1695        netlink_table_ungrab();
1696}
1697
1698struct nlmsghdr *
1699__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags)
1700{
1701        struct nlmsghdr *nlh;
1702        int size = NLMSG_LENGTH(len);
1703
1704        nlh = (struct nlmsghdr*)skb_put(skb, NLMSG_ALIGN(size));
1705        nlh->nlmsg_type = type;
1706        nlh->nlmsg_len = size;
1707        nlh->nlmsg_flags = flags;
1708        nlh->nlmsg_pid = portid;
1709        nlh->nlmsg_seq = seq;
1710        if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0)
1711                memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size);
1712        return nlh;
1713}
1714EXPORT_SYMBOL(__nlmsg_put);
1715
1716/*
1717 * It looks a bit ugly.
1718 * It would be better to create kernel thread.
1719 */
1720
1721static int netlink_dump(struct sock *sk)
1722{
1723        struct netlink_sock *nlk = nlk_sk(sk);
1724        struct netlink_callback *cb;
1725        struct sk_buff *skb = NULL;
1726        struct nlmsghdr *nlh;
1727        int len, err = -ENOBUFS;
1728        int alloc_size;
1729
1730        mutex_lock(nlk->cb_mutex);
1731
1732        cb = nlk->cb;
1733        if (cb == NULL) {
1734                err = -EINVAL;
1735                goto errout_skb;
1736        }
1737
1738        alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
1739
1740        skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL);
1741        if (!skb)
1742                goto errout_skb;
1743
1744        len = cb->dump(skb, cb);
1745
1746        if (len > 0) {
1747                mutex_unlock(nlk->cb_mutex);
1748
1749                if (sk_filter(sk, skb))
1750                        kfree_skb(skb);
1751                else
1752                        __netlink_sendskb(sk, skb);
1753                return 0;
1754        }
1755
1756        nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
1757        if (!nlh)
1758                goto errout_skb;
1759
1760        nl_dump_check_consistent(cb, nlh);
1761
1762        memcpy(nlmsg_data(nlh), &len, sizeof(len));
1763
1764        if (sk_filter(sk, skb))
1765                kfree_skb(skb);
1766        else
1767                __netlink_sendskb(sk, skb);
1768
1769        if (cb->done)
1770                cb->done(cb);
1771        nlk->cb = NULL;
1772        mutex_unlock(nlk->cb_mutex);
1773
1774        module_put(cb->module);
1775        netlink_consume_callback(cb);
1776        return 0;
1777
1778errout_skb:
1779        mutex_unlock(nlk->cb_mutex);
1780        kfree_skb(skb);
1781        return err;
1782}
1783
1784int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1785                         const struct nlmsghdr *nlh,
1786                         struct netlink_dump_control *control)
1787{
1788        struct netlink_callback *cb;
1789        struct sock *sk;
1790        struct netlink_sock *nlk;
1791        int ret;
1792
1793        cb = kzalloc(sizeof(*cb), GFP_KERNEL);
1794        if (cb == NULL)
1795                return -ENOBUFS;
1796
1797        cb->dump = control->dump;
1798        cb->done = control->done;
1799        cb->nlh = nlh;
1800        cb->data = control->data;
1801        cb->module = control->module;
1802        cb->min_dump_alloc = control->min_dump_alloc;
1803        atomic_inc(&skb->users);
1804        cb->skb = skb;
1805
1806        sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
1807        if (sk == NULL) {
1808                netlink_destroy_callback(cb);
1809                return -ECONNREFUSED;
1810        }
1811        nlk = nlk_sk(sk);
1812
1813        mutex_lock(nlk->cb_mutex);
1814        /* A dump is in progress... */
1815        if (nlk->cb) {
1816                mutex_unlock(nlk->cb_mutex);
1817                netlink_destroy_callback(cb);
1818                ret = -EBUSY;
1819                goto out;
1820        }
1821        /* add reference of module which cb->dump belongs to */
1822        if (!try_module_get(cb->module)) {
1823                mutex_unlock(nlk->cb_mutex);
1824                netlink_destroy_callback(cb);
1825                ret = -EPROTONOSUPPORT;
1826                goto out;
1827        }
1828
1829        nlk->cb = cb;
1830        mutex_unlock(nlk->cb_mutex);
1831
1832        ret = netlink_dump(sk);
1833out:
1834        sock_put(sk);
1835
1836        if (ret)
1837                return ret;
1838
1839        /* We successfully started a dump, by returning -EINTR we
1840         * signal not to send ACK even if it was requested.
1841         */
1842        return -EINTR;
1843}
1844EXPORT_SYMBOL(__netlink_dump_start);
1845
1846void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1847{
1848        struct sk_buff *skb;
1849        struct nlmsghdr *rep;
1850        struct nlmsgerr *errmsg;
1851        size_t payload = sizeof(*errmsg);
1852
1853        /* error messages get the original request appened */
1854        if (err)
1855                payload += nlmsg_len(nlh);
1856
1857        skb = nlmsg_new(payload, GFP_KERNEL);
1858        if (!skb) {
1859                struct sock *sk;
1860
1861                sk = netlink_lookup(sock_net(in_skb->sk),
1862                                    in_skb->sk->sk_protocol,
1863                                    NETLINK_CB(in_skb).portid);
1864                if (sk) {
1865                        sk->sk_err = ENOBUFS;
1866                        sk->sk_error_report(sk);
1867                        sock_put(sk);
1868                }
1869                return;
1870        }
1871
1872        rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
1873                          NLMSG_ERROR, payload, 0);
1874        errmsg = nlmsg_data(rep);
1875        errmsg->error = err;
1876        memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh));
1877        netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT);
1878}
1879EXPORT_SYMBOL(netlink_ack);
1880
1881int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1882                                                     struct nlmsghdr *))
1883{
1884        struct nlmsghdr *nlh;
1885        int err;
1886
1887        while (skb->len >= nlmsg_total_size(0)) {
1888                int msglen;
1889
1890                nlh = nlmsg_hdr(skb);
1891                err = 0;
1892
1893                if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
1894                        return 0;
1895
1896                /* Only requests are handled by the kernel */
1897                if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
1898                        goto ack;
1899
1900                /* Skip control messages */
1901                if (nlh->nlmsg_type < NLMSG_MIN_TYPE)
1902                        goto ack;
1903
1904                err = cb(skb, nlh);
1905                if (err == -EINTR)
1906                        goto skip;
1907
1908ack:
1909                if (nlh->nlmsg_flags & NLM_F_ACK || err)
1910                        netlink_ack(skb, nlh, err);
1911
1912skip:
1913                msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1914                if (msglen > skb->len)
1915                        msglen = skb->len;
1916                skb_pull(skb, msglen);
1917        }
1918
1919        return 0;
1920}
1921EXPORT_SYMBOL(netlink_rcv_skb);
1922
1923/**
1924 * nlmsg_notify - send a notification netlink message
1925 * @sk: netlink socket to use
1926 * @skb: notification message
1927 * @portid: destination netlink portid for reports or 0
1928 * @group: destination multicast group or 0
1929 * @report: 1 to report back, 0 to disable
1930 * @flags: allocation flags
1931 */
1932int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
1933                 unsigned int group, int report, gfp_t flags)
1934{
1935        int err = 0;
1936
1937        if (group) {
1938                int exclude_portid = 0;
1939
1940                if (report) {
1941                        atomic_inc(&skb->users);
1942                        exclude_portid = portid;
1943                }
1944
1945                /* errors reported via destination sk->sk_err, but propagate
1946                 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
1947                err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
1948        }
1949
1950        if (report) {
1951                int err2;
1952
1953                err2 = nlmsg_unicast(sk, skb, portid);
1954                if (!err || err == -ESRCH)
1955                        err = err2;
1956        }
1957
1958        return err;
1959}
1960EXPORT_SYMBOL(nlmsg_notify);
1961
1962#ifdef CONFIG_PROC_FS
1963struct nl_seq_iter {
1964        struct seq_net_private p;
1965        int link;
1966        int hash_idx;
1967};
1968
1969static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
1970{
1971        struct nl_seq_iter *iter = seq->private;
1972        int i, j;
1973        struct sock *s;
1974        struct hlist_node *node;
1975        loff_t off = 0;
1976
1977        for (i = 0; i < MAX_LINKS; i++) {
1978                struct nl_portid_hash *hash = &nl_table[i].hash;
1979
1980                for (j = 0; j <= hash->mask; j++) {
1981                        sk_for_each(s, node, &hash->table[j]) {
1982                                if (sock_net(s) != seq_file_net(seq))
1983                                        continue;
1984                                if (off == pos) {
1985                                        iter->link = i;
1986                                        iter->hash_idx = j;
1987                                        return s;
1988                                }
1989                                ++off;
1990                        }
1991                }
1992        }
1993        return NULL;
1994}
1995
1996static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
1997        __acquires(nl_table_lock)
1998{
1999        read_lock(&nl_table_lock);
2000        return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2001}
2002
2003static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2004{
2005        struct sock *s;
2006        struct nl_seq_iter *iter;
2007        int i, j;
2008
2009        ++*pos;
2010
2011        if (v == SEQ_START_TOKEN)
2012                return netlink_seq_socket_idx(seq, 0);
2013
2014        iter = seq->private;
2015        s = v;
2016        do {
2017                s = sk_next(s);
2018        } while (s && sock_net(s) != seq_file_net(seq));
2019        if (s)
2020                return s;
2021
2022        i = iter->link;
2023        j = iter->hash_idx + 1;
2024
2025        do {
2026                struct nl_portid_hash *hash = &nl_table[i].hash;
2027
2028                for (; j <= hash->mask; j++) {
2029                        s = sk_head(&hash->table[j]);
2030                        while (s && sock_net(s) != seq_file_net(seq))
2031                                s = sk_next(s);
2032                        if (s) {
2033                                iter->link = i;
2034                                iter->hash_idx = j;
2035                                return s;
2036                        }
2037                }
2038
2039                j = 0;
2040        } while (++i < MAX_LINKS);
2041
2042        return NULL;
2043}
2044
2045static void netlink_seq_stop(struct seq_file *seq, void *v)
2046        __releases(nl_table_lock)
2047{
2048        read_unlock(&nl_table_lock);
2049}
2050
2051
2052static int netlink_seq_show(struct seq_file *seq, void *v)
2053{
2054        if (v == SEQ_START_TOKEN) {
2055                seq_puts(seq,
2056                         "sk       Eth Pid    Groups   "
2057                         "Rmem     Wmem     Dump     Locks     Drops     Inode\n");
2058        } else {
2059                struct sock *s = v;
2060                struct netlink_sock *nlk = nlk_sk(s);
2061
2062                seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n",
2063                           s,
2064                           s->sk_protocol,
2065                           nlk->portid,
2066                           nlk->groups ? (u32)nlk->groups[0] : 0,
2067                           sk_rmem_alloc_get(s),
2068                           sk_wmem_alloc_get(s),
2069                           nlk->cb,
2070                           atomic_read(&s->sk_refcnt),
2071                           atomic_read(&s->sk_drops),
2072                           sock_i_ino(s)
2073                        );
2074
2075        }
2076        return 0;
2077}
2078
2079static const struct seq_operations netlink_seq_ops = {
2080        .start  = netlink_seq_start,
2081        .next   = netlink_seq_next,
2082        .stop   = netlink_seq_stop,
2083        .show   = netlink_seq_show,
2084};
2085
2086
2087static int netlink_seq_open(struct inode *inode, struct file *file)
2088{
2089        return seq_open_net(inode, file, &netlink_seq_ops,
2090                                sizeof(struct nl_seq_iter));
2091}
2092
2093static const struct file_operations netlink_seq_fops = {
2094        .owner          = THIS_MODULE,
2095        .open           = netlink_seq_open,
2096        .read           = seq_read,
2097        .llseek         = seq_lseek,
2098        .release        = seq_release_net,
2099};
2100
2101#endif
2102
2103int netlink_register_notifier(struct notifier_block *nb)
2104{
2105        return atomic_notifier_chain_register(&netlink_chain, nb);
2106}
2107EXPORT_SYMBOL(netlink_register_notifier);
2108
2109int netlink_unregister_notifier(struct notifier_block *nb)
2110{
2111        return atomic_notifier_chain_unregister(&netlink_chain, nb);
2112}
2113EXPORT_SYMBOL(netlink_unregister_notifier);
2114
2115static const struct proto_ops netlink_ops = {
2116        .family =       PF_NETLINK,
2117        .owner =        THIS_MODULE,
2118        .release =      netlink_release,
2119        .bind =         netlink_bind,
2120        .connect =      netlink_connect,
2121        .socketpair =   sock_no_socketpair,
2122        .accept =       sock_no_accept,
2123        .getname =      netlink_getname,
2124        .poll =         datagram_poll,
2125        .ioctl =        sock_no_ioctl,
2126        .listen =       sock_no_listen,
2127        .shutdown =     sock_no_shutdown,
2128        .setsockopt =   netlink_setsockopt,
2129        .getsockopt =   netlink_getsockopt,
2130        .sendmsg =      netlink_sendmsg,
2131        .recvmsg =      netlink_recvmsg,
2132        .mmap =         sock_no_mmap,
2133        .sendpage =     sock_no_sendpage,
2134};
2135
2136static const struct net_proto_family netlink_family_ops = {
2137        .family = PF_NETLINK,
2138        .create = netlink_create,
2139        .owner  = THIS_MODULE,  /* for consistency 8) */
2140};
2141
2142static int __net_init netlink_net_init(struct net *net)
2143{
2144#ifdef CONFIG_PROC_FS
2145        if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops))
2146                return -ENOMEM;
2147#endif
2148        return 0;
2149}
2150
2151static void __net_exit netlink_net_exit(struct net *net)
2152{
2153#ifdef CONFIG_PROC_FS
2154        proc_net_remove(net, "netlink");
2155#endif
2156}
2157
2158static void __init netlink_add_usersock_entry(void)
2159{
2160        struct listeners *listeners;
2161        int groups = 32;
2162
2163        listeners = kzalloc(sizeof(*listeners) + NLGRPSZ(groups), GFP_KERNEL);
2164        if (!listeners)
2165                panic("netlink_add_usersock_entry: Cannot allocate listeners\n");
2166
2167        netlink_table_grab();
2168
2169        nl_table[NETLINK_USERSOCK].groups = groups;
2170        rcu_assign_pointer(nl_table[NETLINK_USERSOCK].listeners, listeners);
2171        nl_table[NETLINK_USERSOCK].module = THIS_MODULE;
2172        nl_table[NETLINK_USERSOCK].registered = 1;
2173        nl_table[NETLINK_USERSOCK].flags = NL_CFG_F_NONROOT_SEND;
2174
2175        netlink_table_ungrab();
2176}
2177
2178static struct pernet_operations __net_initdata netlink_net_ops = {
2179        .init = netlink_net_init,
2180        .exit = netlink_net_exit,
2181};
2182
2183static int __init netlink_proto_init(void)
2184{
2185        struct sk_buff *dummy_skb;
2186        int i;
2187        unsigned long limit;
2188        unsigned int order;
2189        int err = proto_register(&netlink_proto, 0);
2190
2191        if (err != 0)
2192                goto out;
2193
2194        BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb));
2195
2196        nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
2197        if (!nl_table)
2198                goto panic;
2199
2200        if (totalram_pages >= (128 * 1024))
2201                limit = totalram_pages >> (21 - PAGE_SHIFT);
2202        else
2203                limit = totalram_pages >> (23 - PAGE_SHIFT);
2204
2205        order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
2206        limit = (1UL << order) / sizeof(struct hlist_head);
2207        order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1;
2208
2209        for (i = 0; i < MAX_LINKS; i++) {
2210                struct nl_portid_hash *hash = &nl_table[i].hash;
2211
2212                hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table));
2213                if (!hash->table) {
2214                        while (i-- > 0)
2215                                nl_portid_hash_free(nl_table[i].hash.table,
2216                                                 1 * sizeof(*hash->table));
2217                        kfree(nl_table);
2218                        goto panic;
2219                }
2220                hash->max_shift = order;
2221                hash->shift = 0;
2222                hash->mask = 0;
2223                hash->rehash_time = jiffies;
2224        }
2225
2226        netlink_add_usersock_entry();
2227
2228        sock_register(&netlink_family_ops);
2229        register_pernet_subsys(&netlink_net_ops);
2230        /* The netlink device handler may be needed early. */
2231        rtnetlink_init();
2232out:
2233        return err;
2234panic:
2235        panic("netlink_init: Cannot allocate nl_table\n");
2236}
2237
2238core_initcall(netlink_proto_init);
2239
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.