linux/net/core/neighbour.c
<<
>>
Prefs
   1/*
   2 *      Generic address resolution entity
   3 *
   4 *      Authors:
   5 *      Pedro Roque             <roque@di.fc.ul.pt>
   6 *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
   7 *
   8 *      This program is free software; you can redistribute it and/or
   9 *      modify it under the terms of the GNU General Public License
  10 *      as published by the Free Software Foundation; either version
  11 *      2 of the License, or (at your option) any later version.
  12 *
  13 *      Fixes:
  14 *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
  15 *      Harald Welte            Add neighbour cache statistics like rtstat
  16 */
  17
  18#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  19
  20#include <linux/slab.h>
  21#include <linux/types.h>
  22#include <linux/kernel.h>
  23#include <linux/module.h>
  24#include <linux/socket.h>
  25#include <linux/netdevice.h>
  26#include <linux/proc_fs.h>
  27#ifdef CONFIG_SYSCTL
  28#include <linux/sysctl.h>
  29#endif
  30#include <linux/times.h>
  31#include <net/net_namespace.h>
  32#include <net/neighbour.h>
  33#include <net/dst.h>
  34#include <net/sock.h>
  35#include <net/netevent.h>
  36#include <net/netlink.h>
  37#include <linux/rtnetlink.h>
  38#include <linux/random.h>
  39#include <linux/string.h>
  40#include <linux/log2.h>
  41
  42#define NEIGH_DEBUG 1
  43
  44#define NEIGH_PRINTK(x...) printk(x)
  45#define NEIGH_NOPRINTK(x...) do { ; } while(0)
  46#define NEIGH_PRINTK1 NEIGH_NOPRINTK
  47#define NEIGH_PRINTK2 NEIGH_NOPRINTK
  48
  49#if NEIGH_DEBUG >= 1
  50#undef NEIGH_PRINTK1
  51#define NEIGH_PRINTK1 NEIGH_PRINTK
  52#endif
  53#if NEIGH_DEBUG >= 2
  54#undef NEIGH_PRINTK2
  55#define NEIGH_PRINTK2 NEIGH_PRINTK
  56#endif
  57
  58#define PNEIGH_HASHMASK         0xF
  59
  60static void neigh_timer_handler(unsigned long arg);
  61static void __neigh_notify(struct neighbour *n, int type, int flags);
  62static void neigh_update_notify(struct neighbour *neigh);
  63static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
  64
  65static struct neigh_table *neigh_tables;
  66#ifdef CONFIG_PROC_FS
  67static const struct file_operations neigh_stat_seq_fops;
  68#endif
  69
  70/*
  71   Neighbour hash table buckets are protected with rwlock tbl->lock.
  72
  73   - All the scans/updates to hash buckets MUST be made under this lock.
  74   - NOTHING clever should be made under this lock: no callbacks
  75     to protocol backends, no attempts to send something to network.
  76     It will result in deadlocks, if backend/driver wants to use neighbour
  77     cache.
  78   - If the entry requires some non-trivial actions, increase
  79     its reference count and release table lock.
  80
  81   Neighbour entries are protected:
  82   - with reference count.
  83   - with rwlock neigh->lock
  84
  85   Reference count prevents destruction.
  86
  87   neigh->lock mainly serializes ll address data and its validity state.
  88   However, the same lock is used to protect another entry fields:
  89    - timer
  90    - resolution queue
  91
  92   Again, nothing clever shall be made under neigh->lock,
  93   the most complicated procedure, which we allow is dev->hard_header.
  94   It is supposed, that dev->hard_header is simplistic and does
  95   not make callbacks to neighbour tables.
  96
  97   The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
  98   list of neighbour tables. This list is used only in process context,
  99 */
 100
 101static DEFINE_RWLOCK(neigh_tbl_lock);
 102
 103static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
 104{
 105        kfree_skb(skb);
 106        return -ENETDOWN;
 107}
 108
 109static void neigh_cleanup_and_release(struct neighbour *neigh)
 110{
 111        if (neigh->parms->neigh_cleanup)
 112                neigh->parms->neigh_cleanup(neigh);
 113
 114        __neigh_notify(neigh, RTM_DELNEIGH, 0);
 115        neigh_release(neigh);
 116}
 117
 118/*
 119 * It is random distribution in the interval (1/2)*base...(3/2)*base.
 120 * It corresponds to default IPv6 settings and is not overridable,
 121 * because it is really reasonable choice.
 122 */
 123
 124unsigned long neigh_rand_reach_time(unsigned long base)
 125{
 126        return base ? (net_random() % base) + (base >> 1) : 0;
 127}
 128EXPORT_SYMBOL(neigh_rand_reach_time);
 129
 130
 131static int neigh_forced_gc(struct neigh_table *tbl)
 132{
 133        int shrunk = 0;
 134        int i;
 135        struct neigh_hash_table *nht;
 136
 137        NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
 138
 139        write_lock_bh(&tbl->lock);
 140        nht = rcu_dereference_protected(tbl->nht,
 141                                        lockdep_is_held(&tbl->lock));
 142        for (i = 0; i < (1 << nht->hash_shift); i++) {
 143                struct neighbour *n;
 144                struct neighbour __rcu **np;
 145
 146                np = &nht->hash_buckets[i];
 147                while ((n = rcu_dereference_protected(*np,
 148                                        lockdep_is_held(&tbl->lock))) != NULL) {
 149                        /* Neighbour record may be discarded if:
 150                         * - nobody refers to it.
 151                         * - it is not permanent
 152                         */
 153                        write_lock(&n->lock);
 154                        if (atomic_read(&n->refcnt) == 1 &&
 155                            !(n->nud_state & NUD_PERMANENT)) {
 156                                rcu_assign_pointer(*np,
 157                                        rcu_dereference_protected(n->next,
 158                                                  lockdep_is_held(&tbl->lock)));
 159                                n->dead = 1;
 160                                shrunk  = 1;
 161                                write_unlock(&n->lock);
 162                                neigh_cleanup_and_release(n);
 163                                continue;
 164                        }
 165                        write_unlock(&n->lock);
 166                        np = &n->next;
 167                }
 168        }
 169
 170        tbl->last_flush = jiffies;
 171
 172        write_unlock_bh(&tbl->lock);
 173
 174        return shrunk;
 175}
 176
 177static void neigh_add_timer(struct neighbour *n, unsigned long when)
 178{
 179        neigh_hold(n);
 180        if (unlikely(mod_timer(&n->timer, when))) {
 181                printk("NEIGH: BUG, double timer add, state is %x\n",
 182                       n->nud_state);
 183                dump_stack();
 184        }
 185}
 186
 187static int neigh_del_timer(struct neighbour *n)
 188{
 189        if ((n->nud_state & NUD_IN_TIMER) &&
 190            del_timer(&n->timer)) {
 191                neigh_release(n);
 192                return 1;
 193        }
 194        return 0;
 195}
 196
 197static void pneigh_queue_purge(struct sk_buff_head *list)
 198{
 199        struct sk_buff *skb;
 200
 201        while ((skb = skb_dequeue(list)) != NULL) {
 202                dev_put(skb->dev);
 203                kfree_skb(skb);
 204        }
 205}
 206
 207static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
 208{
 209        int i;
 210        struct neigh_hash_table *nht;
 211
 212        nht = rcu_dereference_protected(tbl->nht,
 213                                        lockdep_is_held(&tbl->lock));
 214
 215        for (i = 0; i < (1 << nht->hash_shift); i++) {
 216                struct neighbour *n;
 217                struct neighbour __rcu **np = &nht->hash_buckets[i];
 218
 219                while ((n = rcu_dereference_protected(*np,
 220                                        lockdep_is_held(&tbl->lock))) != NULL) {
 221                        if (dev && n->dev != dev) {
 222                                np = &n->next;
 223                                continue;
 224                        }
 225                        rcu_assign_pointer(*np,
 226                                   rcu_dereference_protected(n->next,
 227                                                lockdep_is_held(&tbl->lock)));
 228                        write_lock(&n->lock);
 229                        neigh_del_timer(n);
 230                        n->dead = 1;
 231
 232                        if (atomic_read(&n->refcnt) != 1) {
 233                                /* The most unpleasant situation.
 234                                   We must destroy neighbour entry,
 235                                   but someone still uses it.
 236
 237                                   The destroy will be delayed until
 238                                   the last user releases us, but
 239                                   we must kill timers etc. and move
 240                                   it to safe state.
 241                                 */
 242                                skb_queue_purge(&n->arp_queue);
 243                                n->arp_queue_len_bytes = 0;
 244                                n->output = neigh_blackhole;
 245                                if (n->nud_state & NUD_VALID)
 246                                        n->nud_state = NUD_NOARP;
 247                                else
 248                                        n->nud_state = NUD_NONE;
 249                                NEIGH_PRINTK2("neigh %p is stray.\n", n);
 250                        }
 251                        write_unlock(&n->lock);
 252                        neigh_cleanup_and_release(n);
 253                }
 254        }
 255}
 256
 257void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
 258{
 259        write_lock_bh(&tbl->lock);
 260        neigh_flush_dev(tbl, dev);
 261        write_unlock_bh(&tbl->lock);
 262}
 263EXPORT_SYMBOL(neigh_changeaddr);
 264
 265int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 266{
 267        write_lock_bh(&tbl->lock);
 268        neigh_flush_dev(tbl, dev);
 269        pneigh_ifdown(tbl, dev);
 270        write_unlock_bh(&tbl->lock);
 271
 272        del_timer_sync(&tbl->proxy_timer);
 273        pneigh_queue_purge(&tbl->proxy_queue);
 274        return 0;
 275}
 276EXPORT_SYMBOL(neigh_ifdown);
 277
 278static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
 279{
 280        struct neighbour *n = NULL;
 281        unsigned long now = jiffies;
 282        int entries;
 283
 284        entries = atomic_inc_return(&tbl->entries) - 1;
 285        if (entries >= tbl->gc_thresh3 ||
 286            (entries >= tbl->gc_thresh2 &&
 287             time_after(now, tbl->last_flush + 5 * HZ))) {
 288                if (!neigh_forced_gc(tbl) &&
 289                    entries >= tbl->gc_thresh3)
 290                        goto out_entries;
 291        }
 292
 293        if (tbl->entry_size)
 294                n = kzalloc(tbl->entry_size, GFP_ATOMIC);
 295        else {
 296                int sz = sizeof(*n) + tbl->key_len;
 297
 298                sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
 299                sz += dev->neigh_priv_len;
 300                n = kzalloc(sz, GFP_ATOMIC);
 301        }
 302        if (!n)
 303                goto out_entries;
 304
 305        skb_queue_head_init(&n->arp_queue);
 306        rwlock_init(&n->lock);
 307        seqlock_init(&n->ha_lock);
 308        n->updated        = n->used = now;
 309        n->nud_state      = NUD_NONE;
 310        n->output         = neigh_blackhole;
 311        seqlock_init(&n->hh.hh_lock);
 312        n->parms          = neigh_parms_clone(&tbl->parms);
 313        setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
 314
 315        NEIGH_CACHE_STAT_INC(tbl, allocs);
 316        n->tbl            = tbl;
 317        atomic_set(&n->refcnt, 1);
 318        n->dead           = 1;
 319out:
 320        return n;
 321
 322out_entries:
 323        atomic_dec(&tbl->entries);
 324        goto out;
 325}
 326
 327static void neigh_get_hash_rnd(u32 *x)
 328{
 329        get_random_bytes(x, sizeof(*x));
 330        *x |= 1;
 331}
 332
 333static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
 334{
 335        size_t size = (1 << shift) * sizeof(struct neighbour *);
 336        struct neigh_hash_table *ret;
 337        struct neighbour __rcu **buckets;
 338        int i;
 339
 340        ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
 341        if (!ret)
 342                return NULL;
 343        if (size <= PAGE_SIZE)
 344                buckets = kzalloc(size, GFP_ATOMIC);
 345        else
 346                buckets = (struct neighbour __rcu **)
 347                          __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
 348                                           get_order(size));
 349        if (!buckets) {
 350                kfree(ret);
 351                return NULL;
 352        }
 353        ret->hash_buckets = buckets;
 354        ret->hash_shift = shift;
 355        for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
 356                neigh_get_hash_rnd(&ret->hash_rnd[i]);
 357        return ret;
 358}
 359
 360static void neigh_hash_free_rcu(struct rcu_head *head)
 361{
 362        struct neigh_hash_table *nht = container_of(head,
 363                                                    struct neigh_hash_table,
 364                                                    rcu);
 365        size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
 366        struct neighbour __rcu **buckets = nht->hash_buckets;
 367
 368        if (size <= PAGE_SIZE)
 369                kfree(buckets);
 370        else
 371                free_pages((unsigned long)buckets, get_order(size));
 372        kfree(nht);
 373}
 374
 375static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
 376                                                unsigned long new_shift)
 377{
 378        unsigned int i, hash;
 379        struct neigh_hash_table *new_nht, *old_nht;
 380
 381        NEIGH_CACHE_STAT_INC(tbl, hash_grows);
 382
 383        old_nht = rcu_dereference_protected(tbl->nht,
 384                                            lockdep_is_held(&tbl->lock));
 385        new_nht = neigh_hash_alloc(new_shift);
 386        if (!new_nht)
 387                return old_nht;
 388
 389        for (i = 0; i < (1 << old_nht->hash_shift); i++) {
 390                struct neighbour *n, *next;
 391
 392                for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
 393                                                   lockdep_is_held(&tbl->lock));
 394                     n != NULL;
 395                     n = next) {
 396                        hash = tbl->hash(n->primary_key, n->dev,
 397                                         new_nht->hash_rnd);
 398
 399                        hash >>= (32 - new_nht->hash_shift);
 400                        next = rcu_dereference_protected(n->next,
 401                                                lockdep_is_held(&tbl->lock));
 402
 403                        rcu_assign_pointer(n->next,
 404                                           rcu_dereference_protected(
 405                                                new_nht->hash_buckets[hash],
 406                                                lockdep_is_held(&tbl->lock)));
 407                        rcu_assign_pointer(new_nht->hash_buckets[hash], n);
 408                }
 409        }
 410
 411        rcu_assign_pointer(tbl->nht, new_nht);
 412        call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
 413        return new_nht;
 414}
 415
 416struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
 417                               struct net_device *dev)
 418{
 419        struct neighbour *n;
 420        int key_len = tbl->key_len;
 421        u32 hash_val;
 422        struct neigh_hash_table *nht;
 423
 424        NEIGH_CACHE_STAT_INC(tbl, lookups);
 425
 426        rcu_read_lock_bh();
 427        nht = rcu_dereference_bh(tbl->nht);
 428        hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
 429
 430        for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
 431             n != NULL;
 432             n = rcu_dereference_bh(n->next)) {
 433                if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
 434                        if (!atomic_inc_not_zero(&n->refcnt))
 435                                n = NULL;
 436                        NEIGH_CACHE_STAT_INC(tbl, hits);
 437                        break;
 438                }
 439        }
 440
 441        rcu_read_unlock_bh();
 442        return n;
 443}
 444EXPORT_SYMBOL(neigh_lookup);
 445
 446struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
 447                                     const void *pkey)
 448{
 449        struct neighbour *n;
 450        int key_len = tbl->key_len;
 451        u32 hash_val;
 452        struct neigh_hash_table *nht;
 453
 454        NEIGH_CACHE_STAT_INC(tbl, lookups);
 455
 456        rcu_read_lock_bh();
 457        nht = rcu_dereference_bh(tbl->nht);
 458        hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
 459
 460        for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
 461             n != NULL;
 462             n = rcu_dereference_bh(n->next)) {
 463                if (!memcmp(n->primary_key, pkey, key_len) &&
 464                    net_eq(dev_net(n->dev), net)) {
 465                        if (!atomic_inc_not_zero(&n->refcnt))
 466                                n = NULL;
 467                        NEIGH_CACHE_STAT_INC(tbl, hits);
 468                        break;
 469                }
 470        }
 471
 472        rcu_read_unlock_bh();
 473        return n;
 474}
 475EXPORT_SYMBOL(neigh_lookup_nodev);
 476
 477struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
 478                                 struct net_device *dev, bool want_ref)
 479{
 480        u32 hash_val;
 481        int key_len = tbl->key_len;
 482        int error;
 483        struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
 484        struct neigh_hash_table *nht;
 485
 486        if (!n) {
 487                rc = ERR_PTR(-ENOBUFS);
 488                goto out;
 489        }
 490
 491        memcpy(n->primary_key, pkey, key_len);
 492        n->dev = dev;
 493        dev_hold(dev);
 494
 495        /* Protocol specific setup. */
 496        if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
 497                rc = ERR_PTR(error);
 498                goto out_neigh_release;
 499        }
 500
 501        if (dev->netdev_ops->ndo_neigh_construct) {
 502                error = dev->netdev_ops->ndo_neigh_construct(n);
 503                if (error < 0) {
 504                        rc = ERR_PTR(error);
 505                        goto out_neigh_release;
 506                }
 507        }
 508
 509        /* Device specific setup. */
 510        if (n->parms->neigh_setup &&
 511            (error = n->parms->neigh_setup(n)) < 0) {
 512                rc = ERR_PTR(error);
 513                goto out_neigh_release;
 514        }
 515
 516        n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
 517
 518        write_lock_bh(&tbl->lock);
 519        nht = rcu_dereference_protected(tbl->nht,
 520                                        lockdep_is_held(&tbl->lock));
 521
 522        if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
 523                nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
 524
 525        hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
 526
 527        if (n->parms->dead) {
 528                rc = ERR_PTR(-EINVAL);
 529                goto out_tbl_unlock;
 530        }
 531
 532        for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
 533                                            lockdep_is_held(&tbl->lock));
 534             n1 != NULL;
 535             n1 = rcu_dereference_protected(n1->next,
 536                        lockdep_is_held(&tbl->lock))) {
 537                if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
 538                        if (want_ref)
 539                                neigh_hold(n1);
 540                        rc = n1;
 541                        goto out_tbl_unlock;
 542                }
 543        }
 544
 545        n->dead = 0;
 546        if (want_ref)
 547                neigh_hold(n);
 548        rcu_assign_pointer(n->next,
 549                           rcu_dereference_protected(nht->hash_buckets[hash_val],
 550                                                     lockdep_is_held(&tbl->lock)));
 551        rcu_assign_pointer(nht->hash_buckets[hash_val], n);
 552        write_unlock_bh(&tbl->lock);
 553        NEIGH_PRINTK2("neigh %p is created.\n", n);
 554        rc = n;
 555out:
 556        return rc;
 557out_tbl_unlock:
 558        write_unlock_bh(&tbl->lock);
 559out_neigh_release:
 560        neigh_release(n);
 561        goto out;
 562}
 563EXPORT_SYMBOL(__neigh_create);
 564
 565static u32 pneigh_hash(const void *pkey, int key_len)
 566{
 567        u32 hash_val = *(u32 *)(pkey + key_len - 4);
 568        hash_val ^= (hash_val >> 16);
 569        hash_val ^= hash_val >> 8;
 570        hash_val ^= hash_val >> 4;
 571        hash_val &= PNEIGH_HASHMASK;
 572        return hash_val;
 573}
 574
 575static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
 576                                              struct net *net,
 577                                              const void *pkey,
 578                                              int key_len,
 579                                              struct net_device *dev)
 580{
 581        while (n) {
 582                if (!memcmp(n->key, pkey, key_len) &&
 583                    net_eq(pneigh_net(n), net) &&
 584                    (n->dev == dev || !n->dev))
 585                        return n;
 586                n = n->next;
 587        }
 588        return NULL;
 589}
 590
 591struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
 592                struct net *net, const void *pkey, struct net_device *dev)
 593{
 594        int key_len = tbl->key_len;
 595        u32 hash_val = pneigh_hash(pkey, key_len);
 596
 597        return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
 598                                 net, pkey, key_len, dev);
 599}
 600EXPORT_SYMBOL_GPL(__pneigh_lookup);
 601
 602struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
 603                                    struct net *net, const void *pkey,
 604                                    struct net_device *dev, int creat)
 605{
 606        struct pneigh_entry *n;
 607        int key_len = tbl->key_len;
 608        u32 hash_val = pneigh_hash(pkey, key_len);
 609
 610        read_lock_bh(&tbl->lock);
 611        n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
 612                              net, pkey, key_len, dev);
 613        read_unlock_bh(&tbl->lock);
 614
 615        if (n || !creat)
 616                goto out;
 617
 618        ASSERT_RTNL();
 619
 620        n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
 621        if (!n)
 622                goto out;
 623
 624        write_pnet(&n->net, hold_net(net));
 625        memcpy(n->key, pkey, key_len);
 626        n->dev = dev;
 627        if (dev)
 628                dev_hold(dev);
 629
 630        if (tbl->pconstructor && tbl->pconstructor(n)) {
 631                if (dev)
 632                        dev_put(dev);
 633                release_net(net);
 634                kfree(n);
 635                n = NULL;
 636                goto out;
 637        }
 638
 639        write_lock_bh(&tbl->lock);
 640        n->next = tbl->phash_buckets[hash_val];
 641        tbl->phash_buckets[hash_val] = n;
 642        write_unlock_bh(&tbl->lock);
 643out:
 644        return n;
 645}
 646EXPORT_SYMBOL(pneigh_lookup);
 647
 648
 649int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
 650                  struct net_device *dev)
 651{
 652        struct pneigh_entry *n, **np;
 653        int key_len = tbl->key_len;
 654        u32 hash_val = pneigh_hash(pkey, key_len);
 655
 656        write_lock_bh(&tbl->lock);
 657        for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
 658             np = &n->next) {
 659                if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
 660                    net_eq(pneigh_net(n), net)) {
 661                        *np = n->next;
 662                        write_unlock_bh(&tbl->lock);
 663                        if (tbl->pdestructor)
 664                                tbl->pdestructor(n);
 665                        if (n->dev)
 666                                dev_put(n->dev);
 667                        release_net(pneigh_net(n));
 668                        kfree(n);
 669                        return 0;
 670                }
 671        }
 672        write_unlock_bh(&tbl->lock);
 673        return -ENOENT;
 674}
 675
 676static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 677{
 678        struct pneigh_entry *n, **np;
 679        u32 h;
 680
 681        for (h = 0; h <= PNEIGH_HASHMASK; h++) {
 682                np = &tbl->phash_buckets[h];
 683                while ((n = *np) != NULL) {
 684                        if (!dev || n->dev == dev) {
 685                                *np = n->next;
 686                                if (tbl->pdestructor)
 687                                        tbl->pdestructor(n);
 688                                if (n->dev)
 689                                        dev_put(n->dev);
 690                                release_net(pneigh_net(n));
 691                                kfree(n);
 692                                continue;
 693                        }
 694                        np = &n->next;
 695                }
 696        }
 697        return -ENOENT;
 698}
 699
 700static void neigh_parms_destroy(struct neigh_parms *parms);
 701
 702static inline void neigh_parms_put(struct neigh_parms *parms)
 703{
 704        if (atomic_dec_and_test(&parms->refcnt))
 705                neigh_parms_destroy(parms);
 706}
 707
 708/*
 709 *      neighbour must already be out of the table;
 710 *
 711 */
 712void neigh_destroy(struct neighbour *neigh)
 713{
 714        struct net_device *dev = neigh->dev;
 715
 716        NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
 717
 718        if (!neigh->dead) {
 719                pr_warn("Destroying alive neighbour %p\n", neigh);
 720                dump_stack();
 721                return;
 722        }
 723
 724        if (neigh_del_timer(neigh))
 725                pr_warn("Impossible event\n");
 726
 727        skb_queue_purge(&neigh->arp_queue);
 728        neigh->arp_queue_len_bytes = 0;
 729
 730        if (dev->netdev_ops->ndo_neigh_destroy)
 731                dev->netdev_ops->ndo_neigh_destroy(neigh);
 732
 733        dev_put(dev);
 734        neigh_parms_put(neigh->parms);
 735
 736        NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
 737
 738        atomic_dec(&neigh->tbl->entries);
 739        kfree_rcu(neigh, rcu);
 740}
 741EXPORT_SYMBOL(neigh_destroy);
 742
 743/* Neighbour state is suspicious;
 744   disable fast path.
 745
 746   Called with write_locked neigh.
 747 */
 748static void neigh_suspect(struct neighbour *neigh)
 749{
 750        NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
 751
 752        neigh->output = neigh->ops->output;
 753}
 754
 755/* Neighbour state is OK;
 756   enable fast path.
 757
 758   Called with write_locked neigh.
 759 */
 760static void neigh_connect(struct neighbour *neigh)
 761{
 762        NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
 763
 764        neigh->output = neigh->ops->connected_output;
 765}
 766
 767static void neigh_periodic_work(struct work_struct *work)
 768{
 769        struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
 770        struct neighbour *n;
 771        struct neighbour __rcu **np;
 772        unsigned int i;
 773        struct neigh_hash_table *nht;
 774
 775        NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
 776
 777        write_lock_bh(&tbl->lock);
 778        nht = rcu_dereference_protected(tbl->nht,
 779                                        lockdep_is_held(&tbl->lock));
 780
 781        /*
 782         *      periodically recompute ReachableTime from random function
 783         */
 784
 785        if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
 786                struct neigh_parms *p;
 787                tbl->last_rand = jiffies;
 788                for (p = &tbl->parms; p; p = p->next)
 789                        p->reachable_time =
 790                                neigh_rand_reach_time(p->base_reachable_time);
 791        }
 792
 793        for (i = 0 ; i < (1 << nht->hash_shift); i++) {
 794                np = &nht->hash_buckets[i];
 795
 796                while ((n = rcu_dereference_protected(*np,
 797                                lockdep_is_held(&tbl->lock))) != NULL) {
 798                        unsigned int state;
 799
 800                        write_lock(&n->lock);
 801
 802                        state = n->nud_state;
 803                        if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
 804                                write_unlock(&n->lock);
 805                                goto next_elt;
 806                        }
 807
 808                        if (time_before(n->used, n->confirmed))
 809                                n->used = n->confirmed;
 810
 811                        if (atomic_read(&n->refcnt) == 1 &&
 812                            (state == NUD_FAILED ||
 813                             time_after(jiffies, n->used + n->parms->gc_staletime))) {
 814                                *np = n->next;
 815                                n->dead = 1;
 816                                write_unlock(&n->lock);
 817                                neigh_cleanup_and_release(n);
 818                                continue;
 819                        }
 820                        write_unlock(&n->lock);
 821
 822next_elt:
 823                        np = &n->next;
 824                }
 825                /*
 826                 * It's fine to release lock here, even if hash table
 827                 * grows while we are preempted.
 828                 */
 829                write_unlock_bh(&tbl->lock);
 830                cond_resched();
 831                write_lock_bh(&tbl->lock);
 832                nht = rcu_dereference_protected(tbl->nht,
 833                                                lockdep_is_held(&tbl->lock));
 834        }
 835        /* Cycle through all hash buckets every base_reachable_time/2 ticks.
 836         * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
 837         * base_reachable_time.
 838         */
 839        schedule_delayed_work(&tbl->gc_work,
 840                              tbl->parms.base_reachable_time >> 1);
 841        write_unlock_bh(&tbl->lock);
 842}
 843
 844static __inline__ int neigh_max_probes(struct neighbour *n)
 845{
 846        struct neigh_parms *p = n->parms;
 847        return (n->nud_state & NUD_PROBE) ?
 848                p->ucast_probes :
 849                p->ucast_probes + p->app_probes + p->mcast_probes;
 850}
 851
 852static void neigh_invalidate(struct neighbour *neigh)
 853        __releases(neigh->lock)
 854        __acquires(neigh->lock)
 855{
 856        struct sk_buff *skb;
 857
 858        NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
 859        NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
 860        neigh->updated = jiffies;
 861
 862        /* It is very thin place. report_unreachable is very complicated
 863           routine. Particularly, it can hit the same neighbour entry!
 864
 865           So that, we try to be accurate and avoid dead loop. --ANK
 866         */
 867        while (neigh->nud_state == NUD_FAILED &&
 868               (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
 869                write_unlock(&neigh->lock);
 870                neigh->ops->error_report(neigh, skb);
 871                write_lock(&neigh->lock);
 872        }
 873        skb_queue_purge(&neigh->arp_queue);
 874        neigh->arp_queue_len_bytes = 0;
 875}
 876
 877static void neigh_probe(struct neighbour *neigh)
 878        __releases(neigh->lock)
 879{
 880        struct sk_buff *skb = skb_peek(&neigh->arp_queue);
 881        /* keep skb alive even if arp_queue overflows */
 882        if (skb)
 883                skb = skb_copy(skb, GFP_ATOMIC);
 884        write_unlock(&neigh->lock);
 885        neigh->ops->solicit(neigh, skb);
 886        atomic_inc(&neigh->probes);
 887        kfree_skb(skb);
 888}
 889
 890/* Called when a timer expires for a neighbour entry. */
 891
 892static void neigh_timer_handler(unsigned long arg)
 893{
 894        unsigned long now, next;
 895        struct neighbour *neigh = (struct neighbour *)arg;
 896        unsigned int state;
 897        int notify = 0;
 898
 899        write_lock(&neigh->lock);
 900
 901        state = neigh->nud_state;
 902        now = jiffies;
 903        next = now + HZ;
 904
 905        if (!(state & NUD_IN_TIMER))
 906                goto out;
 907
 908        if (state & NUD_REACHABLE) {
 909                if (time_before_eq(now,
 910                                   neigh->confirmed + neigh->parms->reachable_time)) {
 911                        NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
 912                        next = neigh->confirmed + neigh->parms->reachable_time;
 913                } else if (time_before_eq(now,
 914                                          neigh->used + neigh->parms->delay_probe_time)) {
 915                        NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
 916                        neigh->nud_state = NUD_DELAY;
 917                        neigh->updated = jiffies;
 918                        neigh_suspect(neigh);
 919                        next = now + neigh->parms->delay_probe_time;
 920                } else {
 921                        NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
 922                        neigh->nud_state = NUD_STALE;
 923                        neigh->updated = jiffies;
 924                        neigh_suspect(neigh);
 925                        notify = 1;
 926                }
 927        } else if (state & NUD_DELAY) {
 928                if (time_before_eq(now,
 929                                   neigh->confirmed + neigh->parms->delay_probe_time)) {
 930                        NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
 931                        neigh->nud_state = NUD_REACHABLE;
 932                        neigh->updated = jiffies;
 933                        neigh_connect(neigh);
 934                        notify = 1;
 935                        next = neigh->confirmed + neigh->parms->reachable_time;
 936                } else {
 937                        NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
 938                        neigh->nud_state = NUD_PROBE;
 939                        neigh->updated = jiffies;
 940                        atomic_set(&neigh->probes, 0);
 941                        next = now + neigh->parms->retrans_time;
 942                }
 943        } else {
 944                /* NUD_PROBE|NUD_INCOMPLETE */
 945                next = now + neigh->parms->retrans_time;
 946        }
 947
 948        if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
 949            atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
 950                neigh->nud_state = NUD_FAILED;
 951                notify = 1;
 952                neigh_invalidate(neigh);
 953        }
 954
 955        if (neigh->nud_state & NUD_IN_TIMER) {
 956                if (time_before(next, jiffies + HZ/2))
 957                        next = jiffies + HZ/2;
 958                if (!mod_timer(&neigh->timer, next))
 959                        neigh_hold(neigh);
 960        }
 961        if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
 962                neigh_probe(neigh);
 963        } else {
 964out:
 965                write_unlock(&neigh->lock);
 966        }
 967
 968        if (notify)
 969                neigh_update_notify(neigh);
 970
 971        neigh_release(neigh);
 972}
 973
 974int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 975{
 976        int rc;
 977        bool immediate_probe = false;
 978
 979        write_lock_bh(&neigh->lock);
 980
 981        rc = 0;
 982        if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
 983                goto out_unlock_bh;
 984
 985        if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
 986                if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
 987                        unsigned long next, now = jiffies;
 988
 989                        atomic_set(&neigh->probes, neigh->parms->ucast_probes);
 990                        neigh->nud_state     = NUD_INCOMPLETE;
 991                        neigh->updated = now;
 992                        next = now + max(neigh->parms->retrans_time, HZ/2);
 993                        neigh_add_timer(neigh, next);
 994                        immediate_probe = true;
 995                } else {
 996                        neigh->nud_state = NUD_FAILED;
 997                        neigh->updated = jiffies;
 998                        write_unlock_bh(&neigh->lock);
 999
1000                        kfree_skb(skb);
1001                        return 1;
1002                }
1003        } else if (neigh->nud_state & NUD_STALE) {
1004                NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
1005                neigh->nud_state = NUD_DELAY;
1006                neigh->updated = jiffies;
1007                neigh_add_timer(neigh,
1008                                jiffies + neigh->parms->delay_probe_time);
1009        }
1010
1011        if (neigh->nud_state == NUD_INCOMPLETE) {
1012                if (skb) {
1013                        while (neigh->arp_queue_len_bytes + skb->truesize >
1014                               neigh->parms->queue_len_bytes) {
1015                                struct sk_buff *buff;
1016
1017                                buff = __skb_dequeue(&neigh->arp_queue);
1018                                if (!buff)
1019                                        break;
1020                                neigh->arp_queue_len_bytes -= buff->truesize;
1021                                kfree_skb(buff);
1022                                NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1023                        }
1024                        skb_dst_force(skb);
1025                        __skb_queue_tail(&neigh->arp_queue, skb);
1026                        neigh->arp_queue_len_bytes += skb->truesize;
1027                }
1028                rc = 1;
1029        }
1030out_unlock_bh:
1031        if (immediate_probe)
1032                neigh_probe(neigh);
1033        else
1034                write_unlock(&neigh->lock);
1035        local_bh_enable();
1036        return rc;
1037}
1038EXPORT_SYMBOL(__neigh_event_send);
1039
1040static void neigh_update_hhs(struct neighbour *neigh)
1041{
1042        struct hh_cache *hh;
1043        void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1044                = NULL;
1045
1046        if (neigh->dev->header_ops)
1047                update = neigh->dev->header_ops->cache_update;
1048
1049        if (update) {
1050                hh = &neigh->hh;
1051                if (hh->hh_len) {
1052                        write_seqlock_bh(&hh->hh_lock);
1053                        update(hh, neigh->dev, neigh->ha);
1054                        write_sequnlock_bh(&hh->hh_lock);
1055                }
1056        }
1057}
1058
1059
1060
1061/* Generic update routine.
1062   -- lladdr is new lladdr or NULL, if it is not supplied.
1063   -- new    is new state.
1064   -- flags
1065        NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1066                                if it is different.
1067        NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1068                                lladdr instead of overriding it
1069                                if it is different.
1070                                It also allows to retain current state
1071                                if lladdr is unchanged.
1072        NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1073
1074        NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1075                                NTF_ROUTER flag.
1076        NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1077                                a router.
1078
1079   Caller MUST hold reference count on the entry.
1080 */
1081
1082int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1083                 u32 flags)
1084{
1085        u8 old;
1086        int err;
1087        int notify = 0;
1088        struct net_device *dev;
1089        int update_isrouter = 0;
1090
1091        write_lock_bh(&neigh->lock);
1092
1093        dev    = neigh->dev;
1094        old    = neigh->nud_state;
1095        err    = -EPERM;
1096
1097        if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1098            (old & (NUD_NOARP | NUD_PERMANENT)))
1099                goto out;
1100
1101        if (!(new & NUD_VALID)) {
1102                neigh_del_timer(neigh);
1103                if (old & NUD_CONNECTED)
1104                        neigh_suspect(neigh);
1105                neigh->nud_state = new;
1106                err = 0;
1107                notify = old & NUD_VALID;
1108                if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1109                    (new & NUD_FAILED)) {
1110                        neigh_invalidate(neigh);
1111                        notify = 1;
1112                }
1113                goto out;
1114        }
1115
1116        /* Compare new lladdr with cached one */
1117        if (!dev->addr_len) {
1118                /* First case: device needs no address. */
1119                lladdr = neigh->ha;
1120        } else if (lladdr) {
1121                /* The second case: if something is already cached
1122                   and a new address is proposed:
1123                   - compare new & old
1124                   - if they are different, check override flag
1125                 */
1126                if ((old & NUD_VALID) &&
1127                    !memcmp(lladdr, neigh->ha, dev->addr_len))
1128                        lladdr = neigh->ha;
1129        } else {
1130                /* No address is supplied; if we know something,
1131                   use it, otherwise discard the request.
1132                 */
1133                err = -EINVAL;
1134                if (!(old & NUD_VALID))
1135                        goto out;
1136                lladdr = neigh->ha;
1137        }
1138
1139        if (new & NUD_CONNECTED)
1140                neigh->confirmed = jiffies;
1141        neigh->updated = jiffies;
1142
1143        /* If entry was valid and address is not changed,
1144           do not change entry state, if new one is STALE.
1145         */
1146        err = 0;
1147        update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1148        if (old & NUD_VALID) {
1149                if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1150                        update_isrouter = 0;
1151                        if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1152                            (old & NUD_CONNECTED)) {
1153                                lladdr = neigh->ha;
1154                                new = NUD_STALE;
1155                        } else
1156                                goto out;
1157                } else {
1158                        if (lladdr == neigh->ha && new == NUD_STALE &&
1159                            ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1160                             (old & NUD_CONNECTED))
1161                            )
1162                                new = old;
1163                }
1164        }
1165
1166        if (new != old) {
1167                neigh_del_timer(neigh);
1168                if (new & NUD_IN_TIMER)
1169                        neigh_add_timer(neigh, (jiffies +
1170                                                ((new & NUD_REACHABLE) ?
1171                                                 neigh->parms->reachable_time :
1172                                                 0)));
1173                neigh->nud_state = new;
1174        }
1175
1176        if (lladdr != neigh->ha) {
1177                write_seqlock(&neigh->ha_lock);
1178                memcpy(&neigh->ha, lladdr, dev->addr_len);
1179                write_sequnlock(&neigh->ha_lock);
1180                neigh_update_hhs(neigh);
1181                if (!(new & NUD_CONNECTED))
1182                        neigh->confirmed = jiffies -
1183                                      (neigh->parms->base_reachable_time << 1);
1184                notify = 1;
1185        }
1186        if (new == old)
1187                goto out;
1188        if (new & NUD_CONNECTED)
1189                neigh_connect(neigh);
1190        else
1191                neigh_suspect(neigh);
1192        if (!(old & NUD_VALID)) {
1193                struct sk_buff *skb;
1194
1195                /* Again: avoid dead loop if something went wrong */
1196
1197                while (neigh->nud_state & NUD_VALID &&
1198                       (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1199                        struct dst_entry *dst = skb_dst(skb);
1200                        struct neighbour *n2, *n1 = neigh;
1201                        write_unlock_bh(&neigh->lock);
1202
1203                        rcu_read_lock();
1204
1205                        /* Why not just use 'neigh' as-is?  The problem is that
1206                         * things such as shaper, eql, and sch_teql can end up
1207                         * using alternative, different, neigh objects to output
1208                         * the packet in the output path.  So what we need to do
1209                         * here is re-lookup the top-level neigh in the path so
1210                         * we can reinject the packet there.
1211                         */
1212                        n2 = NULL;
1213                        if (dst) {
1214                                n2 = dst_neigh_lookup_skb(dst, skb);
1215                                if (n2)
1216                                        n1 = n2;
1217                        }
1218                        n1->output(n1, skb);
1219                        if (n2)
1220                                neigh_release(n2);
1221                        rcu_read_unlock();
1222
1223                        write_lock_bh(&neigh->lock);
1224                }
1225                skb_queue_purge(&neigh->arp_queue);
1226                neigh->arp_queue_len_bytes = 0;
1227        }
1228out:
1229        if (update_isrouter) {
1230                neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1231                        (neigh->flags | NTF_ROUTER) :
1232                        (neigh->flags & ~NTF_ROUTER);
1233        }
1234        write_unlock_bh(&neigh->lock);
1235
1236        if (notify)
1237                neigh_update_notify(neigh);
1238
1239        return err;
1240}
1241EXPORT_SYMBOL(neigh_update);
1242
1243struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1244                                 u8 *lladdr, void *saddr,
1245                                 struct net_device *dev)
1246{
1247        struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1248                                                 lladdr || !dev->addr_len);
1249        if (neigh)
1250                neigh_update(neigh, lladdr, NUD_STALE,
1251                             NEIGH_UPDATE_F_OVERRIDE);
1252        return neigh;
1253}
1254EXPORT_SYMBOL(neigh_event_ns);
1255
1256/* called with read_lock_bh(&n->lock); */
1257static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1258{
1259        struct net_device *dev = dst->dev;
1260        __be16 prot = dst->ops->protocol;
1261        struct hh_cache *hh = &n->hh;
1262
1263        write_lock_bh(&n->lock);
1264
1265        /* Only one thread can come in here and initialize the
1266         * hh_cache entry.
1267         */
1268        if (!hh->hh_len)
1269                dev->header_ops->cache(n, hh, prot);
1270
1271        write_unlock_bh(&n->lock);
1272}
1273
1274/* This function can be used in contexts, where only old dev_queue_xmit
1275 * worked, f.e. if you want to override normal output path (eql, shaper),
1276 * but resolution is not made yet.
1277 */
1278
1279int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1280{
1281        struct net_device *dev = skb->dev;
1282
1283        __skb_pull(skb, skb_network_offset(skb));
1284
1285        if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1286                            skb->len) < 0 &&
1287            dev->header_ops->rebuild(skb))
1288                return 0;
1289
1290        return dev_queue_xmit(skb);
1291}
1292EXPORT_SYMBOL(neigh_compat_output);
1293
1294/* Slow and careful. */
1295
1296int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1297{
1298        struct dst_entry *dst = skb_dst(skb);
1299        int rc = 0;
1300
1301        if (!dst)
1302                goto discard;
1303
1304        __skb_pull(skb, skb_network_offset(skb));
1305
1306        if (!neigh_event_send(neigh, skb)) {
1307                int err;
1308                struct net_device *dev = neigh->dev;
1309                unsigned int seq;
1310
1311                if (dev->header_ops->cache && !neigh->hh.hh_len)
1312                        neigh_hh_init(neigh, dst);
1313
1314                do {
1315                        seq = read_seqbegin(&neigh->ha_lock);
1316                        err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1317                                              neigh->ha, NULL, skb->len);
1318                } while (read_seqretry(&neigh->ha_lock, seq));
1319
1320                if (err >= 0)
1321                        rc = dev_queue_xmit(skb);
1322                else
1323                        goto out_kfree_skb;
1324        }
1325out:
1326        return rc;
1327discard:
1328        NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1329                      dst, neigh);
1330out_kfree_skb:
1331        rc = -EINVAL;
1332        kfree_skb(skb);
1333        goto out;
1334}
1335EXPORT_SYMBOL(neigh_resolve_output);
1336
1337/* As fast as possible without hh cache */
1338
1339int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1340{
1341        struct net_device *dev = neigh->dev;
1342        unsigned int seq;
1343        int err;
1344
1345        __skb_pull(skb, skb_network_offset(skb));
1346
1347        do {
1348                seq = read_seqbegin(&neigh->ha_lock);
1349                err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1350                                      neigh->ha, NULL, skb->len);
1351        } while (read_seqretry(&neigh->ha_lock, seq));
1352
1353        if (err >= 0)
1354                err = dev_queue_xmit(skb);
1355        else {
1356                err = -EINVAL;
1357                kfree_skb(skb);
1358        }
1359        return err;
1360}
1361EXPORT_SYMBOL(neigh_connected_output);
1362
1363int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1364{
1365        return dev_queue_xmit(skb);
1366}
1367EXPORT_SYMBOL(neigh_direct_output);
1368
1369static void neigh_proxy_process(unsigned long arg)
1370{
1371        struct neigh_table *tbl = (struct neigh_table *)arg;
1372        long sched_next = 0;
1373        unsigned long now = jiffies;
1374        struct sk_buff *skb, *n;
1375
1376        spin_lock(&tbl->proxy_queue.lock);
1377
1378        skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1379                long tdif = NEIGH_CB(skb)->sched_next - now;
1380
1381                if (tdif <= 0) {
1382                        struct net_device *dev = skb->dev;
1383
1384                        __skb_unlink(skb, &tbl->proxy_queue);
1385                        if (tbl->proxy_redo && netif_running(dev)) {
1386                                rcu_read_lock();
1387                                tbl->proxy_redo(skb);
1388                                rcu_read_unlock();
1389                        } else {
1390                                kfree_skb(skb);
1391                        }
1392
1393                        dev_put(dev);
1394                } else if (!sched_next || tdif < sched_next)
1395                        sched_next = tdif;
1396        }
1397        del_timer(&tbl->proxy_timer);
1398        if (sched_next)
1399                mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1400        spin_unlock(&tbl->proxy_queue.lock);
1401}
1402
1403void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1404                    struct sk_buff *skb)
1405{
1406        unsigned long now = jiffies;
1407        unsigned long sched_next = now + (net_random() % p->proxy_delay);
1408
1409        if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1410                kfree_skb(skb);
1411                return;
1412        }
1413
1414        NEIGH_CB(skb)->sched_next = sched_next;
1415        NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1416
1417        spin_lock(&tbl->proxy_queue.lock);
1418        if (del_timer(&tbl->proxy_timer)) {
1419                if (time_before(tbl->proxy_timer.expires, sched_next))
1420                        sched_next = tbl->proxy_timer.expires;
1421        }
1422        skb_dst_drop(skb);
1423        dev_hold(skb->dev);
1424        __skb_queue_tail(&tbl->proxy_queue, skb);
1425        mod_timer(&tbl->proxy_timer, sched_next);
1426        spin_unlock(&tbl->proxy_queue.lock);
1427}
1428EXPORT_SYMBOL(pneigh_enqueue);
1429
1430static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1431                                                      struct net *net, int ifindex)
1432{
1433        struct neigh_parms *p;
1434
1435        for (p = &tbl->parms; p; p = p->next) {
1436                if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1437                    (!p->dev && !ifindex))
1438                        return p;
1439        }
1440
1441        return NULL;
1442}
1443
1444struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1445                                      struct neigh_table *tbl)
1446{
1447        struct neigh_parms *p, *ref;
1448        struct net *net = dev_net(dev);
1449        const struct net_device_ops *ops = dev->netdev_ops;
1450
1451        ref = lookup_neigh_parms(tbl, net, 0);
1452        if (!ref)
1453                return NULL;
1454
1455        p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1456        if (p) {
1457                p->tbl            = tbl;
1458                atomic_set(&p->refcnt, 1);
1459                p->reachable_time =
1460                                neigh_rand_reach_time(p->base_reachable_time);
1461
1462                if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1463                        kfree(p);
1464                        return NULL;
1465                }
1466
1467                dev_hold(dev);
1468                p->dev = dev;
1469                write_pnet(&p->net, hold_net(net));
1470                p->sysctl_table = NULL;
1471                write_lock_bh(&tbl->lock);
1472                p->next         = tbl->parms.next;
1473                tbl->parms.next = p;
1474                write_unlock_bh(&tbl->lock);
1475        }
1476        return p;
1477}
1478EXPORT_SYMBOL(neigh_parms_alloc);
1479
1480static void neigh_rcu_free_parms(struct rcu_head *head)
1481{
1482        struct neigh_parms *parms =
1483                container_of(head, struct neigh_parms, rcu_head);
1484
1485        neigh_parms_put(parms);
1486}
1487
1488void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1489{
1490        struct neigh_parms **p;
1491
1492        if (!parms || parms == &tbl->parms)
1493                return;
1494        write_lock_bh(&tbl->lock);
1495        for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1496                if (*p == parms) {
1497                        *p = parms->next;
1498                        parms->dead = 1;
1499                        write_unlock_bh(&tbl->lock);
1500                        if (parms->dev)
1501                                dev_put(parms->dev);
1502                        call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1503                        return;
1504                }
1505        }
1506        write_unlock_bh(&tbl->lock);
1507        NEIGH_PRINTK1("neigh_parms_release: not found\n");
1508}
1509EXPORT_SYMBOL(neigh_parms_release);
1510
1511static void neigh_parms_destroy(struct neigh_parms *parms)
1512{
1513        release_net(neigh_parms_net(parms));
1514        kfree(parms);
1515}
1516
1517static struct lock_class_key neigh_table_proxy_queue_class;
1518
1519static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1520{
1521        unsigned long now = jiffies;
1522        unsigned long phsize;
1523
1524        write_pnet(&tbl->parms.net, &init_net);
1525        atomic_set(&tbl->parms.refcnt, 1);
1526        tbl->parms.reachable_time =
1527                          neigh_rand_reach_time(tbl->parms.base_reachable_time);
1528
1529        tbl->stats = alloc_percpu(struct neigh_statistics);
1530        if (!tbl->stats)
1531                panic("cannot create neighbour cache statistics");
1532
1533#ifdef CONFIG_PROC_FS
1534        if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1535                              &neigh_stat_seq_fops, tbl))
1536                panic("cannot create neighbour proc dir entry");
1537#endif
1538
1539        RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1540
1541        phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1542        tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1543
1544        if (!tbl->nht || !tbl->phash_buckets)
1545                panic("cannot allocate neighbour cache hashes");
1546
1547        rwlock_init(&tbl->lock);
1548        INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1549        schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1550        setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1551        skb_queue_head_init_class(&tbl->proxy_queue,
1552                        &neigh_table_proxy_queue_class);
1553
1554        tbl->last_flush = now;
1555        tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1556}
1557
1558void neigh_table_init(struct neigh_table *tbl)
1559{
1560        struct neigh_table *tmp;
1561
1562        neigh_table_init_no_netlink(tbl);
1563        write_lock(&neigh_tbl_lock);
1564        for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1565                if (tmp->family == tbl->family)
1566                        break;
1567        }
1568        tbl->next       = neigh_tables;
1569        neigh_tables    = tbl;
1570        write_unlock(&neigh_tbl_lock);
1571
1572        if (unlikely(tmp)) {
1573                pr_err("Registering multiple tables for family %d\n",
1574                       tbl->family);
1575                dump_stack();
1576        }
1577}
1578EXPORT_SYMBOL(neigh_table_init);
1579
1580int neigh_table_clear(struct neigh_table *tbl)
1581{
1582        struct neigh_table **tp;
1583
1584        /* It is not clean... Fix it to unload IPv6 module safely */
1585        cancel_delayed_work_sync(&tbl->gc_work);
1586        del_timer_sync(&tbl->proxy_timer);
1587        pneigh_queue_purge(&tbl->proxy_queue);
1588        neigh_ifdown(tbl, NULL);
1589        if (atomic_read(&tbl->entries))
1590                pr_crit("neighbour leakage\n");
1591        write_lock(&neigh_tbl_lock);
1592        for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1593                if (*tp == tbl) {
1594                        *tp = tbl->next;
1595                        break;
1596                }
1597        }
1598        write_unlock(&neigh_tbl_lock);
1599
1600        call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1601                 neigh_hash_free_rcu);
1602        tbl->nht = NULL;
1603
1604        kfree(tbl->phash_buckets);
1605        tbl->phash_buckets = NULL;
1606
1607        remove_proc_entry(tbl->id, init_net.proc_net_stat);
1608
1609        free_percpu(tbl->stats);
1610        tbl->stats = NULL;
1611
1612        return 0;
1613}
1614EXPORT_SYMBOL(neigh_table_clear);
1615
1616static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1617{
1618        struct net *net = sock_net(skb->sk);
1619        struct ndmsg *ndm;
1620        struct nlattr *dst_attr;
1621        struct neigh_table *tbl;
1622        struct net_device *dev = NULL;
1623        int err = -EINVAL;
1624
1625        ASSERT_RTNL();
1626        if (nlmsg_len(nlh) < sizeof(*ndm))
1627                goto out;
1628
1629        dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1630        if (dst_attr == NULL)
1631                goto out;
1632
1633        ndm = nlmsg_data(nlh);
1634        if (ndm->ndm_ifindex) {
1635                dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1636                if (dev == NULL) {
1637                        err = -ENODEV;
1638                        goto out;
1639                }
1640        }
1641
1642        read_lock(&neigh_tbl_lock);
1643        for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1644                struct neighbour *neigh;
1645
1646                if (tbl->family != ndm->ndm_family)
1647                        continue;
1648                read_unlock(&neigh_tbl_lock);
1649
1650                if (nla_len(dst_attr) < tbl->key_len)
1651                        goto out;
1652
1653                if (ndm->ndm_flags & NTF_PROXY) {
1654                        err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1655                        goto out;
1656                }
1657
1658                if (dev == NULL)
1659                        goto out;
1660
1661                neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1662                if (neigh == NULL) {
1663                        err = -ENOENT;
1664                        goto out;
1665                }
1666
1667                err = neigh_update(neigh, NULL, NUD_FAILED,
1668                                   NEIGH_UPDATE_F_OVERRIDE |
1669                                   NEIGH_UPDATE_F_ADMIN);
1670                neigh_release(neigh);
1671                goto out;
1672        }
1673        read_unlock(&neigh_tbl_lock);
1674        err = -EAFNOSUPPORT;
1675
1676out:
1677        return err;
1678}
1679
1680static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1681{
1682        struct net *net = sock_net(skb->sk);
1683        struct ndmsg *ndm;
1684        struct nlattr *tb[NDA_MAX+1];
1685        struct neigh_table *tbl;
1686        struct net_device *dev = NULL;
1687        int err;
1688
1689        ASSERT_RTNL();
1690        err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1691        if (err < 0)
1692                goto out;
1693
1694        err = -EINVAL;
1695        if (tb[NDA_DST] == NULL)
1696                goto out;
1697
1698        ndm = nlmsg_data(nlh);
1699        if (ndm->ndm_ifindex) {
1700                dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1701                if (dev == NULL) {
1702                        err = -ENODEV;
1703                        goto out;
1704                }
1705
1706                if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1707                        goto out;
1708        }
1709
1710        read_lock(&neigh_tbl_lock);
1711        for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1712                int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1713                struct neighbour *neigh;
1714                void *dst, *lladdr;
1715
1716                if (tbl->family != ndm->ndm_family)
1717                        continue;
1718                read_unlock(&neigh_tbl_lock);
1719
1720                if (nla_len(tb[NDA_DST]) < tbl->key_len)
1721                        goto out;
1722                dst = nla_data(tb[NDA_DST]);
1723                lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1724
1725                if (ndm->ndm_flags & NTF_PROXY) {
1726                        struct pneigh_entry *pn;
1727
1728                        err = -ENOBUFS;
1729                        pn = pneigh_lookup(tbl, net, dst, dev, 1);
1730                        if (pn) {
1731                                pn->flags = ndm->ndm_flags;
1732                                err = 0;
1733                        }
1734                        goto out;
1735                }
1736
1737                if (dev == NULL)
1738                        goto out;
1739
1740                neigh = neigh_lookup(tbl, dst, dev);
1741                if (neigh == NULL) {
1742                        if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1743                                err = -ENOENT;
1744                                goto out;
1745                        }
1746
1747                        neigh = __neigh_lookup_errno(tbl, dst, dev);
1748                        if (IS_ERR(neigh)) {
1749                                err = PTR_ERR(neigh);
1750                                goto out;
1751                        }
1752                } else {
1753                        if (nlh->nlmsg_flags & NLM_F_EXCL) {
1754                                err = -EEXIST;
1755                                neigh_release(neigh);
1756                                goto out;
1757                        }
1758
1759                        if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1760                                flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1761                }
1762
1763                if (ndm->ndm_flags & NTF_USE) {
1764                        neigh_event_send(neigh, NULL);
1765                        err = 0;
1766                } else
1767                        err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1768                neigh_release(neigh);
1769                goto out;
1770        }
1771
1772        read_unlock(&neigh_tbl_lock);
1773        err = -EAFNOSUPPORT;
1774out:
1775        return err;
1776}
1777
1778static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1779{
1780        struct nlattr *nest;
1781
1782        nest = nla_nest_start(skb, NDTA_PARMS);
1783        if (nest == NULL)
1784                return -ENOBUFS;
1785
1786        if ((parms->dev &&
1787             nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1788            nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1789            nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) ||
1790            /* approximative value for deprecated QUEUE_LEN (in packets) */
1791            nla_put_u32(skb, NDTPA_QUEUE_LEN,
1792                        DIV_ROUND_UP(parms->queue_len_bytes,
1793                                     SKB_TRUESIZE(ETH_FRAME_LEN))) ||
1794            nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) ||
1795            nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) ||
1796            nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) ||
1797            nla_put_u32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes) ||
1798            nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1799            nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1800                          parms->base_reachable_time) ||
1801            nla_put_msecs(skb, NDTPA_GC_STALETIME, parms->gc_staletime) ||
1802            nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1803                          parms->delay_probe_time) ||
1804            nla_put_msecs(skb, NDTPA_RETRANS_TIME, parms->retrans_time) ||
1805            nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay) ||
1806            nla_put_msecs(skb, NDTPA_PROXY_DELAY, parms->proxy_delay) ||
1807            nla_put_msecs(skb, NDTPA_LOCKTIME, parms->locktime))
1808                goto nla_put_failure;
1809        return nla_nest_end(skb, nest);
1810
1811nla_put_failure:
1812        nla_nest_cancel(skb, nest);
1813        return -EMSGSIZE;
1814}
1815
1816static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1817                              u32 pid, u32 seq, int type, int flags)
1818{
1819        struct nlmsghdr *nlh;
1820        struct ndtmsg *ndtmsg;
1821
1822        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1823        if (nlh == NULL)
1824                return -EMSGSIZE;
1825
1826        ndtmsg = nlmsg_data(nlh);
1827
1828        read_lock_bh(&tbl->lock);
1829        ndtmsg->ndtm_family = tbl->family;
1830        ndtmsg->ndtm_pad1   = 0;
1831        ndtmsg->ndtm_pad2   = 0;
1832
1833        if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1834            nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1835            nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1836            nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1837            nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1838                goto nla_put_failure;
1839        {
1840                unsigned long now = jiffies;
1841                unsigned int flush_delta = now - tbl->last_flush;
1842                unsigned int rand_delta = now - tbl->last_rand;
1843                struct neigh_hash_table *nht;
1844                struct ndt_config ndc = {
1845                        .ndtc_key_len           = tbl->key_len,
1846                        .ndtc_entry_size        = tbl->entry_size,
1847                        .ndtc_entries           = atomic_read(&tbl->entries),
1848                        .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1849                        .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1850                        .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1851                };
1852
1853                rcu_read_lock_bh();
1854                nht = rcu_dereference_bh(tbl->nht);
1855                ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1856                ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1857                rcu_read_unlock_bh();
1858
1859                if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1860                        goto nla_put_failure;
1861        }
1862
1863        {
1864                int cpu;
1865                struct ndt_stats ndst;
1866
1867                memset(&ndst, 0, sizeof(ndst));
1868
1869                for_each_possible_cpu(cpu) {
1870                        struct neigh_statistics *st;
1871
1872                        st = per_cpu_ptr(tbl->stats, cpu);
1873                        ndst.ndts_allocs                += st->allocs;
1874                        ndst.ndts_destroys              += st->destroys;
1875                        ndst.ndts_hash_grows            += st->hash_grows;
1876                        ndst.ndts_res_failed            += st->res_failed;
1877                        ndst.ndts_lookups               += st->lookups;
1878                        ndst.ndts_hits                  += st->hits;
1879                        ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1880                        ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1881                        ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1882                        ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1883                }
1884
1885                if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1886                        goto nla_put_failure;
1887        }
1888
1889        BUG_ON(tbl->parms.dev);
1890        if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1891                goto nla_put_failure;
1892
1893        read_unlock_bh(&tbl->lock);
1894        return nlmsg_end(skb, nlh);
1895
1896nla_put_failure:
1897        read_unlock_bh(&tbl->lock);
1898        nlmsg_cancel(skb, nlh);
1899        return -EMSGSIZE;
1900}
1901
1902static int neightbl_fill_param_info(struct sk_buff *skb,
1903                                    struct neigh_table *tbl,
1904                                    struct neigh_parms *parms,
1905                                    u32 pid, u32 seq, int type,
1906                                    unsigned int flags)
1907{
1908        struct ndtmsg *ndtmsg;
1909        struct nlmsghdr *nlh;
1910
1911        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1912        if (nlh == NULL)
1913                return -EMSGSIZE;
1914
1915        ndtmsg = nlmsg_data(nlh);
1916
1917        read_lock_bh(&tbl->lock);
1918        ndtmsg->ndtm_family = tbl->family;
1919        ndtmsg->ndtm_pad1   = 0;
1920        ndtmsg->ndtm_pad2   = 0;
1921
1922        if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1923            neightbl_fill_parms(skb, parms) < 0)
1924                goto errout;
1925
1926        read_unlock_bh(&tbl->lock);
1927        return nlmsg_end(skb, nlh);
1928errout:
1929        read_unlock_bh(&tbl->lock);
1930        nlmsg_cancel(skb, nlh);
1931        return -EMSGSIZE;
1932}
1933
1934static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1935        [NDTA_NAME]             = { .type = NLA_STRING },
1936        [NDTA_THRESH1]          = { .type = NLA_U32 },
1937        [NDTA_THRESH2]          = { .type = NLA_U32 },
1938        [NDTA_THRESH3]          = { .type = NLA_U32 },
1939        [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1940        [NDTA_PARMS]            = { .type = NLA_NESTED },
1941};
1942
1943static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1944        [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1945        [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1946        [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1947        [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1948        [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1949        [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1950        [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1951        [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1952        [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1953        [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1954        [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1955        [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1956        [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1957};
1958
1959static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1960{
1961        struct net *net = sock_net(skb->sk);
1962        struct neigh_table *tbl;
1963        struct ndtmsg *ndtmsg;
1964        struct nlattr *tb[NDTA_MAX+1];
1965        int err;
1966
1967        err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1968                          nl_neightbl_policy);
1969        if (err < 0)
1970                goto errout;
1971
1972        if (tb[NDTA_NAME] == NULL) {
1973                err = -EINVAL;
1974                goto errout;
1975        }
1976
1977        ndtmsg = nlmsg_data(nlh);
1978        read_lock(&neigh_tbl_lock);
1979        for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1980                if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1981                        continue;
1982
1983                if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1984                        break;
1985        }
1986
1987        if (tbl == NULL) {
1988                err = -ENOENT;
1989                goto errout_locked;
1990        }
1991
1992        /*
1993         * We acquire tbl->lock to be nice to the periodic timers and
1994         * make sure they always see a consistent set of values.
1995         */
1996        write_lock_bh(&tbl->lock);
1997
1998        if (tb[NDTA_PARMS]) {
1999                struct nlattr *tbp[NDTPA_MAX+1];
2000                struct neigh_parms *p;
2001                int i, ifindex = 0;
2002
2003                err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2004                                       nl_ntbl_parm_policy);
2005                if (err < 0)
2006                        goto errout_tbl_lock;
2007
2008                if (tbp[NDTPA_IFINDEX])
2009                        ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2010
2011                p = lookup_neigh_parms(tbl, net, ifindex);
2012                if (p == NULL) {
2013                        err = -ENOENT;
2014                        goto errout_tbl_lock;
2015                }
2016
2017                for (i = 1; i <= NDTPA_MAX; i++) {
2018                        if (tbp[i] == NULL)
2019                                continue;
2020
2021                        switch (i) {
2022                        case NDTPA_QUEUE_LEN:
2023                                p->queue_len_bytes = nla_get_u32(tbp[i]) *
2024                                                     SKB_TRUESIZE(ETH_FRAME_LEN);
2025                                break;
2026                        case NDTPA_QUEUE_LENBYTES:
2027                                p->queue_len_bytes = nla_get_u32(tbp[i]);
2028                                break;
2029                        case NDTPA_PROXY_QLEN:
2030                                p->proxy_qlen = nla_get_u32(tbp[i]);
2031                                break;
2032                        case NDTPA_APP_PROBES:
2033                                p->app_probes = nla_get_u32(tbp[i]);
2034                                break;
2035                        case NDTPA_UCAST_PROBES:
2036                                p->ucast_probes = nla_get_u32(tbp[i]);
2037                                break;
2038                        case NDTPA_MCAST_PROBES:
2039                                p->mcast_probes = nla_get_u32(tbp[i]);
2040                                break;
2041                        case NDTPA_BASE_REACHABLE_TIME:
2042                                p->base_reachable_time = nla_get_msecs(tbp[i]);
2043                                break;
2044                        case NDTPA_GC_STALETIME:
2045                                p->gc_staletime = nla_get_msecs(tbp[i]);
2046                                break;
2047                        case NDTPA_DELAY_PROBE_TIME:
2048                                p->delay_probe_time = nla_get_msecs(tbp[i]);
2049                                break;
2050                        case NDTPA_RETRANS_TIME:
2051                                p->retrans_time = nla_get_msecs(tbp[i]);
2052                                break;
2053                        case NDTPA_ANYCAST_DELAY:
2054                                p->anycast_delay = nla_get_msecs(tbp[i]);
2055                                break;
2056                        case NDTPA_PROXY_DELAY:
2057                                p->proxy_delay = nla_get_msecs(tbp[i]);
2058                                break;
2059                        case NDTPA_LOCKTIME:
2060                                p->locktime = nla_get_msecs(tbp[i]);
2061                                break;
2062                        }
2063                }
2064        }
2065
2066        if (tb[NDTA_THRESH1])
2067                tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2068
2069        if (tb[NDTA_THRESH2])
2070                tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2071
2072        if (tb[NDTA_THRESH3])
2073                tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2074
2075        if (tb[NDTA_GC_INTERVAL])
2076                tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2077
2078        err = 0;
2079
2080errout_tbl_lock:
2081        write_unlock_bh(&tbl->lock);
2082errout_locked:
2083        read_unlock(&neigh_tbl_lock);
2084errout:
2085        return err;
2086}
2087
2088static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2089{
2090        struct net *net = sock_net(skb->sk);
2091        int family, tidx, nidx = 0;
2092        int tbl_skip = cb->args[0];
2093        int neigh_skip = cb->args[1];
2094        struct neigh_table *tbl;
2095
2096        family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2097
2098        read_lock(&neigh_tbl_lock);
2099        for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2100                struct neigh_parms *p;
2101
2102                if (tidx < tbl_skip || (family && tbl->family != family))
2103                        continue;
2104
2105                if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
2106                                       cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2107                                       NLM_F_MULTI) <= 0)
2108                        break;
2109
2110                for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2111                        if (!net_eq(neigh_parms_net(p), net))
2112                                continue;
2113
2114                        if (nidx < neigh_skip)
2115                                goto next;
2116
2117                        if (neightbl_fill_param_info(skb, tbl, p,
2118                                                     NETLINK_CB(cb->skb).pid,
2119                                                     cb->nlh->nlmsg_seq,
2120                                                     RTM_NEWNEIGHTBL,
2121                                                     NLM_F_MULTI) <= 0)
2122                                goto out;
2123                next:
2124                        nidx++;
2125                }
2126
2127                neigh_skip = 0;
2128        }
2129out:
2130        read_unlock(&neigh_tbl_lock);
2131        cb->args[0] = tidx;
2132        cb->args[1] = nidx;
2133
2134        return skb->len;
2135}
2136
2137static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2138                           u32 pid, u32 seq, int type, unsigned int flags)
2139{
2140        unsigned long now = jiffies;
2141        struct nda_cacheinfo ci;
2142        struct nlmsghdr *nlh;
2143        struct ndmsg *ndm;
2144
2145        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2146        if (nlh == NULL)
2147                return -EMSGSIZE;
2148
2149        ndm = nlmsg_data(nlh);
2150        ndm->ndm_family  = neigh->ops->family;
2151        ndm->ndm_pad1    = 0;
2152        ndm->ndm_pad2    = 0;
2153        ndm->ndm_flags   = neigh->flags;
2154        ndm->ndm_type    = neigh->type;
2155        ndm->ndm_ifindex = neigh->dev->ifindex;
2156
2157        if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2158                goto nla_put_failure;
2159
2160        read_lock_bh(&neigh->lock);
2161        ndm->ndm_state   = neigh->nud_state;
2162        if (neigh->nud_state & NUD_VALID) {
2163                char haddr[MAX_ADDR_LEN];
2164
2165                neigh_ha_snapshot(haddr, neigh, neigh->dev);
2166                if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2167                        read_unlock_bh(&neigh->lock);
2168                        goto nla_put_failure;
2169                }
2170        }
2171
2172        ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2173        ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2174        ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2175        ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2176        read_unlock_bh(&neigh->lock);
2177
2178        if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2179            nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2180                goto nla_put_failure;
2181
2182        return nlmsg_end(skb, nlh);
2183
2184nla_put_failure:
2185        nlmsg_cancel(skb, nlh);
2186        return -EMSGSIZE;
2187}
2188
2189static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2190                            u32 pid, u32 seq, int type, unsigned int flags,
2191                            struct neigh_table *tbl)
2192{
2193        struct nlmsghdr *nlh;
2194        struct ndmsg *ndm;
2195
2196        nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2197        if (nlh == NULL)
2198                return -EMSGSIZE;
2199
2200        ndm = nlmsg_data(nlh);
2201        ndm->ndm_family  = tbl->family;
2202        ndm->ndm_pad1    = 0;
2203        ndm->ndm_pad2    = 0;
2204        ndm->ndm_flags   = pn->flags | NTF_PROXY;
2205        ndm->ndm_type    = NDA_DST;
2206        ndm->ndm_ifindex = pn->dev->ifindex;
2207        ndm->ndm_state   = NUD_NONE;
2208
2209        if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2210                goto nla_put_failure;
2211
2212        return nlmsg_end(skb, nlh);
2213
2214nla_put_failure:
2215        nlmsg_cancel(skb, nlh);
2216        return -EMSGSIZE;
2217}
2218
2219static void neigh_update_notify(struct neighbour *neigh)
2220{
2221        call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2222        __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2223}
2224
2225static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2226                            struct netlink_callback *cb)
2227{
2228        struct net *net = sock_net(skb->sk);
2229        struct neighbour *n;
2230        int rc, h, s_h = cb->args[1];
2231        int idx, s_idx = idx = cb->args[2];
2232        struct neigh_hash_table *nht;
2233
2234        rcu_read_lock_bh();
2235        nht = rcu_dereference_bh(tbl->nht);
2236
2237        for (h = s_h; h < (1 << nht->hash_shift); h++) {
2238                if (h > s_h)
2239                        s_idx = 0;
2240                for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2241                     n != NULL;
2242                     n = rcu_dereference_bh(n->next)) {
2243                        if (!net_eq(dev_net(n->dev), net))
2244                                continue;
2245                        if (idx < s_idx)
2246                                goto next;
2247                        if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2248                                            cb->nlh->nlmsg_seq,
2249                                            RTM_NEWNEIGH,
2250                                            NLM_F_MULTI) <= 0) {
2251                                rc = -1;
2252                                goto out;
2253                        }
2254next:
2255                        idx++;
2256                }
2257        }
2258        rc = skb->len;
2259out:
2260        rcu_read_unlock_bh();
2261        cb->args[1] = h;
2262        cb->args[2] = idx;
2263        return rc;
2264}
2265
2266static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2267                             struct netlink_callback *cb)
2268{
2269        struct pneigh_entry *n;
2270        struct net *net = sock_net(skb->sk);
2271        int rc, h, s_h = cb->args[3];
2272        int idx, s_idx = idx = cb->args[4];
2273
2274        read_lock_bh(&tbl->lock);
2275
2276        for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2277                if (h > s_h)
2278                        s_idx = 0;
2279                for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2280                        if (dev_net(n->dev) != net)
2281                                continue;
2282                        if (idx < s_idx)
2283                                goto next;
2284                        if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2285                                            cb->nlh->nlmsg_seq,
2286                                            RTM_NEWNEIGH,
2287                                            NLM_F_MULTI, tbl) <= 0) {
2288                                read_unlock_bh(&tbl->lock);
2289                                rc = -1;
2290                                goto out;
2291                        }
2292                next:
2293                        idx++;
2294                }
2295        }
2296
2297        read_unlock_bh(&tbl->lock);
2298        rc = skb->len;
2299out:
2300        cb->args[3] = h;
2301        cb->args[4] = idx;
2302        return rc;
2303
2304}
2305
2306static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2307{
2308        struct neigh_table *tbl;
2309        int t, family, s_t;
2310        int proxy = 0;
2311        int err;
2312
2313        read_lock(&neigh_tbl_lock);
2314        family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2315
2316        /* check for full ndmsg structure presence, family member is
2317         * the same for both structures
2318         */
2319        if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2320            ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2321                proxy = 1;
2322
2323        s_t = cb->args[0];
2324
2325        for (tbl = neigh_tables, t = 0; tbl;
2326             tbl = tbl->next, t++) {
2327                if (t < s_t || (family && tbl->family != family))
2328                        continue;
2329                if (t > s_t)
2330                        memset(&cb->args[1], 0, sizeof(cb->args) -
2331                                                sizeof(cb->args[0]));
2332                if (proxy)
2333                        err = pneigh_dump_table(tbl, skb, cb);
2334                else
2335                        err = neigh_dump_table(tbl, skb, cb);
2336                if (err < 0)
2337                        break;
2338        }
2339        read_unlock(&neigh_tbl_lock);
2340
2341        cb->args[0] = t;
2342        return skb->len;
2343}
2344
2345void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2346{
2347        int chain;
2348        struct neigh_hash_table *nht;
2349
2350        rcu_read_lock_bh();
2351        nht = rcu_dereference_bh(tbl->nht);
2352
2353        read_lock(&tbl->lock); /* avoid resizes */
2354        for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2355                struct neighbour *n;
2356
2357                for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2358                     n != NULL;
2359                     n = rcu_dereference_bh(n->next))
2360                        cb(n, cookie);
2361        }
2362        read_unlock(&tbl->lock);
2363        rcu_read_unlock_bh();
2364}
2365EXPORT_SYMBOL(neigh_for_each);
2366
2367/* The tbl->lock must be held as a writer and BH disabled. */
2368void __neigh_for_each_release(struct neigh_table *tbl,
2369                              int (*cb)(struct neighbour *))
2370{
2371        int chain;
2372        struct neigh_hash_table *nht;
2373
2374        nht = rcu_dereference_protected(tbl->nht,
2375                                        lockdep_is_held(&tbl->lock));
2376        for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2377                struct neighbour *n;
2378                struct neighbour __rcu **np;
2379
2380                np = &nht->hash_buckets[chain];
2381                while ((n = rcu_dereference_protected(*np,
2382                                        lockdep_is_held(&tbl->lock))) != NULL) {
2383                        int release;
2384
2385                        write_lock(&n->lock);
2386                        release = cb(n);
2387                        if (release) {
2388                                rcu_assign_pointer(*np,
2389                                        rcu_dereference_protected(n->next,
2390                                                lockdep_is_held(&tbl->lock)));
2391                                n->dead = 1;
2392                        } else
2393                                np = &n->next;
2394                        write_unlock(&n->lock);
2395                        if (release)
2396                                neigh_cleanup_and_release(n);
2397                }
2398        }
2399}
2400EXPORT_SYMBOL(__neigh_for_each_release);
2401
2402#ifdef CONFIG_PROC_FS
2403
2404static struct neighbour *neigh_get_first(struct seq_file *seq)
2405{
2406        struct neigh_seq_state *state = seq->private;
2407        struct net *net = seq_file_net(seq);
2408        struct neigh_hash_table *nht = state->nht;
2409        struct neighbour *n = NULL;
2410        int bucket = state->bucket;
2411
2412        state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2413        for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2414                n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2415
2416                while (n) {
2417                        if (!net_eq(dev_net(n->dev), net))
2418                                goto next;
2419                        if (state->neigh_sub_iter) {
2420                                loff_t fakep = 0;
2421                                void *v;
2422
2423                                v = state->neigh_sub_iter(state, n, &fakep);
2424                                if (!v)
2425                                        goto next;
2426                        }
2427                        if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2428                                break;
2429                        if (n->nud_state & ~NUD_NOARP)
2430                                break;
2431next:
2432                        n = rcu_dereference_bh(n->next);
2433                }
2434
2435                if (n)
2436                        break;
2437        }
2438        state->bucket = bucket;
2439
2440        return n;
2441}
2442
2443static struct neighbour *neigh_get_next(struct seq_file *seq,
2444                                        struct neighbour *n,
2445                                        loff_t *pos)
2446{
2447        struct neigh_seq_state *state = seq->private;
2448        struct net *net = seq_file_net(seq);
2449        struct neigh_hash_table *nht = state->nht;
2450
2451        if (state->neigh_sub_iter) {
2452                void *v = state->neigh_sub_iter(state, n, pos);
2453                if (v)
2454                        return n;
2455        }
2456        n = rcu_dereference_bh(n->next);
2457
2458        while (1) {
2459                while (n) {
2460                        if (!net_eq(dev_net(n->dev), net))
2461                                goto next;
2462                        if (state->neigh_sub_iter) {
2463                                void *v = state->neigh_sub_iter(state, n, pos);
2464                                if (v)
2465                                        return n;
2466                                goto next;
2467                        }
2468                        if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2469                                break;
2470
2471                        if (n->nud_state & ~NUD_NOARP)
2472                                break;
2473next:
2474                        n = rcu_dereference_bh(n->next);
2475                }
2476
2477                if (n)
2478                        break;
2479
2480                if (++state->bucket >= (1 << nht->hash_shift))
2481                        break;
2482
2483                n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2484        }
2485
2486        if (n && pos)
2487                --(*pos);
2488        return n;
2489}
2490
2491static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2492{
2493        struct neighbour *n = neigh_get_first(seq);
2494
2495        if (n) {
2496                --(*pos);
2497                while (*pos) {
2498                        n = neigh_get_next(seq, n, pos);
2499                        if (!n)
2500                                break;
2501                }
2502        }
2503        return *pos ? NULL : n;
2504}
2505
2506static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2507{
2508        struct neigh_seq_state *state = seq->private;
2509        struct net *net = seq_file_net(seq);
2510        struct neigh_table *tbl = state->tbl;
2511        struct pneigh_entry *pn = NULL;
2512        int bucket = state->bucket;
2513
2514        state->flags |= NEIGH_SEQ_IS_PNEIGH;
2515        for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2516                pn = tbl->phash_buckets[bucket];
2517                while (pn && !net_eq(pneigh_net(pn), net))
2518                        pn = pn->next;
2519                if (pn)
2520                        break;
2521        }
2522        state->bucket = bucket;
2523
2524        return pn;
2525}
2526
2527static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2528                                            struct pneigh_entry *pn,
2529                                            loff_t *pos)
2530{
2531        struct neigh_seq_state *state = seq->private;
2532        struct net *net = seq_file_net(seq);
2533        struct neigh_table *tbl = state->tbl;
2534
2535        do {
2536                pn = pn->next;
2537        } while (pn && !net_eq(pneigh_net(pn), net));
2538
2539        while (!pn) {
2540                if (++state->bucket > PNEIGH_HASHMASK)
2541                        break;
2542                pn = tbl->phash_buckets[state->bucket];
2543                while (pn && !net_eq(pneigh_net(pn), net))
2544                        pn = pn->next;
2545                if (pn)
2546                        break;
2547        }
2548
2549        if (pn && pos)
2550                --(*pos);
2551
2552        return pn;
2553}
2554
2555static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2556{
2557        struct pneigh_entry *pn = pneigh_get_first(seq);
2558
2559        if (pn) {
2560                --(*pos);
2561                while (*pos) {
2562                        pn = pneigh_get_next(seq, pn, pos);
2563                        if (!pn)
2564                                break;
2565                }
2566        }
2567        return *pos ? NULL : pn;
2568}
2569
2570static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2571{
2572        struct neigh_seq_state *state = seq->private;
2573        void *rc;
2574        loff_t idxpos = *pos;
2575
2576        rc = neigh_get_idx(seq, &idxpos);
2577        if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2578                rc = pneigh_get_idx(seq, &idxpos);
2579
2580        return rc;
2581}
2582
2583void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2584        __acquires(rcu_bh)
2585{
2586        struct neigh_seq_state *state = seq->private;
2587
2588        state->tbl = tbl;
2589        state->bucket = 0;
2590        state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2591
2592        rcu_read_lock_bh();
2593        state->nht = rcu_dereference_bh(tbl->nht);
2594
2595        return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2596}
2597EXPORT_SYMBOL(neigh_seq_start);
2598
2599void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2600{
2601        struct neigh_seq_state *state;
2602        void *rc;
2603
2604        if (v == SEQ_START_TOKEN) {
2605                rc = neigh_get_first(seq);
2606                goto out;
2607        }
2608
2609        state = seq->private;
2610        if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2611                rc = neigh_get_next(seq, v, NULL);
2612                if (rc)
2613                        goto out;
2614                if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2615                        rc = pneigh_get_first(seq);
2616        } else {
2617                BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2618                rc = pneigh_get_next(seq, v, NULL);
2619        }
2620out:
2621        ++(*pos);
2622        return rc;
2623}
2624EXPORT_SYMBOL(neigh_seq_next);
2625
2626void neigh_seq_stop(struct seq_file *seq, void *v)
2627        __releases(rcu_bh)
2628{
2629        rcu_read_unlock_bh();
2630}
2631EXPORT_SYMBOL(neigh_seq_stop);
2632
2633/* statistics via seq_file */
2634
2635static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2636{
2637        struct neigh_table *tbl = seq->private;
2638        int cpu;
2639
2640        if (*pos == 0)
2641                return SEQ_START_TOKEN;
2642
2643        for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2644                if (!cpu_possible(cpu))
2645                        continue;
2646                *pos = cpu+1;
2647                return per_cpu_ptr(tbl->stats, cpu);
2648        }
2649        return NULL;
2650}
2651
2652static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2653{
2654        struct neigh_table *tbl = seq->private;
2655        int cpu;
2656
2657        for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2658                if (!cpu_possible(cpu))
2659                        continue;
2660                *pos = cpu+1;
2661                return per_cpu_ptr(tbl->stats, cpu);
2662        }
2663        return NULL;
2664}
2665
2666static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2667{
2668
2669}
2670
2671static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2672{
2673        struct neigh_table *tbl = seq->private;
2674        struct neigh_statistics *st = v;
2675
2676        if (v == SEQ_START_TOKEN) {
2677                seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2678                return 0;
2679        }
2680
2681        seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2682                        "%08lx %08lx  %08lx %08lx %08lx\n",
2683                   atomic_read(&tbl->entries),
2684
2685                   st->allocs,
2686                   st->destroys,
2687                   st->hash_grows,
2688
2689                   st->lookups,
2690                   st->hits,
2691
2692                   st->res_failed,
2693
2694                   st->rcv_probes_mcast,
2695                   st->rcv_probes_ucast,
2696
2697                   st->periodic_gc_runs,
2698                   st->forced_gc_runs,
2699                   st->unres_discards
2700                   );
2701
2702        return 0;
2703}
2704
2705static const struct seq_operations neigh_stat_seq_ops = {
2706        .start  = neigh_stat_seq_start,
2707        .next   = neigh_stat_seq_next,
2708        .stop   = neigh_stat_seq_stop,
2709        .show   = neigh_stat_seq_show,
2710};
2711
2712static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2713{
2714        int ret = seq_open(file, &neigh_stat_seq_ops);
2715
2716        if (!ret) {
2717                struct seq_file *sf = file->private_data;
2718                sf->private = PDE(inode)->data;
2719        }
2720        return ret;
2721};
2722
2723static const struct file_operations neigh_stat_seq_fops = {
2724        .owner   = THIS_MODULE,
2725        .open    = neigh_stat_seq_open,
2726        .read    = seq_read,
2727        .llseek  = seq_lseek,
2728        .release = seq_release,
2729};
2730
2731#endif /* CONFIG_PROC_FS */
2732
2733static inline size_t neigh_nlmsg_size(void)
2734{
2735        return NLMSG_ALIGN(sizeof(struct ndmsg))
2736               + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2737               + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2738               + nla_total_size(sizeof(struct nda_cacheinfo))
2739               + nla_total_size(4); /* NDA_PROBES */
2740}
2741
2742static void __neigh_notify(struct neighbour *n, int type, int flags)
2743{
2744        struct net *net = dev_net(n->dev);
2745        struct sk_buff *skb;
2746        int err = -ENOBUFS;
2747
2748        skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2749        if (skb == NULL)
2750                goto errout;
2751
2752        err = neigh_fill_info(skb, n, 0, 0, type, flags);
2753        if (err < 0) {
2754                /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2755                WARN_ON(err == -EMSGSIZE);
2756                kfree_skb(skb);
2757                goto errout;
2758        }
2759        rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2760        return;
2761errout:
2762        if (err < 0)
2763                rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2764}
2765
2766#ifdef CONFIG_ARPD
2767void neigh_app_ns(struct neighbour *n)
2768{
2769        __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2770}
2771EXPORT_SYMBOL(neigh_app_ns);
2772#endif /* CONFIG_ARPD */
2773
2774#ifdef CONFIG_SYSCTL
2775
2776static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
2777                           size_t *lenp, loff_t *ppos)
2778{
2779        int size, ret;
2780        ctl_table tmp = *ctl;
2781
2782        tmp.data = &size;
2783        size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
2784        ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
2785        if (write && !ret)
2786                *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2787        return ret;
2788}
2789
2790enum {
2791        NEIGH_VAR_MCAST_PROBE,
2792        NEIGH_VAR_UCAST_PROBE,
2793        NEIGH_VAR_APP_PROBE,
2794        NEIGH_VAR_RETRANS_TIME,
2795        NEIGH_VAR_BASE_REACHABLE_TIME,
2796        NEIGH_VAR_DELAY_PROBE_TIME,
2797        NEIGH_VAR_GC_STALETIME,
2798        NEIGH_VAR_QUEUE_LEN,
2799        NEIGH_VAR_QUEUE_LEN_BYTES,
2800        NEIGH_VAR_PROXY_QLEN,
2801        NEIGH_VAR_ANYCAST_DELAY,
2802        NEIGH_VAR_PROXY_DELAY,
2803        NEIGH_VAR_LOCKTIME,
2804        NEIGH_VAR_RETRANS_TIME_MS,
2805        NEIGH_VAR_BASE_REACHABLE_TIME_MS,
2806        NEIGH_VAR_GC_INTERVAL,
2807        NEIGH_VAR_GC_THRESH1,
2808        NEIGH_VAR_GC_THRESH2,
2809        NEIGH_VAR_GC_THRESH3,
2810        NEIGH_VAR_MAX
2811};
2812
2813static struct neigh_sysctl_table {
2814        struct ctl_table_header *sysctl_header;
2815        struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2816} neigh_sysctl_template __read_mostly = {
2817        .neigh_vars = {
2818                [NEIGH_VAR_MCAST_PROBE] = {
2819                        .procname       = "mcast_solicit",
2820                        .maxlen         = sizeof(int),
2821                        .mode           = 0644,
2822                        .proc_handler   = proc_dointvec,
2823                },
2824                [NEIGH_VAR_UCAST_PROBE] = {
2825                        .procname       = "ucast_solicit",
2826                        .maxlen         = sizeof(int),
2827                        .mode           = 0644,
2828                        .proc_handler   = proc_dointvec,
2829                },
2830                [NEIGH_VAR_APP_PROBE] = {
2831                        .procname       = "app_solicit",
2832                        .maxlen         = sizeof(int),
2833                        .mode           = 0644,
2834                        .proc_handler   = proc_dointvec,
2835                },
2836                [NEIGH_VAR_RETRANS_TIME] = {
2837                        .procname       = "retrans_time",
2838                        .maxlen         = sizeof(int),
2839                        .mode           = 0644,
2840                        .proc_handler   = proc_dointvec_userhz_jiffies,
2841                },
2842                [NEIGH_VAR_BASE_REACHABLE_TIME] = {
2843                        .procname       = "base_reachable_time",
2844                        .maxlen         = sizeof(int),
2845                        .mode           = 0644,
2846                        .proc_handler   = proc_dointvec_jiffies,
2847                },
2848                [NEIGH_VAR_DELAY_PROBE_TIME] = {
2849                        .procname       = "delay_first_probe_time",
2850                        .maxlen         = sizeof(int),
2851                        .mode           = 0644,
2852                        .proc_handler   = proc_dointvec_jiffies,
2853                },
2854                [NEIGH_VAR_GC_STALETIME] = {
2855                        .procname       = "gc_stale_time",
2856                        .maxlen         = sizeof(int),
2857                        .mode           = 0644,
2858                        .proc_handler   = proc_dointvec_jiffies,
2859                },
2860                [NEIGH_VAR_QUEUE_LEN] = {
2861                        .procname       = "unres_qlen",
2862                        .maxlen         = sizeof(int),
2863                        .mode           = 0644,
2864                        .proc_handler   = proc_unres_qlen,
2865                },
2866                [NEIGH_VAR_QUEUE_LEN_BYTES] = {
2867                        .procname       = "unres_qlen_bytes",
2868                        .maxlen         = sizeof(int),
2869                        .mode           = 0644,
2870                        .proc_handler   = proc_dointvec,
2871                },
2872                [NEIGH_VAR_PROXY_QLEN] = {
2873                        .procname       = "proxy_qlen",
2874                        .maxlen         = sizeof(int),
2875                        .mode           = 0644,
2876                        .proc_handler   = proc_dointvec,
2877                },
2878                [NEIGH_VAR_ANYCAST_DELAY] = {
2879                        .procname       = "anycast_delay",
2880                        .maxlen         = sizeof(int),
2881                        .mode           = 0644,
2882                        .proc_handler   = proc_dointvec_userhz_jiffies,
2883                },
2884                [NEIGH_VAR_PROXY_DELAY] = {
2885                        .procname       = "proxy_delay",
2886                        .maxlen         = sizeof(int),
2887                        .mode           = 0644,
2888                        .proc_handler   = proc_dointvec_userhz_jiffies,
2889                },
2890                [NEIGH_VAR_LOCKTIME] = {
2891                        .procname       = "locktime",
2892                        .maxlen         = sizeof(int),
2893                        .mode           = 0644,
2894                        .proc_handler   = proc_dointvec_userhz_jiffies,
2895                },
2896                [NEIGH_VAR_RETRANS_TIME_MS] = {
2897                        .procname       = "retrans_time_ms",
2898                        .maxlen         = sizeof(int),
2899                        .mode           = 0644,
2900                        .proc_handler   = proc_dointvec_ms_jiffies,
2901                },
2902                [NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
2903                        .procname       = "base_reachable_time_ms",
2904                        .maxlen         = sizeof(int),
2905                        .mode           = 0644,
2906                        .proc_handler   = proc_dointvec_ms_jiffies,
2907                },
2908                [NEIGH_VAR_GC_INTERVAL] = {
2909                        .procname       = "gc_interval",
2910                        .maxlen         = sizeof(int),
2911                        .mode           = 0644,
2912                        .proc_handler   = proc_dointvec_jiffies,
2913                },
2914                [NEIGH_VAR_GC_THRESH1] = {
2915                        .procname       = "gc_thresh1",
2916                        .maxlen         = sizeof(int),
2917                        .mode           = 0644,
2918                        .proc_handler   = proc_dointvec,
2919                },
2920                [NEIGH_VAR_GC_THRESH2] = {
2921                        .procname       = "gc_thresh2",
2922                        .maxlen         = sizeof(int),
2923                        .mode           = 0644,
2924                        .proc_handler   = proc_dointvec,
2925                },
2926                [NEIGH_VAR_GC_THRESH3] = {
2927                        .procname       = "gc_thresh3",
2928                        .maxlen         = sizeof(int),
2929                        .mode           = 0644,
2930                        .proc_handler   = proc_dointvec,
2931                },
2932                {},
2933        },
2934};
2935
2936int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2937                          char *p_name, proc_handler *handler)
2938{
2939        struct neigh_sysctl_table *t;
2940        const char *dev_name_source = NULL;
2941        char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
2942
2943        t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2944        if (!t)
2945                goto err;
2946
2947        t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
2948        t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
2949        t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
2950        t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
2951        t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
2952        t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
2953        t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
2954        t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
2955        t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
2956        t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
2957        t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
2958        t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
2959        t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
2960        t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
2961        t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
2962
2963        if (dev) {
2964                dev_name_source = dev->name;
2965                /* Terminate the table early */
2966                memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
2967                       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
2968        } else {
2969                dev_name_source = "default";
2970                t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
2971                t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
2972                t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
2973                t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
2974        }
2975
2976
2977        if (handler) {
2978                /* RetransTime */
2979                t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
2980                t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
2981                /* ReachableTime */
2982                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
2983                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
2984                /* RetransTime (in milliseconds)*/
2985                t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
2986                t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
2987                /* ReachableTime (in milliseconds) */
2988                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
2989                t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
2990        }
2991
2992        snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
2993                p_name, dev_name_source);
2994        t->sysctl_header =
2995                register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
2996        if (!t->sysctl_header)
2997                goto free;
2998
2999        p->sysctl_table = t;
3000        return 0;
3001
3002free:
3003        kfree(t);
3004err:
3005        return -ENOBUFS;
3006}
3007EXPORT_SYMBOL(neigh_sysctl_register);
3008
3009void neigh_sysctl_unregister(struct neigh_parms *p)
3010{
3011        if (p->sysctl_table) {
3012                struct neigh_sysctl_table *t = p->sysctl_table;
3013                p->sysctl_table = NULL;
3014                unregister_net_sysctl_table(t->sysctl_header);
3015                kfree(t);
3016        }
3017}
3018EXPORT_SYMBOL(neigh_sysctl_unregister);
3019
3020#endif  /* CONFIG_SYSCTL */
3021
3022static int __init neigh_init(void)
3023{
3024        rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3025        rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3026        rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3027
3028        rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3029                      NULL);
3030        rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3031
3032        return 0;
3033}
3034
3035subsys_initcall(neigh_init);
3036
3037
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.