linux/net/ipv4/devinet.c
<<
>>
Prefs
   1/*
   2 *      NET3    IP device support routines.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 *      Derived from the IP parts of dev.c 1.0.19
  10 *              Authors:        Ross Biro
  11 *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13 *
  14 *      Additional Authors:
  15 *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  16 *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  17 *
  18 *      Changes:
  19 *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
  20 *                                      lists.
  21 *              Cyrus Durgin:           updated for kmod
  22 *              Matthias Andree:        in devinet_ioctl, compare label and
  23 *                                      address (4.4BSD alias style support),
  24 *                                      fall back to comparing just the label
  25 *                                      if no match found.
  26 */
  27
  28
  29#include <asm/uaccess.h>
  30#include <linux/bitops.h>
  31#include <linux/capability.h>
  32#include <linux/module.h>
  33#include <linux/types.h>
  34#include <linux/kernel.h>
  35#include <linux/string.h>
  36#include <linux/mm.h>
  37#include <linux/socket.h>
  38#include <linux/sockios.h>
  39#include <linux/in.h>
  40#include <linux/errno.h>
  41#include <linux/interrupt.h>
  42#include <linux/if_addr.h>
  43#include <linux/if_ether.h>
  44#include <linux/inet.h>
  45#include <linux/netdevice.h>
  46#include <linux/etherdevice.h>
  47#include <linux/skbuff.h>
  48#include <linux/init.h>
  49#include <linux/notifier.h>
  50#include <linux/inetdevice.h>
  51#include <linux/igmp.h>
  52#include <linux/slab.h>
  53#include <linux/hash.h>
  54#ifdef CONFIG_SYSCTL
  55#include <linux/sysctl.h>
  56#endif
  57#include <linux/kmod.h>
  58
  59#include <net/arp.h>
  60#include <net/ip.h>
  61#include <net/route.h>
  62#include <net/ip_fib.h>
  63#include <net/rtnetlink.h>
  64#include <net/net_namespace.h>
  65
  66#include "fib_lookup.h"
  67
  68static struct ipv4_devconf ipv4_devconf = {
  69        .data = {
  70                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  71                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  72                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  73                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  74        },
  75};
  76
  77static struct ipv4_devconf ipv4_devconf_dflt = {
  78        .data = {
  79                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  80                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  81                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  82                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  83                [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
  84        },
  85};
  86
  87#define IPV4_DEVCONF_DFLT(net, attr) \
  88        IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
  89
  90static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
  91        [IFA_LOCAL]             = { .type = NLA_U32 },
  92        [IFA_ADDRESS]           = { .type = NLA_U32 },
  93        [IFA_BROADCAST]         = { .type = NLA_U32 },
  94        [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
  95};
  96
  97#define IN4_ADDR_HSIZE_SHIFT    8
  98#define IN4_ADDR_HSIZE          (1U << IN4_ADDR_HSIZE_SHIFT)
  99
 100static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
 101static DEFINE_SPINLOCK(inet_addr_hash_lock);
 102
 103static u32 inet_addr_hash(struct net *net, __be32 addr)
 104{
 105        u32 val = (__force u32) addr ^ net_hash_mix(net);
 106
 107        return hash_32(val, IN4_ADDR_HSIZE_SHIFT);
 108}
 109
 110static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
 111{
 112        u32 hash = inet_addr_hash(net, ifa->ifa_local);
 113
 114        spin_lock(&inet_addr_hash_lock);
 115        hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
 116        spin_unlock(&inet_addr_hash_lock);
 117}
 118
 119static void inet_hash_remove(struct in_ifaddr *ifa)
 120{
 121        spin_lock(&inet_addr_hash_lock);
 122        hlist_del_init_rcu(&ifa->hash);
 123        spin_unlock(&inet_addr_hash_lock);
 124}
 125
 126/**
 127 * __ip_dev_find - find the first device with a given source address.
 128 * @net: the net namespace
 129 * @addr: the source address
 130 * @devref: if true, take a reference on the found device
 131 *
 132 * If a caller uses devref=false, it should be protected by RCU, or RTNL
 133 */
 134struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 135{
 136        u32 hash = inet_addr_hash(net, addr);
 137        struct net_device *result = NULL;
 138        struct in_ifaddr *ifa;
 139        struct hlist_node *node;
 140
 141        rcu_read_lock();
 142        hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
 143                if (ifa->ifa_local == addr) {
 144                        struct net_device *dev = ifa->ifa_dev->dev;
 145
 146                        if (!net_eq(dev_net(dev), net))
 147                                continue;
 148                        result = dev;
 149                        break;
 150                }
 151        }
 152        if (!result) {
 153                struct flowi4 fl4 = { .daddr = addr };
 154                struct fib_result res = { 0 };
 155                struct fib_table *local;
 156
 157                /* Fallback to FIB local table so that communication
 158                 * over loopback subnets work.
 159                 */
 160                local = fib_get_table(net, RT_TABLE_LOCAL);
 161                if (local &&
 162                    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
 163                    res.type == RTN_LOCAL)
 164                        result = FIB_RES_DEV(res);
 165        }
 166        if (result && devref)
 167                dev_hold(result);
 168        rcu_read_unlock();
 169        return result;
 170}
 171EXPORT_SYMBOL(__ip_dev_find);
 172
 173static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
 174
 175static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 176static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 177                         int destroy);
 178#ifdef CONFIG_SYSCTL
 179static void devinet_sysctl_register(struct in_device *idev);
 180static void devinet_sysctl_unregister(struct in_device *idev);
 181#else
 182static void devinet_sysctl_register(struct in_device *idev)
 183{
 184}
 185static void devinet_sysctl_unregister(struct in_device *idev)
 186{
 187}
 188#endif
 189
 190/* Locks all the inet devices. */
 191
 192static struct in_ifaddr *inet_alloc_ifa(void)
 193{
 194        return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
 195}
 196
 197static void inet_rcu_free_ifa(struct rcu_head *head)
 198{
 199        struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
 200        if (ifa->ifa_dev)
 201                in_dev_put(ifa->ifa_dev);
 202        kfree(ifa);
 203}
 204
 205static void inet_free_ifa(struct in_ifaddr *ifa)
 206{
 207        call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
 208}
 209
 210void in_dev_finish_destroy(struct in_device *idev)
 211{
 212        struct net_device *dev = idev->dev;
 213
 214        WARN_ON(idev->ifa_list);
 215        WARN_ON(idev->mc_list);
 216#ifdef NET_REFCNT_DEBUG
 217        pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
 218#endif
 219        dev_put(dev);
 220        if (!idev->dead)
 221                pr_err("Freeing alive in_device %p\n", idev);
 222        else
 223                kfree(idev);
 224}
 225EXPORT_SYMBOL(in_dev_finish_destroy);
 226
 227static struct in_device *inetdev_init(struct net_device *dev)
 228{
 229        struct in_device *in_dev;
 230
 231        ASSERT_RTNL();
 232
 233        in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
 234        if (!in_dev)
 235                goto out;
 236        memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
 237                        sizeof(in_dev->cnf));
 238        in_dev->cnf.sysctl = NULL;
 239        in_dev->dev = dev;
 240        in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
 241        if (!in_dev->arp_parms)
 242                goto out_kfree;
 243        if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
 244                dev_disable_lro(dev);
 245        /* Reference in_dev->dev */
 246        dev_hold(dev);
 247        /* Account for reference dev->ip_ptr (below) */
 248        in_dev_hold(in_dev);
 249
 250        devinet_sysctl_register(in_dev);
 251        ip_mc_init_dev(in_dev);
 252        if (dev->flags & IFF_UP)
 253                ip_mc_up(in_dev);
 254
 255        /* we can receive as soon as ip_ptr is set -- do this last */
 256        rcu_assign_pointer(dev->ip_ptr, in_dev);
 257out:
 258        return in_dev;
 259out_kfree:
 260        kfree(in_dev);
 261        in_dev = NULL;
 262        goto out;
 263}
 264
 265static void in_dev_rcu_put(struct rcu_head *head)
 266{
 267        struct in_device *idev = container_of(head, struct in_device, rcu_head);
 268        in_dev_put(idev);
 269}
 270
 271static void inetdev_destroy(struct in_device *in_dev)
 272{
 273        struct in_ifaddr *ifa;
 274        struct net_device *dev;
 275
 276        ASSERT_RTNL();
 277
 278        dev = in_dev->dev;
 279
 280        in_dev->dead = 1;
 281
 282        ip_mc_destroy_dev(in_dev);
 283
 284        while ((ifa = in_dev->ifa_list) != NULL) {
 285                inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
 286                inet_free_ifa(ifa);
 287        }
 288
 289        RCU_INIT_POINTER(dev->ip_ptr, NULL);
 290
 291        devinet_sysctl_unregister(in_dev);
 292        neigh_parms_release(&arp_tbl, in_dev->arp_parms);
 293        arp_ifdown(dev);
 294
 295        call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
 296}
 297
 298int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
 299{
 300        rcu_read_lock();
 301        for_primary_ifa(in_dev) {
 302                if (inet_ifa_match(a, ifa)) {
 303                        if (!b || inet_ifa_match(b, ifa)) {
 304                                rcu_read_unlock();
 305                                return 1;
 306                        }
 307                }
 308        } endfor_ifa(in_dev);
 309        rcu_read_unlock();
 310        return 0;
 311}
 312
 313static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 314                         int destroy, struct nlmsghdr *nlh, u32 portid)
 315{
 316        struct in_ifaddr *promote = NULL;
 317        struct in_ifaddr *ifa, *ifa1 = *ifap;
 318        struct in_ifaddr *last_prim = in_dev->ifa_list;
 319        struct in_ifaddr *prev_prom = NULL;
 320        int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 321
 322        ASSERT_RTNL();
 323
 324        /* 1. Deleting primary ifaddr forces deletion all secondaries
 325         * unless alias promotion is set
 326         **/
 327
 328        if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
 329                struct in_ifaddr **ifap1 = &ifa1->ifa_next;
 330
 331                while ((ifa = *ifap1) != NULL) {
 332                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
 333                            ifa1->ifa_scope <= ifa->ifa_scope)
 334                                last_prim = ifa;
 335
 336                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
 337                            ifa1->ifa_mask != ifa->ifa_mask ||
 338                            !inet_ifa_match(ifa1->ifa_address, ifa)) {
 339                                ifap1 = &ifa->ifa_next;
 340                                prev_prom = ifa;
 341                                continue;
 342                        }
 343
 344                        if (!do_promote) {
 345                                inet_hash_remove(ifa);
 346                                *ifap1 = ifa->ifa_next;
 347
 348                                rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid);
 349                                blocking_notifier_call_chain(&inetaddr_chain,
 350                                                NETDEV_DOWN, ifa);
 351                                inet_free_ifa(ifa);
 352                        } else {
 353                                promote = ifa;
 354                                break;
 355                        }
 356                }
 357        }
 358
 359        /* On promotion all secondaries from subnet are changing
 360         * the primary IP, we must remove all their routes silently
 361         * and later to add them back with new prefsrc. Do this
 362         * while all addresses are on the device list.
 363         */
 364        for (ifa = promote; ifa; ifa = ifa->ifa_next) {
 365                if (ifa1->ifa_mask == ifa->ifa_mask &&
 366                    inet_ifa_match(ifa1->ifa_address, ifa))
 367                        fib_del_ifaddr(ifa, ifa1);
 368        }
 369
 370        /* 2. Unlink it */
 371
 372        *ifap = ifa1->ifa_next;
 373        inet_hash_remove(ifa1);
 374
 375        /* 3. Announce address deletion */
 376
 377        /* Send message first, then call notifier.
 378           At first sight, FIB update triggered by notifier
 379           will refer to already deleted ifaddr, that could confuse
 380           netlink listeners. It is not true: look, gated sees
 381           that route deleted and if it still thinks that ifaddr
 382           is valid, it will try to restore deleted routes... Grr.
 383           So that, this order is correct.
 384         */
 385        rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid);
 386        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 387
 388        if (promote) {
 389                struct in_ifaddr *next_sec = promote->ifa_next;
 390
 391                if (prev_prom) {
 392                        prev_prom->ifa_next = promote->ifa_next;
 393                        promote->ifa_next = last_prim->ifa_next;
 394                        last_prim->ifa_next = promote;
 395                }
 396
 397                promote->ifa_flags &= ~IFA_F_SECONDARY;
 398                rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid);
 399                blocking_notifier_call_chain(&inetaddr_chain,
 400                                NETDEV_UP, promote);
 401                for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
 402                        if (ifa1->ifa_mask != ifa->ifa_mask ||
 403                            !inet_ifa_match(ifa1->ifa_address, ifa))
 404                                        continue;
 405                        fib_add_ifaddr(ifa);
 406                }
 407
 408        }
 409        if (destroy)
 410                inet_free_ifa(ifa1);
 411}
 412
 413static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 414                         int destroy)
 415{
 416        __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
 417}
 418
 419static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 420                             u32 portid)
 421{
 422        struct in_device *in_dev = ifa->ifa_dev;
 423        struct in_ifaddr *ifa1, **ifap, **last_primary;
 424
 425        ASSERT_RTNL();
 426
 427        if (!ifa->ifa_local) {
 428                inet_free_ifa(ifa);
 429                return 0;
 430        }
 431
 432        ifa->ifa_flags &= ~IFA_F_SECONDARY;
 433        last_primary = &in_dev->ifa_list;
 434
 435        for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
 436             ifap = &ifa1->ifa_next) {
 437                if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
 438                    ifa->ifa_scope <= ifa1->ifa_scope)
 439                        last_primary = &ifa1->ifa_next;
 440                if (ifa1->ifa_mask == ifa->ifa_mask &&
 441                    inet_ifa_match(ifa1->ifa_address, ifa)) {
 442                        if (ifa1->ifa_local == ifa->ifa_local) {
 443                                inet_free_ifa(ifa);
 444                                return -EEXIST;
 445                        }
 446                        if (ifa1->ifa_scope != ifa->ifa_scope) {
 447                                inet_free_ifa(ifa);
 448                                return -EINVAL;
 449                        }
 450                        ifa->ifa_flags |= IFA_F_SECONDARY;
 451                }
 452        }
 453
 454        if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
 455                net_srandom(ifa->ifa_local);
 456                ifap = last_primary;
 457        }
 458
 459        ifa->ifa_next = *ifap;
 460        *ifap = ifa;
 461
 462        inet_hash_insert(dev_net(in_dev->dev), ifa);
 463
 464        /* Send message first, then call notifier.
 465           Notifier will trigger FIB update, so that
 466           listeners of netlink will know about new ifaddr */
 467        rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid);
 468        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 469
 470        return 0;
 471}
 472
 473static int inet_insert_ifa(struct in_ifaddr *ifa)
 474{
 475        return __inet_insert_ifa(ifa, NULL, 0);
 476}
 477
 478static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 479{
 480        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 481
 482        ASSERT_RTNL();
 483
 484        if (!in_dev) {
 485                inet_free_ifa(ifa);
 486                return -ENOBUFS;
 487        }
 488        ipv4_devconf_setall(in_dev);
 489        if (ifa->ifa_dev != in_dev) {
 490                WARN_ON(ifa->ifa_dev);
 491                in_dev_hold(in_dev);
 492                ifa->ifa_dev = in_dev;
 493        }
 494        if (ipv4_is_loopback(ifa->ifa_local))
 495                ifa->ifa_scope = RT_SCOPE_HOST;
 496        return inet_insert_ifa(ifa);
 497}
 498
 499/* Caller must hold RCU or RTNL :
 500 * We dont take a reference on found in_device
 501 */
 502struct in_device *inetdev_by_index(struct net *net, int ifindex)
 503{
 504        struct net_device *dev;
 505        struct in_device *in_dev = NULL;
 506
 507        rcu_read_lock();
 508        dev = dev_get_by_index_rcu(net, ifindex);
 509        if (dev)
 510                in_dev = rcu_dereference_rtnl(dev->ip_ptr);
 511        rcu_read_unlock();
 512        return in_dev;
 513}
 514EXPORT_SYMBOL(inetdev_by_index);
 515
 516/* Called only from RTNL semaphored context. No locks. */
 517
 518struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 519                                    __be32 mask)
 520{
 521        ASSERT_RTNL();
 522
 523        for_primary_ifa(in_dev) {
 524                if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
 525                        return ifa;
 526        } endfor_ifa(in_dev);
 527        return NULL;
 528}
 529
 530static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 531{
 532        struct net *net = sock_net(skb->sk);
 533        struct nlattr *tb[IFA_MAX+1];
 534        struct in_device *in_dev;
 535        struct ifaddrmsg *ifm;
 536        struct in_ifaddr *ifa, **ifap;
 537        int err = -EINVAL;
 538
 539        ASSERT_RTNL();
 540
 541        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
 542        if (err < 0)
 543                goto errout;
 544
 545        ifm = nlmsg_data(nlh);
 546        in_dev = inetdev_by_index(net, ifm->ifa_index);
 547        if (in_dev == NULL) {
 548                err = -ENODEV;
 549                goto errout;
 550        }
 551
 552        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 553             ifap = &ifa->ifa_next) {
 554                if (tb[IFA_LOCAL] &&
 555                    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
 556                        continue;
 557
 558                if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
 559                        continue;
 560
 561                if (tb[IFA_ADDRESS] &&
 562                    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
 563                    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
 564                        continue;
 565
 566                __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
 567                return 0;
 568        }
 569
 570        err = -EADDRNOTAVAIL;
 571errout:
 572        return err;
 573}
 574
 575static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
 576{
 577        struct nlattr *tb[IFA_MAX+1];
 578        struct in_ifaddr *ifa;
 579        struct ifaddrmsg *ifm;
 580        struct net_device *dev;
 581        struct in_device *in_dev;
 582        int err;
 583
 584        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
 585        if (err < 0)
 586                goto errout;
 587
 588        ifm = nlmsg_data(nlh);
 589        err = -EINVAL;
 590        if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
 591                goto errout;
 592
 593        dev = __dev_get_by_index(net, ifm->ifa_index);
 594        err = -ENODEV;
 595        if (dev == NULL)
 596                goto errout;
 597
 598        in_dev = __in_dev_get_rtnl(dev);
 599        err = -ENOBUFS;
 600        if (in_dev == NULL)
 601                goto errout;
 602
 603        ifa = inet_alloc_ifa();
 604        if (ifa == NULL)
 605                /*
 606                 * A potential indev allocation can be left alive, it stays
 607                 * assigned to its device and is destroy with it.
 608                 */
 609                goto errout;
 610
 611        ipv4_devconf_setall(in_dev);
 612        in_dev_hold(in_dev);
 613
 614        if (tb[IFA_ADDRESS] == NULL)
 615                tb[IFA_ADDRESS] = tb[IFA_LOCAL];
 616
 617        INIT_HLIST_NODE(&ifa->hash);
 618        ifa->ifa_prefixlen = ifm->ifa_prefixlen;
 619        ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
 620        ifa->ifa_flags = ifm->ifa_flags;
 621        ifa->ifa_scope = ifm->ifa_scope;
 622        ifa->ifa_dev = in_dev;
 623
 624        ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
 625        ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
 626
 627        if (tb[IFA_BROADCAST])
 628                ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
 629
 630        if (tb[IFA_LABEL])
 631                nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
 632        else
 633                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 634
 635        return ifa;
 636
 637errout:
 638        return ERR_PTR(err);
 639}
 640
 641static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 642{
 643        struct net *net = sock_net(skb->sk);
 644        struct in_ifaddr *ifa;
 645
 646        ASSERT_RTNL();
 647
 648        ifa = rtm_to_ifaddr(net, nlh);
 649        if (IS_ERR(ifa))
 650                return PTR_ERR(ifa);
 651
 652        return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
 653}
 654
 655/*
 656 *      Determine a default network mask, based on the IP address.
 657 */
 658
 659static int inet_abc_len(__be32 addr)
 660{
 661        int rc = -1;    /* Something else, probably a multicast. */
 662
 663        if (ipv4_is_zeronet(addr))
 664                rc = 0;
 665        else {
 666                __u32 haddr = ntohl(addr);
 667
 668                if (IN_CLASSA(haddr))
 669                        rc = 8;
 670                else if (IN_CLASSB(haddr))
 671                        rc = 16;
 672                else if (IN_CLASSC(haddr))
 673                        rc = 24;
 674        }
 675
 676        return rc;
 677}
 678
 679
 680int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 681{
 682        struct ifreq ifr;
 683        struct sockaddr_in sin_orig;
 684        struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
 685        struct in_device *in_dev;
 686        struct in_ifaddr **ifap = NULL;
 687        struct in_ifaddr *ifa = NULL;
 688        struct net_device *dev;
 689        char *colon;
 690        int ret = -EFAULT;
 691        int tryaddrmatch = 0;
 692
 693        /*
 694         *      Fetch the caller's info block into kernel space
 695         */
 696
 697        if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
 698                goto out;
 699        ifr.ifr_name[IFNAMSIZ - 1] = 0;
 700
 701        /* save original address for comparison */
 702        memcpy(&sin_orig, sin, sizeof(*sin));
 703
 704        colon = strchr(ifr.ifr_name, ':');
 705        if (colon)
 706                *colon = 0;
 707
 708        dev_load(net, ifr.ifr_name);
 709
 710        switch (cmd) {
 711        case SIOCGIFADDR:       /* Get interface address */
 712        case SIOCGIFBRDADDR:    /* Get the broadcast address */
 713        case SIOCGIFDSTADDR:    /* Get the destination address */
 714        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
 715                /* Note that these ioctls will not sleep,
 716                   so that we do not impose a lock.
 717                   One day we will be forced to put shlock here (I mean SMP)
 718                 */
 719                tryaddrmatch = (sin_orig.sin_family == AF_INET);
 720                memset(sin, 0, sizeof(*sin));
 721                sin->sin_family = AF_INET;
 722                break;
 723
 724        case SIOCSIFFLAGS:
 725                ret = -EPERM;
 726                if (!capable(CAP_NET_ADMIN))
 727                        goto out;
 728                break;
 729        case SIOCSIFADDR:       /* Set interface address (and family) */
 730        case SIOCSIFBRDADDR:    /* Set the broadcast address */
 731        case SIOCSIFDSTADDR:    /* Set the destination address */
 732        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
 733                ret = -EPERM;
 734                if (!capable(CAP_NET_ADMIN))
 735                        goto out;
 736                ret = -EINVAL;
 737                if (sin->sin_family != AF_INET)
 738                        goto out;
 739                break;
 740        default:
 741                ret = -EINVAL;
 742                goto out;
 743        }
 744
 745        rtnl_lock();
 746
 747        ret = -ENODEV;
 748        dev = __dev_get_by_name(net, ifr.ifr_name);
 749        if (!dev)
 750                goto done;
 751
 752        if (colon)
 753                *colon = ':';
 754
 755        in_dev = __in_dev_get_rtnl(dev);
 756        if (in_dev) {
 757                if (tryaddrmatch) {
 758                        /* Matthias Andree */
 759                        /* compare label and address (4.4BSD style) */
 760                        /* note: we only do this for a limited set of ioctls
 761                           and only if the original address family was AF_INET.
 762                           This is checked above. */
 763                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 764                             ifap = &ifa->ifa_next) {
 765                                if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
 766                                    sin_orig.sin_addr.s_addr ==
 767                                                        ifa->ifa_local) {
 768                                        break; /* found */
 769                                }
 770                        }
 771                }
 772                /* we didn't get a match, maybe the application is
 773                   4.3BSD-style and passed in junk so we fall back to
 774                   comparing just the label */
 775                if (!ifa) {
 776                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 777                             ifap = &ifa->ifa_next)
 778                                if (!strcmp(ifr.ifr_name, ifa->ifa_label))
 779                                        break;
 780                }
 781        }
 782
 783        ret = -EADDRNOTAVAIL;
 784        if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
 785                goto done;
 786
 787        switch (cmd) {
 788        case SIOCGIFADDR:       /* Get interface address */
 789                sin->sin_addr.s_addr = ifa->ifa_local;
 790                goto rarok;
 791
 792        case SIOCGIFBRDADDR:    /* Get the broadcast address */
 793                sin->sin_addr.s_addr = ifa->ifa_broadcast;
 794                goto rarok;
 795
 796        case SIOCGIFDSTADDR:    /* Get the destination address */
 797                sin->sin_addr.s_addr = ifa->ifa_address;
 798                goto rarok;
 799
 800        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
 801                sin->sin_addr.s_addr = ifa->ifa_mask;
 802                goto rarok;
 803
 804        case SIOCSIFFLAGS:
 805                if (colon) {
 806                        ret = -EADDRNOTAVAIL;
 807                        if (!ifa)
 808                                break;
 809                        ret = 0;
 810                        if (!(ifr.ifr_flags & IFF_UP))
 811                                inet_del_ifa(in_dev, ifap, 1);
 812                        break;
 813                }
 814                ret = dev_change_flags(dev, ifr.ifr_flags);
 815                break;
 816
 817        case SIOCSIFADDR:       /* Set interface address (and family) */
 818                ret = -EINVAL;
 819                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
 820                        break;
 821
 822                if (!ifa) {
 823                        ret = -ENOBUFS;
 824                        ifa = inet_alloc_ifa();
 825                        INIT_HLIST_NODE(&ifa->hash);
 826                        if (!ifa)
 827                                break;
 828                        if (colon)
 829                                memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
 830                        else
 831                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 832                } else {
 833                        ret = 0;
 834                        if (ifa->ifa_local == sin->sin_addr.s_addr)
 835                                break;
 836                        inet_del_ifa(in_dev, ifap, 0);
 837                        ifa->ifa_broadcast = 0;
 838                        ifa->ifa_scope = 0;
 839                }
 840
 841                ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
 842
 843                if (!(dev->flags & IFF_POINTOPOINT)) {
 844                        ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
 845                        ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
 846                        if ((dev->flags & IFF_BROADCAST) &&
 847                            ifa->ifa_prefixlen < 31)
 848                                ifa->ifa_broadcast = ifa->ifa_address |
 849                                                     ~ifa->ifa_mask;
 850                } else {
 851                        ifa->ifa_prefixlen = 32;
 852                        ifa->ifa_mask = inet_make_mask(32);
 853                }
 854                ret = inet_set_ifa(dev, ifa);
 855                break;
 856
 857        case SIOCSIFBRDADDR:    /* Set the broadcast address */
 858                ret = 0;
 859                if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
 860                        inet_del_ifa(in_dev, ifap, 0);
 861                        ifa->ifa_broadcast = sin->sin_addr.s_addr;
 862                        inet_insert_ifa(ifa);
 863                }
 864                break;
 865
 866        case SIOCSIFDSTADDR:    /* Set the destination address */
 867                ret = 0;
 868                if (ifa->ifa_address == sin->sin_addr.s_addr)
 869                        break;
 870                ret = -EINVAL;
 871                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
 872                        break;
 873                ret = 0;
 874                inet_del_ifa(in_dev, ifap, 0);
 875                ifa->ifa_address = sin->sin_addr.s_addr;
 876                inet_insert_ifa(ifa);
 877                break;
 878
 879        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
 880
 881                /*
 882                 *      The mask we set must be legal.
 883                 */
 884                ret = -EINVAL;
 885                if (bad_mask(sin->sin_addr.s_addr, 0))
 886                        break;
 887                ret = 0;
 888                if (ifa->ifa_mask != sin->sin_addr.s_addr) {
 889                        __be32 old_mask = ifa->ifa_mask;
 890                        inet_del_ifa(in_dev, ifap, 0);
 891                        ifa->ifa_mask = sin->sin_addr.s_addr;
 892                        ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
 893
 894                        /* See if current broadcast address matches
 895                         * with current netmask, then recalculate
 896                         * the broadcast address. Otherwise it's a
 897                         * funny address, so don't touch it since
 898                         * the user seems to know what (s)he's doing...
 899                         */
 900                        if ((dev->flags & IFF_BROADCAST) &&
 901                            (ifa->ifa_prefixlen < 31) &&
 902                            (ifa->ifa_broadcast ==
 903                             (ifa->ifa_local|~old_mask))) {
 904                                ifa->ifa_broadcast = (ifa->ifa_local |
 905                                                      ~sin->sin_addr.s_addr);
 906                        }
 907                        inet_insert_ifa(ifa);
 908                }
 909                break;
 910        }
 911done:
 912        rtnl_unlock();
 913out:
 914        return ret;
 915rarok:
 916        rtnl_unlock();
 917        ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
 918        goto out;
 919}
 920
 921static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
 922{
 923        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 924        struct in_ifaddr *ifa;
 925        struct ifreq ifr;
 926        int done = 0;
 927
 928        if (!in_dev)
 929                goto out;
 930
 931        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
 932                if (!buf) {
 933                        done += sizeof(ifr);
 934                        continue;
 935                }
 936                if (len < (int) sizeof(ifr))
 937                        break;
 938                memset(&ifr, 0, sizeof(struct ifreq));
 939                if (ifa->ifa_label)
 940                        strcpy(ifr.ifr_name, ifa->ifa_label);
 941                else
 942                        strcpy(ifr.ifr_name, dev->name);
 943
 944                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
 945                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
 946                                                                ifa->ifa_local;
 947
 948                if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
 949                        done = -EFAULT;
 950                        break;
 951                }
 952                buf  += sizeof(struct ifreq);
 953                len  -= sizeof(struct ifreq);
 954                done += sizeof(struct ifreq);
 955        }
 956out:
 957        return done;
 958}
 959
 960__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
 961{
 962        __be32 addr = 0;
 963        struct in_device *in_dev;
 964        struct net *net = dev_net(dev);
 965
 966        rcu_read_lock();
 967        in_dev = __in_dev_get_rcu(dev);
 968        if (!in_dev)
 969                goto no_in_dev;
 970
 971        for_primary_ifa(in_dev) {
 972                if (ifa->ifa_scope > scope)
 973                        continue;
 974                if (!dst || inet_ifa_match(dst, ifa)) {
 975                        addr = ifa->ifa_local;
 976                        break;
 977                }
 978                if (!addr)
 979                        addr = ifa->ifa_local;
 980        } endfor_ifa(in_dev);
 981
 982        if (addr)
 983                goto out_unlock;
 984no_in_dev:
 985
 986        /* Not loopback addresses on loopback should be preferred
 987           in this case. It is importnat that lo is the first interface
 988           in dev_base list.
 989         */
 990        for_each_netdev_rcu(net, dev) {
 991                in_dev = __in_dev_get_rcu(dev);
 992                if (!in_dev)
 993                        continue;
 994
 995                for_primary_ifa(in_dev) {
 996                        if (ifa->ifa_scope != RT_SCOPE_LINK &&
 997                            ifa->ifa_scope <= scope) {
 998                                addr = ifa->ifa_local;
 999                                goto out_unlock;
1000                        }
1001                } endfor_ifa(in_dev);
1002        }
1003out_unlock:
1004        rcu_read_unlock();
1005        return addr;
1006}
1007EXPORT_SYMBOL(inet_select_addr);
1008
1009static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1010                              __be32 local, int scope)
1011{
1012        int same = 0;
1013        __be32 addr = 0;
1014
1015        for_ifa(in_dev) {
1016                if (!addr &&
1017                    (local == ifa->ifa_local || !local) &&
1018                    ifa->ifa_scope <= scope) {
1019                        addr = ifa->ifa_local;
1020                        if (same)
1021                                break;
1022                }
1023                if (!same) {
1024                        same = (!local || inet_ifa_match(local, ifa)) &&
1025                                (!dst || inet_ifa_match(dst, ifa));
1026                        if (same && addr) {
1027                                if (local || !dst)
1028                                        break;
1029                                /* Is the selected addr into dst subnet? */
1030                                if (inet_ifa_match(addr, ifa))
1031                                        break;
1032                                /* No, then can we use new local src? */
1033                                if (ifa->ifa_scope <= scope) {
1034                                        addr = ifa->ifa_local;
1035                                        break;
1036                                }
1037                                /* search for large dst subnet for addr */
1038                                same = 0;
1039                        }
1040                }
1041        } endfor_ifa(in_dev);
1042
1043        return same ? addr : 0;
1044}
1045
1046/*
1047 * Confirm that local IP address exists using wildcards:
1048 * - in_dev: only on this interface, 0=any interface
1049 * - dst: only in the same subnet as dst, 0=any dst
1050 * - local: address, 0=autoselect the local address
1051 * - scope: maximum allowed scope value for the local address
1052 */
1053__be32 inet_confirm_addr(struct in_device *in_dev,
1054                         __be32 dst, __be32 local, int scope)
1055{
1056        __be32 addr = 0;
1057        struct net_device *dev;
1058        struct net *net;
1059
1060        if (scope != RT_SCOPE_LINK)
1061                return confirm_addr_indev(in_dev, dst, local, scope);
1062
1063        net = dev_net(in_dev->dev);
1064        rcu_read_lock();
1065        for_each_netdev_rcu(net, dev) {
1066                in_dev = __in_dev_get_rcu(dev);
1067                if (in_dev) {
1068                        addr = confirm_addr_indev(in_dev, dst, local, scope);
1069                        if (addr)
1070                                break;
1071                }
1072        }
1073        rcu_read_unlock();
1074
1075        return addr;
1076}
1077EXPORT_SYMBOL(inet_confirm_addr);
1078
1079/*
1080 *      Device notifier
1081 */
1082
1083int register_inetaddr_notifier(struct notifier_block *nb)
1084{
1085        return blocking_notifier_chain_register(&inetaddr_chain, nb);
1086}
1087EXPORT_SYMBOL(register_inetaddr_notifier);
1088
1089int unregister_inetaddr_notifier(struct notifier_block *nb)
1090{
1091        return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1092}
1093EXPORT_SYMBOL(unregister_inetaddr_notifier);
1094
1095/* Rename ifa_labels for a device name change. Make some effort to preserve
1096 * existing alias numbering and to create unique labels if possible.
1097*/
1098static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1099{
1100        struct in_ifaddr *ifa;
1101        int named = 0;
1102
1103        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1104                char old[IFNAMSIZ], *dot;
1105
1106                memcpy(old, ifa->ifa_label, IFNAMSIZ);
1107                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1108                if (named++ == 0)
1109                        goto skip;
1110                dot = strchr(old, ':');
1111                if (dot == NULL) {
1112                        sprintf(old, ":%d", named);
1113                        dot = old;
1114                }
1115                if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1116                        strcat(ifa->ifa_label, dot);
1117                else
1118                        strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1119skip:
1120                rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1121        }
1122}
1123
1124static bool inetdev_valid_mtu(unsigned int mtu)
1125{
1126        return mtu >= 68;
1127}
1128
1129static void inetdev_send_gratuitous_arp(struct net_device *dev,
1130                                        struct in_device *in_dev)
1131
1132{
1133        struct in_ifaddr *ifa;
1134
1135        for (ifa = in_dev->ifa_list; ifa;
1136             ifa = ifa->ifa_next) {
1137                arp_send(ARPOP_REQUEST, ETH_P_ARP,
1138                         ifa->ifa_local, dev,
1139                         ifa->ifa_local, NULL,
1140                         dev->dev_addr, NULL);
1141        }
1142}
1143
1144/* Called only under RTNL semaphore */
1145
1146static int inetdev_event(struct notifier_block *this, unsigned long event,
1147                         void *ptr)
1148{
1149        struct net_device *dev = ptr;
1150        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1151
1152        ASSERT_RTNL();
1153
1154        if (!in_dev) {
1155                if (event == NETDEV_REGISTER) {
1156                        in_dev = inetdev_init(dev);
1157                        if (!in_dev)
1158                                return notifier_from_errno(-ENOMEM);
1159                        if (dev->flags & IFF_LOOPBACK) {
1160                                IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1161                                IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1162                        }
1163                } else if (event == NETDEV_CHANGEMTU) {
1164                        /* Re-enabling IP */
1165                        if (inetdev_valid_mtu(dev->mtu))
1166                                in_dev = inetdev_init(dev);
1167                }
1168                goto out;
1169        }
1170
1171        switch (event) {
1172        case NETDEV_REGISTER:
1173                pr_debug("%s: bug\n", __func__);
1174                RCU_INIT_POINTER(dev->ip_ptr, NULL);
1175                break;
1176        case NETDEV_UP:
1177                if (!inetdev_valid_mtu(dev->mtu))
1178                        break;
1179                if (dev->flags & IFF_LOOPBACK) {
1180                        struct in_ifaddr *ifa = inet_alloc_ifa();
1181
1182                        if (ifa) {
1183                                INIT_HLIST_NODE(&ifa->hash);
1184                                ifa->ifa_local =
1185                                  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1186                                ifa->ifa_prefixlen = 8;
1187                                ifa->ifa_mask = inet_make_mask(8);
1188                                in_dev_hold(in_dev);
1189                                ifa->ifa_dev = in_dev;
1190                                ifa->ifa_scope = RT_SCOPE_HOST;
1191                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1192                                inet_insert_ifa(ifa);
1193                        }
1194                }
1195                ip_mc_up(in_dev);
1196                /* fall through */
1197        case NETDEV_CHANGEADDR:
1198                if (!IN_DEV_ARP_NOTIFY(in_dev))
1199                        break;
1200                /* fall through */
1201        case NETDEV_NOTIFY_PEERS:
1202                /* Send gratuitous ARP to notify of link change */
1203                inetdev_send_gratuitous_arp(dev, in_dev);
1204                break;
1205        case NETDEV_DOWN:
1206                ip_mc_down(in_dev);
1207                break;
1208        case NETDEV_PRE_TYPE_CHANGE:
1209                ip_mc_unmap(in_dev);
1210                break;
1211        case NETDEV_POST_TYPE_CHANGE:
1212                ip_mc_remap(in_dev);
1213                break;
1214        case NETDEV_CHANGEMTU:
1215                if (inetdev_valid_mtu(dev->mtu))
1216                        break;
1217                /* disable IP when MTU is not enough */
1218        case NETDEV_UNREGISTER:
1219                inetdev_destroy(in_dev);
1220                break;
1221        case NETDEV_CHANGENAME:
1222                /* Do not notify about label change, this event is
1223                 * not interesting to applications using netlink.
1224                 */
1225                inetdev_changename(dev, in_dev);
1226
1227                devinet_sysctl_unregister(in_dev);
1228                devinet_sysctl_register(in_dev);
1229                break;
1230        }
1231out:
1232        return NOTIFY_DONE;
1233}
1234
1235static struct notifier_block ip_netdev_notifier = {
1236        .notifier_call = inetdev_event,
1237};
1238
1239static size_t inet_nlmsg_size(void)
1240{
1241        return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1242               + nla_total_size(4) /* IFA_ADDRESS */
1243               + nla_total_size(4) /* IFA_LOCAL */
1244               + nla_total_size(4) /* IFA_BROADCAST */
1245               + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1246}
1247
1248static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1249                            u32 portid, u32 seq, int event, unsigned int flags)
1250{
1251        struct ifaddrmsg *ifm;
1252        struct nlmsghdr  *nlh;
1253
1254        nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
1255        if (nlh == NULL)
1256                return -EMSGSIZE;
1257
1258        ifm = nlmsg_data(nlh);
1259        ifm->ifa_family = AF_INET;
1260        ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1261        ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1262        ifm->ifa_scope = ifa->ifa_scope;
1263        ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1264
1265        if ((ifa->ifa_address &&
1266             nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1267            (ifa->ifa_local &&
1268             nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1269            (ifa->ifa_broadcast &&
1270             nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1271            (ifa->ifa_label[0] &&
1272             nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1273                goto nla_put_failure;
1274
1275        return nlmsg_end(skb, nlh);
1276
1277nla_put_failure:
1278        nlmsg_cancel(skb, nlh);
1279        return -EMSGSIZE;
1280}
1281
1282static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1283{
1284        struct net *net = sock_net(skb->sk);
1285        int h, s_h;
1286        int idx, s_idx;
1287        int ip_idx, s_ip_idx;
1288        struct net_device *dev;
1289        struct in_device *in_dev;
1290        struct in_ifaddr *ifa;
1291        struct hlist_head *head;
1292        struct hlist_node *node;
1293
1294        s_h = cb->args[0];
1295        s_idx = idx = cb->args[1];
1296        s_ip_idx = ip_idx = cb->args[2];
1297
1298        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1299                idx = 0;
1300                head = &net->dev_index_head[h];
1301                rcu_read_lock();
1302                hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1303                        if (idx < s_idx)
1304                                goto cont;
1305                        if (h > s_h || idx > s_idx)
1306                                s_ip_idx = 0;
1307                        in_dev = __in_dev_get_rcu(dev);
1308                        if (!in_dev)
1309                                goto cont;
1310
1311                        for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1312                             ifa = ifa->ifa_next, ip_idx++) {
1313                                if (ip_idx < s_ip_idx)
1314                                        continue;
1315                                if (inet_fill_ifaddr(skb, ifa,
1316                                             NETLINK_CB(cb->skb).portid,
1317                                             cb->nlh->nlmsg_seq,
1318                                             RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1319                                        rcu_read_unlock();
1320                                        goto done;
1321                                }
1322                        }
1323cont:
1324                        idx++;
1325                }
1326                rcu_read_unlock();
1327        }
1328
1329done:
1330        cb->args[0] = h;
1331        cb->args[1] = idx;
1332        cb->args[2] = ip_idx;
1333
1334        return skb->len;
1335}
1336
1337static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1338                      u32 portid)
1339{
1340        struct sk_buff *skb;
1341        u32 seq = nlh ? nlh->nlmsg_seq : 0;
1342        int err = -ENOBUFS;
1343        struct net *net;
1344
1345        net = dev_net(ifa->ifa_dev->dev);
1346        skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1347        if (skb == NULL)
1348                goto errout;
1349
1350        err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0);
1351        if (err < 0) {
1352                /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1353                WARN_ON(err == -EMSGSIZE);
1354                kfree_skb(skb);
1355                goto errout;
1356        }
1357        rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1358        return;
1359errout:
1360        if (err < 0)
1361                rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1362}
1363
1364static size_t inet_get_link_af_size(const struct net_device *dev)
1365{
1366        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1367
1368        if (!in_dev)
1369                return 0;
1370
1371        return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1372}
1373
1374static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1375{
1376        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1377        struct nlattr *nla;
1378        int i;
1379
1380        if (!in_dev)
1381                return -ENODATA;
1382
1383        nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1384        if (nla == NULL)
1385                return -EMSGSIZE;
1386
1387        for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1388                ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1389
1390        return 0;
1391}
1392
1393static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1394        [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1395};
1396
1397static int inet_validate_link_af(const struct net_device *dev,
1398                                 const struct nlattr *nla)
1399{
1400        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1401        int err, rem;
1402
1403        if (dev && !__in_dev_get_rtnl(dev))
1404                return -EAFNOSUPPORT;
1405
1406        err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1407        if (err < 0)
1408                return err;
1409
1410        if (tb[IFLA_INET_CONF]) {
1411                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1412                        int cfgid = nla_type(a);
1413
1414                        if (nla_len(a) < 4)
1415                                return -EINVAL;
1416
1417                        if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1418                                return -EINVAL;
1419                }
1420        }
1421
1422        return 0;
1423}
1424
1425static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1426{
1427        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1428        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1429        int rem;
1430
1431        if (!in_dev)
1432                return -EAFNOSUPPORT;
1433
1434        if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1435                BUG();
1436
1437        if (tb[IFLA_INET_CONF]) {
1438                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1439                        ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1440        }
1441
1442        return 0;
1443}
1444
1445#ifdef CONFIG_SYSCTL
1446
1447static void devinet_copy_dflt_conf(struct net *net, int i)
1448{
1449        struct net_device *dev;
1450
1451        rcu_read_lock();
1452        for_each_netdev_rcu(net, dev) {
1453                struct in_device *in_dev;
1454
1455                in_dev = __in_dev_get_rcu(dev);
1456                if (in_dev && !test_bit(i, in_dev->cnf.state))
1457                        in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1458        }
1459        rcu_read_unlock();
1460}
1461
1462/* called with RTNL locked */
1463static void inet_forward_change(struct net *net)
1464{
1465        struct net_device *dev;
1466        int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1467
1468        IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1469        IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1470
1471        for_each_netdev(net, dev) {
1472                struct in_device *in_dev;
1473                if (on)
1474                        dev_disable_lro(dev);
1475                rcu_read_lock();
1476                in_dev = __in_dev_get_rcu(dev);
1477                if (in_dev)
1478                        IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1479                rcu_read_unlock();
1480        }
1481}
1482
1483static int devinet_conf_proc(ctl_table *ctl, int write,
1484                             void __user *buffer,
1485                             size_t *lenp, loff_t *ppos)
1486{
1487        int old_value = *(int *)ctl->data;
1488        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1489        int new_value = *(int *)ctl->data;
1490
1491        if (write) {
1492                struct ipv4_devconf *cnf = ctl->extra1;
1493                struct net *net = ctl->extra2;
1494                int i = (int *)ctl->data - cnf->data;
1495
1496                set_bit(i, cnf->state);
1497
1498                if (cnf == net->ipv4.devconf_dflt)
1499                        devinet_copy_dflt_conf(net, i);
1500                if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1501                    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1502                        if ((new_value == 0) && (old_value != 0))
1503                                rt_cache_flush(net);
1504        }
1505
1506        return ret;
1507}
1508
1509static int devinet_sysctl_forward(ctl_table *ctl, int write,
1510                                  void __user *buffer,
1511                                  size_t *lenp, loff_t *ppos)
1512{
1513        int *valp = ctl->data;
1514        int val = *valp;
1515        loff_t pos = *ppos;
1516        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1517
1518        if (write && *valp != val) {
1519                struct net *net = ctl->extra2;
1520
1521                if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1522                        if (!rtnl_trylock()) {
1523                                /* Restore the original values before restarting */
1524                                *valp = val;
1525                                *ppos = pos;
1526                                return restart_syscall();
1527                        }
1528                        if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1529                                inet_forward_change(net);
1530                        } else if (*valp) {
1531                                struct ipv4_devconf *cnf = ctl->extra1;
1532                                struct in_device *idev =
1533                                        container_of(cnf, struct in_device, cnf);
1534                                dev_disable_lro(idev->dev);
1535                        }
1536                        rtnl_unlock();
1537                        rt_cache_flush(net);
1538                }
1539        }
1540
1541        return ret;
1542}
1543
1544static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1545                                void __user *buffer,
1546                                size_t *lenp, loff_t *ppos)
1547{
1548        int *valp = ctl->data;
1549        int val = *valp;
1550        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1551        struct net *net = ctl->extra2;
1552
1553        if (write && *valp != val)
1554                rt_cache_flush(net);
1555
1556        return ret;
1557}
1558
1559#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1560        { \
1561                .procname       = name, \
1562                .data           = ipv4_devconf.data + \
1563                                  IPV4_DEVCONF_ ## attr - 1, \
1564                .maxlen         = sizeof(int), \
1565                .mode           = mval, \
1566                .proc_handler   = proc, \
1567                .extra1         = &ipv4_devconf, \
1568        }
1569
1570#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1571        DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1572
1573#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1574        DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1575
1576#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1577        DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1578
1579#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1580        DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1581
1582static struct devinet_sysctl_table {
1583        struct ctl_table_header *sysctl_header;
1584        struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1585} devinet_sysctl = {
1586        .devinet_vars = {
1587                DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1588                                             devinet_sysctl_forward),
1589                DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1590
1591                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1592                DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1593                DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1594                DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1595                DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1596                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1597                                        "accept_source_route"),
1598                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1599                DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1600                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1601                DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1602                DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1603                DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1604                DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1605                DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1606                DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1607                DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1608                DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1609                DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1610                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1611
1612                DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1613                DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1614                DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1615                                              "force_igmp_version"),
1616                DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1617                                              "promote_secondaries"),
1618                DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
1619                                              "route_localnet"),
1620        },
1621};
1622
1623static int __devinet_sysctl_register(struct net *net, char *dev_name,
1624                                        struct ipv4_devconf *p)
1625{
1626        int i;
1627        struct devinet_sysctl_table *t;
1628        char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
1629
1630        t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1631        if (!t)
1632                goto out;
1633
1634        for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1635                t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1636                t->devinet_vars[i].extra1 = p;
1637                t->devinet_vars[i].extra2 = net;
1638        }
1639
1640        snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
1641
1642        t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
1643        if (!t->sysctl_header)
1644                goto free;
1645
1646        p->sysctl = t;
1647        return 0;
1648
1649free:
1650        kfree(t);
1651out:
1652        return -ENOBUFS;
1653}
1654
1655static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1656{
1657        struct devinet_sysctl_table *t = cnf->sysctl;
1658
1659        if (t == NULL)
1660                return;
1661
1662        cnf->sysctl = NULL;
1663        unregister_net_sysctl_table(t->sysctl_header);
1664        kfree(t);
1665}
1666
1667static void devinet_sysctl_register(struct in_device *idev)
1668{
1669        neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1670        __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1671                                        &idev->cnf);
1672}
1673
1674static void devinet_sysctl_unregister(struct in_device *idev)
1675{
1676        __devinet_sysctl_unregister(&idev->cnf);
1677        neigh_sysctl_unregister(idev->arp_parms);
1678}
1679
1680static struct ctl_table ctl_forward_entry[] = {
1681        {
1682                .procname       = "ip_forward",
1683                .data           = &ipv4_devconf.data[
1684                                        IPV4_DEVCONF_FORWARDING - 1],
1685                .maxlen         = sizeof(int),
1686                .mode           = 0644,
1687                .proc_handler   = devinet_sysctl_forward,
1688                .extra1         = &ipv4_devconf,
1689                .extra2         = &init_net,
1690        },
1691        { },
1692};
1693#endif
1694
1695static __net_init int devinet_init_net(struct net *net)
1696{
1697        int err;
1698        struct ipv4_devconf *all, *dflt;
1699#ifdef CONFIG_SYSCTL
1700        struct ctl_table *tbl = ctl_forward_entry;
1701        struct ctl_table_header *forw_hdr;
1702#endif
1703
1704        err = -ENOMEM;
1705        all = &ipv4_devconf;
1706        dflt = &ipv4_devconf_dflt;
1707
1708        if (!net_eq(net, &init_net)) {
1709                all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1710                if (all == NULL)
1711                        goto err_alloc_all;
1712
1713                dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1714                if (dflt == NULL)
1715                        goto err_alloc_dflt;
1716
1717#ifdef CONFIG_SYSCTL
1718                tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1719                if (tbl == NULL)
1720                        goto err_alloc_ctl;
1721
1722                tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1723                tbl[0].extra1 = all;
1724                tbl[0].extra2 = net;
1725#endif
1726        }
1727
1728#ifdef CONFIG_SYSCTL
1729        err = __devinet_sysctl_register(net, "all", all);
1730        if (err < 0)
1731                goto err_reg_all;
1732
1733        err = __devinet_sysctl_register(net, "default", dflt);
1734        if (err < 0)
1735                goto err_reg_dflt;
1736
1737        err = -ENOMEM;
1738        forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
1739        if (forw_hdr == NULL)
1740                goto err_reg_ctl;
1741        net->ipv4.forw_hdr = forw_hdr;
1742#endif
1743
1744        net->ipv4.devconf_all = all;
1745        net->ipv4.devconf_dflt = dflt;
1746        return 0;
1747
1748#ifdef CONFIG_SYSCTL
1749err_reg_ctl:
1750        __devinet_sysctl_unregister(dflt);
1751err_reg_dflt:
1752        __devinet_sysctl_unregister(all);
1753err_reg_all:
1754        if (tbl != ctl_forward_entry)
1755                kfree(tbl);
1756err_alloc_ctl:
1757#endif
1758        if (dflt != &ipv4_devconf_dflt)
1759                kfree(dflt);
1760err_alloc_dflt:
1761        if (all != &ipv4_devconf)
1762                kfree(all);
1763err_alloc_all:
1764        return err;
1765}
1766
1767static __net_exit void devinet_exit_net(struct net *net)
1768{
1769#ifdef CONFIG_SYSCTL
1770        struct ctl_table *tbl;
1771
1772        tbl = net->ipv4.forw_hdr->ctl_table_arg;
1773        unregister_net_sysctl_table(net->ipv4.forw_hdr);
1774        __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1775        __devinet_sysctl_unregister(net->ipv4.devconf_all);
1776        kfree(tbl);
1777#endif
1778        kfree(net->ipv4.devconf_dflt);
1779        kfree(net->ipv4.devconf_all);
1780}
1781
1782static __net_initdata struct pernet_operations devinet_ops = {
1783        .init = devinet_init_net,
1784        .exit = devinet_exit_net,
1785};
1786
1787static struct rtnl_af_ops inet_af_ops = {
1788        .family           = AF_INET,
1789        .fill_link_af     = inet_fill_link_af,
1790        .get_link_af_size = inet_get_link_af_size,
1791        .validate_link_af = inet_validate_link_af,
1792        .set_link_af      = inet_set_link_af,
1793};
1794
1795void __init devinet_init(void)
1796{
1797        int i;
1798
1799        for (i = 0; i < IN4_ADDR_HSIZE; i++)
1800                INIT_HLIST_HEAD(&inet_addr_lst[i]);
1801
1802        register_pernet_subsys(&devinet_ops);
1803
1804        register_gifconf(PF_INET, inet_gifconf);
1805        register_netdevice_notifier(&ip_netdev_notifier);
1806
1807        rtnl_af_register(&inet_af_ops);
1808
1809        rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1810        rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1811        rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1812}
1813
1814
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.