linux/net/ipv4/devinet.c
<<
>>
Prefs
   1/*
   2 *      NET3    IP device support routines.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 *      Derived from the IP parts of dev.c 1.0.19
  10 *              Authors:        Ross Biro
  11 *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13 *
  14 *      Additional Authors:
  15 *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  16 *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  17 *
  18 *      Changes:
  19 *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
  20 *                                      lists.
  21 *              Cyrus Durgin:           updated for kmod
  22 *              Matthias Andree:        in devinet_ioctl, compare label and
  23 *                                      address (4.4BSD alias style support),
  24 *                                      fall back to comparing just the label
  25 *                                      if no match found.
  26 */
  27
  28
  29#include <asm/uaccess.h>
  30#include <linux/bitops.h>
  31#include <linux/capability.h>
  32#include <linux/module.h>
  33#include <linux/types.h>
  34#include <linux/kernel.h>
  35#include <linux/string.h>
  36#include <linux/mm.h>
  37#include <linux/socket.h>
  38#include <linux/sockios.h>
  39#include <linux/in.h>
  40#include <linux/errno.h>
  41#include <linux/interrupt.h>
  42#include <linux/if_addr.h>
  43#include <linux/if_ether.h>
  44#include <linux/inet.h>
  45#include <linux/netdevice.h>
  46#include <linux/etherdevice.h>
  47#include <linux/skbuff.h>
  48#include <linux/init.h>
  49#include <linux/notifier.h>
  50#include <linux/inetdevice.h>
  51#include <linux/igmp.h>
  52#include <linux/slab.h>
  53#include <linux/hash.h>
  54#ifdef CONFIG_SYSCTL
  55#include <linux/sysctl.h>
  56#endif
  57#include <linux/kmod.h>
  58
  59#include <net/arp.h>
  60#include <net/ip.h>
  61#include <net/route.h>
  62#include <net/ip_fib.h>
  63#include <net/rtnetlink.h>
  64#include <net/net_namespace.h>
  65
  66#include "fib_lookup.h"
  67
  68static struct ipv4_devconf ipv4_devconf = {
  69        .data = {
  70                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  71                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  72                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  73                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  74        },
  75};
  76
  77static struct ipv4_devconf ipv4_devconf_dflt = {
  78        .data = {
  79                [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
  80                [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
  81                [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
  82                [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
  83                [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
  84        },
  85};
  86
  87#define IPV4_DEVCONF_DFLT(net, attr) \
  88        IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
  89
  90static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
  91        [IFA_LOCAL]             = { .type = NLA_U32 },
  92        [IFA_ADDRESS]           = { .type = NLA_U32 },
  93        [IFA_BROADCAST]         = { .type = NLA_U32 },
  94        [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
  95};
  96
  97/* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
  98 * value.  So if you change this define, make appropriate changes to
  99 * inet_addr_hash as well.
 100 */
 101#define IN4_ADDR_HSIZE  256
 102static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
 103static DEFINE_SPINLOCK(inet_addr_hash_lock);
 104
 105static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
 106{
 107        u32 val = (__force u32) addr ^ hash_ptr(net, 8);
 108
 109        return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
 110                (IN4_ADDR_HSIZE - 1));
 111}
 112
 113static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
 114{
 115        unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
 116
 117        spin_lock(&inet_addr_hash_lock);
 118        hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
 119        spin_unlock(&inet_addr_hash_lock);
 120}
 121
 122static void inet_hash_remove(struct in_ifaddr *ifa)
 123{
 124        spin_lock(&inet_addr_hash_lock);
 125        hlist_del_init_rcu(&ifa->hash);
 126        spin_unlock(&inet_addr_hash_lock);
 127}
 128
 129/**
 130 * __ip_dev_find - find the first device with a given source address.
 131 * @net: the net namespace
 132 * @addr: the source address
 133 * @devref: if true, take a reference on the found device
 134 *
 135 * If a caller uses devref=false, it should be protected by RCU, or RTNL
 136 */
 137struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
 138{
 139        unsigned int hash = inet_addr_hash(net, addr);
 140        struct net_device *result = NULL;
 141        struct in_ifaddr *ifa;
 142        struct hlist_node *node;
 143
 144        rcu_read_lock();
 145        hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
 146                struct net_device *dev = ifa->ifa_dev->dev;
 147
 148                if (!net_eq(dev_net(dev), net))
 149                        continue;
 150                if (ifa->ifa_local == addr) {
 151                        result = dev;
 152                        break;
 153                }
 154        }
 155        if (!result) {
 156                struct flowi4 fl4 = { .daddr = addr };
 157                struct fib_result res = { 0 };
 158                struct fib_table *local;
 159
 160                /* Fallback to FIB local table so that communication
 161                 * over loopback subnets work.
 162                 */
 163                local = fib_get_table(net, RT_TABLE_LOCAL);
 164                if (local &&
 165                    !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
 166                    res.type == RTN_LOCAL)
 167                        result = FIB_RES_DEV(res);
 168        }
 169        if (result && devref)
 170                dev_hold(result);
 171        rcu_read_unlock();
 172        return result;
 173}
 174EXPORT_SYMBOL(__ip_dev_find);
 175
 176static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
 177
 178static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
 179static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 180                         int destroy);
 181#ifdef CONFIG_SYSCTL
 182static void devinet_sysctl_register(struct in_device *idev);
 183static void devinet_sysctl_unregister(struct in_device *idev);
 184#else
 185static inline void devinet_sysctl_register(struct in_device *idev)
 186{
 187}
 188static inline void devinet_sysctl_unregister(struct in_device *idev)
 189{
 190}
 191#endif
 192
 193/* Locks all the inet devices. */
 194
 195static struct in_ifaddr *inet_alloc_ifa(void)
 196{
 197        return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
 198}
 199
 200static void inet_rcu_free_ifa(struct rcu_head *head)
 201{
 202        struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
 203        if (ifa->ifa_dev)
 204                in_dev_put(ifa->ifa_dev);
 205        kfree(ifa);
 206}
 207
 208static inline void inet_free_ifa(struct in_ifaddr *ifa)
 209{
 210        call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
 211}
 212
 213void in_dev_finish_destroy(struct in_device *idev)
 214{
 215        struct net_device *dev = idev->dev;
 216
 217        WARN_ON(idev->ifa_list);
 218        WARN_ON(idev->mc_list);
 219#ifdef NET_REFCNT_DEBUG
 220        pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL");
 221#endif
 222        dev_put(dev);
 223        if (!idev->dead)
 224                pr_err("Freeing alive in_device %p\n", idev);
 225        else
 226                kfree(idev);
 227}
 228EXPORT_SYMBOL(in_dev_finish_destroy);
 229
 230static struct in_device *inetdev_init(struct net_device *dev)
 231{
 232        struct in_device *in_dev;
 233
 234        ASSERT_RTNL();
 235
 236        in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
 237        if (!in_dev)
 238                goto out;
 239        memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
 240                        sizeof(in_dev->cnf));
 241        in_dev->cnf.sysctl = NULL;
 242        in_dev->dev = dev;
 243        in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
 244        if (!in_dev->arp_parms)
 245                goto out_kfree;
 246        if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
 247                dev_disable_lro(dev);
 248        /* Reference in_dev->dev */
 249        dev_hold(dev);
 250        /* Account for reference dev->ip_ptr (below) */
 251        in_dev_hold(in_dev);
 252
 253        devinet_sysctl_register(in_dev);
 254        ip_mc_init_dev(in_dev);
 255        if (dev->flags & IFF_UP)
 256                ip_mc_up(in_dev);
 257
 258        /* we can receive as soon as ip_ptr is set -- do this last */
 259        rcu_assign_pointer(dev->ip_ptr, in_dev);
 260out:
 261        return in_dev;
 262out_kfree:
 263        kfree(in_dev);
 264        in_dev = NULL;
 265        goto out;
 266}
 267
 268static void in_dev_rcu_put(struct rcu_head *head)
 269{
 270        struct in_device *idev = container_of(head, struct in_device, rcu_head);
 271        in_dev_put(idev);
 272}
 273
 274static void inetdev_destroy(struct in_device *in_dev)
 275{
 276        struct in_ifaddr *ifa;
 277        struct net_device *dev;
 278
 279        ASSERT_RTNL();
 280
 281        dev = in_dev->dev;
 282
 283        in_dev->dead = 1;
 284
 285        ip_mc_destroy_dev(in_dev);
 286
 287        while ((ifa = in_dev->ifa_list) != NULL) {
 288                inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
 289                inet_free_ifa(ifa);
 290        }
 291
 292        RCU_INIT_POINTER(dev->ip_ptr, NULL);
 293
 294        devinet_sysctl_unregister(in_dev);
 295        neigh_parms_release(&arp_tbl, in_dev->arp_parms);
 296        arp_ifdown(dev);
 297
 298        call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
 299}
 300
 301int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
 302{
 303        rcu_read_lock();
 304        for_primary_ifa(in_dev) {
 305                if (inet_ifa_match(a, ifa)) {
 306                        if (!b || inet_ifa_match(b, ifa)) {
 307                                rcu_read_unlock();
 308                                return 1;
 309                        }
 310                }
 311        } endfor_ifa(in_dev);
 312        rcu_read_unlock();
 313        return 0;
 314}
 315
 316static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 317                         int destroy, struct nlmsghdr *nlh, u32 pid)
 318{
 319        struct in_ifaddr *promote = NULL;
 320        struct in_ifaddr *ifa, *ifa1 = *ifap;
 321        struct in_ifaddr *last_prim = in_dev->ifa_list;
 322        struct in_ifaddr *prev_prom = NULL;
 323        int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
 324
 325        ASSERT_RTNL();
 326
 327        /* 1. Deleting primary ifaddr forces deletion all secondaries
 328         * unless alias promotion is set
 329         **/
 330
 331        if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
 332                struct in_ifaddr **ifap1 = &ifa1->ifa_next;
 333
 334                while ((ifa = *ifap1) != NULL) {
 335                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
 336                            ifa1->ifa_scope <= ifa->ifa_scope)
 337                                last_prim = ifa;
 338
 339                        if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
 340                            ifa1->ifa_mask != ifa->ifa_mask ||
 341                            !inet_ifa_match(ifa1->ifa_address, ifa)) {
 342                                ifap1 = &ifa->ifa_next;
 343                                prev_prom = ifa;
 344                                continue;
 345                        }
 346
 347                        if (!do_promote) {
 348                                inet_hash_remove(ifa);
 349                                *ifap1 = ifa->ifa_next;
 350
 351                                rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
 352                                blocking_notifier_call_chain(&inetaddr_chain,
 353                                                NETDEV_DOWN, ifa);
 354                                inet_free_ifa(ifa);
 355                        } else {
 356                                promote = ifa;
 357                                break;
 358                        }
 359                }
 360        }
 361
 362        /* On promotion all secondaries from subnet are changing
 363         * the primary IP, we must remove all their routes silently
 364         * and later to add them back with new prefsrc. Do this
 365         * while all addresses are on the device list.
 366         */
 367        for (ifa = promote; ifa; ifa = ifa->ifa_next) {
 368                if (ifa1->ifa_mask == ifa->ifa_mask &&
 369                    inet_ifa_match(ifa1->ifa_address, ifa))
 370                        fib_del_ifaddr(ifa, ifa1);
 371        }
 372
 373        /* 2. Unlink it */
 374
 375        *ifap = ifa1->ifa_next;
 376        inet_hash_remove(ifa1);
 377
 378        /* 3. Announce address deletion */
 379
 380        /* Send message first, then call notifier.
 381           At first sight, FIB update triggered by notifier
 382           will refer to already deleted ifaddr, that could confuse
 383           netlink listeners. It is not true: look, gated sees
 384           that route deleted and if it still thinks that ifaddr
 385           is valid, it will try to restore deleted routes... Grr.
 386           So that, this order is correct.
 387         */
 388        rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
 389        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
 390
 391        if (promote) {
 392                struct in_ifaddr *next_sec = promote->ifa_next;
 393
 394                if (prev_prom) {
 395                        prev_prom->ifa_next = promote->ifa_next;
 396                        promote->ifa_next = last_prim->ifa_next;
 397                        last_prim->ifa_next = promote;
 398                }
 399
 400                promote->ifa_flags &= ~IFA_F_SECONDARY;
 401                rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
 402                blocking_notifier_call_chain(&inetaddr_chain,
 403                                NETDEV_UP, promote);
 404                for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
 405                        if (ifa1->ifa_mask != ifa->ifa_mask ||
 406                            !inet_ifa_match(ifa1->ifa_address, ifa))
 407                                        continue;
 408                        fib_add_ifaddr(ifa);
 409                }
 410
 411        }
 412        if (destroy)
 413                inet_free_ifa(ifa1);
 414}
 415
 416static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
 417                         int destroy)
 418{
 419        __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
 420}
 421
 422static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
 423                             u32 pid)
 424{
 425        struct in_device *in_dev = ifa->ifa_dev;
 426        struct in_ifaddr *ifa1, **ifap, **last_primary;
 427
 428        ASSERT_RTNL();
 429
 430        if (!ifa->ifa_local) {
 431                inet_free_ifa(ifa);
 432                return 0;
 433        }
 434
 435        ifa->ifa_flags &= ~IFA_F_SECONDARY;
 436        last_primary = &in_dev->ifa_list;
 437
 438        for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
 439             ifap = &ifa1->ifa_next) {
 440                if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
 441                    ifa->ifa_scope <= ifa1->ifa_scope)
 442                        last_primary = &ifa1->ifa_next;
 443                if (ifa1->ifa_mask == ifa->ifa_mask &&
 444                    inet_ifa_match(ifa1->ifa_address, ifa)) {
 445                        if (ifa1->ifa_local == ifa->ifa_local) {
 446                                inet_free_ifa(ifa);
 447                                return -EEXIST;
 448                        }
 449                        if (ifa1->ifa_scope != ifa->ifa_scope) {
 450                                inet_free_ifa(ifa);
 451                                return -EINVAL;
 452                        }
 453                        ifa->ifa_flags |= IFA_F_SECONDARY;
 454                }
 455        }
 456
 457        if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
 458                net_srandom(ifa->ifa_local);
 459                ifap = last_primary;
 460        }
 461
 462        ifa->ifa_next = *ifap;
 463        *ifap = ifa;
 464
 465        inet_hash_insert(dev_net(in_dev->dev), ifa);
 466
 467        /* Send message first, then call notifier.
 468           Notifier will trigger FIB update, so that
 469           listeners of netlink will know about new ifaddr */
 470        rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
 471        blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
 472
 473        return 0;
 474}
 475
 476static int inet_insert_ifa(struct in_ifaddr *ifa)
 477{
 478        return __inet_insert_ifa(ifa, NULL, 0);
 479}
 480
 481static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
 482{
 483        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 484
 485        ASSERT_RTNL();
 486
 487        if (!in_dev) {
 488                inet_free_ifa(ifa);
 489                return -ENOBUFS;
 490        }
 491        ipv4_devconf_setall(in_dev);
 492        if (ifa->ifa_dev != in_dev) {
 493                WARN_ON(ifa->ifa_dev);
 494                in_dev_hold(in_dev);
 495                ifa->ifa_dev = in_dev;
 496        }
 497        if (ipv4_is_loopback(ifa->ifa_local))
 498                ifa->ifa_scope = RT_SCOPE_HOST;
 499        return inet_insert_ifa(ifa);
 500}
 501
 502/* Caller must hold RCU or RTNL :
 503 * We dont take a reference on found in_device
 504 */
 505struct in_device *inetdev_by_index(struct net *net, int ifindex)
 506{
 507        struct net_device *dev;
 508        struct in_device *in_dev = NULL;
 509
 510        rcu_read_lock();
 511        dev = dev_get_by_index_rcu(net, ifindex);
 512        if (dev)
 513                in_dev = rcu_dereference_rtnl(dev->ip_ptr);
 514        rcu_read_unlock();
 515        return in_dev;
 516}
 517EXPORT_SYMBOL(inetdev_by_index);
 518
 519/* Called only from RTNL semaphored context. No locks. */
 520
 521struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 522                                    __be32 mask)
 523{
 524        ASSERT_RTNL();
 525
 526        for_primary_ifa(in_dev) {
 527                if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
 528                        return ifa;
 529        } endfor_ifa(in_dev);
 530        return NULL;
 531}
 532
 533static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 534{
 535        struct net *net = sock_net(skb->sk);
 536        struct nlattr *tb[IFA_MAX+1];
 537        struct in_device *in_dev;
 538        struct ifaddrmsg *ifm;
 539        struct in_ifaddr *ifa, **ifap;
 540        int err = -EINVAL;
 541
 542        ASSERT_RTNL();
 543
 544        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
 545        if (err < 0)
 546                goto errout;
 547
 548        ifm = nlmsg_data(nlh);
 549        in_dev = inetdev_by_index(net, ifm->ifa_index);
 550        if (in_dev == NULL) {
 551                err = -ENODEV;
 552                goto errout;
 553        }
 554
 555        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 556             ifap = &ifa->ifa_next) {
 557                if (tb[IFA_LOCAL] &&
 558                    ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
 559                        continue;
 560
 561                if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
 562                        continue;
 563
 564                if (tb[IFA_ADDRESS] &&
 565                    (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
 566                    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
 567                        continue;
 568
 569                __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
 570                return 0;
 571        }
 572
 573        err = -EADDRNOTAVAIL;
 574errout:
 575        return err;
 576}
 577
 578static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
 579{
 580        struct nlattr *tb[IFA_MAX+1];
 581        struct in_ifaddr *ifa;
 582        struct ifaddrmsg *ifm;
 583        struct net_device *dev;
 584        struct in_device *in_dev;
 585        int err;
 586
 587        err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
 588        if (err < 0)
 589                goto errout;
 590
 591        ifm = nlmsg_data(nlh);
 592        err = -EINVAL;
 593        if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
 594                goto errout;
 595
 596        dev = __dev_get_by_index(net, ifm->ifa_index);
 597        err = -ENODEV;
 598        if (dev == NULL)
 599                goto errout;
 600
 601        in_dev = __in_dev_get_rtnl(dev);
 602        err = -ENOBUFS;
 603        if (in_dev == NULL)
 604                goto errout;
 605
 606        ifa = inet_alloc_ifa();
 607        if (ifa == NULL)
 608                /*
 609                 * A potential indev allocation can be left alive, it stays
 610                 * assigned to its device and is destroy with it.
 611                 */
 612                goto errout;
 613
 614        ipv4_devconf_setall(in_dev);
 615        in_dev_hold(in_dev);
 616
 617        if (tb[IFA_ADDRESS] == NULL)
 618                tb[IFA_ADDRESS] = tb[IFA_LOCAL];
 619
 620        INIT_HLIST_NODE(&ifa->hash);
 621        ifa->ifa_prefixlen = ifm->ifa_prefixlen;
 622        ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
 623        ifa->ifa_flags = ifm->ifa_flags;
 624        ifa->ifa_scope = ifm->ifa_scope;
 625        ifa->ifa_dev = in_dev;
 626
 627        ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
 628        ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
 629
 630        if (tb[IFA_BROADCAST])
 631                ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
 632
 633        if (tb[IFA_LABEL])
 634                nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
 635        else
 636                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 637
 638        return ifa;
 639
 640errout:
 641        return ERR_PTR(err);
 642}
 643
 644static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 645{
 646        struct net *net = sock_net(skb->sk);
 647        struct in_ifaddr *ifa;
 648
 649        ASSERT_RTNL();
 650
 651        ifa = rtm_to_ifaddr(net, nlh);
 652        if (IS_ERR(ifa))
 653                return PTR_ERR(ifa);
 654
 655        return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
 656}
 657
 658/*
 659 *      Determine a default network mask, based on the IP address.
 660 */
 661
 662static inline int inet_abc_len(__be32 addr)
 663{
 664        int rc = -1;    /* Something else, probably a multicast. */
 665
 666        if (ipv4_is_zeronet(addr))
 667                rc = 0;
 668        else {
 669                __u32 haddr = ntohl(addr);
 670
 671                if (IN_CLASSA(haddr))
 672                        rc = 8;
 673                else if (IN_CLASSB(haddr))
 674                        rc = 16;
 675                else if (IN_CLASSC(haddr))
 676                        rc = 24;
 677        }
 678
 679        return rc;
 680}
 681
 682
 683int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
 684{
 685        struct ifreq ifr;
 686        struct sockaddr_in sin_orig;
 687        struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
 688        struct in_device *in_dev;
 689        struct in_ifaddr **ifap = NULL;
 690        struct in_ifaddr *ifa = NULL;
 691        struct net_device *dev;
 692        char *colon;
 693        int ret = -EFAULT;
 694        int tryaddrmatch = 0;
 695
 696        /*
 697         *      Fetch the caller's info block into kernel space
 698         */
 699
 700        if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
 701                goto out;
 702        ifr.ifr_name[IFNAMSIZ - 1] = 0;
 703
 704        /* save original address for comparison */
 705        memcpy(&sin_orig, sin, sizeof(*sin));
 706
 707        colon = strchr(ifr.ifr_name, ':');
 708        if (colon)
 709                *colon = 0;
 710
 711        dev_load(net, ifr.ifr_name);
 712
 713        switch (cmd) {
 714        case SIOCGIFADDR:       /* Get interface address */
 715        case SIOCGIFBRDADDR:    /* Get the broadcast address */
 716        case SIOCGIFDSTADDR:    /* Get the destination address */
 717        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
 718                /* Note that these ioctls will not sleep,
 719                   so that we do not impose a lock.
 720                   One day we will be forced to put shlock here (I mean SMP)
 721                 */
 722                tryaddrmatch = (sin_orig.sin_family == AF_INET);
 723                memset(sin, 0, sizeof(*sin));
 724                sin->sin_family = AF_INET;
 725                break;
 726
 727        case SIOCSIFFLAGS:
 728                ret = -EPERM;
 729                if (!capable(CAP_NET_ADMIN))
 730                        goto out;
 731                break;
 732        case SIOCSIFADDR:       /* Set interface address (and family) */
 733        case SIOCSIFBRDADDR:    /* Set the broadcast address */
 734        case SIOCSIFDSTADDR:    /* Set the destination address */
 735        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
 736                ret = -EPERM;
 737                if (!capable(CAP_NET_ADMIN))
 738                        goto out;
 739                ret = -EINVAL;
 740                if (sin->sin_family != AF_INET)
 741                        goto out;
 742                break;
 743        default:
 744                ret = -EINVAL;
 745                goto out;
 746        }
 747
 748        rtnl_lock();
 749
 750        ret = -ENODEV;
 751        dev = __dev_get_by_name(net, ifr.ifr_name);
 752        if (!dev)
 753                goto done;
 754
 755        if (colon)
 756                *colon = ':';
 757
 758        in_dev = __in_dev_get_rtnl(dev);
 759        if (in_dev) {
 760                if (tryaddrmatch) {
 761                        /* Matthias Andree */
 762                        /* compare label and address (4.4BSD style) */
 763                        /* note: we only do this for a limited set of ioctls
 764                           and only if the original address family was AF_INET.
 765                           This is checked above. */
 766                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 767                             ifap = &ifa->ifa_next) {
 768                                if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
 769                                    sin_orig.sin_addr.s_addr ==
 770                                                        ifa->ifa_local) {
 771                                        break; /* found */
 772                                }
 773                        }
 774                }
 775                /* we didn't get a match, maybe the application is
 776                   4.3BSD-style and passed in junk so we fall back to
 777                   comparing just the label */
 778                if (!ifa) {
 779                        for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
 780                             ifap = &ifa->ifa_next)
 781                                if (!strcmp(ifr.ifr_name, ifa->ifa_label))
 782                                        break;
 783                }
 784        }
 785
 786        ret = -EADDRNOTAVAIL;
 787        if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
 788                goto done;
 789
 790        switch (cmd) {
 791        case SIOCGIFADDR:       /* Get interface address */
 792                sin->sin_addr.s_addr = ifa->ifa_local;
 793                goto rarok;
 794
 795        case SIOCGIFBRDADDR:    /* Get the broadcast address */
 796                sin->sin_addr.s_addr = ifa->ifa_broadcast;
 797                goto rarok;
 798
 799        case SIOCGIFDSTADDR:    /* Get the destination address */
 800                sin->sin_addr.s_addr = ifa->ifa_address;
 801                goto rarok;
 802
 803        case SIOCGIFNETMASK:    /* Get the netmask for the interface */
 804                sin->sin_addr.s_addr = ifa->ifa_mask;
 805                goto rarok;
 806
 807        case SIOCSIFFLAGS:
 808                if (colon) {
 809                        ret = -EADDRNOTAVAIL;
 810                        if (!ifa)
 811                                break;
 812                        ret = 0;
 813                        if (!(ifr.ifr_flags & IFF_UP))
 814                                inet_del_ifa(in_dev, ifap, 1);
 815                        break;
 816                }
 817                ret = dev_change_flags(dev, ifr.ifr_flags);
 818                break;
 819
 820        case SIOCSIFADDR:       /* Set interface address (and family) */
 821                ret = -EINVAL;
 822                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
 823                        break;
 824
 825                if (!ifa) {
 826                        ret = -ENOBUFS;
 827                        ifa = inet_alloc_ifa();
 828                        INIT_HLIST_NODE(&ifa->hash);
 829                        if (!ifa)
 830                                break;
 831                        if (colon)
 832                                memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
 833                        else
 834                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 835                } else {
 836                        ret = 0;
 837                        if (ifa->ifa_local == sin->sin_addr.s_addr)
 838                                break;
 839                        inet_del_ifa(in_dev, ifap, 0);
 840                        ifa->ifa_broadcast = 0;
 841                        ifa->ifa_scope = 0;
 842                }
 843
 844                ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
 845
 846                if (!(dev->flags & IFF_POINTOPOINT)) {
 847                        ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
 848                        ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
 849                        if ((dev->flags & IFF_BROADCAST) &&
 850                            ifa->ifa_prefixlen < 31)
 851                                ifa->ifa_broadcast = ifa->ifa_address |
 852                                                     ~ifa->ifa_mask;
 853                } else {
 854                        ifa->ifa_prefixlen = 32;
 855                        ifa->ifa_mask = inet_make_mask(32);
 856                }
 857                ret = inet_set_ifa(dev, ifa);
 858                break;
 859
 860        case SIOCSIFBRDADDR:    /* Set the broadcast address */
 861                ret = 0;
 862                if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
 863                        inet_del_ifa(in_dev, ifap, 0);
 864                        ifa->ifa_broadcast = sin->sin_addr.s_addr;
 865                        inet_insert_ifa(ifa);
 866                }
 867                break;
 868
 869        case SIOCSIFDSTADDR:    /* Set the destination address */
 870                ret = 0;
 871                if (ifa->ifa_address == sin->sin_addr.s_addr)
 872                        break;
 873                ret = -EINVAL;
 874                if (inet_abc_len(sin->sin_addr.s_addr) < 0)
 875                        break;
 876                ret = 0;
 877                inet_del_ifa(in_dev, ifap, 0);
 878                ifa->ifa_address = sin->sin_addr.s_addr;
 879                inet_insert_ifa(ifa);
 880                break;
 881
 882        case SIOCSIFNETMASK:    /* Set the netmask for the interface */
 883
 884                /*
 885                 *      The mask we set must be legal.
 886                 */
 887                ret = -EINVAL;
 888                if (bad_mask(sin->sin_addr.s_addr, 0))
 889                        break;
 890                ret = 0;
 891                if (ifa->ifa_mask != sin->sin_addr.s_addr) {
 892                        __be32 old_mask = ifa->ifa_mask;
 893                        inet_del_ifa(in_dev, ifap, 0);
 894                        ifa->ifa_mask = sin->sin_addr.s_addr;
 895                        ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
 896
 897                        /* See if current broadcast address matches
 898                         * with current netmask, then recalculate
 899                         * the broadcast address. Otherwise it's a
 900                         * funny address, so don't touch it since
 901                         * the user seems to know what (s)he's doing...
 902                         */
 903                        if ((dev->flags & IFF_BROADCAST) &&
 904                            (ifa->ifa_prefixlen < 31) &&
 905                            (ifa->ifa_broadcast ==
 906                             (ifa->ifa_local|~old_mask))) {
 907                                ifa->ifa_broadcast = (ifa->ifa_local |
 908                                                      ~sin->sin_addr.s_addr);
 909                        }
 910                        inet_insert_ifa(ifa);
 911                }
 912                break;
 913        }
 914done:
 915        rtnl_unlock();
 916out:
 917        return ret;
 918rarok:
 919        rtnl_unlock();
 920        ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
 921        goto out;
 922}
 923
 924static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
 925{
 926        struct in_device *in_dev = __in_dev_get_rtnl(dev);
 927        struct in_ifaddr *ifa;
 928        struct ifreq ifr;
 929        int done = 0;
 930
 931        if (!in_dev)
 932                goto out;
 933
 934        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
 935                if (!buf) {
 936                        done += sizeof(ifr);
 937                        continue;
 938                }
 939                if (len < (int) sizeof(ifr))
 940                        break;
 941                memset(&ifr, 0, sizeof(struct ifreq));
 942                if (ifa->ifa_label)
 943                        strcpy(ifr.ifr_name, ifa->ifa_label);
 944                else
 945                        strcpy(ifr.ifr_name, dev->name);
 946
 947                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
 948                (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
 949                                                                ifa->ifa_local;
 950
 951                if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
 952                        done = -EFAULT;
 953                        break;
 954                }
 955                buf  += sizeof(struct ifreq);
 956                len  -= sizeof(struct ifreq);
 957                done += sizeof(struct ifreq);
 958        }
 959out:
 960        return done;
 961}
 962
 963__be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
 964{
 965        __be32 addr = 0;
 966        struct in_device *in_dev;
 967        struct net *net = dev_net(dev);
 968
 969        rcu_read_lock();
 970        in_dev = __in_dev_get_rcu(dev);
 971        if (!in_dev)
 972                goto no_in_dev;
 973
 974        for_primary_ifa(in_dev) {
 975                if (ifa->ifa_scope > scope)
 976                        continue;
 977                if (!dst || inet_ifa_match(dst, ifa)) {
 978                        addr = ifa->ifa_local;
 979                        break;
 980                }
 981                if (!addr)
 982                        addr = ifa->ifa_local;
 983        } endfor_ifa(in_dev);
 984
 985        if (addr)
 986                goto out_unlock;
 987no_in_dev:
 988
 989        /* Not loopback addresses on loopback should be preferred
 990           in this case. It is importnat that lo is the first interface
 991           in dev_base list.
 992         */
 993        for_each_netdev_rcu(net, dev) {
 994                in_dev = __in_dev_get_rcu(dev);
 995                if (!in_dev)
 996                        continue;
 997
 998                for_primary_ifa(in_dev) {
 999                        if (ifa->ifa_scope != RT_SCOPE_LINK &&
1000                            ifa->ifa_scope <= scope) {
1001                                addr = ifa->ifa_local;
1002                                goto out_unlock;
1003                        }
1004                } endfor_ifa(in_dev);
1005        }
1006out_unlock:
1007        rcu_read_unlock();
1008        return addr;
1009}
1010EXPORT_SYMBOL(inet_select_addr);
1011
1012static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1013                              __be32 local, int scope)
1014{
1015        int same = 0;
1016        __be32 addr = 0;
1017
1018        for_ifa(in_dev) {
1019                if (!addr &&
1020                    (local == ifa->ifa_local || !local) &&
1021                    ifa->ifa_scope <= scope) {
1022                        addr = ifa->ifa_local;
1023                        if (same)
1024                                break;
1025                }
1026                if (!same) {
1027                        same = (!local || inet_ifa_match(local, ifa)) &&
1028                                (!dst || inet_ifa_match(dst, ifa));
1029                        if (same && addr) {
1030                                if (local || !dst)
1031                                        break;
1032                                /* Is the selected addr into dst subnet? */
1033                                if (inet_ifa_match(addr, ifa))
1034                                        break;
1035                                /* No, then can we use new local src? */
1036                                if (ifa->ifa_scope <= scope) {
1037                                        addr = ifa->ifa_local;
1038                                        break;
1039                                }
1040                                /* search for large dst subnet for addr */
1041                                same = 0;
1042                        }
1043                }
1044        } endfor_ifa(in_dev);
1045
1046        return same ? addr : 0;
1047}
1048
1049/*
1050 * Confirm that local IP address exists using wildcards:
1051 * - in_dev: only on this interface, 0=any interface
1052 * - dst: only in the same subnet as dst, 0=any dst
1053 * - local: address, 0=autoselect the local address
1054 * - scope: maximum allowed scope value for the local address
1055 */
1056__be32 inet_confirm_addr(struct in_device *in_dev,
1057                         __be32 dst, __be32 local, int scope)
1058{
1059        __be32 addr = 0;
1060        struct net_device *dev;
1061        struct net *net;
1062
1063        if (scope != RT_SCOPE_LINK)
1064                return confirm_addr_indev(in_dev, dst, local, scope);
1065
1066        net = dev_net(in_dev->dev);
1067        rcu_read_lock();
1068        for_each_netdev_rcu(net, dev) {
1069                in_dev = __in_dev_get_rcu(dev);
1070                if (in_dev) {
1071                        addr = confirm_addr_indev(in_dev, dst, local, scope);
1072                        if (addr)
1073                                break;
1074                }
1075        }
1076        rcu_read_unlock();
1077
1078        return addr;
1079}
1080EXPORT_SYMBOL(inet_confirm_addr);
1081
1082/*
1083 *      Device notifier
1084 */
1085
1086int register_inetaddr_notifier(struct notifier_block *nb)
1087{
1088        return blocking_notifier_chain_register(&inetaddr_chain, nb);
1089}
1090EXPORT_SYMBOL(register_inetaddr_notifier);
1091
1092int unregister_inetaddr_notifier(struct notifier_block *nb)
1093{
1094        return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1095}
1096EXPORT_SYMBOL(unregister_inetaddr_notifier);
1097
1098/* Rename ifa_labels for a device name change. Make some effort to preserve
1099 * existing alias numbering and to create unique labels if possible.
1100*/
1101static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1102{
1103        struct in_ifaddr *ifa;
1104        int named = 0;
1105
1106        for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1107                char old[IFNAMSIZ], *dot;
1108
1109                memcpy(old, ifa->ifa_label, IFNAMSIZ);
1110                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1111                if (named++ == 0)
1112                        goto skip;
1113                dot = strchr(old, ':');
1114                if (dot == NULL) {
1115                        sprintf(old, ":%d", named);
1116                        dot = old;
1117                }
1118                if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1119                        strcat(ifa->ifa_label, dot);
1120                else
1121                        strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1122skip:
1123                rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1124        }
1125}
1126
1127static inline bool inetdev_valid_mtu(unsigned int mtu)
1128{
1129        return mtu >= 68;
1130}
1131
1132static void inetdev_send_gratuitous_arp(struct net_device *dev,
1133                                        struct in_device *in_dev)
1134
1135{
1136        struct in_ifaddr *ifa;
1137
1138        for (ifa = in_dev->ifa_list; ifa;
1139             ifa = ifa->ifa_next) {
1140                arp_send(ARPOP_REQUEST, ETH_P_ARP,
1141                         ifa->ifa_local, dev,
1142                         ifa->ifa_local, NULL,
1143                         dev->dev_addr, NULL);
1144        }
1145}
1146
1147/* Called only under RTNL semaphore */
1148
1149static int inetdev_event(struct notifier_block *this, unsigned long event,
1150                         void *ptr)
1151{
1152        struct net_device *dev = ptr;
1153        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1154
1155        ASSERT_RTNL();
1156
1157        if (!in_dev) {
1158                if (event == NETDEV_REGISTER) {
1159                        in_dev = inetdev_init(dev);
1160                        if (!in_dev)
1161                                return notifier_from_errno(-ENOMEM);
1162                        if (dev->flags & IFF_LOOPBACK) {
1163                                IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1164                                IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1165                        }
1166                } else if (event == NETDEV_CHANGEMTU) {
1167                        /* Re-enabling IP */
1168                        if (inetdev_valid_mtu(dev->mtu))
1169                                in_dev = inetdev_init(dev);
1170                }
1171                goto out;
1172        }
1173
1174        switch (event) {
1175        case NETDEV_REGISTER:
1176                pr_debug("%s: bug\n", __func__);
1177                RCU_INIT_POINTER(dev->ip_ptr, NULL);
1178                break;
1179        case NETDEV_UP:
1180                if (!inetdev_valid_mtu(dev->mtu))
1181                        break;
1182                if (dev->flags & IFF_LOOPBACK) {
1183                        struct in_ifaddr *ifa = inet_alloc_ifa();
1184
1185                        if (ifa) {
1186                                INIT_HLIST_NODE(&ifa->hash);
1187                                ifa->ifa_local =
1188                                  ifa->ifa_address = htonl(INADDR_LOOPBACK);
1189                                ifa->ifa_prefixlen = 8;
1190                                ifa->ifa_mask = inet_make_mask(8);
1191                                in_dev_hold(in_dev);
1192                                ifa->ifa_dev = in_dev;
1193                                ifa->ifa_scope = RT_SCOPE_HOST;
1194                                memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1195                                inet_insert_ifa(ifa);
1196                        }
1197                }
1198                ip_mc_up(in_dev);
1199                /* fall through */
1200        case NETDEV_CHANGEADDR:
1201                if (!IN_DEV_ARP_NOTIFY(in_dev))
1202                        break;
1203                /* fall through */
1204        case NETDEV_NOTIFY_PEERS:
1205                /* Send gratuitous ARP to notify of link change */
1206                inetdev_send_gratuitous_arp(dev, in_dev);
1207                break;
1208        case NETDEV_DOWN:
1209                ip_mc_down(in_dev);
1210                break;
1211        case NETDEV_PRE_TYPE_CHANGE:
1212                ip_mc_unmap(in_dev);
1213                break;
1214        case NETDEV_POST_TYPE_CHANGE:
1215                ip_mc_remap(in_dev);
1216                break;
1217        case NETDEV_CHANGEMTU:
1218                if (inetdev_valid_mtu(dev->mtu))
1219                        break;
1220                /* disable IP when MTU is not enough */
1221        case NETDEV_UNREGISTER:
1222                inetdev_destroy(in_dev);
1223                break;
1224        case NETDEV_CHANGENAME:
1225                /* Do not notify about label change, this event is
1226                 * not interesting to applications using netlink.
1227                 */
1228                inetdev_changename(dev, in_dev);
1229
1230                devinet_sysctl_unregister(in_dev);
1231                devinet_sysctl_register(in_dev);
1232                break;
1233        }
1234out:
1235        return NOTIFY_DONE;
1236}
1237
1238static struct notifier_block ip_netdev_notifier = {
1239        .notifier_call = inetdev_event,
1240};
1241
1242static inline size_t inet_nlmsg_size(void)
1243{
1244        return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1245               + nla_total_size(4) /* IFA_ADDRESS */
1246               + nla_total_size(4) /* IFA_LOCAL */
1247               + nla_total_size(4) /* IFA_BROADCAST */
1248               + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1249}
1250
1251static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1252                            u32 pid, u32 seq, int event, unsigned int flags)
1253{
1254        struct ifaddrmsg *ifm;
1255        struct nlmsghdr  *nlh;
1256
1257        nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1258        if (nlh == NULL)
1259                return -EMSGSIZE;
1260
1261        ifm = nlmsg_data(nlh);
1262        ifm->ifa_family = AF_INET;
1263        ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1264        ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1265        ifm->ifa_scope = ifa->ifa_scope;
1266        ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1267
1268        if ((ifa->ifa_address &&
1269             nla_put_be32(skb, IFA_ADDRESS, ifa->ifa_address)) ||
1270            (ifa->ifa_local &&
1271             nla_put_be32(skb, IFA_LOCAL, ifa->ifa_local)) ||
1272            (ifa->ifa_broadcast &&
1273             nla_put_be32(skb, IFA_BROADCAST, ifa->ifa_broadcast)) ||
1274            (ifa->ifa_label[0] &&
1275             nla_put_string(skb, IFA_LABEL, ifa->ifa_label)))
1276                goto nla_put_failure;
1277
1278        return nlmsg_end(skb, nlh);
1279
1280nla_put_failure:
1281        nlmsg_cancel(skb, nlh);
1282        return -EMSGSIZE;
1283}
1284
1285static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1286{
1287        struct net *net = sock_net(skb->sk);
1288        int h, s_h;
1289        int idx, s_idx;
1290        int ip_idx, s_ip_idx;
1291        struct net_device *dev;
1292        struct in_device *in_dev;
1293        struct in_ifaddr *ifa;
1294        struct hlist_head *head;
1295        struct hlist_node *node;
1296
1297        s_h = cb->args[0];
1298        s_idx = idx = cb->args[1];
1299        s_ip_idx = ip_idx = cb->args[2];
1300
1301        for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1302                idx = 0;
1303                head = &net->dev_index_head[h];
1304                rcu_read_lock();
1305                hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1306                        if (idx < s_idx)
1307                                goto cont;
1308                        if (h > s_h || idx > s_idx)
1309                                s_ip_idx = 0;
1310                        in_dev = __in_dev_get_rcu(dev);
1311                        if (!in_dev)
1312                                goto cont;
1313
1314                        for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1315                             ifa = ifa->ifa_next, ip_idx++) {
1316                                if (ip_idx < s_ip_idx)
1317                                        continue;
1318                                if (inet_fill_ifaddr(skb, ifa,
1319                                             NETLINK_CB(cb->skb).pid,
1320                                             cb->nlh->nlmsg_seq,
1321                                             RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1322                                        rcu_read_unlock();
1323                                        goto done;
1324                                }
1325                        }
1326cont:
1327                        idx++;
1328                }
1329                rcu_read_unlock();
1330        }
1331
1332done:
1333        cb->args[0] = h;
1334        cb->args[1] = idx;
1335        cb->args[2] = ip_idx;
1336
1337        return skb->len;
1338}
1339
1340static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1341                      u32 pid)
1342{
1343        struct sk_buff *skb;
1344        u32 seq = nlh ? nlh->nlmsg_seq : 0;
1345        int err = -ENOBUFS;
1346        struct net *net;
1347
1348        net = dev_net(ifa->ifa_dev->dev);
1349        skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1350        if (skb == NULL)
1351                goto errout;
1352
1353        err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1354        if (err < 0) {
1355                /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1356                WARN_ON(err == -EMSGSIZE);
1357                kfree_skb(skb);
1358                goto errout;
1359        }
1360        rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1361        return;
1362errout:
1363        if (err < 0)
1364                rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1365}
1366
1367static size_t inet_get_link_af_size(const struct net_device *dev)
1368{
1369        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1370
1371        if (!in_dev)
1372                return 0;
1373
1374        return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1375}
1376
1377static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1378{
1379        struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1380        struct nlattr *nla;
1381        int i;
1382
1383        if (!in_dev)
1384                return -ENODATA;
1385
1386        nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1387        if (nla == NULL)
1388                return -EMSGSIZE;
1389
1390        for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1391                ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1392
1393        return 0;
1394}
1395
1396static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1397        [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1398};
1399
1400static int inet_validate_link_af(const struct net_device *dev,
1401                                 const struct nlattr *nla)
1402{
1403        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1404        int err, rem;
1405
1406        if (dev && !__in_dev_get_rtnl(dev))
1407                return -EAFNOSUPPORT;
1408
1409        err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1410        if (err < 0)
1411                return err;
1412
1413        if (tb[IFLA_INET_CONF]) {
1414                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1415                        int cfgid = nla_type(a);
1416
1417                        if (nla_len(a) < 4)
1418                                return -EINVAL;
1419
1420                        if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1421                                return -EINVAL;
1422                }
1423        }
1424
1425        return 0;
1426}
1427
1428static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1429{
1430        struct in_device *in_dev = __in_dev_get_rtnl(dev);
1431        struct nlattr *a, *tb[IFLA_INET_MAX+1];
1432        int rem;
1433
1434        if (!in_dev)
1435                return -EAFNOSUPPORT;
1436
1437        if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1438                BUG();
1439
1440        if (tb[IFLA_INET_CONF]) {
1441                nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1442                        ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1443        }
1444
1445        return 0;
1446}
1447
1448#ifdef CONFIG_SYSCTL
1449
1450static void devinet_copy_dflt_conf(struct net *net, int i)
1451{
1452        struct net_device *dev;
1453
1454        rcu_read_lock();
1455        for_each_netdev_rcu(net, dev) {
1456                struct in_device *in_dev;
1457
1458                in_dev = __in_dev_get_rcu(dev);
1459                if (in_dev && !test_bit(i, in_dev->cnf.state))
1460                        in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1461        }
1462        rcu_read_unlock();
1463}
1464
1465/* called with RTNL locked */
1466static void inet_forward_change(struct net *net)
1467{
1468        struct net_device *dev;
1469        int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1470
1471        IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1472        IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1473
1474        for_each_netdev(net, dev) {
1475                struct in_device *in_dev;
1476                if (on)
1477                        dev_disable_lro(dev);
1478                rcu_read_lock();
1479                in_dev = __in_dev_get_rcu(dev);
1480                if (in_dev)
1481                        IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1482                rcu_read_unlock();
1483        }
1484}
1485
1486static int devinet_conf_proc(ctl_table *ctl, int write,
1487                             void __user *buffer,
1488                             size_t *lenp, loff_t *ppos)
1489{
1490        int old_value = *(int *)ctl->data;
1491        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1492        int new_value = *(int *)ctl->data;
1493
1494        if (write) {
1495                struct ipv4_devconf *cnf = ctl->extra1;
1496                struct net *net = ctl->extra2;
1497                int i = (int *)ctl->data - cnf->data;
1498
1499                set_bit(i, cnf->state);
1500
1501                if (cnf == net->ipv4.devconf_dflt)
1502                        devinet_copy_dflt_conf(net, i);
1503                if (i == IPV4_DEVCONF_ACCEPT_LOCAL - 1 ||
1504                    i == IPV4_DEVCONF_ROUTE_LOCALNET - 1)
1505                        if ((new_value == 0) && (old_value != 0))
1506                                rt_cache_flush(net);
1507        }
1508
1509        return ret;
1510}
1511
1512static int devinet_sysctl_forward(ctl_table *ctl, int write,
1513                                  void __user *buffer,
1514                                  size_t *lenp, loff_t *ppos)
1515{
1516        int *valp = ctl->data;
1517        int val = *valp;
1518        loff_t pos = *ppos;
1519        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1520
1521        if (write && *valp != val) {
1522                struct net *net = ctl->extra2;
1523
1524                if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1525                        if (!rtnl_trylock()) {
1526                                /* Restore the original values before restarting */
1527                                *valp = val;
1528                                *ppos = pos;
1529                                return restart_syscall();
1530                        }
1531                        if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1532                                inet_forward_change(net);
1533                        } else if (*valp) {
1534                                struct ipv4_devconf *cnf = ctl->extra1;
1535                                struct in_device *idev =
1536                                        container_of(cnf, struct in_device, cnf);
1537                                dev_disable_lro(idev->dev);
1538                        }
1539                        rtnl_unlock();
1540                        rt_cache_flush(net);
1541                }
1542        }
1543
1544        return ret;
1545}
1546
1547static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1548                                void __user *buffer,
1549                                size_t *lenp, loff_t *ppos)
1550{
1551        int *valp = ctl->data;
1552        int val = *valp;
1553        int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1554        struct net *net = ctl->extra2;
1555
1556        if (write && *valp != val)
1557                rt_cache_flush(net);
1558
1559        return ret;
1560}
1561
1562#define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1563        { \
1564                .procname       = name, \
1565                .data           = ipv4_devconf.data + \
1566                                  IPV4_DEVCONF_ ## attr - 1, \
1567                .maxlen         = sizeof(int), \
1568                .mode           = mval, \
1569                .proc_handler   = proc, \
1570                .extra1         = &ipv4_devconf, \
1571        }
1572
1573#define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1574        DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1575
1576#define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1577        DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1578
1579#define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1580        DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1581
1582#define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1583        DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1584
1585static struct devinet_sysctl_table {
1586        struct ctl_table_header *sysctl_header;
1587        struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1588} devinet_sysctl = {
1589        .devinet_vars = {
1590                DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1591                                             devinet_sysctl_forward),
1592                DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1593
1594                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1595                DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1596                DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1597                DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1598                DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1599                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1600                                        "accept_source_route"),
1601                DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1602                DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1603                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1604                DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1605                DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1606                DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1607                DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1608                DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1609                DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1610                DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1611                DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1612                DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1613                DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1614
1615                DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1616                DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1617                DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1618                                              "force_igmp_version"),
1619                DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1620                                              "promote_secondaries"),
1621                DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
1622                                              "route_localnet"),
1623        },
1624};
1625
1626static int __devinet_sysctl_register(struct net *net, char *dev_name,
1627                                        struct ipv4_devconf *p)
1628{
1629        int i;
1630        struct devinet_sysctl_table *t;
1631        char path[sizeof("net/ipv4/conf/") + IFNAMSIZ];
1632
1633        t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1634        if (!t)
1635                goto out;
1636
1637        for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1638                t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1639                t->devinet_vars[i].extra1 = p;
1640                t->devinet_vars[i].extra2 = net;
1641        }
1642
1643        snprintf(path, sizeof(path), "net/ipv4/conf/%s", dev_name);
1644
1645        t->sysctl_header = register_net_sysctl(net, path, t->devinet_vars);
1646        if (!t->sysctl_header)
1647                goto free;
1648
1649        p->sysctl = t;
1650        return 0;
1651
1652free:
1653        kfree(t);
1654out:
1655        return -ENOBUFS;
1656}
1657
1658static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1659{
1660        struct devinet_sysctl_table *t = cnf->sysctl;
1661
1662        if (t == NULL)
1663                return;
1664
1665        cnf->sysctl = NULL;
1666        unregister_net_sysctl_table(t->sysctl_header);
1667        kfree(t);
1668}
1669
1670static void devinet_sysctl_register(struct in_device *idev)
1671{
1672        neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1673        __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1674                                        &idev->cnf);
1675}
1676
1677static void devinet_sysctl_unregister(struct in_device *idev)
1678{
1679        __devinet_sysctl_unregister(&idev->cnf);
1680        neigh_sysctl_unregister(idev->arp_parms);
1681}
1682
1683static struct ctl_table ctl_forward_entry[] = {
1684        {
1685                .procname       = "ip_forward",
1686                .data           = &ipv4_devconf.data[
1687                                        IPV4_DEVCONF_FORWARDING - 1],
1688                .maxlen         = sizeof(int),
1689                .mode           = 0644,
1690                .proc_handler   = devinet_sysctl_forward,
1691                .extra1         = &ipv4_devconf,
1692                .extra2         = &init_net,
1693        },
1694        { },
1695};
1696#endif
1697
1698static __net_init int devinet_init_net(struct net *net)
1699{
1700        int err;
1701        struct ipv4_devconf *all, *dflt;
1702#ifdef CONFIG_SYSCTL
1703        struct ctl_table *tbl = ctl_forward_entry;
1704        struct ctl_table_header *forw_hdr;
1705#endif
1706
1707        err = -ENOMEM;
1708        all = &ipv4_devconf;
1709        dflt = &ipv4_devconf_dflt;
1710
1711        if (!net_eq(net, &init_net)) {
1712                all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1713                if (all == NULL)
1714                        goto err_alloc_all;
1715
1716                dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1717                if (dflt == NULL)
1718                        goto err_alloc_dflt;
1719
1720#ifdef CONFIG_SYSCTL
1721                tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1722                if (tbl == NULL)
1723                        goto err_alloc_ctl;
1724
1725                tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1726                tbl[0].extra1 = all;
1727                tbl[0].extra2 = net;
1728#endif
1729        }
1730
1731#ifdef CONFIG_SYSCTL
1732        err = __devinet_sysctl_register(net, "all", all);
1733        if (err < 0)
1734                goto err_reg_all;
1735
1736        err = __devinet_sysctl_register(net, "default", dflt);
1737        if (err < 0)
1738                goto err_reg_dflt;
1739
1740        err = -ENOMEM;
1741        forw_hdr = register_net_sysctl(net, "net/ipv4", tbl);
1742        if (forw_hdr == NULL)
1743                goto err_reg_ctl;
1744        net->ipv4.forw_hdr = forw_hdr;
1745#endif
1746
1747        net->ipv4.devconf_all = all;
1748        net->ipv4.devconf_dflt = dflt;
1749        return 0;
1750
1751#ifdef CONFIG_SYSCTL
1752err_reg_ctl:
1753        __devinet_sysctl_unregister(dflt);
1754err_reg_dflt:
1755        __devinet_sysctl_unregister(all);
1756err_reg_all:
1757        if (tbl != ctl_forward_entry)
1758                kfree(tbl);
1759err_alloc_ctl:
1760#endif
1761        if (dflt != &ipv4_devconf_dflt)
1762                kfree(dflt);
1763err_alloc_dflt:
1764        if (all != &ipv4_devconf)
1765                kfree(all);
1766err_alloc_all:
1767        return err;
1768}
1769
1770static __net_exit void devinet_exit_net(struct net *net)
1771{
1772#ifdef CONFIG_SYSCTL
1773        struct ctl_table *tbl;
1774
1775        tbl = net->ipv4.forw_hdr->ctl_table_arg;
1776        unregister_net_sysctl_table(net->ipv4.forw_hdr);
1777        __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1778        __devinet_sysctl_unregister(net->ipv4.devconf_all);
1779        kfree(tbl);
1780#endif
1781        kfree(net->ipv4.devconf_dflt);
1782        kfree(net->ipv4.devconf_all);
1783}
1784
1785static __net_initdata struct pernet_operations devinet_ops = {
1786        .init = devinet_init_net,
1787        .exit = devinet_exit_net,
1788};
1789
1790static struct rtnl_af_ops inet_af_ops = {
1791        .family           = AF_INET,
1792        .fill_link_af     = inet_fill_link_af,
1793        .get_link_af_size = inet_get_link_af_size,
1794        .validate_link_af = inet_validate_link_af,
1795        .set_link_af      = inet_set_link_af,
1796};
1797
1798void __init devinet_init(void)
1799{
1800        int i;
1801
1802        for (i = 0; i < IN4_ADDR_HSIZE; i++)
1803                INIT_HLIST_HEAD(&inet_addr_lst[i]);
1804
1805        register_pernet_subsys(&devinet_ops);
1806
1807        register_gifconf(PF_INET, inet_gifconf);
1808        register_netdevice_notifier(&ip_netdev_notifier);
1809
1810        rtnl_af_register(&inet_af_ops);
1811
1812        rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1813        rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1814        rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1815}
1816
1817
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.