linux/net/openvswitch/datapath.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007-2012 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include <linux/init.h>
  22#include <linux/module.h>
  23#include <linux/if_arp.h>
  24#include <linux/if_vlan.h>
  25#include <linux/in.h>
  26#include <linux/ip.h>
  27#include <linux/jhash.h>
  28#include <linux/delay.h>
  29#include <linux/time.h>
  30#include <linux/etherdevice.h>
  31#include <linux/genetlink.h>
  32#include <linux/kernel.h>
  33#include <linux/kthread.h>
  34#include <linux/mutex.h>
  35#include <linux/percpu.h>
  36#include <linux/rcupdate.h>
  37#include <linux/tcp.h>
  38#include <linux/udp.h>
  39#include <linux/ethtool.h>
  40#include <linux/wait.h>
  41#include <asm/div64.h>
  42#include <linux/highmem.h>
  43#include <linux/netfilter_bridge.h>
  44#include <linux/netfilter_ipv4.h>
  45#include <linux/inetdevice.h>
  46#include <linux/list.h>
  47#include <linux/openvswitch.h>
  48#include <linux/rculist.h>
  49#include <linux/dmi.h>
  50#include <linux/workqueue.h>
  51#include <net/genetlink.h>
  52#include <net/net_namespace.h>
  53#include <net/netns/generic.h>
  54
  55#include "datapath.h"
  56#include "flow.h"
  57#include "vport-internal_dev.h"
  58
  59/**
  60 * struct ovs_net - Per net-namespace data for ovs.
  61 * @dps: List of datapaths to enable dumping them all out.
  62 * Protected by genl_mutex.
  63 */
  64struct ovs_net {
  65        struct list_head dps;
  66};
  67
  68static int ovs_net_id __read_mostly;
  69
  70#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
  71static void rehash_flow_table(struct work_struct *work);
  72static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
  73
  74/**
  75 * DOC: Locking:
  76 *
  77 * Writes to device state (add/remove datapath, port, set operations on vports,
  78 * etc.) are protected by RTNL.
  79 *
  80 * Writes to other state (flow table modifications, set miscellaneous datapath
  81 * parameters, etc.) are protected by genl_mutex.  The RTNL lock nests inside
  82 * genl_mutex.
  83 *
  84 * Reads are protected by RCU.
  85 *
  86 * There are a few special cases (mostly stats) that have their own
  87 * synchronization but they nest under all of above and don't interact with
  88 * each other.
  89 */
  90
  91static struct vport *new_vport(const struct vport_parms *);
  92static int queue_gso_packets(struct net *, int dp_ifindex, struct sk_buff *,
  93                             const struct dp_upcall_info *);
  94static int queue_userspace_packet(struct net *, int dp_ifindex,
  95                                  struct sk_buff *,
  96                                  const struct dp_upcall_info *);
  97
  98/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
  99static struct datapath *get_dp(struct net *net, int dp_ifindex)
 100{
 101        struct datapath *dp = NULL;
 102        struct net_device *dev;
 103
 104        rcu_read_lock();
 105        dev = dev_get_by_index_rcu(net, dp_ifindex);
 106        if (dev) {
 107                struct vport *vport = ovs_internal_dev_get_vport(dev);
 108                if (vport)
 109                        dp = vport->dp;
 110        }
 111        rcu_read_unlock();
 112
 113        return dp;
 114}
 115
 116/* Must be called with rcu_read_lock or RTNL lock. */
 117const char *ovs_dp_name(const struct datapath *dp)
 118{
 119        struct vport *vport = ovs_vport_rtnl_rcu(dp, OVSP_LOCAL);
 120        return vport->ops->get_name(vport);
 121}
 122
 123static int get_dpifindex(struct datapath *dp)
 124{
 125        struct vport *local;
 126        int ifindex;
 127
 128        rcu_read_lock();
 129
 130        local = ovs_vport_rcu(dp, OVSP_LOCAL);
 131        if (local)
 132                ifindex = local->ops->get_ifindex(local);
 133        else
 134                ifindex = 0;
 135
 136        rcu_read_unlock();
 137
 138        return ifindex;
 139}
 140
 141static void destroy_dp_rcu(struct rcu_head *rcu)
 142{
 143        struct datapath *dp = container_of(rcu, struct datapath, rcu);
 144
 145        ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
 146        free_percpu(dp->stats_percpu);
 147        release_net(ovs_dp_get_net(dp));
 148        kfree(dp->ports);
 149        kfree(dp);
 150}
 151
 152static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
 153                                            u16 port_no)
 154{
 155        return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
 156}
 157
 158struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
 159{
 160        struct vport *vport;
 161        struct hlist_node *n;
 162        struct hlist_head *head;
 163
 164        head = vport_hash_bucket(dp, port_no);
 165        hlist_for_each_entry_rcu(vport, n, head, dp_hash_node) {
 166                if (vport->port_no == port_no)
 167                        return vport;
 168        }
 169        return NULL;
 170}
 171
 172/* Called with RTNL lock and genl_lock. */
 173static struct vport *new_vport(const struct vport_parms *parms)
 174{
 175        struct vport *vport;
 176
 177        vport = ovs_vport_add(parms);
 178        if (!IS_ERR(vport)) {
 179                struct datapath *dp = parms->dp;
 180                struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
 181
 182                hlist_add_head_rcu(&vport->dp_hash_node, head);
 183        }
 184
 185        return vport;
 186}
 187
 188/* Called with RTNL lock. */
 189void ovs_dp_detach_port(struct vport *p)
 190{
 191        ASSERT_RTNL();
 192
 193        /* First drop references to device. */
 194        hlist_del_rcu(&p->dp_hash_node);
 195
 196        /* Then destroy it. */
 197        ovs_vport_del(p);
 198}
 199
 200/* Must be called with rcu_read_lock. */
 201void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 202{
 203        struct datapath *dp = p->dp;
 204        struct sw_flow *flow;
 205        struct dp_stats_percpu *stats;
 206        struct sw_flow_key key;
 207        u64 *stats_counter;
 208        int error;
 209        int key_len;
 210
 211        stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
 212
 213        /* Extract flow from 'skb' into 'key'. */
 214        error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
 215        if (unlikely(error)) {
 216                kfree_skb(skb);
 217                return;
 218        }
 219
 220        /* Look up flow. */
 221        flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
 222        if (unlikely(!flow)) {
 223                struct dp_upcall_info upcall;
 224
 225                upcall.cmd = OVS_PACKET_CMD_MISS;
 226                upcall.key = &key;
 227                upcall.userdata = NULL;
 228                upcall.portid = p->upcall_portid;
 229                ovs_dp_upcall(dp, skb, &upcall);
 230                consume_skb(skb);
 231                stats_counter = &stats->n_missed;
 232                goto out;
 233        }
 234
 235        OVS_CB(skb)->flow = flow;
 236
 237        stats_counter = &stats->n_hit;
 238        ovs_flow_used(OVS_CB(skb)->flow, skb);
 239        ovs_execute_actions(dp, skb);
 240
 241out:
 242        /* Update datapath statistics. */
 243        u64_stats_update_begin(&stats->sync);
 244        (*stats_counter)++;
 245        u64_stats_update_end(&stats->sync);
 246}
 247
 248static struct genl_family dp_packet_genl_family = {
 249        .id = GENL_ID_GENERATE,
 250        .hdrsize = sizeof(struct ovs_header),
 251        .name = OVS_PACKET_FAMILY,
 252        .version = OVS_PACKET_VERSION,
 253        .maxattr = OVS_PACKET_ATTR_MAX,
 254        .netnsok = true
 255};
 256
 257int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 258                  const struct dp_upcall_info *upcall_info)
 259{
 260        struct dp_stats_percpu *stats;
 261        int dp_ifindex;
 262        int err;
 263
 264        if (upcall_info->portid == 0) {
 265                err = -ENOTCONN;
 266                goto err;
 267        }
 268
 269        dp_ifindex = get_dpifindex(dp);
 270        if (!dp_ifindex) {
 271                err = -ENODEV;
 272                goto err;
 273        }
 274
 275        if (!skb_is_gso(skb))
 276                err = queue_userspace_packet(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
 277        else
 278                err = queue_gso_packets(ovs_dp_get_net(dp), dp_ifindex, skb, upcall_info);
 279        if (err)
 280                goto err;
 281
 282        return 0;
 283
 284err:
 285        stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
 286
 287        u64_stats_update_begin(&stats->sync);
 288        stats->n_lost++;
 289        u64_stats_update_end(&stats->sync);
 290
 291        return err;
 292}
 293
 294static int queue_gso_packets(struct net *net, int dp_ifindex,
 295                             struct sk_buff *skb,
 296                             const struct dp_upcall_info *upcall_info)
 297{
 298        unsigned short gso_type = skb_shinfo(skb)->gso_type;
 299        struct dp_upcall_info later_info;
 300        struct sw_flow_key later_key;
 301        struct sk_buff *segs, *nskb;
 302        int err;
 303
 304        segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
 305        if (IS_ERR(segs))
 306                return PTR_ERR(segs);
 307
 308        /* Queue all of the segments. */
 309        skb = segs;
 310        do {
 311                err = queue_userspace_packet(net, dp_ifindex, skb, upcall_info);
 312                if (err)
 313                        break;
 314
 315                if (skb == segs && gso_type & SKB_GSO_UDP) {
 316                        /* The initial flow key extracted by ovs_flow_extract()
 317                         * in this case is for a first fragment, so we need to
 318                         * properly mark later fragments.
 319                         */
 320                        later_key = *upcall_info->key;
 321                        later_key.ip.frag = OVS_FRAG_TYPE_LATER;
 322
 323                        later_info = *upcall_info;
 324                        later_info.key = &later_key;
 325                        upcall_info = &later_info;
 326                }
 327        } while ((skb = skb->next));
 328
 329        /* Free all of the segments. */
 330        skb = segs;
 331        do {
 332                nskb = skb->next;
 333                if (err)
 334                        kfree_skb(skb);
 335                else
 336                        consume_skb(skb);
 337        } while ((skb = nskb));
 338        return err;
 339}
 340
 341static int queue_userspace_packet(struct net *net, int dp_ifindex,
 342                                  struct sk_buff *skb,
 343                                  const struct dp_upcall_info *upcall_info)
 344{
 345        struct ovs_header *upcall;
 346        struct sk_buff *nskb = NULL;
 347        struct sk_buff *user_skb; /* to be queued to userspace */
 348        struct nlattr *nla;
 349        unsigned int len;
 350        int err;
 351
 352        if (vlan_tx_tag_present(skb)) {
 353                nskb = skb_clone(skb, GFP_ATOMIC);
 354                if (!nskb)
 355                        return -ENOMEM;
 356
 357                nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb));
 358                if (!nskb)
 359                        return -ENOMEM;
 360
 361                nskb->vlan_tci = 0;
 362                skb = nskb;
 363        }
 364
 365        if (nla_attr_size(skb->len) > USHRT_MAX) {
 366                err = -EFBIG;
 367                goto out;
 368        }
 369
 370        len = sizeof(struct ovs_header);
 371        len += nla_total_size(skb->len);
 372        len += nla_total_size(FLOW_BUFSIZE);
 373        if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
 374                len += nla_total_size(8);
 375
 376        user_skb = genlmsg_new(len, GFP_ATOMIC);
 377        if (!user_skb) {
 378                err = -ENOMEM;
 379                goto out;
 380        }
 381
 382        upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
 383                             0, upcall_info->cmd);
 384        upcall->dp_ifindex = dp_ifindex;
 385
 386        nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
 387        ovs_flow_to_nlattrs(upcall_info->key, user_skb);
 388        nla_nest_end(user_skb, nla);
 389
 390        if (upcall_info->userdata)
 391                nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
 392                            nla_get_u64(upcall_info->userdata));
 393
 394        nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
 395
 396        skb_copy_and_csum_dev(skb, nla_data(nla));
 397
 398        err = genlmsg_unicast(net, user_skb, upcall_info->portid);
 399
 400out:
 401        kfree_skb(nskb);
 402        return err;
 403}
 404
 405/* Called with genl_mutex. */
 406static int flush_flows(struct datapath *dp)
 407{
 408        struct flow_table *old_table;
 409        struct flow_table *new_table;
 410
 411        old_table = genl_dereference(dp->table);
 412        new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
 413        if (!new_table)
 414                return -ENOMEM;
 415
 416        rcu_assign_pointer(dp->table, new_table);
 417
 418        ovs_flow_tbl_deferred_destroy(old_table);
 419        return 0;
 420}
 421
 422static int validate_actions(const struct nlattr *attr,
 423                                const struct sw_flow_key *key, int depth);
 424
 425static int validate_sample(const struct nlattr *attr,
 426                                const struct sw_flow_key *key, int depth)
 427{
 428        const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
 429        const struct nlattr *probability, *actions;
 430        const struct nlattr *a;
 431        int rem;
 432
 433        memset(attrs, 0, sizeof(attrs));
 434        nla_for_each_nested(a, attr, rem) {
 435                int type = nla_type(a);
 436                if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
 437                        return -EINVAL;
 438                attrs[type] = a;
 439        }
 440        if (rem)
 441                return -EINVAL;
 442
 443        probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
 444        if (!probability || nla_len(probability) != sizeof(u32))
 445                return -EINVAL;
 446
 447        actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
 448        if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
 449                return -EINVAL;
 450        return validate_actions(actions, key, depth + 1);
 451}
 452
 453static int validate_tp_port(const struct sw_flow_key *flow_key)
 454{
 455        if (flow_key->eth.type == htons(ETH_P_IP)) {
 456                if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
 457                        return 0;
 458        } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
 459                if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
 460                        return 0;
 461        }
 462
 463        return -EINVAL;
 464}
 465
 466static int validate_set(const struct nlattr *a,
 467                        const struct sw_flow_key *flow_key)
 468{
 469        const struct nlattr *ovs_key = nla_data(a);
 470        int key_type = nla_type(ovs_key);
 471
 472        /* There can be only one key in a action */
 473        if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
 474                return -EINVAL;
 475
 476        if (key_type > OVS_KEY_ATTR_MAX ||
 477            nla_len(ovs_key) != ovs_key_lens[key_type])
 478                return -EINVAL;
 479
 480        switch (key_type) {
 481        const struct ovs_key_ipv4 *ipv4_key;
 482
 483        case OVS_KEY_ATTR_PRIORITY:
 484        case OVS_KEY_ATTR_ETHERNET:
 485                break;
 486
 487        case OVS_KEY_ATTR_IPV4:
 488                if (flow_key->eth.type != htons(ETH_P_IP))
 489                        return -EINVAL;
 490
 491                if (!flow_key->ip.proto)
 492                        return -EINVAL;
 493
 494                ipv4_key = nla_data(ovs_key);
 495                if (ipv4_key->ipv4_proto != flow_key->ip.proto)
 496                        return -EINVAL;
 497
 498                if (ipv4_key->ipv4_frag != flow_key->ip.frag)
 499                        return -EINVAL;
 500
 501                break;
 502
 503        case OVS_KEY_ATTR_TCP:
 504                if (flow_key->ip.proto != IPPROTO_TCP)
 505                        return -EINVAL;
 506
 507                return validate_tp_port(flow_key);
 508
 509        case OVS_KEY_ATTR_UDP:
 510                if (flow_key->ip.proto != IPPROTO_UDP)
 511                        return -EINVAL;
 512
 513                return validate_tp_port(flow_key);
 514
 515        default:
 516                return -EINVAL;
 517        }
 518
 519        return 0;
 520}
 521
 522static int validate_userspace(const struct nlattr *attr)
 523{
 524        static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] =   {
 525                [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
 526                [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 },
 527        };
 528        struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
 529        int error;
 530
 531        error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
 532                                 attr, userspace_policy);
 533        if (error)
 534                return error;
 535
 536        if (!a[OVS_USERSPACE_ATTR_PID] ||
 537            !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
 538                return -EINVAL;
 539
 540        return 0;
 541}
 542
 543static int validate_actions(const struct nlattr *attr,
 544                                const struct sw_flow_key *key,  int depth)
 545{
 546        const struct nlattr *a;
 547        int rem, err;
 548
 549        if (depth >= SAMPLE_ACTION_DEPTH)
 550                return -EOVERFLOW;
 551
 552        nla_for_each_nested(a, attr, rem) {
 553                /* Expected argument lengths, (u32)-1 for variable length. */
 554                static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
 555                        [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
 556                        [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
 557                        [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
 558                        [OVS_ACTION_ATTR_POP_VLAN] = 0,
 559                        [OVS_ACTION_ATTR_SET] = (u32)-1,
 560                        [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
 561                };
 562                const struct ovs_action_push_vlan *vlan;
 563                int type = nla_type(a);
 564
 565                if (type > OVS_ACTION_ATTR_MAX ||
 566                    (action_lens[type] != nla_len(a) &&
 567                     action_lens[type] != (u32)-1))
 568                        return -EINVAL;
 569
 570                switch (type) {
 571                case OVS_ACTION_ATTR_UNSPEC:
 572                        return -EINVAL;
 573
 574                case OVS_ACTION_ATTR_USERSPACE:
 575                        err = validate_userspace(a);
 576                        if (err)
 577                                return err;
 578                        break;
 579
 580                case OVS_ACTION_ATTR_OUTPUT:
 581                        if (nla_get_u32(a) >= DP_MAX_PORTS)
 582                                return -EINVAL;
 583                        break;
 584
 585
 586                case OVS_ACTION_ATTR_POP_VLAN:
 587                        break;
 588
 589                case OVS_ACTION_ATTR_PUSH_VLAN:
 590                        vlan = nla_data(a);
 591                        if (vlan->vlan_tpid != htons(ETH_P_8021Q))
 592                                return -EINVAL;
 593                        if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
 594                                return -EINVAL;
 595                        break;
 596
 597                case OVS_ACTION_ATTR_SET:
 598                        err = validate_set(a, key);
 599                        if (err)
 600                                return err;
 601                        break;
 602
 603                case OVS_ACTION_ATTR_SAMPLE:
 604                        err = validate_sample(a, key, depth);
 605                        if (err)
 606                                return err;
 607                        break;
 608
 609                default:
 610                        return -EINVAL;
 611                }
 612        }
 613
 614        if (rem > 0)
 615                return -EINVAL;
 616
 617        return 0;
 618}
 619
 620static void clear_stats(struct sw_flow *flow)
 621{
 622        flow->used = 0;
 623        flow->tcp_flags = 0;
 624        flow->packet_count = 0;
 625        flow->byte_count = 0;
 626}
 627
 628static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 629{
 630        struct ovs_header *ovs_header = info->userhdr;
 631        struct nlattr **a = info->attrs;
 632        struct sw_flow_actions *acts;
 633        struct sk_buff *packet;
 634        struct sw_flow *flow;
 635        struct datapath *dp;
 636        struct ethhdr *eth;
 637        int len;
 638        int err;
 639        int key_len;
 640
 641        err = -EINVAL;
 642        if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
 643            !a[OVS_PACKET_ATTR_ACTIONS] ||
 644            nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
 645                goto err;
 646
 647        len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
 648        packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
 649        err = -ENOMEM;
 650        if (!packet)
 651                goto err;
 652        skb_reserve(packet, NET_IP_ALIGN);
 653
 654        memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
 655
 656        skb_reset_mac_header(packet);
 657        eth = eth_hdr(packet);
 658
 659        /* Normally, setting the skb 'protocol' field would be handled by a
 660         * call to eth_type_trans(), but it assumes there's a sending
 661         * device, which we may not have. */
 662        if (ntohs(eth->h_proto) >= 1536)
 663                packet->protocol = eth->h_proto;
 664        else
 665                packet->protocol = htons(ETH_P_802_2);
 666
 667        /* Build an sw_flow for sending this packet. */
 668        flow = ovs_flow_alloc();
 669        err = PTR_ERR(flow);
 670        if (IS_ERR(flow))
 671                goto err_kfree_skb;
 672
 673        err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
 674        if (err)
 675                goto err_flow_free;
 676
 677        err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority,
 678                                             &flow->key.phy.in_port,
 679                                             a[OVS_PACKET_ATTR_KEY]);
 680        if (err)
 681                goto err_flow_free;
 682
 683        err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0);
 684        if (err)
 685                goto err_flow_free;
 686
 687        flow->hash = ovs_flow_hash(&flow->key, key_len);
 688
 689        acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
 690        err = PTR_ERR(acts);
 691        if (IS_ERR(acts))
 692                goto err_flow_free;
 693        rcu_assign_pointer(flow->sf_acts, acts);
 694
 695        OVS_CB(packet)->flow = flow;
 696        packet->priority = flow->key.phy.priority;
 697
 698        rcu_read_lock();
 699        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 700        err = -ENODEV;
 701        if (!dp)
 702                goto err_unlock;
 703
 704        local_bh_disable();
 705        err = ovs_execute_actions(dp, packet);
 706        local_bh_enable();
 707        rcu_read_unlock();
 708
 709        ovs_flow_free(flow);
 710        return err;
 711
 712err_unlock:
 713        rcu_read_unlock();
 714err_flow_free:
 715        ovs_flow_free(flow);
 716err_kfree_skb:
 717        kfree_skb(packet);
 718err:
 719        return err;
 720}
 721
 722static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
 723        [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
 724        [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
 725        [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
 726};
 727
 728static struct genl_ops dp_packet_genl_ops[] = {
 729        { .cmd = OVS_PACKET_CMD_EXECUTE,
 730          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 731          .policy = packet_policy,
 732          .doit = ovs_packet_cmd_execute
 733        }
 734};
 735
 736static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
 737{
 738        int i;
 739        struct flow_table *table = genl_dereference(dp->table);
 740
 741        stats->n_flows = ovs_flow_tbl_count(table);
 742
 743        stats->n_hit = stats->n_missed = stats->n_lost = 0;
 744        for_each_possible_cpu(i) {
 745                const struct dp_stats_percpu *percpu_stats;
 746                struct dp_stats_percpu local_stats;
 747                unsigned int start;
 748
 749                percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
 750
 751                do {
 752                        start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
 753                        local_stats = *percpu_stats;
 754                } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
 755
 756                stats->n_hit += local_stats.n_hit;
 757                stats->n_missed += local_stats.n_missed;
 758                stats->n_lost += local_stats.n_lost;
 759        }
 760}
 761
 762static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
 763        [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
 764        [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
 765        [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
 766};
 767
 768static struct genl_family dp_flow_genl_family = {
 769        .id = GENL_ID_GENERATE,
 770        .hdrsize = sizeof(struct ovs_header),
 771        .name = OVS_FLOW_FAMILY,
 772        .version = OVS_FLOW_VERSION,
 773        .maxattr = OVS_FLOW_ATTR_MAX,
 774        .netnsok = true
 775};
 776
 777static struct genl_multicast_group ovs_dp_flow_multicast_group = {
 778        .name = OVS_FLOW_MCGROUP
 779};
 780
 781/* Called with genl_lock. */
 782static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
 783                                  struct sk_buff *skb, u32 portid,
 784                                  u32 seq, u32 flags, u8 cmd)
 785{
 786        const int skb_orig_len = skb->len;
 787        const struct sw_flow_actions *sf_acts;
 788        struct ovs_flow_stats stats;
 789        struct ovs_header *ovs_header;
 790        struct nlattr *nla;
 791        unsigned long used;
 792        u8 tcp_flags;
 793        int err;
 794
 795        sf_acts = rcu_dereference_protected(flow->sf_acts,
 796                                            lockdep_genl_is_held());
 797
 798        ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd);
 799        if (!ovs_header)
 800                return -EMSGSIZE;
 801
 802        ovs_header->dp_ifindex = get_dpifindex(dp);
 803
 804        nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
 805        if (!nla)
 806                goto nla_put_failure;
 807        err = ovs_flow_to_nlattrs(&flow->key, skb);
 808        if (err)
 809                goto error;
 810        nla_nest_end(skb, nla);
 811
 812        spin_lock_bh(&flow->lock);
 813        used = flow->used;
 814        stats.n_packets = flow->packet_count;
 815        stats.n_bytes = flow->byte_count;
 816        tcp_flags = flow->tcp_flags;
 817        spin_unlock_bh(&flow->lock);
 818
 819        if (used &&
 820            nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
 821                goto nla_put_failure;
 822
 823        if (stats.n_packets &&
 824            nla_put(skb, OVS_FLOW_ATTR_STATS,
 825                    sizeof(struct ovs_flow_stats), &stats))
 826                goto nla_put_failure;
 827
 828        if (tcp_flags &&
 829            nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags))
 830                goto nla_put_failure;
 831
 832        /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
 833         * this is the first flow to be dumped into 'skb'.  This is unusual for
 834         * Netlink but individual action lists can be longer than
 835         * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
 836         * The userspace caller can always fetch the actions separately if it
 837         * really wants them.  (Most userspace callers in fact don't care.)
 838         *
 839         * This can only fail for dump operations because the skb is always
 840         * properly sized for single flows.
 841         */
 842        err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
 843                      sf_acts->actions);
 844        if (err < 0 && skb_orig_len)
 845                goto error;
 846
 847        return genlmsg_end(skb, ovs_header);
 848
 849nla_put_failure:
 850        err = -EMSGSIZE;
 851error:
 852        genlmsg_cancel(skb, ovs_header);
 853        return err;
 854}
 855
 856static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
 857{
 858        const struct sw_flow_actions *sf_acts;
 859        int len;
 860
 861        sf_acts = rcu_dereference_protected(flow->sf_acts,
 862                                            lockdep_genl_is_held());
 863
 864        /* OVS_FLOW_ATTR_KEY */
 865        len = nla_total_size(FLOW_BUFSIZE);
 866        /* OVS_FLOW_ATTR_ACTIONS */
 867        len += nla_total_size(sf_acts->actions_len);
 868        /* OVS_FLOW_ATTR_STATS */
 869        len += nla_total_size(sizeof(struct ovs_flow_stats));
 870        /* OVS_FLOW_ATTR_TCP_FLAGS */
 871        len += nla_total_size(1);
 872        /* OVS_FLOW_ATTR_USED */
 873        len += nla_total_size(8);
 874
 875        len += NLMSG_ALIGN(sizeof(struct ovs_header));
 876
 877        return genlmsg_new(len, GFP_KERNEL);
 878}
 879
 880static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
 881                                               struct datapath *dp,
 882                                               u32 portid, u32 seq, u8 cmd)
 883{
 884        struct sk_buff *skb;
 885        int retval;
 886
 887        skb = ovs_flow_cmd_alloc_info(flow);
 888        if (!skb)
 889                return ERR_PTR(-ENOMEM);
 890
 891        retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd);
 892        BUG_ON(retval < 0);
 893        return skb;
 894}
 895
 896static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 897{
 898        struct nlattr **a = info->attrs;
 899        struct ovs_header *ovs_header = info->userhdr;
 900        struct sw_flow_key key;
 901        struct sw_flow *flow;
 902        struct sk_buff *reply;
 903        struct datapath *dp;
 904        struct flow_table *table;
 905        int error;
 906        int key_len;
 907
 908        /* Extract key. */
 909        error = -EINVAL;
 910        if (!a[OVS_FLOW_ATTR_KEY])
 911                goto error;
 912        error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
 913        if (error)
 914                goto error;
 915
 916        /* Validate actions. */
 917        if (a[OVS_FLOW_ATTR_ACTIONS]) {
 918                error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0);
 919                if (error)
 920                        goto error;
 921        } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
 922                error = -EINVAL;
 923                goto error;
 924        }
 925
 926        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
 927        error = -ENODEV;
 928        if (!dp)
 929                goto error;
 930
 931        table = genl_dereference(dp->table);
 932        flow = ovs_flow_tbl_lookup(table, &key, key_len);
 933        if (!flow) {
 934                struct sw_flow_actions *acts;
 935
 936                /* Bail out if we're not allowed to create a new flow. */
 937                error = -ENOENT;
 938                if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
 939                        goto error;
 940
 941                /* Expand table, if necessary, to make room. */
 942                if (ovs_flow_tbl_need_to_expand(table)) {
 943                        struct flow_table *new_table;
 944
 945                        new_table = ovs_flow_tbl_expand(table);
 946                        if (!IS_ERR(new_table)) {
 947                                rcu_assign_pointer(dp->table, new_table);
 948                                ovs_flow_tbl_deferred_destroy(table);
 949                                table = genl_dereference(dp->table);
 950                        }
 951                }
 952
 953                /* Allocate flow. */
 954                flow = ovs_flow_alloc();
 955                if (IS_ERR(flow)) {
 956                        error = PTR_ERR(flow);
 957                        goto error;
 958                }
 959                flow->key = key;
 960                clear_stats(flow);
 961
 962                /* Obtain actions. */
 963                acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
 964                error = PTR_ERR(acts);
 965                if (IS_ERR(acts))
 966                        goto error_free_flow;
 967                rcu_assign_pointer(flow->sf_acts, acts);
 968
 969                /* Put flow in bucket. */
 970                flow->hash = ovs_flow_hash(&key, key_len);
 971                ovs_flow_tbl_insert(table, flow);
 972
 973                reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
 974                                                info->snd_seq,
 975                                                OVS_FLOW_CMD_NEW);
 976        } else {
 977                /* We found a matching flow. */
 978                struct sw_flow_actions *old_acts;
 979                struct nlattr *acts_attrs;
 980
 981                /* Bail out if we're not allowed to modify an existing flow.
 982                 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
 983                 * because Generic Netlink treats the latter as a dump
 984                 * request.  We also accept NLM_F_EXCL in case that bug ever
 985                 * gets fixed.
 986                 */
 987                error = -EEXIST;
 988                if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
 989                    info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
 990                        goto error;
 991
 992                /* Update actions. */
 993                old_acts = rcu_dereference_protected(flow->sf_acts,
 994                                                     lockdep_genl_is_held());
 995                acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
 996                if (acts_attrs &&
 997                   (old_acts->actions_len != nla_len(acts_attrs) ||
 998                   memcmp(old_acts->actions, nla_data(acts_attrs),
 999                          old_acts->actions_len))) {
1000                        struct sw_flow_actions *new_acts;
1001
1002                        new_acts = ovs_flow_actions_alloc(acts_attrs);
1003                        error = PTR_ERR(new_acts);
1004                        if (IS_ERR(new_acts))
1005                                goto error;
1006
1007                        rcu_assign_pointer(flow->sf_acts, new_acts);
1008                        ovs_flow_deferred_free_acts(old_acts);
1009                }
1010
1011                reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1012                                               info->snd_seq, OVS_FLOW_CMD_NEW);
1013
1014                /* Clear stats. */
1015                if (a[OVS_FLOW_ATTR_CLEAR]) {
1016                        spin_lock_bh(&flow->lock);
1017                        clear_stats(flow);
1018                        spin_unlock_bh(&flow->lock);
1019                }
1020        }
1021
1022        if (!IS_ERR(reply))
1023                genl_notify(reply, genl_info_net(info), info->snd_portid,
1024                           ovs_dp_flow_multicast_group.id, info->nlhdr,
1025                           GFP_KERNEL);
1026        else
1027                netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1028                                ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
1029        return 0;
1030
1031error_free_flow:
1032        ovs_flow_free(flow);
1033error:
1034        return error;
1035}
1036
1037static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1038{
1039        struct nlattr **a = info->attrs;
1040        struct ovs_header *ovs_header = info->userhdr;
1041        struct sw_flow_key key;
1042        struct sk_buff *reply;
1043        struct sw_flow *flow;
1044        struct datapath *dp;
1045        struct flow_table *table;
1046        int err;
1047        int key_len;
1048
1049        if (!a[OVS_FLOW_ATTR_KEY])
1050                return -EINVAL;
1051        err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1052        if (err)
1053                return err;
1054
1055        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1056        if (!dp)
1057                return -ENODEV;
1058
1059        table = genl_dereference(dp->table);
1060        flow = ovs_flow_tbl_lookup(table, &key, key_len);
1061        if (!flow)
1062                return -ENOENT;
1063
1064        reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid,
1065                                        info->snd_seq, OVS_FLOW_CMD_NEW);
1066        if (IS_ERR(reply))
1067                return PTR_ERR(reply);
1068
1069        return genlmsg_reply(reply, info);
1070}
1071
1072static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1073{
1074        struct nlattr **a = info->attrs;
1075        struct ovs_header *ovs_header = info->userhdr;
1076        struct sw_flow_key key;
1077        struct sk_buff *reply;
1078        struct sw_flow *flow;
1079        struct datapath *dp;
1080        struct flow_table *table;
1081        int err;
1082        int key_len;
1083
1084        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1085        if (!dp)
1086                return -ENODEV;
1087
1088        if (!a[OVS_FLOW_ATTR_KEY])
1089                return flush_flows(dp);
1090
1091        err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1092        if (err)
1093                return err;
1094
1095        table = genl_dereference(dp->table);
1096        flow = ovs_flow_tbl_lookup(table, &key, key_len);
1097        if (!flow)
1098                return -ENOENT;
1099
1100        reply = ovs_flow_cmd_alloc_info(flow);
1101        if (!reply)
1102                return -ENOMEM;
1103
1104        ovs_flow_tbl_remove(table, flow);
1105
1106        err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid,
1107                                     info->snd_seq, 0, OVS_FLOW_CMD_DEL);
1108        BUG_ON(err < 0);
1109
1110        ovs_flow_deferred_free(flow);
1111
1112        genl_notify(reply, genl_info_net(info), info->snd_portid,
1113                    ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1114        return 0;
1115}
1116
1117static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1118{
1119        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1120        struct datapath *dp;
1121        struct flow_table *table;
1122
1123        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1124        if (!dp)
1125                return -ENODEV;
1126
1127        table = genl_dereference(dp->table);
1128
1129        for (;;) {
1130                struct sw_flow *flow;
1131                u32 bucket, obj;
1132
1133                bucket = cb->args[0];
1134                obj = cb->args[1];
1135                flow = ovs_flow_tbl_next(table, &bucket, &obj);
1136                if (!flow)
1137                        break;
1138
1139                if (ovs_flow_cmd_fill_info(flow, dp, skb,
1140                                           NETLINK_CB(cb->skb).portid,
1141                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
1142                                           OVS_FLOW_CMD_NEW) < 0)
1143                        break;
1144
1145                cb->args[0] = bucket;
1146                cb->args[1] = obj;
1147        }
1148        return skb->len;
1149}
1150
1151static struct genl_ops dp_flow_genl_ops[] = {
1152        { .cmd = OVS_FLOW_CMD_NEW,
1153          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1154          .policy = flow_policy,
1155          .doit = ovs_flow_cmd_new_or_set
1156        },
1157        { .cmd = OVS_FLOW_CMD_DEL,
1158          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1159          .policy = flow_policy,
1160          .doit = ovs_flow_cmd_del
1161        },
1162        { .cmd = OVS_FLOW_CMD_GET,
1163          .flags = 0,               /* OK for unprivileged users. */
1164          .policy = flow_policy,
1165          .doit = ovs_flow_cmd_get,
1166          .dumpit = ovs_flow_cmd_dump
1167        },
1168        { .cmd = OVS_FLOW_CMD_SET,
1169          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1170          .policy = flow_policy,
1171          .doit = ovs_flow_cmd_new_or_set,
1172        },
1173};
1174
1175static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1176        [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1177        [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1178};
1179
1180static struct genl_family dp_datapath_genl_family = {
1181        .id = GENL_ID_GENERATE,
1182        .hdrsize = sizeof(struct ovs_header),
1183        .name = OVS_DATAPATH_FAMILY,
1184        .version = OVS_DATAPATH_VERSION,
1185        .maxattr = OVS_DP_ATTR_MAX,
1186        .netnsok = true
1187};
1188
1189static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
1190        .name = OVS_DATAPATH_MCGROUP
1191};
1192
1193static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1194                                u32 portid, u32 seq, u32 flags, u8 cmd)
1195{
1196        struct ovs_header *ovs_header;
1197        struct ovs_dp_stats dp_stats;
1198        int err;
1199
1200        ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1201                                   flags, cmd);
1202        if (!ovs_header)
1203                goto error;
1204
1205        ovs_header->dp_ifindex = get_dpifindex(dp);
1206
1207        rcu_read_lock();
1208        err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1209        rcu_read_unlock();
1210        if (err)
1211                goto nla_put_failure;
1212
1213        get_dp_stats(dp, &dp_stats);
1214        if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats))
1215                goto nla_put_failure;
1216
1217        return genlmsg_end(skb, ovs_header);
1218
1219nla_put_failure:
1220        genlmsg_cancel(skb, ovs_header);
1221error:
1222        return -EMSGSIZE;
1223}
1224
1225static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid,
1226                                             u32 seq, u8 cmd)
1227{
1228        struct sk_buff *skb;
1229        int retval;
1230
1231        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1232        if (!skb)
1233                return ERR_PTR(-ENOMEM);
1234
1235        retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd);
1236        if (retval < 0) {
1237                kfree_skb(skb);
1238                return ERR_PTR(retval);
1239        }
1240        return skb;
1241}
1242
1243/* Called with genl_mutex and optionally with RTNL lock also. */
1244static struct datapath *lookup_datapath(struct net *net,
1245                                        struct ovs_header *ovs_header,
1246                                        struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1247{
1248        struct datapath *dp;
1249
1250        if (!a[OVS_DP_ATTR_NAME])
1251                dp = get_dp(net, ovs_header->dp_ifindex);
1252        else {
1253                struct vport *vport;
1254
1255                rcu_read_lock();
1256                vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1257                dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1258                rcu_read_unlock();
1259        }
1260        return dp ? dp : ERR_PTR(-ENODEV);
1261}
1262
1263static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1264{
1265        struct nlattr **a = info->attrs;
1266        struct vport_parms parms;
1267        struct sk_buff *reply;
1268        struct datapath *dp;
1269        struct vport *vport;
1270        struct ovs_net *ovs_net;
1271        int err, i;
1272
1273        err = -EINVAL;
1274        if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1275                goto err;
1276
1277        rtnl_lock();
1278
1279        err = -ENOMEM;
1280        dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1281        if (dp == NULL)
1282                goto err_unlock_rtnl;
1283
1284        ovs_dp_set_net(dp, hold_net(sock_net(skb->sk)));
1285
1286        /* Allocate table. */
1287        err = -ENOMEM;
1288        rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
1289        if (!dp->table)
1290                goto err_free_dp;
1291
1292        dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1293        if (!dp->stats_percpu) {
1294                err = -ENOMEM;
1295                goto err_destroy_table;
1296        }
1297
1298        dp->ports = kmalloc(DP_VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
1299                        GFP_KERNEL);
1300        if (!dp->ports) {
1301                err = -ENOMEM;
1302                goto err_destroy_percpu;
1303        }
1304
1305        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1306                INIT_HLIST_HEAD(&dp->ports[i]);
1307
1308        /* Set up our datapath device. */
1309        parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1310        parms.type = OVS_VPORT_TYPE_INTERNAL;
1311        parms.options = NULL;
1312        parms.dp = dp;
1313        parms.port_no = OVSP_LOCAL;
1314        parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
1315
1316        vport = new_vport(&parms);
1317        if (IS_ERR(vport)) {
1318                err = PTR_ERR(vport);
1319                if (err == -EBUSY)
1320                        err = -EEXIST;
1321
1322                goto err_destroy_ports_array;
1323        }
1324
1325        reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1326                                      info->snd_seq, OVS_DP_CMD_NEW);
1327        err = PTR_ERR(reply);
1328        if (IS_ERR(reply))
1329                goto err_destroy_local_port;
1330
1331        ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1332        list_add_tail(&dp->list_node, &ovs_net->dps);
1333        rtnl_unlock();
1334
1335        genl_notify(reply, genl_info_net(info), info->snd_portid,
1336                    ovs_dp_datapath_multicast_group.id, info->nlhdr,
1337                    GFP_KERNEL);
1338        return 0;
1339
1340err_destroy_local_port:
1341        ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1342err_destroy_ports_array:
1343        kfree(dp->ports);
1344err_destroy_percpu:
1345        free_percpu(dp->stats_percpu);
1346err_destroy_table:
1347        ovs_flow_tbl_destroy(genl_dereference(dp->table));
1348err_free_dp:
1349        release_net(ovs_dp_get_net(dp));
1350        kfree(dp);
1351err_unlock_rtnl:
1352        rtnl_unlock();
1353err:
1354        return err;
1355}
1356
1357/* Called with genl_mutex. */
1358static void __dp_destroy(struct datapath *dp)
1359{
1360        int i;
1361
1362        rtnl_lock();
1363
1364        for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1365                struct vport *vport;
1366                struct hlist_node *node, *n;
1367
1368                hlist_for_each_entry_safe(vport, node, n, &dp->ports[i], dp_hash_node)
1369                        if (vport->port_no != OVSP_LOCAL)
1370                                ovs_dp_detach_port(vport);
1371        }
1372
1373        list_del(&dp->list_node);
1374        ovs_dp_detach_port(ovs_vport_rtnl(dp, OVSP_LOCAL));
1375
1376        /* rtnl_unlock() will wait until all the references to devices that
1377         * are pending unregistration have been dropped.  We do it here to
1378         * ensure that any internal devices (which contain DP pointers) are
1379         * fully destroyed before freeing the datapath.
1380         */
1381        rtnl_unlock();
1382
1383        call_rcu(&dp->rcu, destroy_dp_rcu);
1384}
1385
1386static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1387{
1388        struct sk_buff *reply;
1389        struct datapath *dp;
1390        int err;
1391
1392        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1393        err = PTR_ERR(dp);
1394        if (IS_ERR(dp))
1395                return err;
1396
1397        reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1398                                      info->snd_seq, OVS_DP_CMD_DEL);
1399        err = PTR_ERR(reply);
1400        if (IS_ERR(reply))
1401                return err;
1402
1403        __dp_destroy(dp);
1404
1405        genl_notify(reply, genl_info_net(info), info->snd_portid,
1406                    ovs_dp_datapath_multicast_group.id, info->nlhdr,
1407                    GFP_KERNEL);
1408
1409        return 0;
1410}
1411
1412static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1413{
1414        struct sk_buff *reply;
1415        struct datapath *dp;
1416        int err;
1417
1418        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1419        if (IS_ERR(dp))
1420                return PTR_ERR(dp);
1421
1422        reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1423                                      info->snd_seq, OVS_DP_CMD_NEW);
1424        if (IS_ERR(reply)) {
1425                err = PTR_ERR(reply);
1426                netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1427                                ovs_dp_datapath_multicast_group.id, err);
1428                return 0;
1429        }
1430
1431        genl_notify(reply, genl_info_net(info), info->snd_portid,
1432                    ovs_dp_datapath_multicast_group.id, info->nlhdr,
1433                    GFP_KERNEL);
1434
1435        return 0;
1436}
1437
1438static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1439{
1440        struct sk_buff *reply;
1441        struct datapath *dp;
1442
1443        dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1444        if (IS_ERR(dp))
1445                return PTR_ERR(dp);
1446
1447        reply = ovs_dp_cmd_build_info(dp, info->snd_portid,
1448                                      info->snd_seq, OVS_DP_CMD_NEW);
1449        if (IS_ERR(reply))
1450                return PTR_ERR(reply);
1451
1452        return genlmsg_reply(reply, info);
1453}
1454
1455static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1456{
1457        struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1458        struct datapath *dp;
1459        int skip = cb->args[0];
1460        int i = 0;
1461
1462        list_for_each_entry(dp, &ovs_net->dps, list_node) {
1463                if (i >= skip &&
1464                    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1465                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
1466                                         OVS_DP_CMD_NEW) < 0)
1467                        break;
1468                i++;
1469        }
1470
1471        cb->args[0] = i;
1472
1473        return skb->len;
1474}
1475
1476static struct genl_ops dp_datapath_genl_ops[] = {
1477        { .cmd = OVS_DP_CMD_NEW,
1478          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1479          .policy = datapath_policy,
1480          .doit = ovs_dp_cmd_new
1481        },
1482        { .cmd = OVS_DP_CMD_DEL,
1483          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1484          .policy = datapath_policy,
1485          .doit = ovs_dp_cmd_del
1486        },
1487        { .cmd = OVS_DP_CMD_GET,
1488          .flags = 0,               /* OK for unprivileged users. */
1489          .policy = datapath_policy,
1490          .doit = ovs_dp_cmd_get,
1491          .dumpit = ovs_dp_cmd_dump
1492        },
1493        { .cmd = OVS_DP_CMD_SET,
1494          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1495          .policy = datapath_policy,
1496          .doit = ovs_dp_cmd_set,
1497        },
1498};
1499
1500static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
1501        [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1502        [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
1503        [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1504        [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1505        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1506        [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1507};
1508
1509static struct genl_family dp_vport_genl_family = {
1510        .id = GENL_ID_GENERATE,
1511        .hdrsize = sizeof(struct ovs_header),
1512        .name = OVS_VPORT_FAMILY,
1513        .version = OVS_VPORT_VERSION,
1514        .maxattr = OVS_VPORT_ATTR_MAX,
1515        .netnsok = true
1516};
1517
1518struct genl_multicast_group ovs_dp_vport_multicast_group = {
1519        .name = OVS_VPORT_MCGROUP
1520};
1521
1522/* Called with RTNL lock or RCU read lock. */
1523static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1524                                   u32 portid, u32 seq, u32 flags, u8 cmd)
1525{
1526        struct ovs_header *ovs_header;
1527        struct ovs_vport_stats vport_stats;
1528        int err;
1529
1530        ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
1531                                 flags, cmd);
1532        if (!ovs_header)
1533                return -EMSGSIZE;
1534
1535        ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1536
1537        if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1538            nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1539            nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
1540            nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid))
1541                goto nla_put_failure;
1542
1543        ovs_vport_get_stats(vport, &vport_stats);
1544        if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1545                    &vport_stats))
1546                goto nla_put_failure;
1547
1548        err = ovs_vport_get_options(vport, skb);
1549        if (err == -EMSGSIZE)
1550                goto error;
1551
1552        return genlmsg_end(skb, ovs_header);
1553
1554nla_put_failure:
1555        err = -EMSGSIZE;
1556error:
1557        genlmsg_cancel(skb, ovs_header);
1558        return err;
1559}
1560
1561/* Called with RTNL lock or RCU read lock. */
1562struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid,
1563                                         u32 seq, u8 cmd)
1564{
1565        struct sk_buff *skb;
1566        int retval;
1567
1568        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1569        if (!skb)
1570                return ERR_PTR(-ENOMEM);
1571
1572        retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd);
1573        if (retval < 0) {
1574                kfree_skb(skb);
1575                return ERR_PTR(retval);
1576        }
1577        return skb;
1578}
1579
1580/* Called with RTNL lock or RCU read lock. */
1581static struct vport *lookup_vport(struct net *net,
1582                                  struct ovs_header *ovs_header,
1583                                  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1584{
1585        struct datapath *dp;
1586        struct vport *vport;
1587
1588        if (a[OVS_VPORT_ATTR_NAME]) {
1589                vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
1590                if (!vport)
1591                        return ERR_PTR(-ENODEV);
1592                if (ovs_header->dp_ifindex &&
1593                    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1594                        return ERR_PTR(-ENODEV);
1595                return vport;
1596        } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1597                u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1598
1599                if (port_no >= DP_MAX_PORTS)
1600                        return ERR_PTR(-EFBIG);
1601
1602                dp = get_dp(net, ovs_header->dp_ifindex);
1603                if (!dp)
1604                        return ERR_PTR(-ENODEV);
1605
1606                vport = ovs_vport_rtnl_rcu(dp, port_no);
1607                if (!vport)
1608                        return ERR_PTR(-ENOENT);
1609                return vport;
1610        } else
1611                return ERR_PTR(-EINVAL);
1612}
1613
1614static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1615{
1616        struct nlattr **a = info->attrs;
1617        struct ovs_header *ovs_header = info->userhdr;
1618        struct vport_parms parms;
1619        struct sk_buff *reply;
1620        struct vport *vport;
1621        struct datapath *dp;
1622        u32 port_no;
1623        int err;
1624
1625        err = -EINVAL;
1626        if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1627            !a[OVS_VPORT_ATTR_UPCALL_PID])
1628                goto exit;
1629
1630        rtnl_lock();
1631        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1632        err = -ENODEV;
1633        if (!dp)
1634                goto exit_unlock;
1635
1636        if (a[OVS_VPORT_ATTR_PORT_NO]) {
1637                port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1638
1639                err = -EFBIG;
1640                if (port_no >= DP_MAX_PORTS)
1641                        goto exit_unlock;
1642
1643                vport = ovs_vport_rtnl_rcu(dp, port_no);
1644                err = -EBUSY;
1645                if (vport)
1646                        goto exit_unlock;
1647        } else {
1648                for (port_no = 1; ; port_no++) {
1649                        if (port_no >= DP_MAX_PORTS) {
1650                                err = -EFBIG;
1651                                goto exit_unlock;
1652                        }
1653                        vport = ovs_vport_rtnl(dp, port_no);
1654                        if (!vport)
1655                                break;
1656                }
1657        }
1658
1659        parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1660        parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1661        parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1662        parms.dp = dp;
1663        parms.port_no = port_no;
1664        parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1665
1666        vport = new_vport(&parms);
1667        err = PTR_ERR(vport);
1668        if (IS_ERR(vport))
1669                goto exit_unlock;
1670
1671        reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1672                                         OVS_VPORT_CMD_NEW);
1673        if (IS_ERR(reply)) {
1674                err = PTR_ERR(reply);
1675                ovs_dp_detach_port(vport);
1676                goto exit_unlock;
1677        }
1678        genl_notify(reply, genl_info_net(info), info->snd_portid,
1679                    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1680
1681exit_unlock:
1682        rtnl_unlock();
1683exit:
1684        return err;
1685}
1686
1687static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1688{
1689        struct nlattr **a = info->attrs;
1690        struct sk_buff *reply;
1691        struct vport *vport;
1692        int err;
1693
1694        rtnl_lock();
1695        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1696        err = PTR_ERR(vport);
1697        if (IS_ERR(vport))
1698                goto exit_unlock;
1699
1700        err = 0;
1701        if (a[OVS_VPORT_ATTR_TYPE] &&
1702            nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type)
1703                err = -EINVAL;
1704
1705        if (!err && a[OVS_VPORT_ATTR_OPTIONS])
1706                err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1707        if (err)
1708                goto exit_unlock;
1709        if (a[OVS_VPORT_ATTR_UPCALL_PID])
1710                vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1711
1712        reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1713                                         OVS_VPORT_CMD_NEW);
1714        if (IS_ERR(reply)) {
1715                netlink_set_err(sock_net(skb->sk)->genl_sock, 0,
1716                                ovs_dp_vport_multicast_group.id, PTR_ERR(reply));
1717                goto exit_unlock;
1718        }
1719
1720        genl_notify(reply, genl_info_net(info), info->snd_portid,
1721                    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1722
1723exit_unlock:
1724        rtnl_unlock();
1725        return err;
1726}
1727
1728static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1729{
1730        struct nlattr **a = info->attrs;
1731        struct sk_buff *reply;
1732        struct vport *vport;
1733        int err;
1734
1735        rtnl_lock();
1736        vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
1737        err = PTR_ERR(vport);
1738        if (IS_ERR(vport))
1739                goto exit_unlock;
1740
1741        if (vport->port_no == OVSP_LOCAL) {
1742                err = -EINVAL;
1743                goto exit_unlock;
1744        }
1745
1746        reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1747                                         OVS_VPORT_CMD_DEL);
1748        err = PTR_ERR(reply);
1749        if (IS_ERR(reply))
1750                goto exit_unlock;
1751
1752        ovs_dp_detach_port(vport);
1753
1754        genl_notify(reply, genl_info_net(info), info->snd_portid,
1755                    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1756
1757exit_unlock:
1758        rtnl_unlock();
1759        return err;
1760}
1761
1762static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1763{
1764        struct nlattr **a = info->attrs;
1765        struct ovs_header *ovs_header = info->userhdr;
1766        struct sk_buff *reply;
1767        struct vport *vport;
1768        int err;
1769
1770        rcu_read_lock();
1771        vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
1772        err = PTR_ERR(vport);
1773        if (IS_ERR(vport))
1774                goto exit_unlock;
1775
1776        reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq,
1777                                         OVS_VPORT_CMD_NEW);
1778        err = PTR_ERR(reply);
1779        if (IS_ERR(reply))
1780                goto exit_unlock;
1781
1782        rcu_read_unlock();
1783
1784        return genlmsg_reply(reply, info);
1785
1786exit_unlock:
1787        rcu_read_unlock();
1788        return err;
1789}
1790
1791static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1792{
1793        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1794        struct datapath *dp;
1795        int bucket = cb->args[0], skip = cb->args[1];
1796        int i, j = 0;
1797
1798        dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1799        if (!dp)
1800                return -ENODEV;
1801
1802        rcu_read_lock();
1803        for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
1804                struct vport *vport;
1805                struct hlist_node *n;
1806
1807                j = 0;
1808                hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) {
1809                        if (j >= skip &&
1810                            ovs_vport_cmd_fill_info(vport, skb,
1811                                                    NETLINK_CB(cb->skb).portid,
1812                                                    cb->nlh->nlmsg_seq,
1813                                                    NLM_F_MULTI,
1814                                                    OVS_VPORT_CMD_NEW) < 0)
1815                                goto out;
1816
1817                        j++;
1818                }
1819                skip = 0;
1820        }
1821out:
1822        rcu_read_unlock();
1823
1824        cb->args[0] = i;
1825        cb->args[1] = j;
1826
1827        return skb->len;
1828}
1829
1830static struct genl_ops dp_vport_genl_ops[] = {
1831        { .cmd = OVS_VPORT_CMD_NEW,
1832          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1833          .policy = vport_policy,
1834          .doit = ovs_vport_cmd_new
1835        },
1836        { .cmd = OVS_VPORT_CMD_DEL,
1837          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1838          .policy = vport_policy,
1839          .doit = ovs_vport_cmd_del
1840        },
1841        { .cmd = OVS_VPORT_CMD_GET,
1842          .flags = 0,               /* OK for unprivileged users. */
1843          .policy = vport_policy,
1844          .doit = ovs_vport_cmd_get,
1845          .dumpit = ovs_vport_cmd_dump
1846        },
1847        { .cmd = OVS_VPORT_CMD_SET,
1848          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1849          .policy = vport_policy,
1850          .doit = ovs_vport_cmd_set,
1851        },
1852};
1853
1854struct genl_family_and_ops {
1855        struct genl_family *family;
1856        struct genl_ops *ops;
1857        int n_ops;
1858        struct genl_multicast_group *group;
1859};
1860
1861static const struct genl_family_and_ops dp_genl_families[] = {
1862        { &dp_datapath_genl_family,
1863          dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1864          &ovs_dp_datapath_multicast_group },
1865        { &dp_vport_genl_family,
1866          dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
1867          &ovs_dp_vport_multicast_group },
1868        { &dp_flow_genl_family,
1869          dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
1870          &ovs_dp_flow_multicast_group },
1871        { &dp_packet_genl_family,
1872          dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1873          NULL },
1874};
1875
1876static void dp_unregister_genl(int n_families)
1877{
1878        int i;
1879
1880        for (i = 0; i < n_families; i++)
1881                genl_unregister_family(dp_genl_families[i].family);
1882}
1883
1884static int dp_register_genl(void)
1885{
1886        int n_registered;
1887        int err;
1888        int i;
1889
1890        n_registered = 0;
1891        for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
1892                const struct genl_family_and_ops *f = &dp_genl_families[i];
1893
1894                err = genl_register_family_with_ops(f->family, f->ops,
1895                                                    f->n_ops);
1896                if (err)
1897                        goto error;
1898                n_registered++;
1899
1900                if (f->group) {
1901                        err = genl_register_mc_group(f->family, f->group);
1902                        if (err)
1903                                goto error;
1904                }
1905        }
1906
1907        return 0;
1908
1909error:
1910        dp_unregister_genl(n_registered);
1911        return err;
1912}
1913
1914static void rehash_flow_table(struct work_struct *work)
1915{
1916        struct datapath *dp;
1917        struct net *net;
1918
1919        genl_lock();
1920        rtnl_lock();
1921        for_each_net(net) {
1922                struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1923
1924                list_for_each_entry(dp, &ovs_net->dps, list_node) {
1925                        struct flow_table *old_table = genl_dereference(dp->table);
1926                        struct flow_table *new_table;
1927
1928                        new_table = ovs_flow_tbl_rehash(old_table);
1929                        if (!IS_ERR(new_table)) {
1930                                rcu_assign_pointer(dp->table, new_table);
1931                                ovs_flow_tbl_deferred_destroy(old_table);
1932                        }
1933                }
1934        }
1935        rtnl_unlock();
1936        genl_unlock();
1937
1938        schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
1939}
1940
1941static int __net_init ovs_init_net(struct net *net)
1942{
1943        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1944
1945        INIT_LIST_HEAD(&ovs_net->dps);
1946        return 0;
1947}
1948
1949static void __net_exit ovs_exit_net(struct net *net)
1950{
1951        struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
1952        struct datapath *dp, *dp_next;
1953
1954        genl_lock();
1955        list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
1956                __dp_destroy(dp);
1957        genl_unlock();
1958}
1959
1960static struct pernet_operations ovs_net_ops = {
1961        .init = ovs_init_net,
1962        .exit = ovs_exit_net,
1963        .id   = &ovs_net_id,
1964        .size = sizeof(struct ovs_net),
1965};
1966
1967static int __init dp_init(void)
1968{
1969        struct sk_buff *dummy_skb;
1970        int err;
1971
1972        BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
1973
1974        pr_info("Open vSwitch switching datapath\n");
1975
1976        err = ovs_flow_init();
1977        if (err)
1978                goto error;
1979
1980        err = ovs_vport_init();
1981        if (err)
1982                goto error_flow_exit;
1983
1984        err = register_pernet_device(&ovs_net_ops);
1985        if (err)
1986                goto error_vport_exit;
1987
1988        err = register_netdevice_notifier(&ovs_dp_device_notifier);
1989        if (err)
1990                goto error_netns_exit;
1991
1992        err = dp_register_genl();
1993        if (err < 0)
1994                goto error_unreg_notifier;
1995
1996        schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
1997
1998        return 0;
1999
2000error_unreg_notifier:
2001        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2002error_netns_exit:
2003        unregister_pernet_device(&ovs_net_ops);
2004error_vport_exit:
2005        ovs_vport_exit();
2006error_flow_exit:
2007        ovs_flow_exit();
2008error:
2009        return err;
2010}
2011
2012static void dp_cleanup(void)
2013{
2014        cancel_delayed_work_sync(&rehash_flow_wq);
2015        dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2016        unregister_netdevice_notifier(&ovs_dp_device_notifier);
2017        unregister_pernet_device(&ovs_net_ops);
2018        rcu_barrier();
2019        ovs_vport_exit();
2020        ovs_flow_exit();
2021}
2022
2023module_init(dp_init);
2024module_exit(dp_cleanup);
2025
2026MODULE_DESCRIPTION("Open vSwitch switching datapath");
2027MODULE_LICENSE("GPL");
2028
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.