linux/net/openvswitch/datapath.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2007-2012 Nicira, Inc.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of version 2 of the GNU General Public
   6 * License as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope that it will be useful, but
   9 * WITHOUT ANY WARRANTY; without even the implied warranty of
  10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11 * General Public License for more details.
  12 *
  13 * You should have received a copy of the GNU General Public License
  14 * along with this program; if not, write to the Free Software
  15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  16 * 02110-1301, USA
  17 */
  18
  19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  20
  21#include <linux/init.h>
  22#include <linux/module.h>
  23#include <linux/if_arp.h>
  24#include <linux/if_vlan.h>
  25#include <linux/in.h>
  26#include <linux/ip.h>
  27#include <linux/jhash.h>
  28#include <linux/delay.h>
  29#include <linux/time.h>
  30#include <linux/etherdevice.h>
  31#include <linux/genetlink.h>
  32#include <linux/kernel.h>
  33#include <linux/kthread.h>
  34#include <linux/mutex.h>
  35#include <linux/percpu.h>
  36#include <linux/rcupdate.h>
  37#include <linux/tcp.h>
  38#include <linux/udp.h>
  39#include <linux/ethtool.h>
  40#include <linux/wait.h>
  41#include <asm/div64.h>
  42#include <linux/highmem.h>
  43#include <linux/netfilter_bridge.h>
  44#include <linux/netfilter_ipv4.h>
  45#include <linux/inetdevice.h>
  46#include <linux/list.h>
  47#include <linux/openvswitch.h>
  48#include <linux/rculist.h>
  49#include <linux/dmi.h>
  50#include <linux/workqueue.h>
  51#include <net/genetlink.h>
  52
  53#include "datapath.h"
  54#include "flow.h"
  55#include "vport-internal_dev.h"
  56
  57/**
  58 * DOC: Locking:
  59 *
  60 * Writes to device state (add/remove datapath, port, set operations on vports,
  61 * etc.) are protected by RTNL.
  62 *
  63 * Writes to other state (flow table modifications, set miscellaneous datapath
  64 * parameters, etc.) are protected by genl_mutex.  The RTNL lock nests inside
  65 * genl_mutex.
  66 *
  67 * Reads are protected by RCU.
  68 *
  69 * There are a few special cases (mostly stats) that have their own
  70 * synchronization but they nest under all of above and don't interact with
  71 * each other.
  72 */
  73
  74/* Global list of datapaths to enable dumping them all out.
  75 * Protected by genl_mutex.
  76 */
  77static LIST_HEAD(dps);
  78
  79#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
  80static void rehash_flow_table(struct work_struct *work);
  81static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
  82
  83static struct vport *new_vport(const struct vport_parms *);
  84static int queue_gso_packets(int dp_ifindex, struct sk_buff *,
  85                             const struct dp_upcall_info *);
  86static int queue_userspace_packet(int dp_ifindex, struct sk_buff *,
  87                                  const struct dp_upcall_info *);
  88
  89/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
  90static struct datapath *get_dp(int dp_ifindex)
  91{
  92        struct datapath *dp = NULL;
  93        struct net_device *dev;
  94
  95        rcu_read_lock();
  96        dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
  97        if (dev) {
  98                struct vport *vport = ovs_internal_dev_get_vport(dev);
  99                if (vport)
 100                        dp = vport->dp;
 101        }
 102        rcu_read_unlock();
 103
 104        return dp;
 105}
 106
 107/* Must be called with rcu_read_lock or RTNL lock. */
 108const char *ovs_dp_name(const struct datapath *dp)
 109{
 110        struct vport *vport = rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]);
 111        return vport->ops->get_name(vport);
 112}
 113
 114static int get_dpifindex(struct datapath *dp)
 115{
 116        struct vport *local;
 117        int ifindex;
 118
 119        rcu_read_lock();
 120
 121        local = rcu_dereference(dp->ports[OVSP_LOCAL]);
 122        if (local)
 123                ifindex = local->ops->get_ifindex(local);
 124        else
 125                ifindex = 0;
 126
 127        rcu_read_unlock();
 128
 129        return ifindex;
 130}
 131
 132static void destroy_dp_rcu(struct rcu_head *rcu)
 133{
 134        struct datapath *dp = container_of(rcu, struct datapath, rcu);
 135
 136        ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
 137        free_percpu(dp->stats_percpu);
 138        kfree(dp);
 139}
 140
 141/* Called with RTNL lock and genl_lock. */
 142static struct vport *new_vport(const struct vport_parms *parms)
 143{
 144        struct vport *vport;
 145
 146        vport = ovs_vport_add(parms);
 147        if (!IS_ERR(vport)) {
 148                struct datapath *dp = parms->dp;
 149
 150                rcu_assign_pointer(dp->ports[parms->port_no], vport);
 151                list_add(&vport->node, &dp->port_list);
 152        }
 153
 154        return vport;
 155}
 156
 157/* Called with RTNL lock. */
 158void ovs_dp_detach_port(struct vport *p)
 159{
 160        ASSERT_RTNL();
 161
 162        /* First drop references to device. */
 163        list_del(&p->node);
 164        rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
 165
 166        /* Then destroy it. */
 167        ovs_vport_del(p);
 168}
 169
 170/* Must be called with rcu_read_lock. */
 171void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
 172{
 173        struct datapath *dp = p->dp;
 174        struct sw_flow *flow;
 175        struct dp_stats_percpu *stats;
 176        struct sw_flow_key key;
 177        u64 *stats_counter;
 178        int error;
 179        int key_len;
 180
 181        stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
 182
 183        /* Extract flow from 'skb' into 'key'. */
 184        error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
 185        if (unlikely(error)) {
 186                kfree_skb(skb);
 187                return;
 188        }
 189
 190        /* Look up flow. */
 191        flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
 192        if (unlikely(!flow)) {
 193                struct dp_upcall_info upcall;
 194
 195                upcall.cmd = OVS_PACKET_CMD_MISS;
 196                upcall.key = &key;
 197                upcall.userdata = NULL;
 198                upcall.pid = p->upcall_pid;
 199                ovs_dp_upcall(dp, skb, &upcall);
 200                consume_skb(skb);
 201                stats_counter = &stats->n_missed;
 202                goto out;
 203        }
 204
 205        OVS_CB(skb)->flow = flow;
 206
 207        stats_counter = &stats->n_hit;
 208        ovs_flow_used(OVS_CB(skb)->flow, skb);
 209        ovs_execute_actions(dp, skb);
 210
 211out:
 212        /* Update datapath statistics. */
 213        u64_stats_update_begin(&stats->sync);
 214        (*stats_counter)++;
 215        u64_stats_update_end(&stats->sync);
 216}
 217
 218static struct genl_family dp_packet_genl_family = {
 219        .id = GENL_ID_GENERATE,
 220        .hdrsize = sizeof(struct ovs_header),
 221        .name = OVS_PACKET_FAMILY,
 222        .version = OVS_PACKET_VERSION,
 223        .maxattr = OVS_PACKET_ATTR_MAX
 224};
 225
 226int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
 227              const struct dp_upcall_info *upcall_info)
 228{
 229        struct dp_stats_percpu *stats;
 230        int dp_ifindex;
 231        int err;
 232
 233        if (upcall_info->pid == 0) {
 234                err = -ENOTCONN;
 235                goto err;
 236        }
 237
 238        dp_ifindex = get_dpifindex(dp);
 239        if (!dp_ifindex) {
 240                err = -ENODEV;
 241                goto err;
 242        }
 243
 244        if (!skb_is_gso(skb))
 245                err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
 246        else
 247                err = queue_gso_packets(dp_ifindex, skb, upcall_info);
 248        if (err)
 249                goto err;
 250
 251        return 0;
 252
 253err:
 254        stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
 255
 256        u64_stats_update_begin(&stats->sync);
 257        stats->n_lost++;
 258        u64_stats_update_end(&stats->sync);
 259
 260        return err;
 261}
 262
 263static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
 264                             const struct dp_upcall_info *upcall_info)
 265{
 266        unsigned short gso_type = skb_shinfo(skb)->gso_type;
 267        struct dp_upcall_info later_info;
 268        struct sw_flow_key later_key;
 269        struct sk_buff *segs, *nskb;
 270        int err;
 271
 272        segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
 273        if (IS_ERR(segs))
 274                return PTR_ERR(segs);
 275
 276        /* Queue all of the segments. */
 277        skb = segs;
 278        do {
 279                err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
 280                if (err)
 281                        break;
 282
 283                if (skb == segs && gso_type & SKB_GSO_UDP) {
 284                        /* The initial flow key extracted by ovs_flow_extract()
 285                         * in this case is for a first fragment, so we need to
 286                         * properly mark later fragments.
 287                         */
 288                        later_key = *upcall_info->key;
 289                        later_key.ip.frag = OVS_FRAG_TYPE_LATER;
 290
 291                        later_info = *upcall_info;
 292                        later_info.key = &later_key;
 293                        upcall_info = &later_info;
 294                }
 295        } while ((skb = skb->next));
 296
 297        /* Free all of the segments. */
 298        skb = segs;
 299        do {
 300                nskb = skb->next;
 301                if (err)
 302                        kfree_skb(skb);
 303                else
 304                        consume_skb(skb);
 305        } while ((skb = nskb));
 306        return err;
 307}
 308
 309static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb,
 310                                  const struct dp_upcall_info *upcall_info)
 311{
 312        struct ovs_header *upcall;
 313        struct sk_buff *nskb = NULL;
 314        struct sk_buff *user_skb; /* to be queued to userspace */
 315        struct nlattr *nla;
 316        unsigned int len;
 317        int err;
 318
 319        if (vlan_tx_tag_present(skb)) {
 320                nskb = skb_clone(skb, GFP_ATOMIC);
 321                if (!nskb)
 322                        return -ENOMEM;
 323
 324                nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb));
 325                if (!nskb)
 326                        return -ENOMEM;
 327
 328                nskb->vlan_tci = 0;
 329                skb = nskb;
 330        }
 331
 332        if (nla_attr_size(skb->len) > USHRT_MAX) {
 333                err = -EFBIG;
 334                goto out;
 335        }
 336
 337        len = sizeof(struct ovs_header);
 338        len += nla_total_size(skb->len);
 339        len += nla_total_size(FLOW_BUFSIZE);
 340        if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
 341                len += nla_total_size(8);
 342
 343        user_skb = genlmsg_new(len, GFP_ATOMIC);
 344        if (!user_skb) {
 345                err = -ENOMEM;
 346                goto out;
 347        }
 348
 349        upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
 350                             0, upcall_info->cmd);
 351        upcall->dp_ifindex = dp_ifindex;
 352
 353        nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
 354        ovs_flow_to_nlattrs(upcall_info->key, user_skb);
 355        nla_nest_end(user_skb, nla);
 356
 357        if (upcall_info->userdata)
 358                nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
 359                            nla_get_u64(upcall_info->userdata));
 360
 361        nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
 362
 363        skb_copy_and_csum_dev(skb, nla_data(nla));
 364
 365        err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid);
 366
 367out:
 368        kfree_skb(nskb);
 369        return err;
 370}
 371
 372/* Called with genl_mutex. */
 373static int flush_flows(int dp_ifindex)
 374{
 375        struct flow_table *old_table;
 376        struct flow_table *new_table;
 377        struct datapath *dp;
 378
 379        dp = get_dp(dp_ifindex);
 380        if (!dp)
 381                return -ENODEV;
 382
 383        old_table = genl_dereference(dp->table);
 384        new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
 385        if (!new_table)
 386                return -ENOMEM;
 387
 388        rcu_assign_pointer(dp->table, new_table);
 389
 390        ovs_flow_tbl_deferred_destroy(old_table);
 391        return 0;
 392}
 393
 394static int validate_actions(const struct nlattr *attr,
 395                                const struct sw_flow_key *key, int depth);
 396
 397static int validate_sample(const struct nlattr *attr,
 398                                const struct sw_flow_key *key, int depth)
 399{
 400        const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
 401        const struct nlattr *probability, *actions;
 402        const struct nlattr *a;
 403        int rem;
 404
 405        memset(attrs, 0, sizeof(attrs));
 406        nla_for_each_nested(a, attr, rem) {
 407                int type = nla_type(a);
 408                if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
 409                        return -EINVAL;
 410                attrs[type] = a;
 411        }
 412        if (rem)
 413                return -EINVAL;
 414
 415        probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
 416        if (!probability || nla_len(probability) != sizeof(u32))
 417                return -EINVAL;
 418
 419        actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
 420        if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
 421                return -EINVAL;
 422        return validate_actions(actions, key, depth + 1);
 423}
 424
 425static int validate_tp_port(const struct sw_flow_key *flow_key)
 426{
 427        if (flow_key->eth.type == htons(ETH_P_IP)) {
 428                if (flow_key->ipv4.tp.src || flow_key->ipv4.tp.dst)
 429                        return 0;
 430        } else if (flow_key->eth.type == htons(ETH_P_IPV6)) {
 431                if (flow_key->ipv6.tp.src || flow_key->ipv6.tp.dst)
 432                        return 0;
 433        }
 434
 435        return -EINVAL;
 436}
 437
 438static int validate_set(const struct nlattr *a,
 439                        const struct sw_flow_key *flow_key)
 440{
 441        const struct nlattr *ovs_key = nla_data(a);
 442        int key_type = nla_type(ovs_key);
 443
 444        /* There can be only one key in a action */
 445        if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
 446                return -EINVAL;
 447
 448        if (key_type > OVS_KEY_ATTR_MAX ||
 449            nla_len(ovs_key) != ovs_key_lens[key_type])
 450                return -EINVAL;
 451
 452        switch (key_type) {
 453        const struct ovs_key_ipv4 *ipv4_key;
 454
 455        case OVS_KEY_ATTR_PRIORITY:
 456        case OVS_KEY_ATTR_ETHERNET:
 457                break;
 458
 459        case OVS_KEY_ATTR_IPV4:
 460                if (flow_key->eth.type != htons(ETH_P_IP))
 461                        return -EINVAL;
 462
 463                if (!flow_key->ip.proto)
 464                        return -EINVAL;
 465
 466                ipv4_key = nla_data(ovs_key);
 467                if (ipv4_key->ipv4_proto != flow_key->ip.proto)
 468                        return -EINVAL;
 469
 470                if (ipv4_key->ipv4_frag != flow_key->ip.frag)
 471                        return -EINVAL;
 472
 473                break;
 474
 475        case OVS_KEY_ATTR_TCP:
 476                if (flow_key->ip.proto != IPPROTO_TCP)
 477                        return -EINVAL;
 478
 479                return validate_tp_port(flow_key);
 480
 481        case OVS_KEY_ATTR_UDP:
 482                if (flow_key->ip.proto != IPPROTO_UDP)
 483                        return -EINVAL;
 484
 485                return validate_tp_port(flow_key);
 486
 487        default:
 488                return -EINVAL;
 489        }
 490
 491        return 0;
 492}
 493
 494static int validate_userspace(const struct nlattr *attr)
 495{
 496        static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] =   {
 497                [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
 498                [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 },
 499        };
 500        struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
 501        int error;
 502
 503        error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
 504                                 attr, userspace_policy);
 505        if (error)
 506                return error;
 507
 508        if (!a[OVS_USERSPACE_ATTR_PID] ||
 509            !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
 510                return -EINVAL;
 511
 512        return 0;
 513}
 514
 515static int validate_actions(const struct nlattr *attr,
 516                                const struct sw_flow_key *key,  int depth)
 517{
 518        const struct nlattr *a;
 519        int rem, err;
 520
 521        if (depth >= SAMPLE_ACTION_DEPTH)
 522                return -EOVERFLOW;
 523
 524        nla_for_each_nested(a, attr, rem) {
 525                /* Expected argument lengths, (u32)-1 for variable length. */
 526                static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
 527                        [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
 528                        [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
 529                        [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
 530                        [OVS_ACTION_ATTR_POP_VLAN] = 0,
 531                        [OVS_ACTION_ATTR_SET] = (u32)-1,
 532                        [OVS_ACTION_ATTR_SAMPLE] = (u32)-1
 533                };
 534                const struct ovs_action_push_vlan *vlan;
 535                int type = nla_type(a);
 536
 537                if (type > OVS_ACTION_ATTR_MAX ||
 538                    (action_lens[type] != nla_len(a) &&
 539                     action_lens[type] != (u32)-1))
 540                        return -EINVAL;
 541
 542                switch (type) {
 543                case OVS_ACTION_ATTR_UNSPEC:
 544                        return -EINVAL;
 545
 546                case OVS_ACTION_ATTR_USERSPACE:
 547                        err = validate_userspace(a);
 548                        if (err)
 549                                return err;
 550                        break;
 551
 552                case OVS_ACTION_ATTR_OUTPUT:
 553                        if (nla_get_u32(a) >= DP_MAX_PORTS)
 554                                return -EINVAL;
 555                        break;
 556
 557
 558                case OVS_ACTION_ATTR_POP_VLAN:
 559                        break;
 560
 561                case OVS_ACTION_ATTR_PUSH_VLAN:
 562                        vlan = nla_data(a);
 563                        if (vlan->vlan_tpid != htons(ETH_P_8021Q))
 564                                return -EINVAL;
 565                        if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
 566                                return -EINVAL;
 567                        break;
 568
 569                case OVS_ACTION_ATTR_SET:
 570                        err = validate_set(a, key);
 571                        if (err)
 572                                return err;
 573                        break;
 574
 575                case OVS_ACTION_ATTR_SAMPLE:
 576                        err = validate_sample(a, key, depth);
 577                        if (err)
 578                                return err;
 579                        break;
 580
 581                default:
 582                        return -EINVAL;
 583                }
 584        }
 585
 586        if (rem > 0)
 587                return -EINVAL;
 588
 589        return 0;
 590}
 591
 592static void clear_stats(struct sw_flow *flow)
 593{
 594        flow->used = 0;
 595        flow->tcp_flags = 0;
 596        flow->packet_count = 0;
 597        flow->byte_count = 0;
 598}
 599
 600static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
 601{
 602        struct ovs_header *ovs_header = info->userhdr;
 603        struct nlattr **a = info->attrs;
 604        struct sw_flow_actions *acts;
 605        struct sk_buff *packet;
 606        struct sw_flow *flow;
 607        struct datapath *dp;
 608        struct ethhdr *eth;
 609        int len;
 610        int err;
 611        int key_len;
 612
 613        err = -EINVAL;
 614        if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
 615            !a[OVS_PACKET_ATTR_ACTIONS] ||
 616            nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
 617                goto err;
 618
 619        len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
 620        packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
 621        err = -ENOMEM;
 622        if (!packet)
 623                goto err;
 624        skb_reserve(packet, NET_IP_ALIGN);
 625
 626        memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
 627
 628        skb_reset_mac_header(packet);
 629        eth = eth_hdr(packet);
 630
 631        /* Normally, setting the skb 'protocol' field would be handled by a
 632         * call to eth_type_trans(), but it assumes there's a sending
 633         * device, which we may not have. */
 634        if (ntohs(eth->h_proto) >= 1536)
 635                packet->protocol = eth->h_proto;
 636        else
 637                packet->protocol = htons(ETH_P_802_2);
 638
 639        /* Build an sw_flow for sending this packet. */
 640        flow = ovs_flow_alloc();
 641        err = PTR_ERR(flow);
 642        if (IS_ERR(flow))
 643                goto err_kfree_skb;
 644
 645        err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
 646        if (err)
 647                goto err_flow_free;
 648
 649        err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority,
 650                                             &flow->key.phy.in_port,
 651                                             a[OVS_PACKET_ATTR_KEY]);
 652        if (err)
 653                goto err_flow_free;
 654
 655        err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0);
 656        if (err)
 657                goto err_flow_free;
 658
 659        flow->hash = ovs_flow_hash(&flow->key, key_len);
 660
 661        acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
 662        err = PTR_ERR(acts);
 663        if (IS_ERR(acts))
 664                goto err_flow_free;
 665        rcu_assign_pointer(flow->sf_acts, acts);
 666
 667        OVS_CB(packet)->flow = flow;
 668        packet->priority = flow->key.phy.priority;
 669
 670        rcu_read_lock();
 671        dp = get_dp(ovs_header->dp_ifindex);
 672        err = -ENODEV;
 673        if (!dp)
 674                goto err_unlock;
 675
 676        local_bh_disable();
 677        err = ovs_execute_actions(dp, packet);
 678        local_bh_enable();
 679        rcu_read_unlock();
 680
 681        ovs_flow_free(flow);
 682        return err;
 683
 684err_unlock:
 685        rcu_read_unlock();
 686err_flow_free:
 687        ovs_flow_free(flow);
 688err_kfree_skb:
 689        kfree_skb(packet);
 690err:
 691        return err;
 692}
 693
 694static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
 695        [OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
 696        [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
 697        [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
 698};
 699
 700static struct genl_ops dp_packet_genl_ops[] = {
 701        { .cmd = OVS_PACKET_CMD_EXECUTE,
 702          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
 703          .policy = packet_policy,
 704          .doit = ovs_packet_cmd_execute
 705        }
 706};
 707
 708static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
 709{
 710        int i;
 711        struct flow_table *table = genl_dereference(dp->table);
 712
 713        stats->n_flows = ovs_flow_tbl_count(table);
 714
 715        stats->n_hit = stats->n_missed = stats->n_lost = 0;
 716        for_each_possible_cpu(i) {
 717                const struct dp_stats_percpu *percpu_stats;
 718                struct dp_stats_percpu local_stats;
 719                unsigned int start;
 720
 721                percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
 722
 723                do {
 724                        start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
 725                        local_stats = *percpu_stats;
 726                } while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
 727
 728                stats->n_hit += local_stats.n_hit;
 729                stats->n_missed += local_stats.n_missed;
 730                stats->n_lost += local_stats.n_lost;
 731        }
 732}
 733
 734static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
 735        [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
 736        [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
 737        [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
 738};
 739
 740static struct genl_family dp_flow_genl_family = {
 741        .id = GENL_ID_GENERATE,
 742        .hdrsize = sizeof(struct ovs_header),
 743        .name = OVS_FLOW_FAMILY,
 744        .version = OVS_FLOW_VERSION,
 745        .maxattr = OVS_FLOW_ATTR_MAX
 746};
 747
 748static struct genl_multicast_group ovs_dp_flow_multicast_group = {
 749        .name = OVS_FLOW_MCGROUP
 750};
 751
 752/* Called with genl_lock. */
 753static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
 754                                  struct sk_buff *skb, u32 pid,
 755                                  u32 seq, u32 flags, u8 cmd)
 756{
 757        const int skb_orig_len = skb->len;
 758        const struct sw_flow_actions *sf_acts;
 759        struct ovs_flow_stats stats;
 760        struct ovs_header *ovs_header;
 761        struct nlattr *nla;
 762        unsigned long used;
 763        u8 tcp_flags;
 764        int err;
 765
 766        sf_acts = rcu_dereference_protected(flow->sf_acts,
 767                                            lockdep_genl_is_held());
 768
 769        ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
 770        if (!ovs_header)
 771                return -EMSGSIZE;
 772
 773        ovs_header->dp_ifindex = get_dpifindex(dp);
 774
 775        nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
 776        if (!nla)
 777                goto nla_put_failure;
 778        err = ovs_flow_to_nlattrs(&flow->key, skb);
 779        if (err)
 780                goto error;
 781        nla_nest_end(skb, nla);
 782
 783        spin_lock_bh(&flow->lock);
 784        used = flow->used;
 785        stats.n_packets = flow->packet_count;
 786        stats.n_bytes = flow->byte_count;
 787        tcp_flags = flow->tcp_flags;
 788        spin_unlock_bh(&flow->lock);
 789
 790        if (used &&
 791            nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used)))
 792                goto nla_put_failure;
 793
 794        if (stats.n_packets &&
 795            nla_put(skb, OVS_FLOW_ATTR_STATS,
 796                    sizeof(struct ovs_flow_stats), &stats))
 797                goto nla_put_failure;
 798
 799        if (tcp_flags &&
 800            nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags))
 801                goto nla_put_failure;
 802
 803        /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
 804         * this is the first flow to be dumped into 'skb'.  This is unusual for
 805         * Netlink but individual action lists can be longer than
 806         * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
 807         * The userspace caller can always fetch the actions separately if it
 808         * really wants them.  (Most userspace callers in fact don't care.)
 809         *
 810         * This can only fail for dump operations because the skb is always
 811         * properly sized for single flows.
 812         */
 813        err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
 814                      sf_acts->actions);
 815        if (err < 0 && skb_orig_len)
 816                goto error;
 817
 818        return genlmsg_end(skb, ovs_header);
 819
 820nla_put_failure:
 821        err = -EMSGSIZE;
 822error:
 823        genlmsg_cancel(skb, ovs_header);
 824        return err;
 825}
 826
 827static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
 828{
 829        const struct sw_flow_actions *sf_acts;
 830        int len;
 831
 832        sf_acts = rcu_dereference_protected(flow->sf_acts,
 833                                            lockdep_genl_is_held());
 834
 835        /* OVS_FLOW_ATTR_KEY */
 836        len = nla_total_size(FLOW_BUFSIZE);
 837        /* OVS_FLOW_ATTR_ACTIONS */
 838        len += nla_total_size(sf_acts->actions_len);
 839        /* OVS_FLOW_ATTR_STATS */
 840        len += nla_total_size(sizeof(struct ovs_flow_stats));
 841        /* OVS_FLOW_ATTR_TCP_FLAGS */
 842        len += nla_total_size(1);
 843        /* OVS_FLOW_ATTR_USED */
 844        len += nla_total_size(8);
 845
 846        len += NLMSG_ALIGN(sizeof(struct ovs_header));
 847
 848        return genlmsg_new(len, GFP_KERNEL);
 849}
 850
 851static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
 852                                               struct datapath *dp,
 853                                               u32 pid, u32 seq, u8 cmd)
 854{
 855        struct sk_buff *skb;
 856        int retval;
 857
 858        skb = ovs_flow_cmd_alloc_info(flow);
 859        if (!skb)
 860                return ERR_PTR(-ENOMEM);
 861
 862        retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
 863        BUG_ON(retval < 0);
 864        return skb;
 865}
 866
 867static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
 868{
 869        struct nlattr **a = info->attrs;
 870        struct ovs_header *ovs_header = info->userhdr;
 871        struct sw_flow_key key;
 872        struct sw_flow *flow;
 873        struct sk_buff *reply;
 874        struct datapath *dp;
 875        struct flow_table *table;
 876        int error;
 877        int key_len;
 878
 879        /* Extract key. */
 880        error = -EINVAL;
 881        if (!a[OVS_FLOW_ATTR_KEY])
 882                goto error;
 883        error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
 884        if (error)
 885                goto error;
 886
 887        /* Validate actions. */
 888        if (a[OVS_FLOW_ATTR_ACTIONS]) {
 889                error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0);
 890                if (error)
 891                        goto error;
 892        } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
 893                error = -EINVAL;
 894                goto error;
 895        }
 896
 897        dp = get_dp(ovs_header->dp_ifindex);
 898        error = -ENODEV;
 899        if (!dp)
 900                goto error;
 901
 902        table = genl_dereference(dp->table);
 903        flow = ovs_flow_tbl_lookup(table, &key, key_len);
 904        if (!flow) {
 905                struct sw_flow_actions *acts;
 906
 907                /* Bail out if we're not allowed to create a new flow. */
 908                error = -ENOENT;
 909                if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
 910                        goto error;
 911
 912                /* Expand table, if necessary, to make room. */
 913                if (ovs_flow_tbl_need_to_expand(table)) {
 914                        struct flow_table *new_table;
 915
 916                        new_table = ovs_flow_tbl_expand(table);
 917                        if (!IS_ERR(new_table)) {
 918                                rcu_assign_pointer(dp->table, new_table);
 919                                ovs_flow_tbl_deferred_destroy(table);
 920                                table = genl_dereference(dp->table);
 921                        }
 922                }
 923
 924                /* Allocate flow. */
 925                flow = ovs_flow_alloc();
 926                if (IS_ERR(flow)) {
 927                        error = PTR_ERR(flow);
 928                        goto error;
 929                }
 930                flow->key = key;
 931                clear_stats(flow);
 932
 933                /* Obtain actions. */
 934                acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
 935                error = PTR_ERR(acts);
 936                if (IS_ERR(acts))
 937                        goto error_free_flow;
 938                rcu_assign_pointer(flow->sf_acts, acts);
 939
 940                /* Put flow in bucket. */
 941                flow->hash = ovs_flow_hash(&key, key_len);
 942                ovs_flow_tbl_insert(table, flow);
 943
 944                reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
 945                                                info->snd_seq,
 946                                                OVS_FLOW_CMD_NEW);
 947        } else {
 948                /* We found a matching flow. */
 949                struct sw_flow_actions *old_acts;
 950                struct nlattr *acts_attrs;
 951
 952                /* Bail out if we're not allowed to modify an existing flow.
 953                 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
 954                 * because Generic Netlink treats the latter as a dump
 955                 * request.  We also accept NLM_F_EXCL in case that bug ever
 956                 * gets fixed.
 957                 */
 958                error = -EEXIST;
 959                if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
 960                    info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
 961                        goto error;
 962
 963                /* Update actions. */
 964                old_acts = rcu_dereference_protected(flow->sf_acts,
 965                                                     lockdep_genl_is_held());
 966                acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
 967                if (acts_attrs &&
 968                   (old_acts->actions_len != nla_len(acts_attrs) ||
 969                   memcmp(old_acts->actions, nla_data(acts_attrs),
 970                          old_acts->actions_len))) {
 971                        struct sw_flow_actions *new_acts;
 972
 973                        new_acts = ovs_flow_actions_alloc(acts_attrs);
 974                        error = PTR_ERR(new_acts);
 975                        if (IS_ERR(new_acts))
 976                                goto error;
 977
 978                        rcu_assign_pointer(flow->sf_acts, new_acts);
 979                        ovs_flow_deferred_free_acts(old_acts);
 980                }
 981
 982                reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
 983                                               info->snd_seq, OVS_FLOW_CMD_NEW);
 984
 985                /* Clear stats. */
 986                if (a[OVS_FLOW_ATTR_CLEAR]) {
 987                        spin_lock_bh(&flow->lock);
 988                        clear_stats(flow);
 989                        spin_unlock_bh(&flow->lock);
 990                }
 991        }
 992
 993        if (!IS_ERR(reply))
 994                genl_notify(reply, genl_info_net(info), info->snd_pid,
 995                           ovs_dp_flow_multicast_group.id, info->nlhdr,
 996                           GFP_KERNEL);
 997        else
 998                netlink_set_err(init_net.genl_sock, 0,
 999                                ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
1000        return 0;
1001
1002error_free_flow:
1003        ovs_flow_free(flow);
1004error:
1005        return error;
1006}
1007
1008static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1009{
1010        struct nlattr **a = info->attrs;
1011        struct ovs_header *ovs_header = info->userhdr;
1012        struct sw_flow_key key;
1013        struct sk_buff *reply;
1014        struct sw_flow *flow;
1015        struct datapath *dp;
1016        struct flow_table *table;
1017        int err;
1018        int key_len;
1019
1020        if (!a[OVS_FLOW_ATTR_KEY])
1021                return -EINVAL;
1022        err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1023        if (err)
1024                return err;
1025
1026        dp = get_dp(ovs_header->dp_ifindex);
1027        if (!dp)
1028                return -ENODEV;
1029
1030        table = genl_dereference(dp->table);
1031        flow = ovs_flow_tbl_lookup(table, &key, key_len);
1032        if (!flow)
1033                return -ENOENT;
1034
1035        reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
1036                                        info->snd_seq, OVS_FLOW_CMD_NEW);
1037        if (IS_ERR(reply))
1038                return PTR_ERR(reply);
1039
1040        return genlmsg_reply(reply, info);
1041}
1042
1043static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1044{
1045        struct nlattr **a = info->attrs;
1046        struct ovs_header *ovs_header = info->userhdr;
1047        struct sw_flow_key key;
1048        struct sk_buff *reply;
1049        struct sw_flow *flow;
1050        struct datapath *dp;
1051        struct flow_table *table;
1052        int err;
1053        int key_len;
1054
1055        if (!a[OVS_FLOW_ATTR_KEY])
1056                return flush_flows(ovs_header->dp_ifindex);
1057        err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
1058        if (err)
1059                return err;
1060
1061        dp = get_dp(ovs_header->dp_ifindex);
1062        if (!dp)
1063                return -ENODEV;
1064
1065        table = genl_dereference(dp->table);
1066        flow = ovs_flow_tbl_lookup(table, &key, key_len);
1067        if (!flow)
1068                return -ENOENT;
1069
1070        reply = ovs_flow_cmd_alloc_info(flow);
1071        if (!reply)
1072                return -ENOMEM;
1073
1074        ovs_flow_tbl_remove(table, flow);
1075
1076        err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
1077                                     info->snd_seq, 0, OVS_FLOW_CMD_DEL);
1078        BUG_ON(err < 0);
1079
1080        ovs_flow_deferred_free(flow);
1081
1082        genl_notify(reply, genl_info_net(info), info->snd_pid,
1083                    ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
1084        return 0;
1085}
1086
1087static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1088{
1089        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1090        struct datapath *dp;
1091        struct flow_table *table;
1092
1093        dp = get_dp(ovs_header->dp_ifindex);
1094        if (!dp)
1095                return -ENODEV;
1096
1097        table = genl_dereference(dp->table);
1098
1099        for (;;) {
1100                struct sw_flow *flow;
1101                u32 bucket, obj;
1102
1103                bucket = cb->args[0];
1104                obj = cb->args[1];
1105                flow = ovs_flow_tbl_next(table, &bucket, &obj);
1106                if (!flow)
1107                        break;
1108
1109                if (ovs_flow_cmd_fill_info(flow, dp, skb,
1110                                           NETLINK_CB(cb->skb).pid,
1111                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
1112                                           OVS_FLOW_CMD_NEW) < 0)
1113                        break;
1114
1115                cb->args[0] = bucket;
1116                cb->args[1] = obj;
1117        }
1118        return skb->len;
1119}
1120
1121static struct genl_ops dp_flow_genl_ops[] = {
1122        { .cmd = OVS_FLOW_CMD_NEW,
1123          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1124          .policy = flow_policy,
1125          .doit = ovs_flow_cmd_new_or_set
1126        },
1127        { .cmd = OVS_FLOW_CMD_DEL,
1128          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1129          .policy = flow_policy,
1130          .doit = ovs_flow_cmd_del
1131        },
1132        { .cmd = OVS_FLOW_CMD_GET,
1133          .flags = 0,               /* OK for unprivileged users. */
1134          .policy = flow_policy,
1135          .doit = ovs_flow_cmd_get,
1136          .dumpit = ovs_flow_cmd_dump
1137        },
1138        { .cmd = OVS_FLOW_CMD_SET,
1139          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1140          .policy = flow_policy,
1141          .doit = ovs_flow_cmd_new_or_set,
1142        },
1143};
1144
1145static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1146        [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1147        [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1148};
1149
1150static struct genl_family dp_datapath_genl_family = {
1151        .id = GENL_ID_GENERATE,
1152        .hdrsize = sizeof(struct ovs_header),
1153        .name = OVS_DATAPATH_FAMILY,
1154        .version = OVS_DATAPATH_VERSION,
1155        .maxattr = OVS_DP_ATTR_MAX
1156};
1157
1158static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
1159        .name = OVS_DATAPATH_MCGROUP
1160};
1161
1162static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1163                                u32 pid, u32 seq, u32 flags, u8 cmd)
1164{
1165        struct ovs_header *ovs_header;
1166        struct ovs_dp_stats dp_stats;
1167        int err;
1168
1169        ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
1170                                   flags, cmd);
1171        if (!ovs_header)
1172                goto error;
1173
1174        ovs_header->dp_ifindex = get_dpifindex(dp);
1175
1176        rcu_read_lock();
1177        err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1178        rcu_read_unlock();
1179        if (err)
1180                goto nla_put_failure;
1181
1182        get_dp_stats(dp, &dp_stats);
1183        if (nla_put(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats))
1184                goto nla_put_failure;
1185
1186        return genlmsg_end(skb, ovs_header);
1187
1188nla_put_failure:
1189        genlmsg_cancel(skb, ovs_header);
1190error:
1191        return -EMSGSIZE;
1192}
1193
1194static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
1195                                             u32 seq, u8 cmd)
1196{
1197        struct sk_buff *skb;
1198        int retval;
1199
1200        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1201        if (!skb)
1202                return ERR_PTR(-ENOMEM);
1203
1204        retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
1205        if (retval < 0) {
1206                kfree_skb(skb);
1207                return ERR_PTR(retval);
1208        }
1209        return skb;
1210}
1211
1212/* Called with genl_mutex and optionally with RTNL lock also. */
1213static struct datapath *lookup_datapath(struct ovs_header *ovs_header,
1214                                        struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1215{
1216        struct datapath *dp;
1217
1218        if (!a[OVS_DP_ATTR_NAME])
1219                dp = get_dp(ovs_header->dp_ifindex);
1220        else {
1221                struct vport *vport;
1222
1223                rcu_read_lock();
1224                vport = ovs_vport_locate(nla_data(a[OVS_DP_ATTR_NAME]));
1225                dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1226                rcu_read_unlock();
1227        }
1228        return dp ? dp : ERR_PTR(-ENODEV);
1229}
1230
1231static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1232{
1233        struct nlattr **a = info->attrs;
1234        struct vport_parms parms;
1235        struct sk_buff *reply;
1236        struct datapath *dp;
1237        struct vport *vport;
1238        int err;
1239
1240        err = -EINVAL;
1241        if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1242                goto err;
1243
1244        rtnl_lock();
1245        err = -ENODEV;
1246        if (!try_module_get(THIS_MODULE))
1247                goto err_unlock_rtnl;
1248
1249        err = -ENOMEM;
1250        dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1251        if (dp == NULL)
1252                goto err_put_module;
1253        INIT_LIST_HEAD(&dp->port_list);
1254
1255        /* Allocate table. */
1256        err = -ENOMEM;
1257        rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
1258        if (!dp->table)
1259                goto err_free_dp;
1260
1261        dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
1262        if (!dp->stats_percpu) {
1263                err = -ENOMEM;
1264                goto err_destroy_table;
1265        }
1266
1267        /* Set up our datapath device. */
1268        parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1269        parms.type = OVS_VPORT_TYPE_INTERNAL;
1270        parms.options = NULL;
1271        parms.dp = dp;
1272        parms.port_no = OVSP_LOCAL;
1273        parms.upcall_pid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
1274
1275        vport = new_vport(&parms);
1276        if (IS_ERR(vport)) {
1277                err = PTR_ERR(vport);
1278                if (err == -EBUSY)
1279                        err = -EEXIST;
1280
1281                goto err_destroy_percpu;
1282        }
1283
1284        reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
1285                                      info->snd_seq, OVS_DP_CMD_NEW);
1286        err = PTR_ERR(reply);
1287        if (IS_ERR(reply))
1288                goto err_destroy_local_port;
1289
1290        list_add_tail(&dp->list_node, &dps);
1291        rtnl_unlock();
1292
1293        genl_notify(reply, genl_info_net(info), info->snd_pid,
1294                    ovs_dp_datapath_multicast_group.id, info->nlhdr,
1295                    GFP_KERNEL);
1296        return 0;
1297
1298err_destroy_local_port:
1299        ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL]));
1300err_destroy_percpu:
1301        free_percpu(dp->stats_percpu);
1302err_destroy_table:
1303        ovs_flow_tbl_destroy(genl_dereference(dp->table));
1304err_free_dp:
1305        kfree(dp);
1306err_put_module:
1307        module_put(THIS_MODULE);
1308err_unlock_rtnl:
1309        rtnl_unlock();
1310err:
1311        return err;
1312}
1313
1314static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1315{
1316        struct vport *vport, *next_vport;
1317        struct sk_buff *reply;
1318        struct datapath *dp;
1319        int err;
1320
1321        rtnl_lock();
1322        dp = lookup_datapath(info->userhdr, info->attrs);
1323        err = PTR_ERR(dp);
1324        if (IS_ERR(dp))
1325                goto exit_unlock;
1326
1327        reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
1328                                      info->snd_seq, OVS_DP_CMD_DEL);
1329        err = PTR_ERR(reply);
1330        if (IS_ERR(reply))
1331                goto exit_unlock;
1332
1333        list_for_each_entry_safe(vport, next_vport, &dp->port_list, node)
1334                if (vport->port_no != OVSP_LOCAL)
1335                        ovs_dp_detach_port(vport);
1336
1337        list_del(&dp->list_node);
1338        ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL]));
1339
1340        /* rtnl_unlock() will wait until all the references to devices that
1341         * are pending unregistration have been dropped.  We do it here to
1342         * ensure that any internal devices (which contain DP pointers) are
1343         * fully destroyed before freeing the datapath.
1344         */
1345        rtnl_unlock();
1346
1347        call_rcu(&dp->rcu, destroy_dp_rcu);
1348        module_put(THIS_MODULE);
1349
1350        genl_notify(reply, genl_info_net(info), info->snd_pid,
1351                    ovs_dp_datapath_multicast_group.id, info->nlhdr,
1352                    GFP_KERNEL);
1353
1354        return 0;
1355
1356exit_unlock:
1357        rtnl_unlock();
1358        return err;
1359}
1360
1361static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1362{
1363        struct sk_buff *reply;
1364        struct datapath *dp;
1365        int err;
1366
1367        dp = lookup_datapath(info->userhdr, info->attrs);
1368        if (IS_ERR(dp))
1369                return PTR_ERR(dp);
1370
1371        reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
1372                                      info->snd_seq, OVS_DP_CMD_NEW);
1373        if (IS_ERR(reply)) {
1374                err = PTR_ERR(reply);
1375                netlink_set_err(init_net.genl_sock, 0,
1376                                ovs_dp_datapath_multicast_group.id, err);
1377                return 0;
1378        }
1379
1380        genl_notify(reply, genl_info_net(info), info->snd_pid,
1381                    ovs_dp_datapath_multicast_group.id, info->nlhdr,
1382                    GFP_KERNEL);
1383
1384        return 0;
1385}
1386
1387static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1388{
1389        struct sk_buff *reply;
1390        struct datapath *dp;
1391
1392        dp = lookup_datapath(info->userhdr, info->attrs);
1393        if (IS_ERR(dp))
1394                return PTR_ERR(dp);
1395
1396        reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
1397                                      info->snd_seq, OVS_DP_CMD_NEW);
1398        if (IS_ERR(reply))
1399                return PTR_ERR(reply);
1400
1401        return genlmsg_reply(reply, info);
1402}
1403
1404static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1405{
1406        struct datapath *dp;
1407        int skip = cb->args[0];
1408        int i = 0;
1409
1410        list_for_each_entry(dp, &dps, list_node) {
1411                if (i >= skip &&
1412                    ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
1413                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
1414                                         OVS_DP_CMD_NEW) < 0)
1415                        break;
1416                i++;
1417        }
1418
1419        cb->args[0] = i;
1420
1421        return skb->len;
1422}
1423
1424static struct genl_ops dp_datapath_genl_ops[] = {
1425        { .cmd = OVS_DP_CMD_NEW,
1426          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1427          .policy = datapath_policy,
1428          .doit = ovs_dp_cmd_new
1429        },
1430        { .cmd = OVS_DP_CMD_DEL,
1431          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1432          .policy = datapath_policy,
1433          .doit = ovs_dp_cmd_del
1434        },
1435        { .cmd = OVS_DP_CMD_GET,
1436          .flags = 0,               /* OK for unprivileged users. */
1437          .policy = datapath_policy,
1438          .doit = ovs_dp_cmd_get,
1439          .dumpit = ovs_dp_cmd_dump
1440        },
1441        { .cmd = OVS_DP_CMD_SET,
1442          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1443          .policy = datapath_policy,
1444          .doit = ovs_dp_cmd_set,
1445        },
1446};
1447
1448static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
1449        [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1450        [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
1451        [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
1452        [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
1453        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1454        [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
1455};
1456
1457static struct genl_family dp_vport_genl_family = {
1458        .id = GENL_ID_GENERATE,
1459        .hdrsize = sizeof(struct ovs_header),
1460        .name = OVS_VPORT_FAMILY,
1461        .version = OVS_VPORT_VERSION,
1462        .maxattr = OVS_VPORT_ATTR_MAX
1463};
1464
1465struct genl_multicast_group ovs_dp_vport_multicast_group = {
1466        .name = OVS_VPORT_MCGROUP
1467};
1468
1469/* Called with RTNL lock or RCU read lock. */
1470static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
1471                                   u32 pid, u32 seq, u32 flags, u8 cmd)
1472{
1473        struct ovs_header *ovs_header;
1474        struct ovs_vport_stats vport_stats;
1475        int err;
1476
1477        ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
1478                                 flags, cmd);
1479        if (!ovs_header)
1480                return -EMSGSIZE;
1481
1482        ovs_header->dp_ifindex = get_dpifindex(vport->dp);
1483
1484        if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
1485            nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
1486            nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) ||
1487            nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_pid))
1488                goto nla_put_failure;
1489
1490        ovs_vport_get_stats(vport, &vport_stats);
1491        if (nla_put(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
1492                    &vport_stats))
1493                goto nla_put_failure;
1494
1495        err = ovs_vport_get_options(vport, skb);
1496        if (err == -EMSGSIZE)
1497                goto error;
1498
1499        return genlmsg_end(skb, ovs_header);
1500
1501nla_put_failure:
1502        err = -EMSGSIZE;
1503error:
1504        genlmsg_cancel(skb, ovs_header);
1505        return err;
1506}
1507
1508/* Called with RTNL lock or RCU read lock. */
1509struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid,
1510                                         u32 seq, u8 cmd)
1511{
1512        struct sk_buff *skb;
1513        int retval;
1514
1515        skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
1516        if (!skb)
1517                return ERR_PTR(-ENOMEM);
1518
1519        retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
1520        if (retval < 0) {
1521                kfree_skb(skb);
1522                return ERR_PTR(retval);
1523        }
1524        return skb;
1525}
1526
1527/* Called with RTNL lock or RCU read lock. */
1528static struct vport *lookup_vport(struct ovs_header *ovs_header,
1529                                  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
1530{
1531        struct datapath *dp;
1532        struct vport *vport;
1533
1534        if (a[OVS_VPORT_ATTR_NAME]) {
1535                vport = ovs_vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME]));
1536                if (!vport)
1537                        return ERR_PTR(-ENODEV);
1538                if (ovs_header->dp_ifindex &&
1539                    ovs_header->dp_ifindex != get_dpifindex(vport->dp))
1540                        return ERR_PTR(-ENODEV);
1541                return vport;
1542        } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
1543                u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1544
1545                if (port_no >= DP_MAX_PORTS)
1546                        return ERR_PTR(-EFBIG);
1547
1548                dp = get_dp(ovs_header->dp_ifindex);
1549                if (!dp)
1550                        return ERR_PTR(-ENODEV);
1551
1552                vport = rcu_dereference_rtnl(dp->ports[port_no]);
1553                if (!vport)
1554                        return ERR_PTR(-ENOENT);
1555                return vport;
1556        } else
1557                return ERR_PTR(-EINVAL);
1558}
1559
1560static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
1561{
1562        struct nlattr **a = info->attrs;
1563        struct ovs_header *ovs_header = info->userhdr;
1564        struct vport_parms parms;
1565        struct sk_buff *reply;
1566        struct vport *vport;
1567        struct datapath *dp;
1568        u32 port_no;
1569        int err;
1570
1571        err = -EINVAL;
1572        if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
1573            !a[OVS_VPORT_ATTR_UPCALL_PID])
1574                goto exit;
1575
1576        rtnl_lock();
1577        dp = get_dp(ovs_header->dp_ifindex);
1578        err = -ENODEV;
1579        if (!dp)
1580                goto exit_unlock;
1581
1582        if (a[OVS_VPORT_ATTR_PORT_NO]) {
1583                port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
1584
1585                err = -EFBIG;
1586                if (port_no >= DP_MAX_PORTS)
1587                        goto exit_unlock;
1588
1589                vport = rtnl_dereference(dp->ports[port_no]);
1590                err = -EBUSY;
1591                if (vport)
1592                        goto exit_unlock;
1593        } else {
1594                for (port_no = 1; ; port_no++) {
1595                        if (port_no >= DP_MAX_PORTS) {
1596                                err = -EFBIG;
1597                                goto exit_unlock;
1598                        }
1599                        vport = rtnl_dereference(dp->ports[port_no]);
1600                        if (!vport)
1601                                break;
1602                }
1603        }
1604
1605        parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
1606        parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
1607        parms.options = a[OVS_VPORT_ATTR_OPTIONS];
1608        parms.dp = dp;
1609        parms.port_no = port_no;
1610        parms.upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1611
1612        vport = new_vport(&parms);
1613        err = PTR_ERR(vport);
1614        if (IS_ERR(vport))
1615                goto exit_unlock;
1616
1617        reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1618                                         OVS_VPORT_CMD_NEW);
1619        if (IS_ERR(reply)) {
1620                err = PTR_ERR(reply);
1621                ovs_dp_detach_port(vport);
1622                goto exit_unlock;
1623        }
1624        genl_notify(reply, genl_info_net(info), info->snd_pid,
1625                    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1626
1627exit_unlock:
1628        rtnl_unlock();
1629exit:
1630        return err;
1631}
1632
1633static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
1634{
1635        struct nlattr **a = info->attrs;
1636        struct sk_buff *reply;
1637        struct vport *vport;
1638        int err;
1639
1640        rtnl_lock();
1641        vport = lookup_vport(info->userhdr, a);
1642        err = PTR_ERR(vport);
1643        if (IS_ERR(vport))
1644                goto exit_unlock;
1645
1646        err = 0;
1647        if (a[OVS_VPORT_ATTR_TYPE] &&
1648            nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type)
1649                err = -EINVAL;
1650
1651        if (!err && a[OVS_VPORT_ATTR_OPTIONS])
1652                err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
1653        if (err)
1654                goto exit_unlock;
1655        if (a[OVS_VPORT_ATTR_UPCALL_PID])
1656                vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
1657
1658        reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1659                                         OVS_VPORT_CMD_NEW);
1660        if (IS_ERR(reply)) {
1661                netlink_set_err(init_net.genl_sock, 0,
1662                                ovs_dp_vport_multicast_group.id, PTR_ERR(reply));
1663                goto exit_unlock;
1664        }
1665
1666        genl_notify(reply, genl_info_net(info), info->snd_pid,
1667                    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1668
1669exit_unlock:
1670        rtnl_unlock();
1671        return err;
1672}
1673
1674static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
1675{
1676        struct nlattr **a = info->attrs;
1677        struct sk_buff *reply;
1678        struct vport *vport;
1679        int err;
1680
1681        rtnl_lock();
1682        vport = lookup_vport(info->userhdr, a);
1683        err = PTR_ERR(vport);
1684        if (IS_ERR(vport))
1685                goto exit_unlock;
1686
1687        if (vport->port_no == OVSP_LOCAL) {
1688                err = -EINVAL;
1689                goto exit_unlock;
1690        }
1691
1692        reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1693                                         OVS_VPORT_CMD_DEL);
1694        err = PTR_ERR(reply);
1695        if (IS_ERR(reply))
1696                goto exit_unlock;
1697
1698        ovs_dp_detach_port(vport);
1699
1700        genl_notify(reply, genl_info_net(info), info->snd_pid,
1701                    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
1702
1703exit_unlock:
1704        rtnl_unlock();
1705        return err;
1706}
1707
1708static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
1709{
1710        struct nlattr **a = info->attrs;
1711        struct ovs_header *ovs_header = info->userhdr;
1712        struct sk_buff *reply;
1713        struct vport *vport;
1714        int err;
1715
1716        rcu_read_lock();
1717        vport = lookup_vport(ovs_header, a);
1718        err = PTR_ERR(vport);
1719        if (IS_ERR(vport))
1720                goto exit_unlock;
1721
1722        reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
1723                                         OVS_VPORT_CMD_NEW);
1724        err = PTR_ERR(reply);
1725        if (IS_ERR(reply))
1726                goto exit_unlock;
1727
1728        rcu_read_unlock();
1729
1730        return genlmsg_reply(reply, info);
1731
1732exit_unlock:
1733        rcu_read_unlock();
1734        return err;
1735}
1736
1737static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1738{
1739        struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1740        struct datapath *dp;
1741        u32 port_no;
1742        int retval;
1743
1744        dp = get_dp(ovs_header->dp_ifindex);
1745        if (!dp)
1746                return -ENODEV;
1747
1748        rcu_read_lock();
1749        for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
1750                struct vport *vport;
1751
1752                vport = rcu_dereference(dp->ports[port_no]);
1753                if (!vport)
1754                        continue;
1755
1756                if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
1757                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
1758                                            OVS_VPORT_CMD_NEW) < 0)
1759                        break;
1760        }
1761        rcu_read_unlock();
1762
1763        cb->args[0] = port_no;
1764        retval = skb->len;
1765
1766        return retval;
1767}
1768
1769static void rehash_flow_table(struct work_struct *work)
1770{
1771        struct datapath *dp;
1772
1773        genl_lock();
1774
1775        list_for_each_entry(dp, &dps, list_node) {
1776                struct flow_table *old_table = genl_dereference(dp->table);
1777                struct flow_table *new_table;
1778
1779                new_table = ovs_flow_tbl_rehash(old_table);
1780                if (!IS_ERR(new_table)) {
1781                        rcu_assign_pointer(dp->table, new_table);
1782                        ovs_flow_tbl_deferred_destroy(old_table);
1783                }
1784        }
1785
1786        genl_unlock();
1787
1788        schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
1789}
1790
1791static struct genl_ops dp_vport_genl_ops[] = {
1792        { .cmd = OVS_VPORT_CMD_NEW,
1793          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1794          .policy = vport_policy,
1795          .doit = ovs_vport_cmd_new
1796        },
1797        { .cmd = OVS_VPORT_CMD_DEL,
1798          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1799          .policy = vport_policy,
1800          .doit = ovs_vport_cmd_del
1801        },
1802        { .cmd = OVS_VPORT_CMD_GET,
1803          .flags = 0,               /* OK for unprivileged users. */
1804          .policy = vport_policy,
1805          .doit = ovs_vport_cmd_get,
1806          .dumpit = ovs_vport_cmd_dump
1807        },
1808        { .cmd = OVS_VPORT_CMD_SET,
1809          .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1810          .policy = vport_policy,
1811          .doit = ovs_vport_cmd_set,
1812        },
1813};
1814
1815struct genl_family_and_ops {
1816        struct genl_family *family;
1817        struct genl_ops *ops;
1818        int n_ops;
1819        struct genl_multicast_group *group;
1820};
1821
1822static const struct genl_family_and_ops dp_genl_families[] = {
1823        { &dp_datapath_genl_family,
1824          dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
1825          &ovs_dp_datapath_multicast_group },
1826        { &dp_vport_genl_family,
1827          dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
1828          &ovs_dp_vport_multicast_group },
1829        { &dp_flow_genl_family,
1830          dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
1831          &ovs_dp_flow_multicast_group },
1832        { &dp_packet_genl_family,
1833          dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
1834          NULL },
1835};
1836
1837static void dp_unregister_genl(int n_families)
1838{
1839        int i;
1840
1841        for (i = 0; i < n_families; i++)
1842                genl_unregister_family(dp_genl_families[i].family);
1843}
1844
1845static int dp_register_genl(void)
1846{
1847        int n_registered;
1848        int err;
1849        int i;
1850
1851        n_registered = 0;
1852        for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
1853                const struct genl_family_and_ops *f = &dp_genl_families[i];
1854
1855                err = genl_register_family_with_ops(f->family, f->ops,
1856                                                    f->n_ops);
1857                if (err)
1858                        goto error;
1859                n_registered++;
1860
1861                if (f->group) {
1862                        err = genl_register_mc_group(f->family, f->group);
1863                        if (err)
1864                                goto error;
1865                }
1866        }
1867
1868        return 0;
1869
1870error:
1871        dp_unregister_genl(n_registered);
1872        return err;
1873}
1874
1875static int __init dp_init(void)
1876{
1877        struct sk_buff *dummy_skb;
1878        int err;
1879
1880        BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
1881
1882        pr_info("Open vSwitch switching datapath\n");
1883
1884        err = ovs_flow_init();
1885        if (err)
1886                goto error;
1887
1888        err = ovs_vport_init();
1889        if (err)
1890                goto error_flow_exit;
1891
1892        err = register_netdevice_notifier(&ovs_dp_device_notifier);
1893        if (err)
1894                goto error_vport_exit;
1895
1896        err = dp_register_genl();
1897        if (err < 0)
1898                goto error_unreg_notifier;
1899
1900        schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
1901
1902        return 0;
1903
1904error_unreg_notifier:
1905        unregister_netdevice_notifier(&ovs_dp_device_notifier);
1906error_vport_exit:
1907        ovs_vport_exit();
1908error_flow_exit:
1909        ovs_flow_exit();
1910error:
1911        return err;
1912}
1913
1914static void dp_cleanup(void)
1915{
1916        cancel_delayed_work_sync(&rehash_flow_wq);
1917        rcu_barrier();
1918        dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
1919        unregister_netdevice_notifier(&ovs_dp_device_notifier);
1920        ovs_vport_exit();
1921        ovs_flow_exit();
1922}
1923
1924module_init(dp_init);
1925module_exit(dp_cleanup);
1926
1927MODULE_DESCRIPTION("Open vSwitch switching datapath");
1928MODULE_LICENSE("GPL");
1929
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.