linux/net/sched/cls_rsvp.h
<<
>>
Prefs
   1/*
   2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 */
  11
  12/*
  13   Comparing to general packet classification problem,
  14   RSVP needs only sevaral relatively simple rules:
  15
  16   * (dst, protocol) are always specified,
  17     so that we are able to hash them.
  18   * src may be exact, or may be wildcard, so that
  19     we can keep a hash table plus one wildcard entry.
  20   * source port (or flow label) is important only if src is given.
  21
  22   IMPLEMENTATION.
  23
  24   We use a two level hash table: The top level is keyed by
  25   destination address and protocol ID, every bucket contains a list
  26   of "rsvp sessions", identified by destination address, protocol and
  27   DPI(="Destination Port ID"): triple (key, mask, offset).
  28
  29   Every bucket has a smaller hash table keyed by source address
  30   (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
  31   Every bucket is again a list of "RSVP flows", selected by
  32   source address and SPI(="Source Port ID" here rather than
  33   "security parameter index"): triple (key, mask, offset).
  34
  35
  36   NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
  37   and all fragmented packets go to the best-effort traffic class.
  38
  39
  40   NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
  41   only one "Generalized Port Identifier". So that for classic
  42   ah, esp (and udp,tcp) both *pi should coincide or one of them
  43   should be wildcard.
  44
  45   At first sight, this redundancy is just a waste of CPU
  46   resources. But DPI and SPI add the possibility to assign different
  47   priorities to GPIs. Look also at note 4 about tunnels below.
  48
  49
  50   NOTE 3. One complication is the case of tunneled packets.
  51   We implement it as following: if the first lookup
  52   matches a special session with "tunnelhdr" value not zero,
  53   flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
  54   In this case, we pull tunnelhdr bytes and restart lookup
  55   with tunnel ID added to the list of keys. Simple and stupid 8)8)
  56   It's enough for PIMREG and IPIP.
  57
  58
  59   NOTE 4. Two GPIs make it possible to parse even GRE packets.
  60   F.e. DPI can select ETH_P_IP (and necessary flags to make
  61   tunnelhdr correct) in GRE protocol field and SPI matches
  62   GRE key. Is it not nice? 8)8)
  63
  64
  65   Well, as result, despite its simplicity, we get a pretty
  66   powerful classification engine.  */
  67
  68
  69struct rsvp_head
  70{
  71        u32                     tmap[256/32];
  72        u32                     hgenerator;
  73        u8                      tgenerator;
  74        struct rsvp_session     *ht[256];
  75};
  76
  77struct rsvp_session
  78{
  79        struct rsvp_session     *next;
  80        __be32                  dst[RSVP_DST_LEN];
  81        struct tc_rsvp_gpi      dpi;
  82        u8                      protocol;
  83        u8                      tunnelid;
  84        /* 16 (src,sport) hash slots, and one wildcard source slot */
  85        struct rsvp_filter      *ht[16+1];
  86};
  87
  88
  89struct rsvp_filter
  90{
  91        struct rsvp_filter      *next;
  92        __be32                  src[RSVP_DST_LEN];
  93        struct tc_rsvp_gpi      spi;
  94        u8                      tunnelhdr;
  95
  96        struct tcf_result       res;
  97        struct tcf_exts         exts;
  98
  99        u32                     handle;
 100        struct rsvp_session     *sess;
 101};
 102
 103static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
 104{
 105        unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
 106        h ^= h>>16;
 107        h ^= h>>8;
 108        return (h ^ protocol ^ tunnelid) & 0xFF;
 109}
 110
 111static __inline__ unsigned hash_src(__be32 *src)
 112{
 113        unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
 114        h ^= h>>16;
 115        h ^= h>>8;
 116        h ^= h>>4;
 117        return h & 0xF;
 118}
 119
 120static struct tcf_ext_map rsvp_ext_map = {
 121        .police = TCA_RSVP_POLICE,
 122        .action = TCA_RSVP_ACT
 123};
 124
 125#define RSVP_APPLY_RESULT()                             \
 126{                                                       \
 127        int r = tcf_exts_exec(skb, &f->exts, res);      \
 128        if (r < 0)                                      \
 129                continue;                               \
 130        else if (r > 0)                                 \
 131                return r;                               \
 132}
 133
 134static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
 135                         struct tcf_result *res)
 136{
 137        struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
 138        struct rsvp_session *s;
 139        struct rsvp_filter *f;
 140        unsigned h1, h2;
 141        __be32 *dst, *src;
 142        u8 protocol;
 143        u8 tunnelid = 0;
 144        u8 *xprt;
 145#if RSVP_DST_LEN == 4
 146        struct ipv6hdr *nhptr;
 147
 148        if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
 149                return -1;
 150        nhptr = ipv6_hdr(skb);
 151#else
 152        struct iphdr *nhptr;
 153
 154        if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
 155                return -1;
 156        nhptr = ip_hdr(skb);
 157#endif
 158
 159restart:
 160
 161#if RSVP_DST_LEN == 4
 162        src = &nhptr->saddr.s6_addr32[0];
 163        dst = &nhptr->daddr.s6_addr32[0];
 164        protocol = nhptr->nexthdr;
 165        xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
 166#else
 167        src = &nhptr->saddr;
 168        dst = &nhptr->daddr;
 169        protocol = nhptr->protocol;
 170        xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
 171        if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
 172                return -1;
 173#endif
 174
 175        h1 = hash_dst(dst, protocol, tunnelid);
 176        h2 = hash_src(src);
 177
 178        for (s = sht[h1]; s; s = s->next) {
 179                if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
 180                    protocol == s->protocol &&
 181                    !(s->dpi.mask &
 182                      (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) &&
 183#if RSVP_DST_LEN == 4
 184                    dst[0] == s->dst[0] &&
 185                    dst[1] == s->dst[1] &&
 186                    dst[2] == s->dst[2] &&
 187#endif
 188                    tunnelid == s->tunnelid) {
 189
 190                        for (f = s->ht[h2]; f; f = f->next) {
 191                                if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
 192                                    !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
 193#if RSVP_DST_LEN == 4
 194                                    &&
 195                                    src[0] == f->src[0] &&
 196                                    src[1] == f->src[1] &&
 197                                    src[2] == f->src[2]
 198#endif
 199                                    ) {
 200                                        *res = f->res;
 201                                        RSVP_APPLY_RESULT();
 202
 203matched:
 204                                        if (f->tunnelhdr == 0)
 205                                                return 0;
 206
 207                                        tunnelid = f->res.classid;
 208                                        nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
 209                                        goto restart;
 210                                }
 211                        }
 212
 213                        /* And wildcard bucket... */
 214                        for (f = s->ht[16]; f; f = f->next) {
 215                                *res = f->res;
 216                                RSVP_APPLY_RESULT();
 217                                goto matched;
 218                        }
 219                        return -1;
 220                }
 221        }
 222        return -1;
 223}
 224
 225static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
 226{
 227        struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
 228        struct rsvp_session *s;
 229        struct rsvp_filter *f;
 230        unsigned h1 = handle&0xFF;
 231        unsigned h2 = (handle>>8)&0xFF;
 232
 233        if (h2 > 16)
 234                return 0;
 235
 236        for (s = sht[h1]; s; s = s->next) {
 237                for (f = s->ht[h2]; f; f = f->next) {
 238                        if (f->handle == handle)
 239                                return (unsigned long)f;
 240                }
 241        }
 242        return 0;
 243}
 244
 245static void rsvp_put(struct tcf_proto *tp, unsigned long f)
 246{
 247}
 248
 249static int rsvp_init(struct tcf_proto *tp)
 250{
 251        struct rsvp_head *data;
 252
 253        data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
 254        if (data) {
 255                tp->root = data;
 256                return 0;
 257        }
 258        return -ENOBUFS;
 259}
 260
 261static inline void
 262rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
 263{
 264        tcf_unbind_filter(tp, &f->res);
 265        tcf_exts_destroy(tp, &f->exts);
 266        kfree(f);
 267}
 268
 269static void rsvp_destroy(struct tcf_proto *tp)
 270{
 271        struct rsvp_head *data = xchg(&tp->root, NULL);
 272        struct rsvp_session **sht;
 273        int h1, h2;
 274
 275        if (data == NULL)
 276                return;
 277
 278        sht = data->ht;
 279
 280        for (h1=0; h1<256; h1++) {
 281                struct rsvp_session *s;
 282
 283                while ((s = sht[h1]) != NULL) {
 284                        sht[h1] = s->next;
 285
 286                        for (h2=0; h2<=16; h2++) {
 287                                struct rsvp_filter *f;
 288
 289                                while ((f = s->ht[h2]) != NULL) {
 290                                        s->ht[h2] = f->next;
 291                                        rsvp_delete_filter(tp, f);
 292                                }
 293                        }
 294                        kfree(s);
 295                }
 296        }
 297        kfree(data);
 298}
 299
 300static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
 301{
 302        struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
 303        unsigned h = f->handle;
 304        struct rsvp_session **sp;
 305        struct rsvp_session *s = f->sess;
 306        int i;
 307
 308        for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
 309                if (*fp == f) {
 310                        tcf_tree_lock(tp);
 311                        *fp = f->next;
 312                        tcf_tree_unlock(tp);
 313                        rsvp_delete_filter(tp, f);
 314
 315                        /* Strip tree */
 316
 317                        for (i=0; i<=16; i++)
 318                                if (s->ht[i])
 319                                        return 0;
 320
 321                        /* OK, session has no flows */
 322                        for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
 323                             *sp; sp = &(*sp)->next) {
 324                                if (*sp == s) {
 325                                        tcf_tree_lock(tp);
 326                                        *sp = s->next;
 327                                        tcf_tree_unlock(tp);
 328
 329                                        kfree(s);
 330                                        return 0;
 331                                }
 332                        }
 333
 334                        return 0;
 335                }
 336        }
 337        return 0;
 338}
 339
 340static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
 341{
 342        struct rsvp_head *data = tp->root;
 343        int i = 0xFFFF;
 344
 345        while (i-- > 0) {
 346                u32 h;
 347                if ((data->hgenerator += 0x10000) == 0)
 348                        data->hgenerator = 0x10000;
 349                h = data->hgenerator|salt;
 350                if (rsvp_get(tp, h) == 0)
 351                        return h;
 352        }
 353        return 0;
 354}
 355
 356static int tunnel_bts(struct rsvp_head *data)
 357{
 358        int n = data->tgenerator>>5;
 359        u32 b = 1<<(data->tgenerator&0x1F);
 360
 361        if (data->tmap[n]&b)
 362                return 0;
 363        data->tmap[n] |= b;
 364        return 1;
 365}
 366
 367static void tunnel_recycle(struct rsvp_head *data)
 368{
 369        struct rsvp_session **sht = data->ht;
 370        u32 tmap[256/32];
 371        int h1, h2;
 372
 373        memset(tmap, 0, sizeof(tmap));
 374
 375        for (h1=0; h1<256; h1++) {
 376                struct rsvp_session *s;
 377                for (s = sht[h1]; s; s = s->next) {
 378                        for (h2=0; h2<=16; h2++) {
 379                                struct rsvp_filter *f;
 380
 381                                for (f = s->ht[h2]; f; f = f->next) {
 382                                        if (f->tunnelhdr == 0)
 383                                                continue;
 384                                        data->tgenerator = f->res.classid;
 385                                        tunnel_bts(data);
 386                                }
 387                        }
 388                }
 389        }
 390
 391        memcpy(data->tmap, tmap, sizeof(tmap));
 392}
 393
 394static u32 gen_tunnel(struct rsvp_head *data)
 395{
 396        int i, k;
 397
 398        for (k=0; k<2; k++) {
 399                for (i=255; i>0; i--) {
 400                        if (++data->tgenerator == 0)
 401                                data->tgenerator = 1;
 402                        if (tunnel_bts(data))
 403                                return data->tgenerator;
 404                }
 405                tunnel_recycle(data);
 406        }
 407        return 0;
 408}
 409
 410static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
 411        [TCA_RSVP_CLASSID]      = { .type = NLA_U32 },
 412        [TCA_RSVP_DST]          = { .type = NLA_BINARY,
 413                                    .len = RSVP_DST_LEN * sizeof(u32) },
 414        [TCA_RSVP_SRC]          = { .type = NLA_BINARY,
 415                                    .len = RSVP_DST_LEN * sizeof(u32) },
 416        [TCA_RSVP_PINFO]        = { .len = sizeof(struct tc_rsvp_pinfo) },
 417};
 418
 419static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 420                       u32 handle,
 421                       struct nlattr **tca,
 422                       unsigned long *arg)
 423{
 424        struct rsvp_head *data = tp->root;
 425        struct rsvp_filter *f, **fp;
 426        struct rsvp_session *s, **sp;
 427        struct tc_rsvp_pinfo *pinfo = NULL;
 428        struct nlattr *opt = tca[TCA_OPTIONS-1];
 429        struct nlattr *tb[TCA_RSVP_MAX + 1];
 430        struct tcf_exts e;
 431        unsigned h1, h2;
 432        __be32 *dst;
 433        int err;
 434
 435        if (opt == NULL)
 436                return handle ? -EINVAL : 0;
 437
 438        err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
 439        if (err < 0)
 440                return err;
 441
 442        err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
 443        if (err < 0)
 444                return err;
 445
 446        if ((f = (struct rsvp_filter*)*arg) != NULL) {
 447                /* Node exists: adjust only classid */
 448
 449                if (f->handle != handle && handle)
 450                        goto errout2;
 451                if (tb[TCA_RSVP_CLASSID-1]) {
 452                        f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
 453                        tcf_bind_filter(tp, &f->res, base);
 454                }
 455
 456                tcf_exts_change(tp, &f->exts, &e);
 457                return 0;
 458        }
 459
 460        /* Now more serious part... */
 461        err = -EINVAL;
 462        if (handle)
 463                goto errout2;
 464        if (tb[TCA_RSVP_DST-1] == NULL)
 465                goto errout2;
 466
 467        err = -ENOBUFS;
 468        f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
 469        if (f == NULL)
 470                goto errout2;
 471
 472        h2 = 16;
 473        if (tb[TCA_RSVP_SRC-1]) {
 474                memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
 475                h2 = hash_src(f->src);
 476        }
 477        if (tb[TCA_RSVP_PINFO-1]) {
 478                pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
 479                f->spi = pinfo->spi;
 480                f->tunnelhdr = pinfo->tunnelhdr;
 481        }
 482        if (tb[TCA_RSVP_CLASSID-1])
 483                f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
 484
 485        dst = nla_data(tb[TCA_RSVP_DST-1]);
 486        h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
 487
 488        err = -ENOMEM;
 489        if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
 490                goto errout;
 491
 492        if (f->tunnelhdr) {
 493                err = -EINVAL;
 494                if (f->res.classid > 255)
 495                        goto errout;
 496
 497                err = -ENOMEM;
 498                if (f->res.classid == 0 &&
 499                    (f->res.classid = gen_tunnel(data)) == 0)
 500                        goto errout;
 501        }
 502
 503        for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
 504                if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
 505                    pinfo && pinfo->protocol == s->protocol &&
 506                    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
 507#if RSVP_DST_LEN == 4
 508                    dst[0] == s->dst[0] &&
 509                    dst[1] == s->dst[1] &&
 510                    dst[2] == s->dst[2] &&
 511#endif
 512                    pinfo->tunnelid == s->tunnelid) {
 513
 514insert:
 515                        /* OK, we found appropriate session */
 516
 517                        fp = &s->ht[h2];
 518
 519                        f->sess = s;
 520                        if (f->tunnelhdr == 0)
 521                                tcf_bind_filter(tp, &f->res, base);
 522
 523                        tcf_exts_change(tp, &f->exts, &e);
 524
 525                        for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
 526                                if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
 527                                        break;
 528                        f->next = *fp;
 529                        wmb();
 530                        *fp = f;
 531
 532                        *arg = (unsigned long)f;
 533                        return 0;
 534                }
 535        }
 536
 537        /* No session found. Create new one. */
 538
 539        err = -ENOBUFS;
 540        s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
 541        if (s == NULL)
 542                goto errout;
 543        memcpy(s->dst, dst, sizeof(s->dst));
 544
 545        if (pinfo) {
 546                s->dpi = pinfo->dpi;
 547                s->protocol = pinfo->protocol;
 548                s->tunnelid = pinfo->tunnelid;
 549        }
 550        for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
 551                if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
 552                        break;
 553        }
 554        s->next = *sp;
 555        wmb();
 556        *sp = s;
 557
 558        goto insert;
 559
 560errout:
 561        kfree(f);
 562errout2:
 563        tcf_exts_destroy(tp, &e);
 564        return err;
 565}
 566
 567static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 568{
 569        struct rsvp_head *head = tp->root;
 570        unsigned h, h1;
 571
 572        if (arg->stop)
 573                return;
 574
 575        for (h = 0; h < 256; h++) {
 576                struct rsvp_session *s;
 577
 578                for (s = head->ht[h]; s; s = s->next) {
 579                        for (h1 = 0; h1 <= 16; h1++) {
 580                                struct rsvp_filter *f;
 581
 582                                for (f = s->ht[h1]; f; f = f->next) {
 583                                        if (arg->count < arg->skip) {
 584                                                arg->count++;
 585                                                continue;
 586                                        }
 587                                        if (arg->fn(tp, (unsigned long)f, arg) < 0) {
 588                                                arg->stop = 1;
 589                                                return;
 590                                        }
 591                                        arg->count++;
 592                                }
 593                        }
 594                }
 595        }
 596}
 597
 598static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 599                     struct sk_buff *skb, struct tcmsg *t)
 600{
 601        struct rsvp_filter *f = (struct rsvp_filter*)fh;
 602        struct rsvp_session *s;
 603        unsigned char *b = skb_tail_pointer(skb);
 604        struct nlattr *nest;
 605        struct tc_rsvp_pinfo pinfo;
 606
 607        if (f == NULL)
 608                return skb->len;
 609        s = f->sess;
 610
 611        t->tcm_handle = f->handle;
 612
 613        nest = nla_nest_start(skb, TCA_OPTIONS);
 614        if (nest == NULL)
 615                goto nla_put_failure;
 616
 617        NLA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
 618        pinfo.dpi = s->dpi;
 619        pinfo.spi = f->spi;
 620        pinfo.protocol = s->protocol;
 621        pinfo.tunnelid = s->tunnelid;
 622        pinfo.tunnelhdr = f->tunnelhdr;
 623        pinfo.pad = 0;
 624        NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
 625        if (f->res.classid)
 626                NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
 627        if (((f->handle>>8)&0xFF) != 16)
 628                NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
 629
 630        if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
 631                goto nla_put_failure;
 632
 633        nla_nest_end(skb, nest);
 634
 635        if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
 636                goto nla_put_failure;
 637        return skb->len;
 638
 639nla_put_failure:
 640        nlmsg_trim(skb, b);
 641        return -1;
 642}
 643
 644static struct tcf_proto_ops RSVP_OPS = {
 645        .next           =       NULL,
 646        .kind           =       RSVP_ID,
 647        .classify       =       rsvp_classify,
 648        .init           =       rsvp_init,
 649        .destroy        =       rsvp_destroy,
 650        .get            =       rsvp_get,
 651        .put            =       rsvp_put,
 652        .change         =       rsvp_change,
 653        .delete         =       rsvp_delete,
 654        .walk           =       rsvp_walk,
 655        .dump           =       rsvp_dump,
 656        .owner          =       THIS_MODULE,
 657};
 658
 659static int __init init_rsvp(void)
 660{
 661        return register_tcf_proto_ops(&RSVP_OPS);
 662}
 663
 664static void __exit exit_rsvp(void)
 665{
 666        unregister_tcf_proto_ops(&RSVP_OPS);
 667}
 668
 669module_init(init_rsvp)
 670module_exit(exit_rsvp)
 671