linux/net/sched/cls_rsvp.h
<<
>>
Prefs
   1/*
   2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 */
  11
  12/*
  13   Comparing to general packet classification problem,
  14   RSVP needs only sevaral relatively simple rules:
  15
  16   * (dst, protocol) are always specified,
  17     so that we are able to hash them.
  18   * src may be exact, or may be wildcard, so that
  19     we can keep a hash table plus one wildcard entry.
  20   * source port (or flow label) is important only if src is given.
  21
  22   IMPLEMENTATION.
  23
  24   We use a two level hash table: The top level is keyed by
  25   destination address and protocol ID, every bucket contains a list
  26   of "rsvp sessions", identified by destination address, protocol and
  27   DPI(="Destination Port ID"): triple (key, mask, offset).
  28
  29   Every bucket has a smaller hash table keyed by source address
  30   (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
  31   Every bucket is again a list of "RSVP flows", selected by
  32   source address and SPI(="Source Port ID" here rather than
  33   "security parameter index"): triple (key, mask, offset).
  34
  35
  36   NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
  37   and all fragmented packets go to the best-effort traffic class.
  38
  39
  40   NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
  41   only one "Generalized Port Identifier". So that for classic
  42   ah, esp (and udp,tcp) both *pi should coincide or one of them
  43   should be wildcard.
  44
  45   At first sight, this redundancy is just a waste of CPU
  46   resources. But DPI and SPI add the possibility to assign different
  47   priorities to GPIs. Look also at note 4 about tunnels below.
  48
  49
  50   NOTE 3. One complication is the case of tunneled packets.
  51   We implement it as following: if the first lookup
  52   matches a special session with "tunnelhdr" value not zero,
  53   flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
  54   In this case, we pull tunnelhdr bytes and restart lookup
  55   with tunnel ID added to the list of keys. Simple and stupid 8)8)
  56   It's enough for PIMREG and IPIP.
  57
  58
  59   NOTE 4. Two GPIs make it possible to parse even GRE packets.
  60   F.e. DPI can select ETH_P_IP (and necessary flags to make
  61   tunnelhdr correct) in GRE protocol field and SPI matches
  62   GRE key. Is it not nice? 8)8)
  63
  64
  65   Well, as result, despite its simplicity, we get a pretty
  66   powerful classification engine.  */
  67
  68
  69struct rsvp_head {
  70        u32                     tmap[256/32];
  71        u32                     hgenerator;
  72        u8                      tgenerator;
  73        struct rsvp_session     *ht[256];
  74};
  75
  76struct rsvp_session {
  77        struct rsvp_session     *next;
  78        __be32                  dst[RSVP_DST_LEN];
  79        struct tc_rsvp_gpi      dpi;
  80        u8                      protocol;
  81        u8                      tunnelid;
  82        /* 16 (src,sport) hash slots, and one wildcard source slot */
  83        struct rsvp_filter      *ht[16 + 1];
  84};
  85
  86
  87struct rsvp_filter {
  88        struct rsvp_filter      *next;
  89        __be32                  src[RSVP_DST_LEN];
  90        struct tc_rsvp_gpi      spi;
  91        u8                      tunnelhdr;
  92
  93        struct tcf_result       res;
  94        struct tcf_exts         exts;
  95
  96        u32                     handle;
  97        struct rsvp_session     *sess;
  98};
  99
 100static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
 101{
 102        unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
 103
 104        h ^= h>>16;
 105        h ^= h>>8;
 106        return (h ^ protocol ^ tunnelid) & 0xFF;
 107}
 108
 109static inline unsigned int hash_src(__be32 *src)
 110{
 111        unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
 112
 113        h ^= h>>16;
 114        h ^= h>>8;
 115        h ^= h>>4;
 116        return h & 0xF;
 117}
 118
 119static struct tcf_ext_map rsvp_ext_map = {
 120        .police = TCA_RSVP_POLICE,
 121        .action = TCA_RSVP_ACT
 122};
 123
 124#define RSVP_APPLY_RESULT()                             \
 125{                                                       \
 126        int r = tcf_exts_exec(skb, &f->exts, res);      \
 127        if (r < 0)                                      \
 128                continue;                               \
 129        else if (r > 0)                                 \
 130                return r;                               \
 131}
 132
 133static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 134                         struct tcf_result *res)
 135{
 136        struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
 137        struct rsvp_session *s;
 138        struct rsvp_filter *f;
 139        unsigned int h1, h2;
 140        __be32 *dst, *src;
 141        u8 protocol;
 142        u8 tunnelid = 0;
 143        u8 *xprt;
 144#if RSVP_DST_LEN == 4
 145        struct ipv6hdr *nhptr;
 146
 147        if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
 148                return -1;
 149        nhptr = ipv6_hdr(skb);
 150#else
 151        struct iphdr *nhptr;
 152
 153        if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
 154                return -1;
 155        nhptr = ip_hdr(skb);
 156#endif
 157
 158restart:
 159
 160#if RSVP_DST_LEN == 4
 161        src = &nhptr->saddr.s6_addr32[0];
 162        dst = &nhptr->daddr.s6_addr32[0];
 163        protocol = nhptr->nexthdr;
 164        xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
 165#else
 166        src = &nhptr->saddr;
 167        dst = &nhptr->daddr;
 168        protocol = nhptr->protocol;
 169        xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
 170        if (ip_is_fragment(nhptr))
 171                return -1;
 172#endif
 173
 174        h1 = hash_dst(dst, protocol, tunnelid);
 175        h2 = hash_src(src);
 176
 177        for (s = sht[h1]; s; s = s->next) {
 178                if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
 179                    protocol == s->protocol &&
 180                    !(s->dpi.mask &
 181                      (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
 182#if RSVP_DST_LEN == 4
 183                    dst[0] == s->dst[0] &&
 184                    dst[1] == s->dst[1] &&
 185                    dst[2] == s->dst[2] &&
 186#endif
 187                    tunnelid == s->tunnelid) {
 188
 189                        for (f = s->ht[h2]; f; f = f->next) {
 190                                if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
 191                                    !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
 192#if RSVP_DST_LEN == 4
 193                                    &&
 194                                    src[0] == f->src[0] &&
 195                                    src[1] == f->src[1] &&
 196                                    src[2] == f->src[2]
 197#endif
 198                                    ) {
 199                                        *res = f->res;
 200                                        RSVP_APPLY_RESULT();
 201
 202matched:
 203                                        if (f->tunnelhdr == 0)
 204                                                return 0;
 205
 206                                        tunnelid = f->res.classid;
 207                                        nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
 208                                        goto restart;
 209                                }
 210                        }
 211
 212                        /* And wildcard bucket... */
 213                        for (f = s->ht[16]; f; f = f->next) {
 214                                *res = f->res;
 215                                RSVP_APPLY_RESULT();
 216                                goto matched;
 217                        }
 218                        return -1;
 219                }
 220        }
 221        return -1;
 222}
 223
 224static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
 225{
 226        struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
 227        struct rsvp_session *s;
 228        struct rsvp_filter *f;
 229        unsigned int h1 = handle & 0xFF;
 230        unsigned int h2 = (handle >> 8) & 0xFF;
 231
 232        if (h2 > 16)
 233                return 0;
 234
 235        for (s = sht[h1]; s; s = s->next) {
 236                for (f = s->ht[h2]; f; f = f->next) {
 237                        if (f->handle == handle)
 238                                return (unsigned long)f;
 239                }
 240        }
 241        return 0;
 242}
 243
 244static void rsvp_put(struct tcf_proto *tp, unsigned long f)
 245{
 246}
 247
 248static int rsvp_init(struct tcf_proto *tp)
 249{
 250        struct rsvp_head *data;
 251
 252        data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
 253        if (data) {
 254                tp->root = data;
 255                return 0;
 256        }
 257        return -ENOBUFS;
 258}
 259
 260static void
 261rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
 262{
 263        tcf_unbind_filter(tp, &f->res);
 264        tcf_exts_destroy(tp, &f->exts);
 265        kfree(f);
 266}
 267
 268static void rsvp_destroy(struct tcf_proto *tp)
 269{
 270        struct rsvp_head *data = xchg(&tp->root, NULL);
 271        struct rsvp_session **sht;
 272        int h1, h2;
 273
 274        if (data == NULL)
 275                return;
 276
 277        sht = data->ht;
 278
 279        for (h1 = 0; h1 < 256; h1++) {
 280                struct rsvp_session *s;
 281
 282                while ((s = sht[h1]) != NULL) {
 283                        sht[h1] = s->next;
 284
 285                        for (h2 = 0; h2 <= 16; h2++) {
 286                                struct rsvp_filter *f;
 287
 288                                while ((f = s->ht[h2]) != NULL) {
 289                                        s->ht[h2] = f->next;
 290                                        rsvp_delete_filter(tp, f);
 291                                }
 292                        }
 293                        kfree(s);
 294                }
 295        }
 296        kfree(data);
 297}
 298
 299static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
 300{
 301        struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
 302        unsigned int h = f->handle;
 303        struct rsvp_session **sp;
 304        struct rsvp_session *s = f->sess;
 305        int i;
 306
 307        for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
 308                if (*fp == f) {
 309                        tcf_tree_lock(tp);
 310                        *fp = f->next;
 311                        tcf_tree_unlock(tp);
 312                        rsvp_delete_filter(tp, f);
 313
 314                        /* Strip tree */
 315
 316                        for (i = 0; i <= 16; i++)
 317                                if (s->ht[i])
 318                                        return 0;
 319
 320                        /* OK, session has no flows */
 321                        for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
 322                             *sp; sp = &(*sp)->next) {
 323                                if (*sp == s) {
 324                                        tcf_tree_lock(tp);
 325                                        *sp = s->next;
 326                                        tcf_tree_unlock(tp);
 327
 328                                        kfree(s);
 329                                        return 0;
 330                                }
 331                        }
 332
 333                        return 0;
 334                }
 335        }
 336        return 0;
 337}
 338
 339static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
 340{
 341        struct rsvp_head *data = tp->root;
 342        int i = 0xFFFF;
 343
 344        while (i-- > 0) {
 345                u32 h;
 346
 347                if ((data->hgenerator += 0x10000) == 0)
 348                        data->hgenerator = 0x10000;
 349                h = data->hgenerator|salt;
 350                if (rsvp_get(tp, h) == 0)
 351                        return h;
 352        }
 353        return 0;
 354}
 355
 356static int tunnel_bts(struct rsvp_head *data)
 357{
 358        int n = data->tgenerator >> 5;
 359        u32 b = 1 << (data->tgenerator & 0x1F);
 360
 361        if (data->tmap[n] & b)
 362                return 0;
 363        data->tmap[n] |= b;
 364        return 1;
 365}
 366
 367static void tunnel_recycle(struct rsvp_head *data)
 368{
 369        struct rsvp_session **sht = data->ht;
 370        u32 tmap[256/32];
 371        int h1, h2;
 372
 373        memset(tmap, 0, sizeof(tmap));
 374
 375        for (h1 = 0; h1 < 256; h1++) {
 376                struct rsvp_session *s;
 377                for (s = sht[h1]; s; s = s->next) {
 378                        for (h2 = 0; h2 <= 16; h2++) {
 379                                struct rsvp_filter *f;
 380
 381                                for (f = s->ht[h2]; f; f = f->next) {
 382                                        if (f->tunnelhdr == 0)
 383                                                continue;
 384                                        data->tgenerator = f->res.classid;
 385                                        tunnel_bts(data);
 386                                }
 387                        }
 388                }
 389        }
 390
 391        memcpy(data->tmap, tmap, sizeof(tmap));
 392}
 393
 394static u32 gen_tunnel(struct rsvp_head *data)
 395{
 396        int i, k;
 397
 398        for (k = 0; k < 2; k++) {
 399                for (i = 255; i > 0; i--) {
 400                        if (++data->tgenerator == 0)
 401                                data->tgenerator = 1;
 402                        if (tunnel_bts(data))
 403                                return data->tgenerator;
 404                }
 405                tunnel_recycle(data);
 406        }
 407        return 0;
 408}
 409
 410static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
 411        [TCA_RSVP_CLASSID]      = { .type = NLA_U32 },
 412        [TCA_RSVP_DST]          = { .type = NLA_BINARY,
 413                                    .len = RSVP_DST_LEN * sizeof(u32) },
 414        [TCA_RSVP_SRC]          = { .type = NLA_BINARY,
 415                                    .len = RSVP_DST_LEN * sizeof(u32) },
 416        [TCA_RSVP_PINFO]        = { .len = sizeof(struct tc_rsvp_pinfo) },
 417};
 418
 419static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 420                       u32 handle,
 421                       struct nlattr **tca,
 422                       unsigned long *arg)
 423{
 424        struct rsvp_head *data = tp->root;
 425        struct rsvp_filter *f, **fp;
 426        struct rsvp_session *s, **sp;
 427        struct tc_rsvp_pinfo *pinfo = NULL;
 428        struct nlattr *opt = tca[TCA_OPTIONS];
 429        struct nlattr *tb[TCA_RSVP_MAX + 1];
 430        struct tcf_exts e;
 431        unsigned int h1, h2;
 432        __be32 *dst;
 433        int err;
 434
 435        if (opt == NULL)
 436                return handle ? -EINVAL : 0;
 437
 438        err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
 439        if (err < 0)
 440                return err;
 441
 442        err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &rsvp_ext_map);
 443        if (err < 0)
 444                return err;
 445
 446        f = (struct rsvp_filter *)*arg;
 447        if (f) {
 448                /* Node exists: adjust only classid */
 449
 450                if (f->handle != handle && handle)
 451                        goto errout2;
 452                if (tb[TCA_RSVP_CLASSID]) {
 453                        f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
 454                        tcf_bind_filter(tp, &f->res, base);
 455                }
 456
 457                tcf_exts_change(tp, &f->exts, &e);
 458                return 0;
 459        }
 460
 461        /* Now more serious part... */
 462        err = -EINVAL;
 463        if (handle)
 464                goto errout2;
 465        if (tb[TCA_RSVP_DST] == NULL)
 466                goto errout2;
 467
 468        err = -ENOBUFS;
 469        f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
 470        if (f == NULL)
 471                goto errout2;
 472
 473        h2 = 16;
 474        if (tb[TCA_RSVP_SRC]) {
 475                memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
 476                h2 = hash_src(f->src);
 477        }
 478        if (tb[TCA_RSVP_PINFO]) {
 479                pinfo = nla_data(tb[TCA_RSVP_PINFO]);
 480                f->spi = pinfo->spi;
 481                f->tunnelhdr = pinfo->tunnelhdr;
 482        }
 483        if (tb[TCA_RSVP_CLASSID])
 484                f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
 485
 486        dst = nla_data(tb[TCA_RSVP_DST]);
 487        h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
 488
 489        err = -ENOMEM;
 490        if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
 491                goto errout;
 492
 493        if (f->tunnelhdr) {
 494                err = -EINVAL;
 495                if (f->res.classid > 255)
 496                        goto errout;
 497
 498                err = -ENOMEM;
 499                if (f->res.classid == 0 &&
 500                    (f->res.classid = gen_tunnel(data)) == 0)
 501                        goto errout;
 502        }
 503
 504        for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
 505                if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
 506                    pinfo && pinfo->protocol == s->protocol &&
 507                    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
 508#if RSVP_DST_LEN == 4
 509                    dst[0] == s->dst[0] &&
 510                    dst[1] == s->dst[1] &&
 511                    dst[2] == s->dst[2] &&
 512#endif
 513                    pinfo->tunnelid == s->tunnelid) {
 514
 515insert:
 516                        /* OK, we found appropriate session */
 517
 518                        fp = &s->ht[h2];
 519
 520                        f->sess = s;
 521                        if (f->tunnelhdr == 0)
 522                                tcf_bind_filter(tp, &f->res, base);
 523
 524                        tcf_exts_change(tp, &f->exts, &e);
 525
 526                        for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
 527                                if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
 528                                        break;
 529                        f->next = *fp;
 530                        wmb();
 531                        *fp = f;
 532
 533                        *arg = (unsigned long)f;
 534                        return 0;
 535                }
 536        }
 537
 538        /* No session found. Create new one. */
 539
 540        err = -ENOBUFS;
 541        s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
 542        if (s == NULL)
 543                goto errout;
 544        memcpy(s->dst, dst, sizeof(s->dst));
 545
 546        if (pinfo) {
 547                s->dpi = pinfo->dpi;
 548                s->protocol = pinfo->protocol;
 549                s->tunnelid = pinfo->tunnelid;
 550        }
 551        for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
 552                if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
 553                        break;
 554        }
 555        s->next = *sp;
 556        wmb();
 557        *sp = s;
 558
 559        goto insert;
 560
 561errout:
 562        kfree(f);
 563errout2:
 564        tcf_exts_destroy(tp, &e);
 565        return err;
 566}
 567
 568static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 569{
 570        struct rsvp_head *head = tp->root;
 571        unsigned int h, h1;
 572
 573        if (arg->stop)
 574                return;
 575
 576        for (h = 0; h < 256; h++) {
 577                struct rsvp_session *s;
 578
 579                for (s = head->ht[h]; s; s = s->next) {
 580                        for (h1 = 0; h1 <= 16; h1++) {
 581                                struct rsvp_filter *f;
 582
 583                                for (f = s->ht[h1]; f; f = f->next) {
 584                                        if (arg->count < arg->skip) {
 585                                                arg->count++;
 586                                                continue;
 587                                        }
 588                                        if (arg->fn(tp, (unsigned long)f, arg) < 0) {
 589                                                arg->stop = 1;
 590                                                return;
 591                                        }
 592                                        arg->count++;
 593                                }
 594                        }
 595                }
 596        }
 597}
 598
 599static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 600                     struct sk_buff *skb, struct tcmsg *t)
 601{
 602        struct rsvp_filter *f = (struct rsvp_filter *)fh;
 603        struct rsvp_session *s;
 604        unsigned char *b = skb_tail_pointer(skb);
 605        struct nlattr *nest;
 606        struct tc_rsvp_pinfo pinfo;
 607
 608        if (f == NULL)
 609                return skb->len;
 610        s = f->sess;
 611
 612        t->tcm_handle = f->handle;
 613
 614        nest = nla_nest_start(skb, TCA_OPTIONS);
 615        if (nest == NULL)
 616                goto nla_put_failure;
 617
 618        if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
 619                goto nla_put_failure;
 620        pinfo.dpi = s->dpi;
 621        pinfo.spi = f->spi;
 622        pinfo.protocol = s->protocol;
 623        pinfo.tunnelid = s->tunnelid;
 624        pinfo.tunnelhdr = f->tunnelhdr;
 625        pinfo.pad = 0;
 626        if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
 627                goto nla_put_failure;
 628        if (f->res.classid &&
 629            nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
 630                goto nla_put_failure;
 631        if (((f->handle >> 8) & 0xFF) != 16 &&
 632            nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
 633                goto nla_put_failure;
 634
 635        if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
 636                goto nla_put_failure;
 637
 638        nla_nest_end(skb, nest);
 639
 640        if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
 641                goto nla_put_failure;
 642        return skb->len;
 643
 644nla_put_failure:
 645        nlmsg_trim(skb, b);
 646        return -1;
 647}
 648
 649static struct tcf_proto_ops RSVP_OPS __read_mostly = {
 650        .kind           =       RSVP_ID,
 651        .classify       =       rsvp_classify,
 652        .init           =       rsvp_init,
 653        .destroy        =       rsvp_destroy,
 654        .get            =       rsvp_get,
 655        .put            =       rsvp_put,
 656        .change         =       rsvp_change,
 657        .delete         =       rsvp_delete,
 658        .walk           =       rsvp_walk,
 659        .dump           =       rsvp_dump,
 660        .owner          =       THIS_MODULE,
 661};
 662
 663static int __init init_rsvp(void)
 664{
 665        return register_tcf_proto_ops(&RSVP_OPS);
 666}
 667
 668static void __exit exit_rsvp(void)
 669{
 670        unregister_tcf_proto_ops(&RSVP_OPS);
 671}
 672
 673module_init(init_rsvp)
 674module_exit(exit_rsvp)
 675
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.