linux/net/sched/cls_rsvp.h
<<
>>
Prefs
   1/*
   2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 */
  11
  12/*
  13   Comparing to general packet classification problem,
  14   RSVP needs only sevaral relatively simple rules:
  15
  16   * (dst, protocol) are always specified,
  17     so that we are able to hash them.
  18   * src may be exact, or may be wildcard, so that
  19     we can keep a hash table plus one wildcard entry.
  20   * source port (or flow label) is important only if src is given.
  21
  22   IMPLEMENTATION.
  23
  24   We use a two level hash table: The top level is keyed by
  25   destination address and protocol ID, every bucket contains a list
  26   of "rsvp sessions", identified by destination address, protocol and
  27   DPI(="Destination Port ID"): triple (key, mask, offset).
  28
  29   Every bucket has a smaller hash table keyed by source address
  30   (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
  31   Every bucket is again a list of "RSVP flows", selected by
  32   source address and SPI(="Source Port ID" here rather than
  33   "security parameter index"): triple (key, mask, offset).
  34
  35
  36   NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
  37   and all fragmented packets go to the best-effort traffic class.
  38
  39
  40   NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
  41   only one "Generalized Port Identifier". So that for classic
  42   ah, esp (and udp,tcp) both *pi should coincide or one of them
  43   should be wildcard.
  44
  45   At first sight, this redundancy is just a waste of CPU
  46   resources. But DPI and SPI add the possibility to assign different
  47   priorities to GPIs. Look also at note 4 about tunnels below.
  48
  49
  50   NOTE 3. One complication is the case of tunneled packets.
  51   We implement it as following: if the first lookup
  52   matches a special session with "tunnelhdr" value not zero,
  53   flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
  54   In this case, we pull tunnelhdr bytes and restart lookup
  55   with tunnel ID added to the list of keys. Simple and stupid 8)8)
  56   It's enough for PIMREG and IPIP.
  57
  58
  59   NOTE 4. Two GPIs make it possible to parse even GRE packets.
  60   F.e. DPI can select ETH_P_IP (and necessary flags to make
  61   tunnelhdr correct) in GRE protocol field and SPI matches
  62   GRE key. Is it not nice? 8)8)
  63
  64
  65   Well, as result, despite its simplicity, we get a pretty
  66   powerful classification engine.  */
  67
  68
  69struct rsvp_head
  70{
  71        u32                     tmap[256/32];
  72        u32                     hgenerator;
  73        u8                      tgenerator;
  74        struct rsvp_session     *ht[256];
  75};
  76
  77struct rsvp_session
  78{
  79        struct rsvp_session     *next;
  80        __be32                  dst[RSVP_DST_LEN];
  81        struct tc_rsvp_gpi      dpi;
  82        u8                      protocol;
  83        u8                      tunnelid;
  84        /* 16 (src,sport) hash slots, and one wildcard source slot */
  85        struct rsvp_filter      *ht[16+1];
  86};
  87
  88
  89struct rsvp_filter
  90{
  91        struct rsvp_filter      *next;
  92        __be32                  src[RSVP_DST_LEN];
  93        struct tc_rsvp_gpi      spi;
  94        u8                      tunnelhdr;
  95
  96        struct tcf_result       res;
  97        struct tcf_exts         exts;
  98
  99        u32                     handle;
 100        struct rsvp_session     *sess;
 101};
 102
 103static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
 104{
 105        unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
 106        h ^= h>>16;
 107        h ^= h>>8;
 108        return (h ^ protocol ^ tunnelid) & 0xFF;
 109}
 110
 111static __inline__ unsigned hash_src(__be32 *src)
 112{
 113        unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
 114        h ^= h>>16;
 115        h ^= h>>8;
 116        h ^= h>>4;
 117        return h & 0xF;
 118}
 119
 120static struct tcf_ext_map rsvp_ext_map = {
 121        .police = TCA_RSVP_POLICE,
 122        .action = TCA_RSVP_ACT
 123};
 124
 125#define RSVP_APPLY_RESULT()                             \
 126{                                                       \
 127        int r = tcf_exts_exec(skb, &f->exts, res);      \
 128        if (r < 0)                                      \
 129                continue;                               \
 130        else if (r > 0)                                 \
 131                return r;                               \
 132}
 133
 134static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
 135                         struct tcf_result *res)
 136{
 137        struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
 138        struct rsvp_session *s;
 139        struct rsvp_filter *f;
 140        unsigned h1, h2;
 141        __be32 *dst, *src;
 142        u8 protocol;
 143        u8 tunnelid = 0;
 144        u8 *xprt;
 145#if RSVP_DST_LEN == 4
 146        struct ipv6hdr *nhptr = ipv6_hdr(skb);
 147#else
 148        struct iphdr *nhptr = ip_hdr(skb);
 149#endif
 150
 151restart:
 152
 153#if RSVP_DST_LEN == 4
 154        src = &nhptr->saddr.s6_addr32[0];
 155        dst = &nhptr->daddr.s6_addr32[0];
 156        protocol = nhptr->nexthdr;
 157        xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
 158#else
 159        src = &nhptr->saddr;
 160        dst = &nhptr->daddr;
 161        protocol = nhptr->protocol;
 162        xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
 163        if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
 164                return -1;
 165#endif
 166
 167        h1 = hash_dst(dst, protocol, tunnelid);
 168        h2 = hash_src(src);
 169
 170        for (s = sht[h1]; s; s = s->next) {
 171                if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
 172                    protocol == s->protocol &&
 173                    !(s->dpi.mask &
 174                      (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) &&
 175#if RSVP_DST_LEN == 4
 176                    dst[0] == s->dst[0] &&
 177                    dst[1] == s->dst[1] &&
 178                    dst[2] == s->dst[2] &&
 179#endif
 180                    tunnelid == s->tunnelid) {
 181
 182                        for (f = s->ht[h2]; f; f = f->next) {
 183                                if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
 184                                    !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
 185#if RSVP_DST_LEN == 4
 186                                    &&
 187                                    src[0] == f->src[0] &&
 188                                    src[1] == f->src[1] &&
 189                                    src[2] == f->src[2]
 190#endif
 191                                    ) {
 192                                        *res = f->res;
 193                                        RSVP_APPLY_RESULT();
 194
 195matched:
 196                                        if (f->tunnelhdr == 0)
 197                                                return 0;
 198
 199                                        tunnelid = f->res.classid;
 200                                        nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
 201                                        goto restart;
 202                                }
 203                        }
 204
 205                        /* And wildcard bucket... */
 206                        for (f = s->ht[16]; f; f = f->next) {
 207                                *res = f->res;
 208                                RSVP_APPLY_RESULT();
 209                                goto matched;
 210                        }
 211                        return -1;
 212                }
 213        }
 214        return -1;
 215}
 216
 217static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
 218{
 219        struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
 220        struct rsvp_session *s;
 221        struct rsvp_filter *f;
 222        unsigned h1 = handle&0xFF;
 223        unsigned h2 = (handle>>8)&0xFF;
 224
 225        if (h2 > 16)
 226                return 0;
 227
 228        for (s = sht[h1]; s; s = s->next) {
 229                for (f = s->ht[h2]; f; f = f->next) {
 230                        if (f->handle == handle)
 231                                return (unsigned long)f;
 232                }
 233        }
 234        return 0;
 235}
 236
 237static void rsvp_put(struct tcf_proto *tp, unsigned long f)
 238{
 239}
 240
 241static int rsvp_init(struct tcf_proto *tp)
 242{
 243        struct rsvp_head *data;
 244
 245        data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
 246        if (data) {
 247                tp->root = data;
 248                return 0;
 249        }
 250        return -ENOBUFS;
 251}
 252
 253static inline void
 254rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
 255{
 256        tcf_unbind_filter(tp, &f->res);
 257        tcf_exts_destroy(tp, &f->exts);
 258        kfree(f);
 259}
 260
 261static void rsvp_destroy(struct tcf_proto *tp)
 262{
 263        struct rsvp_head *data = xchg(&tp->root, NULL);
 264        struct rsvp_session **sht;
 265        int h1, h2;
 266
 267        if (data == NULL)
 268                return;
 269
 270        sht = data->ht;
 271
 272        for (h1=0; h1<256; h1++) {
 273                struct rsvp_session *s;
 274
 275                while ((s = sht[h1]) != NULL) {
 276                        sht[h1] = s->next;
 277
 278                        for (h2=0; h2<=16; h2++) {
 279                                struct rsvp_filter *f;
 280
 281                                while ((f = s->ht[h2]) != NULL) {
 282                                        s->ht[h2] = f->next;
 283                                        rsvp_delete_filter(tp, f);
 284                                }
 285                        }
 286                        kfree(s);
 287                }
 288        }
 289        kfree(data);
 290}
 291
 292static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
 293{
 294        struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
 295        unsigned h = f->handle;
 296        struct rsvp_session **sp;
 297        struct rsvp_session *s = f->sess;
 298        int i;
 299
 300        for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
 301                if (*fp == f) {
 302                        tcf_tree_lock(tp);
 303                        *fp = f->next;
 304                        tcf_tree_unlock(tp);
 305                        rsvp_delete_filter(tp, f);
 306
 307                        /* Strip tree */
 308
 309                        for (i=0; i<=16; i++)
 310                                if (s->ht[i])
 311                                        return 0;
 312
 313                        /* OK, session has no flows */
 314                        for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
 315                             *sp; sp = &(*sp)->next) {
 316                                if (*sp == s) {
 317                                        tcf_tree_lock(tp);
 318                                        *sp = s->next;
 319                                        tcf_tree_unlock(tp);
 320
 321                                        kfree(s);
 322                                        return 0;
 323                                }
 324                        }
 325
 326                        return 0;
 327                }
 328        }
 329        return 0;
 330}
 331
 332static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
 333{
 334        struct rsvp_head *data = tp->root;
 335        int i = 0xFFFF;
 336
 337        while (i-- > 0) {
 338                u32 h;
 339                if ((data->hgenerator += 0x10000) == 0)
 340                        data->hgenerator = 0x10000;
 341                h = data->hgenerator|salt;
 342                if (rsvp_get(tp, h) == 0)
 343                        return h;
 344        }
 345        return 0;
 346}
 347
 348static int tunnel_bts(struct rsvp_head *data)
 349{
 350        int n = data->tgenerator>>5;
 351        u32 b = 1<<(data->tgenerator&0x1F);
 352
 353        if (data->tmap[n]&b)
 354                return 0;
 355        data->tmap[n] |= b;
 356        return 1;
 357}
 358
 359static void tunnel_recycle(struct rsvp_head *data)
 360{
 361        struct rsvp_session **sht = data->ht;
 362        u32 tmap[256/32];
 363        int h1, h2;
 364
 365        memset(tmap, 0, sizeof(tmap));
 366
 367        for (h1=0; h1<256; h1++) {
 368                struct rsvp_session *s;
 369                for (s = sht[h1]; s; s = s->next) {
 370                        for (h2=0; h2<=16; h2++) {
 371                                struct rsvp_filter *f;
 372
 373                                for (f = s->ht[h2]; f; f = f->next) {
 374                                        if (f->tunnelhdr == 0)
 375                                                continue;
 376                                        data->tgenerator = f->res.classid;
 377                                        tunnel_bts(data);
 378                                }
 379                        }
 380                }
 381        }
 382
 383        memcpy(data->tmap, tmap, sizeof(tmap));
 384}
 385
 386static u32 gen_tunnel(struct rsvp_head *data)
 387{
 388        int i, k;
 389
 390        for (k=0; k<2; k++) {
 391                for (i=255; i>0; i--) {
 392                        if (++data->tgenerator == 0)
 393                                data->tgenerator = 1;
 394                        if (tunnel_bts(data))
 395                                return data->tgenerator;
 396                }
 397                tunnel_recycle(data);
 398        }
 399        return 0;
 400}
 401
 402static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
 403        [TCA_RSVP_CLASSID]      = { .type = NLA_U32 },
 404        [TCA_RSVP_DST]          = { .type = NLA_BINARY,
 405                                    .len = RSVP_DST_LEN * sizeof(u32) },
 406        [TCA_RSVP_SRC]          = { .type = NLA_BINARY,
 407                                    .len = RSVP_DST_LEN * sizeof(u32) },
 408        [TCA_RSVP_PINFO]        = { .len = sizeof(struct tc_rsvp_pinfo) },
 409};
 410
 411static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 412                       u32 handle,
 413                       struct nlattr **tca,
 414                       unsigned long *arg)
 415{
 416        struct rsvp_head *data = tp->root;
 417        struct rsvp_filter *f, **fp;
 418        struct rsvp_session *s, **sp;
 419        struct tc_rsvp_pinfo *pinfo = NULL;
 420        struct nlattr *opt = tca[TCA_OPTIONS-1];
 421        struct nlattr *tb[TCA_RSVP_MAX + 1];
 422        struct tcf_exts e;
 423        unsigned h1, h2;
 424        __be32 *dst;
 425        int err;
 426
 427        if (opt == NULL)
 428                return handle ? -EINVAL : 0;
 429
 430        err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
 431        if (err < 0)
 432                return err;
 433
 434        err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
 435        if (err < 0)
 436                return err;
 437
 438        if ((f = (struct rsvp_filter*)*arg) != NULL) {
 439                /* Node exists: adjust only classid */
 440
 441                if (f->handle != handle && handle)
 442                        goto errout2;
 443                if (tb[TCA_RSVP_CLASSID-1]) {
 444                        f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
 445                        tcf_bind_filter(tp, &f->res, base);
 446                }
 447
 448                tcf_exts_change(tp, &f->exts, &e);
 449                return 0;
 450        }
 451
 452        /* Now more serious part... */
 453        err = -EINVAL;
 454        if (handle)
 455                goto errout2;
 456        if (tb[TCA_RSVP_DST-1] == NULL)
 457                goto errout2;
 458
 459        err = -ENOBUFS;
 460        f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
 461        if (f == NULL)
 462                goto errout2;
 463
 464        h2 = 16;
 465        if (tb[TCA_RSVP_SRC-1]) {
 466                memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
 467                h2 = hash_src(f->src);
 468        }
 469        if (tb[TCA_RSVP_PINFO-1]) {
 470                pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
 471                f->spi = pinfo->spi;
 472                f->tunnelhdr = pinfo->tunnelhdr;
 473        }
 474        if (tb[TCA_RSVP_CLASSID-1])
 475                f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
 476
 477        dst = nla_data(tb[TCA_RSVP_DST-1]);
 478        h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
 479
 480        err = -ENOMEM;
 481        if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
 482                goto errout;
 483
 484        if (f->tunnelhdr) {
 485                err = -EINVAL;
 486                if (f->res.classid > 255)
 487                        goto errout;
 488
 489                err = -ENOMEM;
 490                if (f->res.classid == 0 &&
 491                    (f->res.classid = gen_tunnel(data)) == 0)
 492                        goto errout;
 493        }
 494
 495        for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
 496                if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
 497                    pinfo && pinfo->protocol == s->protocol &&
 498                    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
 499#if RSVP_DST_LEN == 4
 500                    dst[0] == s->dst[0] &&
 501                    dst[1] == s->dst[1] &&
 502                    dst[2] == s->dst[2] &&
 503#endif
 504                    pinfo->tunnelid == s->tunnelid) {
 505
 506insert:
 507                        /* OK, we found appropriate session */
 508
 509                        fp = &s->ht[h2];
 510
 511                        f->sess = s;
 512                        if (f->tunnelhdr == 0)
 513                                tcf_bind_filter(tp, &f->res, base);
 514
 515                        tcf_exts_change(tp, &f->exts, &e);
 516
 517                        for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
 518                                if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
 519                                        break;
 520                        f->next = *fp;
 521                        wmb();
 522                        *fp = f;
 523
 524                        *arg = (unsigned long)f;
 525                        return 0;
 526                }
 527        }
 528
 529        /* No session found. Create new one. */
 530
 531        err = -ENOBUFS;
 532        s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
 533        if (s == NULL)
 534                goto errout;
 535        memcpy(s->dst, dst, sizeof(s->dst));
 536
 537        if (pinfo) {
 538                s->dpi = pinfo->dpi;
 539                s->protocol = pinfo->protocol;
 540                s->tunnelid = pinfo->tunnelid;
 541        }
 542        for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
 543                if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
 544                        break;
 545        }
 546        s->next = *sp;
 547        wmb();
 548        *sp = s;
 549
 550        goto insert;
 551
 552errout:
 553        kfree(f);
 554errout2:
 555        tcf_exts_destroy(tp, &e);
 556        return err;
 557}
 558
 559static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 560{
 561        struct rsvp_head *head = tp->root;
 562        unsigned h, h1;
 563
 564        if (arg->stop)
 565                return;
 566
 567        for (h = 0; h < 256; h++) {
 568                struct rsvp_session *s;
 569
 570                for (s = head->ht[h]; s; s = s->next) {
 571                        for (h1 = 0; h1 <= 16; h1++) {
 572                                struct rsvp_filter *f;
 573
 574                                for (f = s->ht[h1]; f; f = f->next) {
 575                                        if (arg->count < arg->skip) {
 576                                                arg->count++;
 577                                                continue;
 578                                        }
 579                                        if (arg->fn(tp, (unsigned long)f, arg) < 0) {
 580                                                arg->stop = 1;
 581                                                return;
 582                                        }
 583                                        arg->count++;
 584                                }
 585                        }
 586                }
 587        }
 588}
 589
 590static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 591                     struct sk_buff *skb, struct tcmsg *t)
 592{
 593        struct rsvp_filter *f = (struct rsvp_filter*)fh;
 594        struct rsvp_session *s;
 595        unsigned char *b = skb_tail_pointer(skb);
 596        struct nlattr *nest;
 597        struct tc_rsvp_pinfo pinfo;
 598
 599        if (f == NULL)
 600                return skb->len;
 601        s = f->sess;
 602
 603        t->tcm_handle = f->handle;
 604
 605        nest = nla_nest_start(skb, TCA_OPTIONS);
 606        if (nest == NULL)
 607                goto nla_put_failure;
 608
 609        NLA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
 610        pinfo.dpi = s->dpi;
 611        pinfo.spi = f->spi;
 612        pinfo.protocol = s->protocol;
 613        pinfo.tunnelid = s->tunnelid;
 614        pinfo.tunnelhdr = f->tunnelhdr;
 615        pinfo.pad = 0;
 616        NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
 617        if (f->res.classid)
 618                NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
 619        if (((f->handle>>8)&0xFF) != 16)
 620                NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
 621
 622        if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
 623                goto nla_put_failure;
 624
 625        nla_nest_end(skb, nest);
 626
 627        if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
 628                goto nla_put_failure;
 629        return skb->len;
 630
 631nla_put_failure:
 632        nlmsg_trim(skb, b);
 633        return -1;
 634}
 635
 636static struct tcf_proto_ops RSVP_OPS = {
 637        .next           =       NULL,
 638        .kind           =       RSVP_ID,
 639        .classify       =       rsvp_classify,
 640        .init           =       rsvp_init,
 641        .destroy        =       rsvp_destroy,
 642        .get            =       rsvp_get,
 643        .put            =       rsvp_put,
 644        .change         =       rsvp_change,
 645        .delete         =       rsvp_delete,
 646        .walk           =       rsvp_walk,
 647        .dump           =       rsvp_dump,
 648        .owner          =       THIS_MODULE,
 649};
 650
 651static int __init init_rsvp(void)
 652{
 653        return register_tcf_proto_ops(&RSVP_OPS);
 654}
 655
 656static void __exit exit_rsvp(void)
 657{
 658        unregister_tcf_proto_ops(&RSVP_OPS);
 659}
 660
 661module_init(init_rsvp)
 662module_exit(exit_rsvp)
 663
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.