linux/net/sched/cls_rsvp.h
<<
>>
Prefs
   1/*
   2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10 */
  11
  12/*
  13   Comparing to general packet classification problem,
  14   RSVP needs only sevaral relatively simple rules:
  15
  16   * (dst, protocol) are always specified,
  17     so that we are able to hash them.
  18   * src may be exact, or may be wildcard, so that
  19     we can keep a hash table plus one wildcard entry.
  20   * source port (or flow label) is important only if src is given.
  21
  22   IMPLEMENTATION.
  23
  24   We use a two level hash table: The top level is keyed by
  25   destination address and protocol ID, every bucket contains a list
  26   of "rsvp sessions", identified by destination address, protocol and
  27   DPI(="Destination Port ID"): triple (key, mask, offset).
  28
  29   Every bucket has a smaller hash table keyed by source address
  30   (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
  31   Every bucket is again a list of "RSVP flows", selected by
  32   source address and SPI(="Source Port ID" here rather than
  33   "security parameter index"): triple (key, mask, offset).
  34
  35
  36   NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
  37   and all fragmented packets go to the best-effort traffic class.
  38
  39
  40   NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
  41   only one "Generalized Port Identifier". So that for classic
  42   ah, esp (and udp,tcp) both *pi should coincide or one of them
  43   should be wildcard.
  44
  45   At first sight, this redundancy is just a waste of CPU
  46   resources. But DPI and SPI add the possibility to assign different
  47   priorities to GPIs. Look also at note 4 about tunnels below.
  48
  49
  50   NOTE 3. One complication is the case of tunneled packets.
  51   We implement it as following: if the first lookup
  52   matches a special session with "tunnelhdr" value not zero,
  53   flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
  54   In this case, we pull tunnelhdr bytes and restart lookup
  55   with tunnel ID added to the list of keys. Simple and stupid 8)8)
  56   It's enough for PIMREG and IPIP.
  57
  58
  59   NOTE 4. Two GPIs make it possible to parse even GRE packets.
  60   F.e. DPI can select ETH_P_IP (and necessary flags to make
  61   tunnelhdr correct) in GRE protocol field and SPI matches
  62   GRE key. Is it not nice? 8)8)
  63
  64
  65   Well, as result, despite its simplicity, we get a pretty
  66   powerful classification engine.  */
  67
  68
  69struct rsvp_head
  70{
  71        u32                     tmap[256/32];
  72        u32                     hgenerator;
  73        u8                      tgenerator;
  74        struct rsvp_session     *ht[256];
  75};
  76
  77struct rsvp_session
  78{
  79        struct rsvp_session     *next;
  80        __be32                  dst[RSVP_DST_LEN];
  81        struct tc_rsvp_gpi      dpi;
  82        u8                      protocol;
  83        u8                      tunnelid;
  84        /* 16 (src,sport) hash slots, and one wildcard source slot */
  85        struct rsvp_filter      *ht[16+1];
  86};
  87
  88
  89struct rsvp_filter
  90{
  91        struct rsvp_filter      *next;
  92        __be32                  src[RSVP_DST_LEN];
  93        struct tc_rsvp_gpi      spi;
  94        u8                      tunnelhdr;
  95
  96        struct tcf_result       res;
  97        struct tcf_exts         exts;
  98
  99        u32                     handle;
 100        struct rsvp_session     *sess;
 101};
 102
 103static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
 104{
 105        unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
 106        h ^= h>>16;
 107        h ^= h>>8;
 108        return (h ^ protocol ^ tunnelid) & 0xFF;
 109}
 110
 111static __inline__ unsigned hash_src(__be32 *src)
 112{
 113        unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
 114        h ^= h>>16;
 115        h ^= h>>8;
 116        h ^= h>>4;
 117        return h & 0xF;
 118}
 119
 120static struct tcf_ext_map rsvp_ext_map = {
 121        .police = TCA_RSVP_POLICE,
 122        .action = TCA_RSVP_ACT
 123};
 124
 125#define RSVP_APPLY_RESULT()                             \
 126{                                                       \
 127        int r = tcf_exts_exec(skb, &f->exts, res);      \
 128        if (r < 0)                                      \
 129                continue;                               \
 130        else if (r > 0)                                 \
 131                return r;                               \
 132}
 133
 134static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
 135                         struct tcf_result *res)
 136{
 137        struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
 138        struct rsvp_session *s;
 139        struct rsvp_filter *f;
 140        unsigned h1, h2;
 141        __be32 *dst, *src;
 142        u8 protocol;
 143        u8 tunnelid = 0;
 144        u8 *xprt;
 145#if RSVP_DST_LEN == 4
 146        struct ipv6hdr *nhptr = ipv6_hdr(skb);
 147#else
 148        struct iphdr *nhptr = ip_hdr(skb);
 149#endif
 150
 151restart:
 152
 153#if RSVP_DST_LEN == 4
 154        src = &nhptr->saddr.s6_addr32[0];
 155        dst = &nhptr->daddr.s6_addr32[0];
 156        protocol = nhptr->nexthdr;
 157        xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
 158#else
 159        src = &nhptr->saddr;
 160        dst = &nhptr->daddr;
 161        protocol = nhptr->protocol;
 162        xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
 163        if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
 164                return -1;
 165#endif
 166
 167        h1 = hash_dst(dst, protocol, tunnelid);
 168        h2 = hash_src(src);
 169
 170        for (s = sht[h1]; s; s = s->next) {
 171                if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
 172                    protocol == s->protocol &&
 173                    !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key))
 174#if RSVP_DST_LEN == 4
 175                    && dst[0] == s->dst[0]
 176                    && dst[1] == s->dst[1]
 177                    && dst[2] == s->dst[2]
 178#endif
 179                    && tunnelid == s->tunnelid) {
 180
 181                        for (f = s->ht[h2]; f; f = f->next) {
 182                                if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
 183                                    !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
 184#if RSVP_DST_LEN == 4
 185                                    && src[0] == f->src[0]
 186                                    && src[1] == f->src[1]
 187                                    && src[2] == f->src[2]
 188#endif
 189                                    ) {
 190                                        *res = f->res;
 191                                        RSVP_APPLY_RESULT();
 192
 193matched:
 194                                        if (f->tunnelhdr == 0)
 195                                                return 0;
 196
 197                                        tunnelid = f->res.classid;
 198                                        nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
 199                                        goto restart;
 200                                }
 201                        }
 202
 203                        /* And wildcard bucket... */
 204                        for (f = s->ht[16]; f; f = f->next) {
 205                                *res = f->res;
 206                                RSVP_APPLY_RESULT();
 207                                goto matched;
 208                        }
 209                        return -1;
 210                }
 211        }
 212        return -1;
 213}
 214
 215static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
 216{
 217        struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
 218        struct rsvp_session *s;
 219        struct rsvp_filter *f;
 220        unsigned h1 = handle&0xFF;
 221        unsigned h2 = (handle>>8)&0xFF;
 222
 223        if (h2 > 16)
 224                return 0;
 225
 226        for (s = sht[h1]; s; s = s->next) {
 227                for (f = s->ht[h2]; f; f = f->next) {
 228                        if (f->handle == handle)
 229                                return (unsigned long)f;
 230                }
 231        }
 232        return 0;
 233}
 234
 235static void rsvp_put(struct tcf_proto *tp, unsigned long f)
 236{
 237}
 238
 239static int rsvp_init(struct tcf_proto *tp)
 240{
 241        struct rsvp_head *data;
 242
 243        data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
 244        if (data) {
 245                tp->root = data;
 246                return 0;
 247        }
 248        return -ENOBUFS;
 249}
 250
 251static inline void
 252rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
 253{
 254        tcf_unbind_filter(tp, &f->res);
 255        tcf_exts_destroy(tp, &f->exts);
 256        kfree(f);
 257}
 258
 259static void rsvp_destroy(struct tcf_proto *tp)
 260{
 261        struct rsvp_head *data = xchg(&tp->root, NULL);
 262        struct rsvp_session **sht;
 263        int h1, h2;
 264
 265        if (data == NULL)
 266                return;
 267
 268        sht = data->ht;
 269
 270        for (h1=0; h1<256; h1++) {
 271                struct rsvp_session *s;
 272
 273                while ((s = sht[h1]) != NULL) {
 274                        sht[h1] = s->next;
 275
 276                        for (h2=0; h2<=16; h2++) {
 277                                struct rsvp_filter *f;
 278
 279                                while ((f = s->ht[h2]) != NULL) {
 280                                        s->ht[h2] = f->next;
 281                                        rsvp_delete_filter(tp, f);
 282                                }
 283                        }
 284                        kfree(s);
 285                }
 286        }
 287        kfree(data);
 288}
 289
 290static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
 291{
 292        struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
 293        unsigned h = f->handle;
 294        struct rsvp_session **sp;
 295        struct rsvp_session *s = f->sess;
 296        int i;
 297
 298        for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
 299                if (*fp == f) {
 300                        tcf_tree_lock(tp);
 301                        *fp = f->next;
 302                        tcf_tree_unlock(tp);
 303                        rsvp_delete_filter(tp, f);
 304
 305                        /* Strip tree */
 306
 307                        for (i=0; i<=16; i++)
 308                                if (s->ht[i])
 309                                        return 0;
 310
 311                        /* OK, session has no flows */
 312                        for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
 313                             *sp; sp = &(*sp)->next) {
 314                                if (*sp == s) {
 315                                        tcf_tree_lock(tp);
 316                                        *sp = s->next;
 317                                        tcf_tree_unlock(tp);
 318
 319                                        kfree(s);
 320                                        return 0;
 321                                }
 322                        }
 323
 324                        return 0;
 325                }
 326        }
 327        return 0;
 328}
 329
 330static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
 331{
 332        struct rsvp_head *data = tp->root;
 333        int i = 0xFFFF;
 334
 335        while (i-- > 0) {
 336                u32 h;
 337                if ((data->hgenerator += 0x10000) == 0)
 338                        data->hgenerator = 0x10000;
 339                h = data->hgenerator|salt;
 340                if (rsvp_get(tp, h) == 0)
 341                        return h;
 342        }
 343        return 0;
 344}
 345
 346static int tunnel_bts(struct rsvp_head *data)
 347{
 348        int n = data->tgenerator>>5;
 349        u32 b = 1<<(data->tgenerator&0x1F);
 350
 351        if (data->tmap[n]&b)
 352                return 0;
 353        data->tmap[n] |= b;
 354        return 1;
 355}
 356
 357static void tunnel_recycle(struct rsvp_head *data)
 358{
 359        struct rsvp_session **sht = data->ht;
 360        u32 tmap[256/32];
 361        int h1, h2;
 362
 363        memset(tmap, 0, sizeof(tmap));
 364
 365        for (h1=0; h1<256; h1++) {
 366                struct rsvp_session *s;
 367                for (s = sht[h1]; s; s = s->next) {
 368                        for (h2=0; h2<=16; h2++) {
 369                                struct rsvp_filter *f;
 370
 371                                for (f = s->ht[h2]; f; f = f->next) {
 372                                        if (f->tunnelhdr == 0)
 373                                                continue;
 374                                        data->tgenerator = f->res.classid;
 375                                        tunnel_bts(data);
 376                                }
 377                        }
 378                }
 379        }
 380
 381        memcpy(data->tmap, tmap, sizeof(tmap));
 382}
 383
 384static u32 gen_tunnel(struct rsvp_head *data)
 385{
 386        int i, k;
 387
 388        for (k=0; k<2; k++) {
 389                for (i=255; i>0; i--) {
 390                        if (++data->tgenerator == 0)
 391                                data->tgenerator = 1;
 392                        if (tunnel_bts(data))
 393                                return data->tgenerator;
 394                }
 395                tunnel_recycle(data);
 396        }
 397        return 0;
 398}
 399
 400static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
 401        [TCA_RSVP_CLASSID]      = { .type = NLA_U32 },
 402        [TCA_RSVP_DST]          = { .type = NLA_BINARY,
 403                                    .len = RSVP_DST_LEN * sizeof(u32) },
 404        [TCA_RSVP_SRC]          = { .type = NLA_BINARY,
 405                                    .len = RSVP_DST_LEN * sizeof(u32) },
 406        [TCA_RSVP_PINFO]        = { .len = sizeof(struct tc_rsvp_pinfo) },
 407};
 408
 409static int rsvp_change(struct tcf_proto *tp, unsigned long base,
 410                       u32 handle,
 411                       struct nlattr **tca,
 412                       unsigned long *arg)
 413{
 414        struct rsvp_head *data = tp->root;
 415        struct rsvp_filter *f, **fp;
 416        struct rsvp_session *s, **sp;
 417        struct tc_rsvp_pinfo *pinfo = NULL;
 418        struct nlattr *opt = tca[TCA_OPTIONS-1];
 419        struct nlattr *tb[TCA_RSVP_MAX + 1];
 420        struct tcf_exts e;
 421        unsigned h1, h2;
 422        __be32 *dst;
 423        int err;
 424
 425        if (opt == NULL)
 426                return handle ? -EINVAL : 0;
 427
 428        err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
 429        if (err < 0)
 430                return err;
 431
 432        err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
 433        if (err < 0)
 434                return err;
 435
 436        if ((f = (struct rsvp_filter*)*arg) != NULL) {
 437                /* Node exists: adjust only classid */
 438
 439                if (f->handle != handle && handle)
 440                        goto errout2;
 441                if (tb[TCA_RSVP_CLASSID-1]) {
 442                        f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
 443                        tcf_bind_filter(tp, &f->res, base);
 444                }
 445
 446                tcf_exts_change(tp, &f->exts, &e);
 447                return 0;
 448        }
 449
 450        /* Now more serious part... */
 451        err = -EINVAL;
 452        if (handle)
 453                goto errout2;
 454        if (tb[TCA_RSVP_DST-1] == NULL)
 455                goto errout2;
 456
 457        err = -ENOBUFS;
 458        f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
 459        if (f == NULL)
 460                goto errout2;
 461
 462        h2 = 16;
 463        if (tb[TCA_RSVP_SRC-1]) {
 464                memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
 465                h2 = hash_src(f->src);
 466        }
 467        if (tb[TCA_RSVP_PINFO-1]) {
 468                pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
 469                f->spi = pinfo->spi;
 470                f->tunnelhdr = pinfo->tunnelhdr;
 471        }
 472        if (tb[TCA_RSVP_CLASSID-1])
 473                f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
 474
 475        dst = nla_data(tb[TCA_RSVP_DST-1]);
 476        h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
 477
 478        err = -ENOMEM;
 479        if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
 480                goto errout;
 481
 482        if (f->tunnelhdr) {
 483                err = -EINVAL;
 484                if (f->res.classid > 255)
 485                        goto errout;
 486
 487                err = -ENOMEM;
 488                if (f->res.classid == 0 &&
 489                    (f->res.classid = gen_tunnel(data)) == 0)
 490                        goto errout;
 491        }
 492
 493        for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
 494                if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
 495                    pinfo && pinfo->protocol == s->protocol &&
 496                    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0
 497#if RSVP_DST_LEN == 4
 498                    && dst[0] == s->dst[0]
 499                    && dst[1] == s->dst[1]
 500                    && dst[2] == s->dst[2]
 501#endif
 502                    && pinfo->tunnelid == s->tunnelid) {
 503
 504insert:
 505                        /* OK, we found appropriate session */
 506
 507                        fp = &s->ht[h2];
 508
 509                        f->sess = s;
 510                        if (f->tunnelhdr == 0)
 511                                tcf_bind_filter(tp, &f->res, base);
 512
 513                        tcf_exts_change(tp, &f->exts, &e);
 514
 515                        for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
 516                                if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
 517                                        break;
 518                        f->next = *fp;
 519                        wmb();
 520                        *fp = f;
 521
 522                        *arg = (unsigned long)f;
 523                        return 0;
 524                }
 525        }
 526
 527        /* No session found. Create new one. */
 528
 529        err = -ENOBUFS;
 530        s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
 531        if (s == NULL)
 532                goto errout;
 533        memcpy(s->dst, dst, sizeof(s->dst));
 534
 535        if (pinfo) {
 536                s->dpi = pinfo->dpi;
 537                s->protocol = pinfo->protocol;
 538                s->tunnelid = pinfo->tunnelid;
 539        }
 540        for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
 541                if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
 542                        break;
 543        }
 544        s->next = *sp;
 545        wmb();
 546        *sp = s;
 547
 548        goto insert;
 549
 550errout:
 551        kfree(f);
 552errout2:
 553        tcf_exts_destroy(tp, &e);
 554        return err;
 555}
 556
 557static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 558{
 559        struct rsvp_head *head = tp->root;
 560        unsigned h, h1;
 561
 562        if (arg->stop)
 563                return;
 564
 565        for (h = 0; h < 256; h++) {
 566                struct rsvp_session *s;
 567
 568                for (s = head->ht[h]; s; s = s->next) {
 569                        for (h1 = 0; h1 <= 16; h1++) {
 570                                struct rsvp_filter *f;
 571
 572                                for (f = s->ht[h1]; f; f = f->next) {
 573                                        if (arg->count < arg->skip) {
 574                                                arg->count++;
 575                                                continue;
 576                                        }
 577                                        if (arg->fn(tp, (unsigned long)f, arg) < 0) {
 578                                                arg->stop = 1;
 579                                                return;
 580                                        }
 581                                        arg->count++;
 582                                }
 583                        }
 584                }
 585        }
 586}
 587
 588static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
 589                     struct sk_buff *skb, struct tcmsg *t)
 590{
 591        struct rsvp_filter *f = (struct rsvp_filter*)fh;
 592        struct rsvp_session *s;
 593        unsigned char *b = skb_tail_pointer(skb);
 594        struct nlattr *nest;
 595        struct tc_rsvp_pinfo pinfo;
 596
 597        if (f == NULL)
 598                return skb->len;
 599        s = f->sess;
 600
 601        t->tcm_handle = f->handle;
 602
 603        nest = nla_nest_start(skb, TCA_OPTIONS);
 604        if (nest == NULL)
 605                goto nla_put_failure;
 606
 607        NLA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
 608        pinfo.dpi = s->dpi;
 609        pinfo.spi = f->spi;
 610        pinfo.protocol = s->protocol;
 611        pinfo.tunnelid = s->tunnelid;
 612        pinfo.tunnelhdr = f->tunnelhdr;
 613        pinfo.pad = 0;
 614        NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
 615        if (f->res.classid)
 616                NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
 617        if (((f->handle>>8)&0xFF) != 16)
 618                NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
 619
 620        if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
 621                goto nla_put_failure;
 622
 623        nla_nest_end(skb, nest);
 624
 625        if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
 626                goto nla_put_failure;
 627        return skb->len;
 628
 629nla_put_failure:
 630        nlmsg_trim(skb, b);
 631        return -1;
 632}
 633
 634static struct tcf_proto_ops RSVP_OPS = {
 635        .next           =       NULL,
 636        .kind           =       RSVP_ID,
 637        .classify       =       rsvp_classify,
 638        .init           =       rsvp_init,
 639        .destroy        =       rsvp_destroy,
 640        .get            =       rsvp_get,
 641        .put            =       rsvp_put,
 642        .change         =       rsvp_change,
 643        .delete         =       rsvp_delete,
 644        .walk           =       rsvp_walk,
 645        .dump           =       rsvp_dump,
 646        .owner          =       THIS_MODULE,
 647};
 648
 649static int __init init_rsvp(void)
 650{
 651        return register_tcf_proto_ops(&RSVP_OPS);
 652}
 653
 654static void __exit exit_rsvp(void)
 655{
 656        unregister_tcf_proto_ops(&RSVP_OPS);
 657}
 658
 659module_init(init_rsvp)
 660module_exit(exit_rsvp)
 661