linux/net/netfilter/nf_conntrack_expect.c
<<
>>
Prefs
   1/* Expectation handling for nf_conntrack. */
   2
   3/* (C) 1999-2001 Paul `Rusty' Russell
   4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
   5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11
  12#include <linux/types.h>
  13#include <linux/netfilter.h>
  14#include <linux/skbuff.h>
  15#include <linux/proc_fs.h>
  16#include <linux/seq_file.h>
  17#include <linux/stddef.h>
  18#include <linux/slab.h>
  19#include <linux/err.h>
  20#include <linux/percpu.h>
  21#include <linux/kernel.h>
  22#include <linux/jhash.h>
  23#include <net/net_namespace.h>
  24
  25#include <net/netfilter/nf_conntrack.h>
  26#include <net/netfilter/nf_conntrack_core.h>
  27#include <net/netfilter/nf_conntrack_expect.h>
  28#include <net/netfilter/nf_conntrack_helper.h>
  29#include <net/netfilter/nf_conntrack_tuple.h>
  30
  31unsigned int nf_ct_expect_hsize __read_mostly;
  32EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
  33
  34static unsigned int nf_ct_expect_hash_rnd __read_mostly;
  35unsigned int nf_ct_expect_max __read_mostly;
  36static int nf_ct_expect_hash_rnd_initted __read_mostly;
  37
  38static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
  39
  40/* nf_conntrack_expect helper functions */
  41void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
  42{
  43        struct nf_conn_help *master_help = nfct_help(exp->master);
  44        struct net *net = nf_ct_exp_net(exp);
  45
  46        NF_CT_ASSERT(master_help);
  47        NF_CT_ASSERT(!timer_pending(&exp->timeout));
  48
  49        hlist_del_rcu(&exp->hnode);
  50        net->ct.expect_count--;
  51
  52        hlist_del(&exp->lnode);
  53        master_help->expecting[exp->class]--;
  54        nf_ct_expect_put(exp);
  55
  56        NF_CT_STAT_INC(net, expect_delete);
  57}
  58EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
  59
  60static void nf_ct_expectation_timed_out(unsigned long ul_expect)
  61{
  62        struct nf_conntrack_expect *exp = (void *)ul_expect;
  63
  64        spin_lock_bh(&nf_conntrack_lock);
  65        nf_ct_unlink_expect(exp);
  66        spin_unlock_bh(&nf_conntrack_lock);
  67        nf_ct_expect_put(exp);
  68}
  69
  70static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
  71{
  72        unsigned int hash;
  73
  74        if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
  75                get_random_bytes(&nf_ct_expect_hash_rnd, 4);
  76                nf_ct_expect_hash_rnd_initted = 1;
  77        }
  78
  79        hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
  80                      (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
  81                       (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
  82        return ((u64)hash * nf_ct_expect_hsize) >> 32;
  83}
  84
  85struct nf_conntrack_expect *
  86__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
  87{
  88        struct nf_conntrack_expect *i;
  89        struct hlist_node *n;
  90        unsigned int h;
  91
  92        if (!net->ct.expect_count)
  93                return NULL;
  94
  95        h = nf_ct_expect_dst_hash(tuple);
  96        hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
  97                if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
  98                        return i;
  99        }
 100        return NULL;
 101}
 102EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
 103
 104/* Just find a expectation corresponding to a tuple. */
 105struct nf_conntrack_expect *
 106nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
 107{
 108        struct nf_conntrack_expect *i;
 109
 110        rcu_read_lock();
 111        i = __nf_ct_expect_find(net, tuple);
 112        if (i && !atomic_inc_not_zero(&i->use))
 113                i = NULL;
 114        rcu_read_unlock();
 115
 116        return i;
 117}
 118EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
 119
 120/* If an expectation for this connection is found, it gets delete from
 121 * global list then returned. */
 122struct nf_conntrack_expect *
 123nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
 124{
 125        struct nf_conntrack_expect *i, *exp = NULL;
 126        struct hlist_node *n;
 127        unsigned int h;
 128
 129        if (!net->ct.expect_count)
 130                return NULL;
 131
 132        h = nf_ct_expect_dst_hash(tuple);
 133        hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
 134                if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
 135                    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
 136                        exp = i;
 137                        break;
 138                }
 139        }
 140        if (!exp)
 141                return NULL;
 142
 143        /* If master is not in hash table yet (ie. packet hasn't left
 144           this machine yet), how can other end know about expected?
 145           Hence these are not the droids you are looking for (if
 146           master ct never got confirmed, we'd hold a reference to it
 147           and weird things would happen to future packets). */
 148        if (!nf_ct_is_confirmed(exp->master))
 149                return NULL;
 150
 151        if (exp->flags & NF_CT_EXPECT_PERMANENT) {
 152                atomic_inc(&exp->use);
 153                return exp;
 154        } else if (del_timer(&exp->timeout)) {
 155                nf_ct_unlink_expect(exp);
 156                return exp;
 157        }
 158
 159        return NULL;
 160}
 161
 162/* delete all expectations for this conntrack */
 163void nf_ct_remove_expectations(struct nf_conn *ct)
 164{
 165        struct nf_conn_help *help = nfct_help(ct);
 166        struct nf_conntrack_expect *exp;
 167        struct hlist_node *n, *next;
 168
 169        /* Optimization: most connection never expect any others. */
 170        if (!help)
 171                return;
 172
 173        hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
 174                if (del_timer(&exp->timeout)) {
 175                        nf_ct_unlink_expect(exp);
 176                        nf_ct_expect_put(exp);
 177                }
 178        }
 179}
 180EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
 181
 182/* Would two expected things clash? */
 183static inline int expect_clash(const struct nf_conntrack_expect *a,
 184                               const struct nf_conntrack_expect *b)
 185{
 186        /* Part covered by intersection of masks must be unequal,
 187           otherwise they clash */
 188        struct nf_conntrack_tuple_mask intersect_mask;
 189        int count;
 190
 191        intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
 192
 193        for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
 194                intersect_mask.src.u3.all[count] =
 195                        a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
 196        }
 197
 198        return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
 199}
 200
 201static inline int expect_matches(const struct nf_conntrack_expect *a,
 202                                 const struct nf_conntrack_expect *b)
 203{
 204        return a->master == b->master && a->class == b->class
 205                && nf_ct_tuple_equal(&a->tuple, &b->tuple)
 206                && nf_ct_tuple_mask_equal(&a->mask, &b->mask);
 207}
 208
 209/* Generally a bad idea to call this: could have matched already. */
 210void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
 211{
 212        spin_lock_bh(&nf_conntrack_lock);
 213        if (del_timer(&exp->timeout)) {
 214                nf_ct_unlink_expect(exp);
 215                nf_ct_expect_put(exp);
 216        }
 217        spin_unlock_bh(&nf_conntrack_lock);
 218}
 219EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
 220
 221/* We don't increase the master conntrack refcount for non-fulfilled
 222 * conntracks. During the conntrack destruction, the expectations are
 223 * always killed before the conntrack itself */
 224struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
 225{
 226        struct nf_conntrack_expect *new;
 227
 228        new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
 229        if (!new)
 230                return NULL;
 231
 232        new->master = me;
 233        atomic_set(&new->use, 1);
 234        INIT_RCU_HEAD(&new->rcu);
 235        return new;
 236}
 237EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
 238
 239void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
 240                       u_int8_t family,
 241                       const union nf_inet_addr *saddr,
 242                       const union nf_inet_addr *daddr,
 243                       u_int8_t proto, const __be16 *src, const __be16 *dst)
 244{
 245        int len;
 246
 247        if (family == AF_INET)
 248                len = 4;
 249        else
 250                len = 16;
 251
 252        exp->flags = 0;
 253        exp->class = class;
 254        exp->expectfn = NULL;
 255        exp->helper = NULL;
 256        exp->tuple.src.l3num = family;
 257        exp->tuple.dst.protonum = proto;
 258
 259        if (saddr) {
 260                memcpy(&exp->tuple.src.u3, saddr, len);
 261                if (sizeof(exp->tuple.src.u3) > len)
 262                        /* address needs to be cleared for nf_ct_tuple_equal */
 263                        memset((void *)&exp->tuple.src.u3 + len, 0x00,
 264                               sizeof(exp->tuple.src.u3) - len);
 265                memset(&exp->mask.src.u3, 0xFF, len);
 266                if (sizeof(exp->mask.src.u3) > len)
 267                        memset((void *)&exp->mask.src.u3 + len, 0x00,
 268                               sizeof(exp->mask.src.u3) - len);
 269        } else {
 270                memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
 271                memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
 272        }
 273
 274        if (src) {
 275                exp->tuple.src.u.all = *src;
 276                exp->mask.src.u.all = htons(0xFFFF);
 277        } else {
 278                exp->tuple.src.u.all = 0;
 279                exp->mask.src.u.all = 0;
 280        }
 281
 282        memcpy(&exp->tuple.dst.u3, daddr, len);
 283        if (sizeof(exp->tuple.dst.u3) > len)
 284                /* address needs to be cleared for nf_ct_tuple_equal */
 285                memset((void *)&exp->tuple.dst.u3 + len, 0x00,
 286                       sizeof(exp->tuple.dst.u3) - len);
 287
 288        exp->tuple.dst.u.all = *dst;
 289}
 290EXPORT_SYMBOL_GPL(nf_ct_expect_init);
 291
 292static void nf_ct_expect_free_rcu(struct rcu_head *head)
 293{
 294        struct nf_conntrack_expect *exp;
 295
 296        exp = container_of(head, struct nf_conntrack_expect, rcu);
 297        kmem_cache_free(nf_ct_expect_cachep, exp);
 298}
 299
 300void nf_ct_expect_put(struct nf_conntrack_expect *exp)
 301{
 302        if (atomic_dec_and_test(&exp->use))
 303                call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
 304}
 305EXPORT_SYMBOL_GPL(nf_ct_expect_put);
 306
 307static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 308{
 309        struct nf_conn_help *master_help = nfct_help(exp->master);
 310        struct net *net = nf_ct_exp_net(exp);
 311        const struct nf_conntrack_expect_policy *p;
 312        unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
 313
 314        atomic_inc(&exp->use);
 315
 316        hlist_add_head(&exp->lnode, &master_help->expectations);
 317        master_help->expecting[exp->class]++;
 318
 319        hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
 320        net->ct.expect_count++;
 321
 322        setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
 323                    (unsigned long)exp);
 324        p = &master_help->helper->expect_policy[exp->class];
 325        exp->timeout.expires = jiffies + p->timeout * HZ;
 326        add_timer(&exp->timeout);
 327
 328        atomic_inc(&exp->use);
 329        NF_CT_STAT_INC(net, expect_create);
 330}
 331
 332/* Race with expectations being used means we could have none to find; OK. */
 333static void evict_oldest_expect(struct nf_conn *master,
 334                                struct nf_conntrack_expect *new)
 335{
 336        struct nf_conn_help *master_help = nfct_help(master);
 337        struct nf_conntrack_expect *exp, *last = NULL;
 338        struct hlist_node *n;
 339
 340        hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
 341                if (exp->class == new->class)
 342                        last = exp;
 343        }
 344
 345        if (last && del_timer(&last->timeout)) {
 346                nf_ct_unlink_expect(last);
 347                nf_ct_expect_put(last);
 348        }
 349}
 350
 351static inline int refresh_timer(struct nf_conntrack_expect *i)
 352{
 353        struct nf_conn_help *master_help = nfct_help(i->master);
 354        const struct nf_conntrack_expect_policy *p;
 355
 356        if (!del_timer(&i->timeout))
 357                return 0;
 358
 359        p = &master_help->helper->expect_policy[i->class];
 360        i->timeout.expires = jiffies + p->timeout * HZ;
 361        add_timer(&i->timeout);
 362        return 1;
 363}
 364
 365int nf_ct_expect_related(struct nf_conntrack_expect *expect)
 366{
 367        const struct nf_conntrack_expect_policy *p;
 368        struct nf_conntrack_expect *i;
 369        struct nf_conn *master = expect->master;
 370        struct nf_conn_help *master_help = nfct_help(master);
 371        struct net *net = nf_ct_exp_net(expect);
 372        struct hlist_node *n;
 373        unsigned int h;
 374        int ret;
 375
 376        NF_CT_ASSERT(master_help);
 377
 378        spin_lock_bh(&nf_conntrack_lock);
 379        if (!master_help->helper) {
 380                ret = -ESHUTDOWN;
 381                goto out;
 382        }
 383        h = nf_ct_expect_dst_hash(&expect->tuple);
 384        hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
 385                if (expect_matches(i, expect)) {
 386                        /* Refresh timer: if it's dying, ignore.. */
 387                        if (refresh_timer(i)) {
 388                                ret = 0;
 389                                goto out;
 390                        }
 391                } else if (expect_clash(i, expect)) {
 392                        ret = -EBUSY;
 393                        goto out;
 394                }
 395        }
 396        /* Will be over limit? */
 397        p = &master_help->helper->expect_policy[expect->class];
 398        if (p->max_expected &&
 399            master_help->expecting[expect->class] >= p->max_expected) {
 400                evict_oldest_expect(master, expect);
 401                if (master_help->expecting[expect->class] >= p->max_expected) {
 402                        ret = -EMFILE;
 403                        goto out;
 404                }
 405        }
 406
 407        if (net->ct.expect_count >= nf_ct_expect_max) {
 408                if (net_ratelimit())
 409                        printk(KERN_WARNING
 410                               "nf_conntrack: expectation table full\n");
 411                ret = -EMFILE;
 412                goto out;
 413        }
 414
 415        nf_ct_expect_insert(expect);
 416        nf_ct_expect_event(IPEXP_NEW, expect);
 417        ret = 0;
 418out:
 419        spin_unlock_bh(&nf_conntrack_lock);
 420        return ret;
 421}
 422EXPORT_SYMBOL_GPL(nf_ct_expect_related);
 423
 424#ifdef CONFIG_PROC_FS
 425struct ct_expect_iter_state {
 426        struct seq_net_private p;
 427        unsigned int bucket;
 428};
 429
 430static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 431{
 432        struct net *net = seq_file_net(seq);
 433        struct ct_expect_iter_state *st = seq->private;
 434        struct hlist_node *n;
 435
 436        for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
 437                n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
 438                if (n)
 439                        return n;
 440        }
 441        return NULL;
 442}
 443
 444static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
 445                                             struct hlist_node *head)
 446{
 447        struct net *net = seq_file_net(seq);
 448        struct ct_expect_iter_state *st = seq->private;
 449
 450        head = rcu_dereference(head->next);
 451        while (head == NULL) {
 452                if (++st->bucket >= nf_ct_expect_hsize)
 453                        return NULL;
 454                head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
 455        }
 456        return head;
 457}
 458
 459static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
 460{
 461        struct hlist_node *head = ct_expect_get_first(seq);
 462
 463        if (head)
 464                while (pos && (head = ct_expect_get_next(seq, head)))
 465                        pos--;
 466        return pos ? NULL : head;
 467}
 468
 469static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
 470        __acquires(RCU)
 471{
 472        rcu_read_lock();
 473        return ct_expect_get_idx(seq, *pos);
 474}
 475
 476static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 477{
 478        (*pos)++;
 479        return ct_expect_get_next(seq, v);
 480}
 481
 482static void exp_seq_stop(struct seq_file *seq, void *v)
 483        __releases(RCU)
 484{
 485        rcu_read_unlock();
 486}
 487
 488static int exp_seq_show(struct seq_file *s, void *v)
 489{
 490        struct nf_conntrack_expect *expect;
 491        struct hlist_node *n = v;
 492        char *delim = "";
 493
 494        expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
 495
 496        if (expect->timeout.function)
 497                seq_printf(s, "%ld ", timer_pending(&expect->timeout)
 498                           ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
 499        else
 500                seq_printf(s, "- ");
 501        seq_printf(s, "l3proto = %u proto=%u ",
 502                   expect->tuple.src.l3num,
 503                   expect->tuple.dst.protonum);
 504        print_tuple(s, &expect->tuple,
 505                    __nf_ct_l3proto_find(expect->tuple.src.l3num),
 506                    __nf_ct_l4proto_find(expect->tuple.src.l3num,
 507                                       expect->tuple.dst.protonum));
 508
 509        if (expect->flags & NF_CT_EXPECT_PERMANENT) {
 510                seq_printf(s, "PERMANENT");
 511                delim = ",";
 512        }
 513        if (expect->flags & NF_CT_EXPECT_INACTIVE)
 514                seq_printf(s, "%sINACTIVE", delim);
 515
 516        return seq_putc(s, '\n');
 517}
 518
 519static const struct seq_operations exp_seq_ops = {
 520        .start = exp_seq_start,
 521        .next = exp_seq_next,
 522        .stop = exp_seq_stop,
 523        .show = exp_seq_show
 524};
 525
 526static int exp_open(struct inode *inode, struct file *file)
 527{
 528        return seq_open_net(inode, file, &exp_seq_ops,
 529                        sizeof(struct ct_expect_iter_state));
 530}
 531
 532static const struct file_operations exp_file_ops = {
 533        .owner   = THIS_MODULE,
 534        .open    = exp_open,
 535        .read    = seq_read,
 536        .llseek  = seq_lseek,
 537        .release = seq_release_net,
 538};
 539#endif /* CONFIG_PROC_FS */
 540
 541static int exp_proc_init(struct net *net)
 542{
 543#ifdef CONFIG_PROC_FS
 544        struct proc_dir_entry *proc;
 545
 546        proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
 547        if (!proc)
 548                return -ENOMEM;
 549#endif /* CONFIG_PROC_FS */
 550        return 0;
 551}
 552
 553static void exp_proc_remove(struct net *net)
 554{
 555#ifdef CONFIG_PROC_FS
 556        proc_net_remove(net, "nf_conntrack_expect");
 557#endif /* CONFIG_PROC_FS */
 558}
 559
 560module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
 561
 562int nf_conntrack_expect_init(struct net *net)
 563{
 564        int err = -ENOMEM;
 565
 566        if (net_eq(net, &init_net)) {
 567                if (!nf_ct_expect_hsize) {
 568                        nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
 569                        if (!nf_ct_expect_hsize)
 570                                nf_ct_expect_hsize = 1;
 571                }
 572                nf_ct_expect_max = nf_ct_expect_hsize * 4;
 573        }
 574
 575        net->ct.expect_count = 0;
 576        net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
 577                                                  &net->ct.expect_vmalloc);
 578        if (net->ct.expect_hash == NULL)
 579                goto err1;
 580
 581        if (net_eq(net, &init_net)) {
 582                nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
 583                                        sizeof(struct nf_conntrack_expect),
 584                                        0, 0, NULL);
 585                if (!nf_ct_expect_cachep)
 586                        goto err2;
 587        }
 588
 589        err = exp_proc_init(net);
 590        if (err < 0)
 591                goto err3;
 592
 593        return 0;
 594
 595err3:
 596        if (net_eq(net, &init_net))
 597                kmem_cache_destroy(nf_ct_expect_cachep);
 598err2:
 599        nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
 600                             nf_ct_expect_hsize);
 601err1:
 602        return err;
 603}
 604
 605void nf_conntrack_expect_fini(struct net *net)
 606{
 607        exp_proc_remove(net);
 608        if (net_eq(net, &init_net))
 609                kmem_cache_destroy(nf_ct_expect_cachep);
 610        nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
 611                             nf_ct_expect_hsize);
 612}
 613