linux/net/netfilter/nf_conntrack_expect.c
<<
>>
Prefs
   1/* Expectation handling for nf_conntrack. */
   2
   3/* (C) 1999-2001 Paul `Rusty' Russell
   4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
   5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 */
  11
  12#include <linux/types.h>
  13#include <linux/netfilter.h>
  14#include <linux/skbuff.h>
  15#include <linux/proc_fs.h>
  16#include <linux/seq_file.h>
  17#include <linux/stddef.h>
  18#include <linux/slab.h>
  19#include <linux/err.h>
  20#include <linux/percpu.h>
  21#include <linux/kernel.h>
  22#include <linux/jhash.h>
  23#include <net/net_namespace.h>
  24
  25#include <net/netfilter/nf_conntrack.h>
  26#include <net/netfilter/nf_conntrack_core.h>
  27#include <net/netfilter/nf_conntrack_expect.h>
  28#include <net/netfilter/nf_conntrack_helper.h>
  29#include <net/netfilter/nf_conntrack_tuple.h>
  30
  31unsigned int nf_ct_expect_hsize __read_mostly;
  32EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
  33
  34static unsigned int nf_ct_expect_hash_rnd __read_mostly;
  35unsigned int nf_ct_expect_max __read_mostly;
  36static int nf_ct_expect_hash_rnd_initted __read_mostly;
  37
  38static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
  39
  40/* nf_conntrack_expect helper functions */
  41void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
  42{
  43        struct nf_conn_help *master_help = nfct_help(exp->master);
  44        struct net *net = nf_ct_exp_net(exp);
  45
  46        NF_CT_ASSERT(master_help);
  47        NF_CT_ASSERT(!timer_pending(&exp->timeout));
  48
  49        hlist_del_rcu(&exp->hnode);
  50        net->ct.expect_count--;
  51
  52        hlist_del(&exp->lnode);
  53        master_help->expecting[exp->class]--;
  54        nf_ct_expect_put(exp);
  55
  56        NF_CT_STAT_INC(net, expect_delete);
  57}
  58EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
  59
  60static void nf_ct_expectation_timed_out(unsigned long ul_expect)
  61{
  62        struct nf_conntrack_expect *exp = (void *)ul_expect;
  63
  64        spin_lock_bh(&nf_conntrack_lock);
  65        nf_ct_unlink_expect(exp);
  66        spin_unlock_bh(&nf_conntrack_lock);
  67        nf_ct_expect_put(exp);
  68}
  69
  70static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
  71{
  72        unsigned int hash;
  73
  74        if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
  75                get_random_bytes(&nf_ct_expect_hash_rnd,
  76                                 sizeof(nf_ct_expect_hash_rnd));
  77                nf_ct_expect_hash_rnd_initted = 1;
  78        }
  79
  80        hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
  81                      (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
  82                       (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
  83        return ((u64)hash * nf_ct_expect_hsize) >> 32;
  84}
  85
  86struct nf_conntrack_expect *
  87__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
  88{
  89        struct nf_conntrack_expect *i;
  90        struct hlist_node *n;
  91        unsigned int h;
  92
  93        if (!net->ct.expect_count)
  94                return NULL;
  95
  96        h = nf_ct_expect_dst_hash(tuple);
  97        hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
  98                if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
  99                        return i;
 100        }
 101        return NULL;
 102}
 103EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
 104
 105/* Just find a expectation corresponding to a tuple. */
 106struct nf_conntrack_expect *
 107nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
 108{
 109        struct nf_conntrack_expect *i;
 110
 111        rcu_read_lock();
 112        i = __nf_ct_expect_find(net, tuple);
 113        if (i && !atomic_inc_not_zero(&i->use))
 114                i = NULL;
 115        rcu_read_unlock();
 116
 117        return i;
 118}
 119EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
 120
 121/* If an expectation for this connection is found, it gets delete from
 122 * global list then returned. */
 123struct nf_conntrack_expect *
 124nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
 125{
 126        struct nf_conntrack_expect *i, *exp = NULL;
 127        struct hlist_node *n;
 128        unsigned int h;
 129
 130        if (!net->ct.expect_count)
 131                return NULL;
 132
 133        h = nf_ct_expect_dst_hash(tuple);
 134        hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
 135                if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
 136                    nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
 137                        exp = i;
 138                        break;
 139                }
 140        }
 141        if (!exp)
 142                return NULL;
 143
 144        /* If master is not in hash table yet (ie. packet hasn't left
 145           this machine yet), how can other end know about expected?
 146           Hence these are not the droids you are looking for (if
 147           master ct never got confirmed, we'd hold a reference to it
 148           and weird things would happen to future packets). */
 149        if (!nf_ct_is_confirmed(exp->master))
 150                return NULL;
 151
 152        if (exp->flags & NF_CT_EXPECT_PERMANENT) {
 153                atomic_inc(&exp->use);
 154                return exp;
 155        } else if (del_timer(&exp->timeout)) {
 156                nf_ct_unlink_expect(exp);
 157                return exp;
 158        }
 159
 160        return NULL;
 161}
 162
 163/* delete all expectations for this conntrack */
 164void nf_ct_remove_expectations(struct nf_conn *ct)
 165{
 166        struct nf_conn_help *help = nfct_help(ct);
 167        struct nf_conntrack_expect *exp;
 168        struct hlist_node *n, *next;
 169
 170        /* Optimization: most connection never expect any others. */
 171        if (!help)
 172                return;
 173
 174        hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
 175                if (del_timer(&exp->timeout)) {
 176                        nf_ct_unlink_expect(exp);
 177                        nf_ct_expect_put(exp);
 178                }
 179        }
 180}
 181EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
 182
 183/* Would two expected things clash? */
 184static inline int expect_clash(const struct nf_conntrack_expect *a,
 185                               const struct nf_conntrack_expect *b)
 186{
 187        /* Part covered by intersection of masks must be unequal,
 188           otherwise they clash */
 189        struct nf_conntrack_tuple_mask intersect_mask;
 190        int count;
 191
 192        intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
 193
 194        for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
 195                intersect_mask.src.u3.all[count] =
 196                        a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
 197        }
 198
 199        return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
 200}
 201
 202static inline int expect_matches(const struct nf_conntrack_expect *a,
 203                                 const struct nf_conntrack_expect *b)
 204{
 205        return a->master == b->master && a->class == b->class
 206                && nf_ct_tuple_equal(&a->tuple, &b->tuple)
 207                && nf_ct_tuple_mask_equal(&a->mask, &b->mask);
 208}
 209
 210/* Generally a bad idea to call this: could have matched already. */
 211void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
 212{
 213        spin_lock_bh(&nf_conntrack_lock);
 214        if (del_timer(&exp->timeout)) {
 215                nf_ct_unlink_expect(exp);
 216                nf_ct_expect_put(exp);
 217        }
 218        spin_unlock_bh(&nf_conntrack_lock);
 219}
 220EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
 221
 222/* We don't increase the master conntrack refcount for non-fulfilled
 223 * conntracks. During the conntrack destruction, the expectations are
 224 * always killed before the conntrack itself */
 225struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
 226{
 227        struct nf_conntrack_expect *new;
 228
 229        new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
 230        if (!new)
 231                return NULL;
 232
 233        new->master = me;
 234        atomic_set(&new->use, 1);
 235        INIT_RCU_HEAD(&new->rcu);
 236        return new;
 237}
 238EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
 239
 240void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
 241                       u_int8_t family,
 242                       const union nf_inet_addr *saddr,
 243                       const union nf_inet_addr *daddr,
 244                       u_int8_t proto, const __be16 *src, const __be16 *dst)
 245{
 246        int len;
 247
 248        if (family == AF_INET)
 249                len = 4;
 250        else
 251                len = 16;
 252
 253        exp->flags = 0;
 254        exp->class = class;
 255        exp->expectfn = NULL;
 256        exp->helper = NULL;
 257        exp->tuple.src.l3num = family;
 258        exp->tuple.dst.protonum = proto;
 259
 260        if (saddr) {
 261                memcpy(&exp->tuple.src.u3, saddr, len);
 262                if (sizeof(exp->tuple.src.u3) > len)
 263                        /* address needs to be cleared for nf_ct_tuple_equal */
 264                        memset((void *)&exp->tuple.src.u3 + len, 0x00,
 265                               sizeof(exp->tuple.src.u3) - len);
 266                memset(&exp->mask.src.u3, 0xFF, len);
 267                if (sizeof(exp->mask.src.u3) > len)
 268                        memset((void *)&exp->mask.src.u3 + len, 0x00,
 269                               sizeof(exp->mask.src.u3) - len);
 270        } else {
 271                memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
 272                memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
 273        }
 274
 275        if (src) {
 276                exp->tuple.src.u.all = *src;
 277                exp->mask.src.u.all = htons(0xFFFF);
 278        } else {
 279                exp->tuple.src.u.all = 0;
 280                exp->mask.src.u.all = 0;
 281        }
 282
 283        memcpy(&exp->tuple.dst.u3, daddr, len);
 284        if (sizeof(exp->tuple.dst.u3) > len)
 285                /* address needs to be cleared for nf_ct_tuple_equal */
 286                memset((void *)&exp->tuple.dst.u3 + len, 0x00,
 287                       sizeof(exp->tuple.dst.u3) - len);
 288
 289        exp->tuple.dst.u.all = *dst;
 290}
 291EXPORT_SYMBOL_GPL(nf_ct_expect_init);
 292
 293static void nf_ct_expect_free_rcu(struct rcu_head *head)
 294{
 295        struct nf_conntrack_expect *exp;
 296
 297        exp = container_of(head, struct nf_conntrack_expect, rcu);
 298        kmem_cache_free(nf_ct_expect_cachep, exp);
 299}
 300
 301void nf_ct_expect_put(struct nf_conntrack_expect *exp)
 302{
 303        if (atomic_dec_and_test(&exp->use))
 304                call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
 305}
 306EXPORT_SYMBOL_GPL(nf_ct_expect_put);
 307
 308static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 309{
 310        struct nf_conn_help *master_help = nfct_help(exp->master);
 311        struct net *net = nf_ct_exp_net(exp);
 312        const struct nf_conntrack_expect_policy *p;
 313        unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
 314
 315        atomic_inc(&exp->use);
 316
 317        hlist_add_head(&exp->lnode, &master_help->expectations);
 318        master_help->expecting[exp->class]++;
 319
 320        hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
 321        net->ct.expect_count++;
 322
 323        setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
 324                    (unsigned long)exp);
 325        p = &master_help->helper->expect_policy[exp->class];
 326        exp->timeout.expires = jiffies + p->timeout * HZ;
 327        add_timer(&exp->timeout);
 328
 329        atomic_inc(&exp->use);
 330        NF_CT_STAT_INC(net, expect_create);
 331}
 332
 333/* Race with expectations being used means we could have none to find; OK. */
 334static void evict_oldest_expect(struct nf_conn *master,
 335                                struct nf_conntrack_expect *new)
 336{
 337        struct nf_conn_help *master_help = nfct_help(master);
 338        struct nf_conntrack_expect *exp, *last = NULL;
 339        struct hlist_node *n;
 340
 341        hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
 342                if (exp->class == new->class)
 343                        last = exp;
 344        }
 345
 346        if (last && del_timer(&last->timeout)) {
 347                nf_ct_unlink_expect(last);
 348                nf_ct_expect_put(last);
 349        }
 350}
 351
 352static inline int refresh_timer(struct nf_conntrack_expect *i)
 353{
 354        struct nf_conn_help *master_help = nfct_help(i->master);
 355        const struct nf_conntrack_expect_policy *p;
 356
 357        if (!del_timer(&i->timeout))
 358                return 0;
 359
 360        p = &master_help->helper->expect_policy[i->class];
 361        i->timeout.expires = jiffies + p->timeout * HZ;
 362        add_timer(&i->timeout);
 363        return 1;
 364}
 365
 366static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 367{
 368        const struct nf_conntrack_expect_policy *p;
 369        struct nf_conntrack_expect *i;
 370        struct nf_conn *master = expect->master;
 371        struct nf_conn_help *master_help = nfct_help(master);
 372        struct net *net = nf_ct_exp_net(expect);
 373        struct hlist_node *n;
 374        unsigned int h;
 375        int ret = 1;
 376
 377        if (!master_help->helper) {
 378                ret = -ESHUTDOWN;
 379                goto out;
 380        }
 381        h = nf_ct_expect_dst_hash(&expect->tuple);
 382        hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
 383                if (expect_matches(i, expect)) {
 384                        /* Refresh timer: if it's dying, ignore.. */
 385                        if (refresh_timer(i)) {
 386                                ret = 0;
 387                                goto out;
 388                        }
 389                } else if (expect_clash(i, expect)) {
 390                        ret = -EBUSY;
 391                        goto out;
 392                }
 393        }
 394        /* Will be over limit? */
 395        p = &master_help->helper->expect_policy[expect->class];
 396        if (p->max_expected &&
 397            master_help->expecting[expect->class] >= p->max_expected) {
 398                evict_oldest_expect(master, expect);
 399                if (master_help->expecting[expect->class] >= p->max_expected) {
 400                        ret = -EMFILE;
 401                        goto out;
 402                }
 403        }
 404
 405        if (net->ct.expect_count >= nf_ct_expect_max) {
 406                if (net_ratelimit())
 407                        printk(KERN_WARNING
 408                               "nf_conntrack: expectation table full\n");
 409                ret = -EMFILE;
 410        }
 411out:
 412        return ret;
 413}
 414
 415int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 
 416                                u32 pid, int report)
 417{
 418        int ret;
 419
 420        spin_lock_bh(&nf_conntrack_lock);
 421        ret = __nf_ct_expect_check(expect);
 422        if (ret <= 0)
 423                goto out;
 424
 425        ret = 0;
 426        nf_ct_expect_insert(expect);
 427        spin_unlock_bh(&nf_conntrack_lock);
 428        nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
 429        return ret;
 430out:
 431        spin_unlock_bh(&nf_conntrack_lock);
 432        return ret;
 433}
 434EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
 435
 436#ifdef CONFIG_PROC_FS
 437struct ct_expect_iter_state {
 438        struct seq_net_private p;
 439        unsigned int bucket;
 440};
 441
 442static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 443{
 444        struct net *net = seq_file_net(seq);
 445        struct ct_expect_iter_state *st = seq->private;
 446        struct hlist_node *n;
 447
 448        for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
 449                n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
 450                if (n)
 451                        return n;
 452        }
 453        return NULL;
 454}
 455
 456static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
 457                                             struct hlist_node *head)
 458{
 459        struct net *net = seq_file_net(seq);
 460        struct ct_expect_iter_state *st = seq->private;
 461
 462        head = rcu_dereference(head->next);
 463        while (head == NULL) {
 464                if (++st->bucket >= nf_ct_expect_hsize)
 465                        return NULL;
 466                head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
 467        }
 468        return head;
 469}
 470
 471static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
 472{
 473        struct hlist_node *head = ct_expect_get_first(seq);
 474
 475        if (head)
 476                while (pos && (head = ct_expect_get_next(seq, head)))
 477                        pos--;
 478        return pos ? NULL : head;
 479}
 480
 481static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
 482        __acquires(RCU)
 483{
 484        rcu_read_lock();
 485        return ct_expect_get_idx(seq, *pos);
 486}
 487
 488static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 489{
 490        (*pos)++;
 491        return ct_expect_get_next(seq, v);
 492}
 493
 494static void exp_seq_stop(struct seq_file *seq, void *v)
 495        __releases(RCU)
 496{
 497        rcu_read_unlock();
 498}
 499
 500static int exp_seq_show(struct seq_file *s, void *v)
 501{
 502        struct nf_conntrack_expect *expect;
 503        struct hlist_node *n = v;
 504        char *delim = "";
 505
 506        expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
 507
 508        if (expect->timeout.function)
 509                seq_printf(s, "%ld ", timer_pending(&expect->timeout)
 510                           ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
 511        else
 512                seq_printf(s, "- ");
 513        seq_printf(s, "l3proto = %u proto=%u ",
 514                   expect->tuple.src.l3num,
 515                   expect->tuple.dst.protonum);
 516        print_tuple(s, &expect->tuple,
 517                    __nf_ct_l3proto_find(expect->tuple.src.l3num),
 518                    __nf_ct_l4proto_find(expect->tuple.src.l3num,
 519                                       expect->tuple.dst.protonum));
 520
 521        if (expect->flags & NF_CT_EXPECT_PERMANENT) {
 522                seq_printf(s, "PERMANENT");
 523                delim = ",";
 524        }
 525        if (expect->flags & NF_CT_EXPECT_INACTIVE)
 526                seq_printf(s, "%sINACTIVE", delim);
 527
 528        return seq_putc(s, '\n');
 529}
 530
 531static const struct seq_operations exp_seq_ops = {
 532        .start = exp_seq_start,
 533        .next = exp_seq_next,
 534        .stop = exp_seq_stop,
 535        .show = exp_seq_show
 536};
 537
 538static int exp_open(struct inode *inode, struct file *file)
 539{
 540        return seq_open_net(inode, file, &exp_seq_ops,
 541                        sizeof(struct ct_expect_iter_state));
 542}
 543
 544static const struct file_operations exp_file_ops = {
 545        .owner   = THIS_MODULE,
 546        .open    = exp_open,
 547        .read    = seq_read,
 548        .llseek  = seq_lseek,
 549        .release = seq_release_net,
 550};
 551#endif /* CONFIG_PROC_FS */
 552
 553static int exp_proc_init(struct net *net)
 554{
 555#ifdef CONFIG_PROC_FS
 556        struct proc_dir_entry *proc;
 557
 558        proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
 559        if (!proc)
 560                return -ENOMEM;
 561#endif /* CONFIG_PROC_FS */
 562        return 0;
 563}
 564
 565static void exp_proc_remove(struct net *net)
 566{
 567#ifdef CONFIG_PROC_FS
 568        proc_net_remove(net, "nf_conntrack_expect");
 569#endif /* CONFIG_PROC_FS */
 570}
 571
 572module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
 573
 574int nf_conntrack_expect_init(struct net *net)
 575{
 576        int err = -ENOMEM;
 577
 578        if (net_eq(net, &init_net)) {
 579                if (!nf_ct_expect_hsize) {
 580                        nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
 581                        if (!nf_ct_expect_hsize)
 582                                nf_ct_expect_hsize = 1;
 583                }
 584                nf_ct_expect_max = nf_ct_expect_hsize * 4;
 585        }
 586
 587        net->ct.expect_count = 0;
 588        net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
 589                                                  &net->ct.expect_vmalloc, 0);
 590        if (net->ct.expect_hash == NULL)
 591                goto err1;
 592
 593        if (net_eq(net, &init_net)) {
 594                nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
 595                                        sizeof(struct nf_conntrack_expect),
 596                                        0, 0, NULL);
 597                if (!nf_ct_expect_cachep)
 598                        goto err2;
 599        }
 600
 601        err = exp_proc_init(net);
 602        if (err < 0)
 603                goto err3;
 604
 605        return 0;
 606
 607err3:
 608        if (net_eq(net, &init_net))
 609                kmem_cache_destroy(nf_ct_expect_cachep);
 610err2:
 611        nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
 612                             nf_ct_expect_hsize);
 613err1:
 614        return err;
 615}
 616
 617void nf_conntrack_expect_fini(struct net *net)
 618{
 619        exp_proc_remove(net);
 620        if (net_eq(net, &init_net))
 621                kmem_cache_destroy(nf_ct_expect_cachep);
 622        nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
 623                             nf_ct_expect_hsize);
 624}
 625