linux/net/netfilter/core.c
<<
>>
Prefs
   1/* netfilter.c: look after the filters for various protocols.
   2 * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
   3 *
   4 * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
   5 * way.
   6 *
   7 * Rusty Russell (C)2000 -- This code is GPL.
   8 * Patrick McHardy (c) 2006-2012
   9 */
  10#include <linux/kernel.h>
  11#include <linux/netfilter.h>
  12#include <net/protocol.h>
  13#include <linux/init.h>
  14#include <linux/skbuff.h>
  15#include <linux/wait.h>
  16#include <linux/module.h>
  17#include <linux/interrupt.h>
  18#include <linux/if.h>
  19#include <linux/netdevice.h>
  20#include <linux/netfilter_ipv6.h>
  21#include <linux/inetdevice.h>
  22#include <linux/proc_fs.h>
  23#include <linux/mutex.h>
  24#include <linux/mm.h>
  25#include <linux/rcupdate.h>
  26#include <net/net_namespace.h>
  27#include <net/sock.h>
  28
  29#include "nf_internals.h"
  30
  31static DEFINE_MUTEX(afinfo_mutex);
  32
  33const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
  34EXPORT_SYMBOL(nf_afinfo);
  35const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
  36EXPORT_SYMBOL_GPL(nf_ipv6_ops);
  37
  38DEFINE_PER_CPU(bool, nf_skb_duplicated);
  39EXPORT_SYMBOL_GPL(nf_skb_duplicated);
  40
  41int nf_register_afinfo(const struct nf_afinfo *afinfo)
  42{
  43        mutex_lock(&afinfo_mutex);
  44        RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
  45        mutex_unlock(&afinfo_mutex);
  46        return 0;
  47}
  48EXPORT_SYMBOL_GPL(nf_register_afinfo);
  49
  50void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
  51{
  52        mutex_lock(&afinfo_mutex);
  53        RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL);
  54        mutex_unlock(&afinfo_mutex);
  55        synchronize_rcu();
  56}
  57EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
  58
  59#ifdef HAVE_JUMP_LABEL
  60struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
  61EXPORT_SYMBOL(nf_hooks_needed);
  62#endif
  63
  64static DEFINE_MUTEX(nf_hook_mutex);
  65
  66/* max hooks per family/hooknum */
  67#define MAX_HOOK_COUNT          1024
  68
  69#define nf_entry_dereference(e) \
  70        rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
  71
  72static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
  73{
  74        struct nf_hook_entries *e;
  75        size_t alloc = sizeof(*e) +
  76                       sizeof(struct nf_hook_entry) * num +
  77                       sizeof(struct nf_hook_ops *) * num;
  78
  79        if (num == 0)
  80                return NULL;
  81
  82        e = kvzalloc(alloc, GFP_KERNEL);
  83        if (e)
  84                e->num_hook_entries = num;
  85        return e;
  86}
  87
  88static unsigned int accept_all(void *priv,
  89                               struct sk_buff *skb,
  90                               const struct nf_hook_state *state)
  91{
  92        return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */
  93}
  94
  95static const struct nf_hook_ops dummy_ops = {
  96        .hook = accept_all,
  97        .priority = INT_MIN,
  98};
  99
 100static struct nf_hook_entries *
 101nf_hook_entries_grow(const struct nf_hook_entries *old,
 102                     const struct nf_hook_ops *reg)
 103{
 104        unsigned int i, alloc_entries, nhooks, old_entries;
 105        struct nf_hook_ops **orig_ops = NULL;
 106        struct nf_hook_ops **new_ops;
 107        struct nf_hook_entries *new;
 108        bool inserted = false;
 109
 110        alloc_entries = 1;
 111        old_entries = old ? old->num_hook_entries : 0;
 112
 113        if (old) {
 114                orig_ops = nf_hook_entries_get_hook_ops(old);
 115
 116                for (i = 0; i < old_entries; i++) {
 117                        if (orig_ops[i] != &dummy_ops)
 118                                alloc_entries++;
 119                }
 120        }
 121
 122        if (alloc_entries > MAX_HOOK_COUNT)
 123                return ERR_PTR(-E2BIG);
 124
 125        new = allocate_hook_entries_size(alloc_entries);
 126        if (!new)
 127                return ERR_PTR(-ENOMEM);
 128
 129        new_ops = nf_hook_entries_get_hook_ops(new);
 130
 131        i = 0;
 132        nhooks = 0;
 133        while (i < old_entries) {
 134                if (orig_ops[i] == &dummy_ops) {
 135                        ++i;
 136                        continue;
 137                }
 138                if (inserted || reg->priority > orig_ops[i]->priority) {
 139                        new_ops[nhooks] = (void *)orig_ops[i];
 140                        new->hooks[nhooks] = old->hooks[i];
 141                        i++;
 142                } else {
 143                        new_ops[nhooks] = (void *)reg;
 144                        new->hooks[nhooks].hook = reg->hook;
 145                        new->hooks[nhooks].priv = reg->priv;
 146                        inserted = true;
 147                }
 148                nhooks++;
 149        }
 150
 151        if (!inserted) {
 152                new_ops[nhooks] = (void *)reg;
 153                new->hooks[nhooks].hook = reg->hook;
 154                new->hooks[nhooks].priv = reg->priv;
 155        }
 156
 157        return new;
 158}
 159
 160static void hooks_validate(const struct nf_hook_entries *hooks)
 161{
 162#ifdef CONFIG_DEBUG_KERNEL
 163        struct nf_hook_ops **orig_ops;
 164        int prio = INT_MIN;
 165        size_t i = 0;
 166
 167        orig_ops = nf_hook_entries_get_hook_ops(hooks);
 168
 169        for (i = 0; i < hooks->num_hook_entries; i++) {
 170                if (orig_ops[i] == &dummy_ops)
 171                        continue;
 172
 173                WARN_ON(orig_ops[i]->priority < prio);
 174
 175                if (orig_ops[i]->priority > prio)
 176                        prio = orig_ops[i]->priority;
 177        }
 178#endif
 179}
 180
 181/*
 182 * __nf_hook_entries_try_shrink - try to shrink hook array
 183 *
 184 * @pp -- location of hook blob
 185 *
 186 * Hook unregistration must always succeed, so to-be-removed hooks
 187 * are replaced by a dummy one that will just move to next hook.
 188 *
 189 * This counts the current dummy hooks, attempts to allocate new blob,
 190 * copies the live hooks, then replaces and discards old one.
 191 *
 192 * return values:
 193 *
 194 * Returns address to free, or NULL.
 195 */
 196static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp)
 197{
 198        struct nf_hook_entries *old, *new = NULL;
 199        unsigned int i, j, skip = 0, hook_entries;
 200        struct nf_hook_ops **orig_ops;
 201        struct nf_hook_ops **new_ops;
 202
 203        old = nf_entry_dereference(*pp);
 204        if (WARN_ON_ONCE(!old))
 205                return NULL;
 206
 207        orig_ops = nf_hook_entries_get_hook_ops(old);
 208        for (i = 0; i < old->num_hook_entries; i++) {
 209                if (orig_ops[i] == &dummy_ops)
 210                        skip++;
 211        }
 212
 213        /* if skip == hook_entries all hooks have been removed */
 214        hook_entries = old->num_hook_entries;
 215        if (skip == hook_entries)
 216                goto out_assign;
 217
 218        if (skip == 0)
 219                return NULL;
 220
 221        hook_entries -= skip;
 222        new = allocate_hook_entries_size(hook_entries);
 223        if (!new)
 224                return NULL;
 225
 226        new_ops = nf_hook_entries_get_hook_ops(new);
 227        for (i = 0, j = 0; i < old->num_hook_entries; i++) {
 228                if (orig_ops[i] == &dummy_ops)
 229                        continue;
 230                new->hooks[j] = old->hooks[i];
 231                new_ops[j] = (void *)orig_ops[i];
 232                j++;
 233        }
 234        hooks_validate(new);
 235out_assign:
 236        rcu_assign_pointer(*pp, new);
 237        return old;
 238}
 239
 240static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
 241{
 242        if (reg->pf != NFPROTO_NETDEV)
 243                return net->nf.hooks[reg->pf]+reg->hooknum;
 244
 245#ifdef CONFIG_NETFILTER_INGRESS
 246        if (reg->hooknum == NF_NETDEV_INGRESS) {
 247                if (reg->dev && dev_net(reg->dev) == net)
 248                        return &reg->dev->nf_hooks_ingress;
 249        }
 250#endif
 251        WARN_ON_ONCE(1);
 252        return NULL;
 253}
 254
 255int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
 256{
 257        struct nf_hook_entries *p, *new_hooks;
 258        struct nf_hook_entries __rcu **pp;
 259
 260        if (reg->pf == NFPROTO_NETDEV) {
 261#ifndef CONFIG_NETFILTER_INGRESS
 262                if (reg->hooknum == NF_NETDEV_INGRESS)
 263                        return -EOPNOTSUPP;
 264#endif
 265                if (reg->hooknum != NF_NETDEV_INGRESS ||
 266                    !reg->dev || dev_net(reg->dev) != net)
 267                        return -EINVAL;
 268        }
 269
 270        pp = nf_hook_entry_head(net, reg);
 271        if (!pp)
 272                return -EINVAL;
 273
 274        mutex_lock(&nf_hook_mutex);
 275
 276        p = nf_entry_dereference(*pp);
 277        new_hooks = nf_hook_entries_grow(p, reg);
 278
 279        if (!IS_ERR(new_hooks))
 280                rcu_assign_pointer(*pp, new_hooks);
 281
 282        mutex_unlock(&nf_hook_mutex);
 283        if (IS_ERR(new_hooks))
 284                return PTR_ERR(new_hooks);
 285
 286        hooks_validate(new_hooks);
 287#ifdef CONFIG_NETFILTER_INGRESS
 288        if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
 289                net_inc_ingress_queue();
 290#endif
 291#ifdef HAVE_JUMP_LABEL
 292        static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
 293#endif
 294        synchronize_net();
 295        BUG_ON(p == new_hooks);
 296        kvfree(p);
 297        return 0;
 298}
 299EXPORT_SYMBOL(nf_register_net_hook);
 300
 301/*
 302 * __nf_unregister_net_hook - remove a hook from blob
 303 *
 304 * @oldp: current address of hook blob
 305 * @unreg: hook to unregister
 306 *
 307 * This cannot fail, hook unregistration must always succeed.
 308 * Therefore replace the to-be-removed hook with a dummy hook.
 309 */
 310static void __nf_unregister_net_hook(struct nf_hook_entries *old,
 311                                     const struct nf_hook_ops *unreg)
 312{
 313        struct nf_hook_ops **orig_ops;
 314        bool found = false;
 315        unsigned int i;
 316
 317        orig_ops = nf_hook_entries_get_hook_ops(old);
 318        for (i = 0; i < old->num_hook_entries; i++) {
 319                if (orig_ops[i] != unreg)
 320                        continue;
 321                WRITE_ONCE(old->hooks[i].hook, accept_all);
 322                WRITE_ONCE(orig_ops[i], &dummy_ops);
 323                found = true;
 324                break;
 325        }
 326
 327        if (found) {
 328#ifdef CONFIG_NETFILTER_INGRESS
 329                if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
 330                        net_dec_ingress_queue();
 331#endif
 332#ifdef HAVE_JUMP_LABEL
 333                static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]);
 334#endif
 335        } else {
 336                WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum);
 337        }
 338}
 339
 340void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
 341{
 342        struct nf_hook_entries __rcu **pp;
 343        struct nf_hook_entries *p;
 344        unsigned int nfq;
 345
 346        pp = nf_hook_entry_head(net, reg);
 347        if (!pp)
 348                return;
 349
 350        mutex_lock(&nf_hook_mutex);
 351
 352        p = nf_entry_dereference(*pp);
 353        if (WARN_ON_ONCE(!p)) {
 354                mutex_unlock(&nf_hook_mutex);
 355                return;
 356        }
 357
 358        __nf_unregister_net_hook(p, reg);
 359
 360        p = __nf_hook_entries_try_shrink(pp);
 361        mutex_unlock(&nf_hook_mutex);
 362        if (!p)
 363                return;
 364
 365        synchronize_net();
 366
 367        /* other cpu might still process nfqueue verdict that used reg */
 368        nfq = nf_queue_nf_hook_drop(net);
 369        if (nfq)
 370                synchronize_net();
 371        kvfree(p);
 372}
 373EXPORT_SYMBOL(nf_unregister_net_hook);
 374
 375int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 376                          unsigned int n)
 377{
 378        unsigned int i;
 379        int err = 0;
 380
 381        for (i = 0; i < n; i++) {
 382                err = nf_register_net_hook(net, &reg[i]);
 383                if (err)
 384                        goto err;
 385        }
 386        return err;
 387
 388err:
 389        if (i > 0)
 390                nf_unregister_net_hooks(net, reg, i);
 391        return err;
 392}
 393EXPORT_SYMBOL(nf_register_net_hooks);
 394
 395void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
 396                             unsigned int hookcount)
 397{
 398        struct nf_hook_entries *to_free[16], *p;
 399        struct nf_hook_entries __rcu **pp;
 400        unsigned int i, j, n;
 401
 402        mutex_lock(&nf_hook_mutex);
 403        for (i = 0; i < hookcount; i++) {
 404                pp = nf_hook_entry_head(net, &reg[i]);
 405                if (!pp)
 406                        continue;
 407
 408                p = nf_entry_dereference(*pp);
 409                if (WARN_ON_ONCE(!p))
 410                        continue;
 411                __nf_unregister_net_hook(p, &reg[i]);
 412        }
 413        mutex_unlock(&nf_hook_mutex);
 414
 415        do {
 416                n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
 417
 418                mutex_lock(&nf_hook_mutex);
 419
 420                for (i = 0, j = 0; i < hookcount && j < n; i++) {
 421                        pp = nf_hook_entry_head(net, &reg[i]);
 422                        if (!pp)
 423                                continue;
 424
 425                        p = nf_entry_dereference(*pp);
 426                        if (!p)
 427                                continue;
 428
 429                        to_free[j] = __nf_hook_entries_try_shrink(pp);
 430                        if (to_free[j])
 431                                ++j;
 432                }
 433
 434                mutex_unlock(&nf_hook_mutex);
 435
 436                if (j) {
 437                        unsigned int nfq;
 438
 439                        synchronize_net();
 440
 441                        /* need 2nd synchronize_net() if nfqueue is used, skb
 442                         * can get reinjected right before nf_queue_hook_drop()
 443                         */
 444                        nfq = nf_queue_nf_hook_drop(net);
 445                        if (nfq)
 446                                synchronize_net();
 447
 448                        for (i = 0; i < j; i++)
 449                                kvfree(to_free[i]);
 450                }
 451
 452                reg += n;
 453                hookcount -= n;
 454        } while (hookcount > 0);
 455}
 456EXPORT_SYMBOL(nf_unregister_net_hooks);
 457
 458/* Returns 1 if okfn() needs to be executed by the caller,
 459 * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
 460int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 461                 const struct nf_hook_entries *e, unsigned int s)
 462{
 463        unsigned int verdict;
 464        int ret;
 465
 466        for (; s < e->num_hook_entries; s++) {
 467                verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
 468                switch (verdict & NF_VERDICT_MASK) {
 469                case NF_ACCEPT:
 470                        break;
 471                case NF_DROP:
 472                        kfree_skb(skb);
 473                        ret = NF_DROP_GETERR(verdict);
 474                        if (ret == 0)
 475                                ret = -EPERM;
 476                        return ret;
 477                case NF_QUEUE:
 478                        ret = nf_queue(skb, state, e, s, verdict);
 479                        if (ret == 1)
 480                                continue;
 481                        return ret;
 482                default:
 483                        /* Implicit handling for NF_STOLEN, as well as any other
 484                         * non conventional verdicts.
 485                         */
 486                        return 0;
 487                }
 488        }
 489
 490        return 1;
 491}
 492EXPORT_SYMBOL(nf_hook_slow);
 493
 494
 495int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
 496{
 497        if (writable_len > skb->len)
 498                return 0;
 499
 500        /* Not exclusive use of packet?  Must copy. */
 501        if (!skb_cloned(skb)) {
 502                if (writable_len <= skb_headlen(skb))
 503                        return 1;
 504        } else if (skb_clone_writable(skb, writable_len))
 505                return 1;
 506
 507        if (writable_len <= skb_headlen(skb))
 508                writable_len = 0;
 509        else
 510                writable_len -= skb_headlen(skb);
 511
 512        return !!__pskb_pull_tail(skb, writable_len);
 513}
 514EXPORT_SYMBOL(skb_make_writable);
 515
 516/* This needs to be compiled in any case to avoid dependencies between the
 517 * nfnetlink_queue code and nf_conntrack.
 518 */
 519struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
 520EXPORT_SYMBOL_GPL(nfnl_ct_hook);
 521
 522#if IS_ENABLED(CONFIG_NF_CONNTRACK)
 523/* This does not belong here, but locally generated errors need it if connection
 524   tracking in use: without this, connection may not be in hash table, and hence
 525   manufactured ICMP or RST packets will not be associated with it. */
 526void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
 527                __rcu __read_mostly;
 528EXPORT_SYMBOL(ip_ct_attach);
 529
 530void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
 531{
 532        void (*attach)(struct sk_buff *, const struct sk_buff *);
 533
 534        if (skb->_nfct) {
 535                rcu_read_lock();
 536                attach = rcu_dereference(ip_ct_attach);
 537                if (attach)
 538                        attach(new, skb);
 539                rcu_read_unlock();
 540        }
 541}
 542EXPORT_SYMBOL(nf_ct_attach);
 543
 544void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
 545EXPORT_SYMBOL(nf_ct_destroy);
 546
 547void nf_conntrack_destroy(struct nf_conntrack *nfct)
 548{
 549        void (*destroy)(struct nf_conntrack *);
 550
 551        rcu_read_lock();
 552        destroy = rcu_dereference(nf_ct_destroy);
 553        BUG_ON(destroy == NULL);
 554        destroy(nfct);
 555        rcu_read_unlock();
 556}
 557EXPORT_SYMBOL(nf_conntrack_destroy);
 558
 559/* Built-in default zone used e.g. by modules. */
 560const struct nf_conntrack_zone nf_ct_zone_dflt = {
 561        .id     = NF_CT_DEFAULT_ZONE_ID,
 562        .dir    = NF_CT_DEFAULT_ZONE_DIR,
 563};
 564EXPORT_SYMBOL_GPL(nf_ct_zone_dflt);
 565#endif /* CONFIG_NF_CONNTRACK */
 566
 567#ifdef CONFIG_NF_NAT_NEEDED
 568void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
 569EXPORT_SYMBOL(nf_nat_decode_session_hook);
 570#endif
 571
 572static int __net_init netfilter_net_init(struct net *net)
 573{
 574        int i, h;
 575
 576        for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
 577                for (h = 0; h < NF_MAX_HOOKS; h++)
 578                        RCU_INIT_POINTER(net->nf.hooks[i][h], NULL);
 579        }
 580
 581#ifdef CONFIG_PROC_FS
 582        net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
 583                                                net->proc_net);
 584        if (!net->nf.proc_netfilter) {
 585                if (!net_eq(net, &init_net))
 586                        pr_err("cannot create netfilter proc entry");
 587
 588                return -ENOMEM;
 589        }
 590#endif
 591
 592        return 0;
 593}
 594
 595static void __net_exit netfilter_net_exit(struct net *net)
 596{
 597        remove_proc_entry("netfilter", net->proc_net);
 598}
 599
 600static struct pernet_operations netfilter_net_ops = {
 601        .init = netfilter_net_init,
 602        .exit = netfilter_net_exit,
 603};
 604
 605int __init netfilter_init(void)
 606{
 607        int ret;
 608
 609        ret = register_pernet_subsys(&netfilter_net_ops);
 610        if (ret < 0)
 611                goto err;
 612
 613        ret = netfilter_log_init();
 614        if (ret < 0)
 615                goto err_pernet;
 616
 617        return 0;
 618err_pernet:
 619        unregister_pernet_subsys(&netfilter_net_ops);
 620err:
 621        return ret;
 622}
 623
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.