linux/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
<<
>>
Prefs
   1/* (C) 1999-2001 Paul `Rusty' Russell
   2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
   3 *
   4 * This program is free software; you can redistribute it and/or modify
   5 * it under the terms of the GNU General Public License version 2 as
   6 * published by the Free Software Foundation.
   7 */
   8
   9#include <linux/types.h>
  10#include <linux/ip.h>
  11#include <linux/netfilter.h>
  12#include <linux/module.h>
  13#include <linux/skbuff.h>
  14#include <linux/icmp.h>
  15#include <linux/sysctl.h>
  16#include <net/route.h>
  17#include <net/ip.h>
  18
  19#include <linux/netfilter_ipv4.h>
  20#include <net/netfilter/nf_conntrack.h>
  21#include <net/netfilter/nf_conntrack_helper.h>
  22#include <net/netfilter/nf_conntrack_l4proto.h>
  23#include <net/netfilter/nf_conntrack_l3proto.h>
  24#include <net/netfilter/nf_conntrack_core.h>
  25#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
  26#include <net/netfilter/nf_nat_helper.h>
  27
  28int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
  29                              struct nf_conn *ct,
  30                              enum ip_conntrack_info ctinfo);
  31EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook);
  32
  33static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
  34                              struct nf_conntrack_tuple *tuple)
  35{
  36        const __be32 *ap;
  37        __be32 _addrs[2];
  38        ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
  39                                sizeof(u_int32_t) * 2, _addrs);
  40        if (ap == NULL)
  41                return false;
  42
  43        tuple->src.u3.ip = ap[0];
  44        tuple->dst.u3.ip = ap[1];
  45
  46        return true;
  47}
  48
  49static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
  50                              const struct nf_conntrack_tuple *orig)
  51{
  52        tuple->src.u3.ip = orig->dst.u3.ip;
  53        tuple->dst.u3.ip = orig->src.u3.ip;
  54
  55        return true;
  56}
  57
  58static int ipv4_print_tuple(struct seq_file *s,
  59                            const struct nf_conntrack_tuple *tuple)
  60{
  61        return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
  62                          NIPQUAD(tuple->src.u3.ip),
  63                          NIPQUAD(tuple->dst.u3.ip));
  64}
  65
  66/* Returns new sk_buff, or NULL */
  67static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
  68{
  69        int err;
  70
  71        skb_orphan(skb);
  72
  73        local_bh_disable();
  74        err = ip_defrag(skb, user);
  75        local_bh_enable();
  76
  77        if (!err)
  78                ip_send_check(ip_hdr(skb));
  79
  80        return err;
  81}
  82
  83static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
  84                            unsigned int *dataoff, u_int8_t *protonum)
  85{
  86        const struct iphdr *iph;
  87        struct iphdr _iph;
  88
  89        iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
  90        if (iph == NULL)
  91                return -NF_DROP;
  92
  93        /* Conntrack defragments packets, we might still see fragments
  94         * inside ICMP packets though. */
  95        if (iph->frag_off & htons(IP_OFFSET))
  96                return -NF_DROP;
  97
  98        *dataoff = nhoff + (iph->ihl << 2);
  99        *protonum = iph->protocol;
 100
 101        return NF_ACCEPT;
 102}
 103
 104static unsigned int ipv4_confirm(unsigned int hooknum,
 105                                 struct sk_buff *skb,
 106                                 const struct net_device *in,
 107                                 const struct net_device *out,
 108                                 int (*okfn)(struct sk_buff *))
 109{
 110        struct nf_conn *ct;
 111        enum ip_conntrack_info ctinfo;
 112        const struct nf_conn_help *help;
 113        const struct nf_conntrack_helper *helper;
 114        unsigned int ret;
 115
 116        /* This is where we call the helper: as the packet goes out. */
 117        ct = nf_ct_get(skb, &ctinfo);
 118        if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
 119                goto out;
 120
 121        help = nfct_help(ct);
 122        if (!help)
 123                goto out;
 124
 125        /* rcu_read_lock()ed by nf_hook_slow */
 126        helper = rcu_dereference(help->helper);
 127        if (!helper)
 128                goto out;
 129
 130        ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
 131                           ct, ctinfo);
 132        if (ret != NF_ACCEPT)
 133                return ret;
 134
 135        if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
 136                typeof(nf_nat_seq_adjust_hook) seq_adjust;
 137
 138                seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
 139                if (!seq_adjust || !seq_adjust(skb, ct, ctinfo))
 140                        return NF_DROP;
 141        }
 142out:
 143        /* We've seen it coming out the other side: confirm it */
 144        return nf_conntrack_confirm(skb);
 145}
 146
 147static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
 148                                          struct sk_buff *skb,
 149                                          const struct net_device *in,
 150                                          const struct net_device *out,
 151                                          int (*okfn)(struct sk_buff *))
 152{
 153#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
 154        /* Previously seen (loopback)?  Ignore.  Do this before
 155           fragment check. */
 156        if (skb->nfct)
 157                return NF_ACCEPT;
 158#endif
 159
 160        /* Gather fragments. */
 161        if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
 162                if (nf_ct_ipv4_gather_frags(skb,
 163                                            hooknum == NF_INET_PRE_ROUTING ?
 164                                            IP_DEFRAG_CONNTRACK_IN :
 165                                            IP_DEFRAG_CONNTRACK_OUT))
 166                        return NF_STOLEN;
 167        }
 168        return NF_ACCEPT;
 169}
 170
 171static unsigned int ipv4_conntrack_in(unsigned int hooknum,
 172                                      struct sk_buff *skb,
 173                                      const struct net_device *in,
 174                                      const struct net_device *out,
 175                                      int (*okfn)(struct sk_buff *))
 176{
 177        return nf_conntrack_in(PF_INET, hooknum, skb);
 178}
 179
 180static unsigned int ipv4_conntrack_local(unsigned int hooknum,
 181                                         struct sk_buff *skb,
 182                                         const struct net_device *in,
 183                                         const struct net_device *out,
 184                                         int (*okfn)(struct sk_buff *))
 185{
 186        /* root is playing with raw sockets. */
 187        if (skb->len < sizeof(struct iphdr) ||
 188            ip_hdrlen(skb) < sizeof(struct iphdr)) {
 189                if (net_ratelimit())
 190                        printk("ipt_hook: happy cracking.\n");
 191                return NF_ACCEPT;
 192        }
 193        return nf_conntrack_in(PF_INET, hooknum, skb);
 194}
 195
 196/* Connection tracking may drop packets, but never alters them, so
 197   make it the first hook. */
 198static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
 199        {
 200                .hook           = ipv4_conntrack_defrag,
 201                .owner          = THIS_MODULE,
 202                .pf             = PF_INET,
 203                .hooknum        = NF_INET_PRE_ROUTING,
 204                .priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
 205        },
 206        {
 207                .hook           = ipv4_conntrack_in,
 208                .owner          = THIS_MODULE,
 209                .pf             = PF_INET,
 210                .hooknum        = NF_INET_PRE_ROUTING,
 211                .priority       = NF_IP_PRI_CONNTRACK,
 212        },
 213        {
 214                .hook           = ipv4_conntrack_defrag,
 215                .owner          = THIS_MODULE,
 216                .pf             = PF_INET,
 217                .hooknum        = NF_INET_LOCAL_OUT,
 218                .priority       = NF_IP_PRI_CONNTRACK_DEFRAG,
 219        },
 220        {
 221                .hook           = ipv4_conntrack_local,
 222                .owner          = THIS_MODULE,
 223                .pf             = PF_INET,
 224                .hooknum        = NF_INET_LOCAL_OUT,
 225                .priority       = NF_IP_PRI_CONNTRACK,
 226        },
 227        {
 228                .hook           = ipv4_confirm,
 229                .owner          = THIS_MODULE,
 230                .pf             = PF_INET,
 231                .hooknum        = NF_INET_POST_ROUTING,
 232                .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
 233        },
 234        {
 235                .hook           = ipv4_confirm,
 236                .owner          = THIS_MODULE,
 237                .pf             = PF_INET,
 238                .hooknum        = NF_INET_LOCAL_IN,
 239                .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
 240        },
 241};
 242
 243#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 244static int log_invalid_proto_min = 0;
 245static int log_invalid_proto_max = 255;
 246
 247static ctl_table ip_ct_sysctl_table[] = {
 248        {
 249                .ctl_name       = NET_IPV4_NF_CONNTRACK_MAX,
 250                .procname       = "ip_conntrack_max",
 251                .data           = &nf_conntrack_max,
 252                .maxlen         = sizeof(int),
 253                .mode           = 0644,
 254                .proc_handler   = &proc_dointvec,
 255        },
 256        {
 257                .ctl_name       = NET_IPV4_NF_CONNTRACK_COUNT,
 258                .procname       = "ip_conntrack_count",
 259                .data           = &nf_conntrack_count,
 260                .maxlen         = sizeof(int),
 261                .mode           = 0444,
 262                .proc_handler   = &proc_dointvec,
 263        },
 264        {
 265                .ctl_name       = NET_IPV4_NF_CONNTRACK_BUCKETS,
 266                .procname       = "ip_conntrack_buckets",
 267                .data           = &nf_conntrack_htable_size,
 268                .maxlen         = sizeof(unsigned int),
 269                .mode           = 0444,
 270                .proc_handler   = &proc_dointvec,
 271        },
 272        {
 273                .ctl_name       = NET_IPV4_NF_CONNTRACK_CHECKSUM,
 274                .procname       = "ip_conntrack_checksum",
 275                .data           = &nf_conntrack_checksum,
 276                .maxlen         = sizeof(int),
 277                .mode           = 0644,
 278                .proc_handler   = &proc_dointvec,
 279        },
 280        {
 281                .ctl_name       = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
 282                .procname       = "ip_conntrack_log_invalid",
 283                .data           = &nf_ct_log_invalid,
 284                .maxlen         = sizeof(unsigned int),
 285                .mode           = 0644,
 286                .proc_handler   = &proc_dointvec_minmax,
 287                .strategy       = &sysctl_intvec,
 288                .extra1         = &log_invalid_proto_min,
 289                .extra2         = &log_invalid_proto_max,
 290        },
 291        {
 292                .ctl_name       = 0
 293        }
 294};
 295#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */
 296
 297/* Fast function for those who don't want to parse /proc (and I don't
 298   blame them). */
 299/* Reversing the socket's dst/src point of view gives us the reply
 300   mapping. */
 301static int
 302getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 303{
 304        const struct inet_sock *inet = inet_sk(sk);
 305        const struct nf_conntrack_tuple_hash *h;
 306        struct nf_conntrack_tuple tuple;
 307
 308        memset(&tuple, 0, sizeof(tuple));
 309        tuple.src.u3.ip = inet->rcv_saddr;
 310        tuple.src.u.tcp.port = inet->sport;
 311        tuple.dst.u3.ip = inet->daddr;
 312        tuple.dst.u.tcp.port = inet->dport;
 313        tuple.src.l3num = PF_INET;
 314        tuple.dst.protonum = IPPROTO_TCP;
 315
 316        /* We only do TCP at the moment: is there a better way? */
 317        if (strcmp(sk->sk_prot->name, "TCP")) {
 318                pr_debug("SO_ORIGINAL_DST: Not a TCP socket\n");
 319                return -ENOPROTOOPT;
 320        }
 321
 322        if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
 323                pr_debug("SO_ORIGINAL_DST: len %d not %Zu\n",
 324                         *len, sizeof(struct sockaddr_in));
 325                return -EINVAL;
 326        }
 327
 328        h = nf_conntrack_find_get(&tuple);
 329        if (h) {
 330                struct sockaddr_in sin;
 331                struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
 332
 333                sin.sin_family = AF_INET;
 334                sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
 335                        .tuple.dst.u.tcp.port;
 336                sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
 337                        .tuple.dst.u3.ip;
 338                memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
 339
 340                pr_debug("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
 341                         NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
 342                nf_ct_put(ct);
 343                if (copy_to_user(user, &sin, sizeof(sin)) != 0)
 344                        return -EFAULT;
 345                else
 346                        return 0;
 347        }
 348        pr_debug("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
 349                 NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port),
 350                 NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port));
 351        return -ENOENT;
 352}
 353
 354#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 355
 356#include <linux/netfilter/nfnetlink.h>
 357#include <linux/netfilter/nfnetlink_conntrack.h>
 358
 359static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
 360                                const struct nf_conntrack_tuple *tuple)
 361{
 362        NLA_PUT_BE32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip);
 363        NLA_PUT_BE32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip);
 364        return 0;
 365
 366nla_put_failure:
 367        return -1;
 368}
 369
 370static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = {
 371        [CTA_IP_V4_SRC] = { .type = NLA_U32 },
 372        [CTA_IP_V4_DST] = { .type = NLA_U32 },
 373};
 374
 375static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
 376                                struct nf_conntrack_tuple *t)
 377{
 378        if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
 379                return -EINVAL;
 380
 381        t->src.u3.ip = nla_get_be32(tb[CTA_IP_V4_SRC]);
 382        t->dst.u3.ip = nla_get_be32(tb[CTA_IP_V4_DST]);
 383
 384        return 0;
 385}
 386#endif
 387
 388static struct nf_sockopt_ops so_getorigdst = {
 389        .pf             = PF_INET,
 390        .get_optmin     = SO_ORIGINAL_DST,
 391        .get_optmax     = SO_ORIGINAL_DST+1,
 392        .get            = &getorigdst,
 393        .owner          = THIS_MODULE,
 394};
 395
 396struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
 397        .l3proto         = PF_INET,
 398        .name            = "ipv4",
 399        .pkt_to_tuple    = ipv4_pkt_to_tuple,
 400        .invert_tuple    = ipv4_invert_tuple,
 401        .print_tuple     = ipv4_print_tuple,
 402        .get_l4proto     = ipv4_get_l4proto,
 403#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 404        .tuple_to_nlattr = ipv4_tuple_to_nlattr,
 405        .nlattr_to_tuple = ipv4_nlattr_to_tuple,
 406        .nla_policy      = ipv4_nla_policy,
 407#endif
 408#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 409        .ctl_table_path  = nf_net_ipv4_netfilter_sysctl_path,
 410        .ctl_table       = ip_ct_sysctl_table,
 411#endif
 412        .me              = THIS_MODULE,
 413};
 414
 415module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
 416                  &nf_conntrack_htable_size, 0600);
 417
 418MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
 419MODULE_ALIAS("ip_conntrack");
 420MODULE_LICENSE("GPL");
 421
 422static int __init nf_conntrack_l3proto_ipv4_init(void)
 423{
 424        int ret = 0;
 425
 426        need_conntrack();
 427
 428        ret = nf_register_sockopt(&so_getorigdst);
 429        if (ret < 0) {
 430                printk(KERN_ERR "Unable to register netfilter socket option\n");
 431                return ret;
 432        }
 433
 434        ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4);
 435        if (ret < 0) {
 436                printk("nf_conntrack_ipv4: can't register tcp.\n");
 437                goto cleanup_sockopt;
 438        }
 439
 440        ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4);
 441        if (ret < 0) {
 442                printk("nf_conntrack_ipv4: can't register udp.\n");
 443                goto cleanup_tcp;
 444        }
 445
 446        ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp);
 447        if (ret < 0) {
 448                printk("nf_conntrack_ipv4: can't register icmp.\n");
 449                goto cleanup_udp;
 450        }
 451
 452        ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
 453        if (ret < 0) {
 454                printk("nf_conntrack_ipv4: can't register ipv4\n");
 455                goto cleanup_icmp;
 456        }
 457
 458        ret = nf_register_hooks(ipv4_conntrack_ops,
 459                                ARRAY_SIZE(ipv4_conntrack_ops));
 460        if (ret < 0) {
 461                printk("nf_conntrack_ipv4: can't register hooks.\n");
 462                goto cleanup_ipv4;
 463        }
 464#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 465        ret = nf_conntrack_ipv4_compat_init();
 466        if (ret < 0)
 467                goto cleanup_hooks;
 468#endif
 469        return ret;
 470#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 471 cleanup_hooks:
 472        nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
 473#endif
 474 cleanup_ipv4:
 475        nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
 476 cleanup_icmp:
 477        nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp);
 478 cleanup_udp:
 479        nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4);
 480 cleanup_tcp:
 481        nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
 482 cleanup_sockopt:
 483        nf_unregister_sockopt(&so_getorigdst);
 484        return ret;
 485}
 486
 487static void __exit nf_conntrack_l3proto_ipv4_fini(void)
 488{
 489        synchronize_net();
 490#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
 491        nf_conntrack_ipv4_compat_fini();
 492#endif
 493        nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
 494        nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
 495        nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp);
 496        nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4);
 497        nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
 498        nf_unregister_sockopt(&so_getorigdst);
 499}
 500
 501module_init(nf_conntrack_l3proto_ipv4_init);
 502module_exit(nf_conntrack_l3proto_ipv4_fini);
 503
 504void need_ipv4_conntrack(void)
 505{
 506        return;
 507}
 508EXPORT_SYMBOL_GPL(need_ipv4_conntrack);
 509
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.