linux/net/ipv4/xfrm4_policy.c
<<
>>
Prefs
   1/*
   2 * xfrm4_policy.c
   3 *
   4 * Changes:
   5 *      Kazunori MIYAZAWA @USAGI
   6 *      YOSHIFUJI Hideaki @USAGI
   7 *              Split up af-specific portion
   8 *
   9 */
  10
  11#include <linux/err.h>
  12#include <linux/kernel.h>
  13#include <linux/inetdevice.h>
  14#include <net/dst.h>
  15#include <net/xfrm.h>
  16#include <net/ip.h>
  17
  18static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
  19
  20static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
  21                                          xfrm_address_t *saddr,
  22                                          xfrm_address_t *daddr)
  23{
  24        struct flowi fl = {
  25                .nl_u = {
  26                        .ip4_u = {
  27                                .tos = tos,
  28                                .daddr = daddr->a4,
  29                        },
  30                },
  31        };
  32        struct dst_entry *dst;
  33        struct rtable *rt;
  34        int err;
  35
  36        if (saddr)
  37                fl.fl4_src = saddr->a4;
  38
  39        err = __ip_route_output_key(net, &rt, &fl);
  40        dst = &rt->u.dst;
  41        if (err)
  42                dst = ERR_PTR(err);
  43        return dst;
  44}
  45
  46static int xfrm4_get_saddr(struct net *net,
  47                           xfrm_address_t *saddr, xfrm_address_t *daddr)
  48{
  49        struct dst_entry *dst;
  50        struct rtable *rt;
  51
  52        dst = xfrm4_dst_lookup(net, 0, NULL, daddr);
  53        if (IS_ERR(dst))
  54                return -EHOSTUNREACH;
  55
  56        rt = (struct rtable *)dst;
  57        saddr->a4 = rt->rt_src;
  58        dst_release(dst);
  59        return 0;
  60}
  61
  62static int xfrm4_get_tos(struct flowi *fl)
  63{
  64        return fl->fl4_tos;
  65}
  66
  67static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
  68                           int nfheader_len)
  69{
  70        return 0;
  71}
  72
  73static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
  74                          struct flowi *fl)
  75{
  76        struct rtable *rt = (struct rtable *)xdst->route;
  77
  78        xdst->u.rt.fl = *fl;
  79
  80        xdst->u.dst.dev = dev;
  81        dev_hold(dev);
  82
  83        xdst->u.rt.idev = in_dev_get(dev);
  84        if (!xdst->u.rt.idev)
  85                return -ENODEV;
  86
  87        xdst->u.rt.peer = rt->peer;
  88        if (rt->peer)
  89                atomic_inc(&rt->peer->refcnt);
  90
  91        /* Sheit... I remember I did this right. Apparently,
  92         * it was magically lost, so this code needs audit */
  93        xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
  94                                              RTCF_LOCAL);
  95        xdst->u.rt.rt_type = rt->rt_type;
  96        xdst->u.rt.rt_src = rt->rt_src;
  97        xdst->u.rt.rt_dst = rt->rt_dst;
  98        xdst->u.rt.rt_gateway = rt->rt_gateway;
  99        xdst->u.rt.rt_spec_dst = rt->rt_spec_dst;
 100
 101        return 0;
 102}
 103
 104static void
 105_decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 106{
 107        struct iphdr *iph = ip_hdr(skb);
 108        u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
 109
 110        memset(fl, 0, sizeof(struct flowi));
 111        fl->mark = skb->mark;
 112
 113        if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
 114                switch (iph->protocol) {
 115                case IPPROTO_UDP:
 116                case IPPROTO_UDPLITE:
 117                case IPPROTO_TCP:
 118                case IPPROTO_SCTP:
 119                case IPPROTO_DCCP:
 120                        if (xprth + 4 < skb->data ||
 121                            pskb_may_pull(skb, xprth + 4 - skb->data)) {
 122                                __be16 *ports = (__be16 *)xprth;
 123
 124                                fl->fl_ip_sport = ports[!!reverse];
 125                                fl->fl_ip_dport = ports[!reverse];
 126                        }
 127                        break;
 128
 129                case IPPROTO_ICMP:
 130                        if (pskb_may_pull(skb, xprth + 2 - skb->data)) {
 131                                u8 *icmp = xprth;
 132
 133                                fl->fl_icmp_type = icmp[0];
 134                                fl->fl_icmp_code = icmp[1];
 135                        }
 136                        break;
 137
 138                case IPPROTO_ESP:
 139                        if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
 140                                __be32 *ehdr = (__be32 *)xprth;
 141
 142                                fl->fl_ipsec_spi = ehdr[0];
 143                        }
 144                        break;
 145
 146                case IPPROTO_AH:
 147                        if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
 148                                __be32 *ah_hdr = (__be32*)xprth;
 149
 150                                fl->fl_ipsec_spi = ah_hdr[1];
 151                        }
 152                        break;
 153
 154                case IPPROTO_COMP:
 155                        if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
 156                                __be16 *ipcomp_hdr = (__be16 *)xprth;
 157
 158                                fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
 159                        }
 160                        break;
 161                default:
 162                        fl->fl_ipsec_spi = 0;
 163                        break;
 164                }
 165        }
 166        fl->proto = iph->protocol;
 167        fl->fl4_dst = reverse ? iph->saddr : iph->daddr;
 168        fl->fl4_src = reverse ? iph->daddr : iph->saddr;
 169        fl->fl4_tos = iph->tos;
 170}
 171
 172static inline int xfrm4_garbage_collect(struct dst_ops *ops)
 173{
 174        struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
 175
 176        xfrm4_policy_afinfo.garbage_collect(net);
 177        return (atomic_read(&ops->entries) > ops->gc_thresh * 2);
 178}
 179
 180static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
 181{
 182        struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 183        struct dst_entry *path = xdst->route;
 184
 185        path->ops->update_pmtu(path, mtu);
 186}
 187
 188static void xfrm4_dst_destroy(struct dst_entry *dst)
 189{
 190        struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 191
 192        if (likely(xdst->u.rt.idev))
 193                in_dev_put(xdst->u.rt.idev);
 194        if (likely(xdst->u.rt.peer))
 195                inet_putpeer(xdst->u.rt.peer);
 196        xfrm_dst_destroy(xdst);
 197}
 198
 199static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 200                             int unregister)
 201{
 202        struct xfrm_dst *xdst;
 203
 204        if (!unregister)
 205                return;
 206
 207        xdst = (struct xfrm_dst *)dst;
 208        if (xdst->u.rt.idev->dev == dev) {
 209                struct in_device *loopback_idev =
 210                        in_dev_get(dev_net(dev)->loopback_dev);
 211                BUG_ON(!loopback_idev);
 212
 213                do {
 214                        in_dev_put(xdst->u.rt.idev);
 215                        xdst->u.rt.idev = loopback_idev;
 216                        in_dev_hold(loopback_idev);
 217                        xdst = (struct xfrm_dst *)xdst->u.dst.child;
 218                } while (xdst->u.dst.xfrm);
 219
 220                __in_dev_put(loopback_idev);
 221        }
 222
 223        xfrm_dst_ifdown(dst, dev);
 224}
 225
 226static struct dst_ops xfrm4_dst_ops = {
 227        .family =               AF_INET,
 228        .protocol =             cpu_to_be16(ETH_P_IP),
 229        .gc =                   xfrm4_garbage_collect,
 230        .update_pmtu =          xfrm4_update_pmtu,
 231        .destroy =              xfrm4_dst_destroy,
 232        .ifdown =               xfrm4_dst_ifdown,
 233        .local_out =            __ip_local_out,
 234        .gc_thresh =            1024,
 235        .entries =              ATOMIC_INIT(0),
 236};
 237
 238static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
 239        .family =               AF_INET,
 240        .dst_ops =              &xfrm4_dst_ops,
 241        .dst_lookup =           xfrm4_dst_lookup,
 242        .get_saddr =            xfrm4_get_saddr,
 243        .decode_session =       _decode_session4,
 244        .get_tos =              xfrm4_get_tos,
 245        .init_path =            xfrm4_init_path,
 246        .fill_dst =             xfrm4_fill_dst,
 247};
 248
 249#ifdef CONFIG_SYSCTL
 250static struct ctl_table xfrm4_policy_table[] = {
 251        {
 252                .procname       = "xfrm4_gc_thresh",
 253                .data           = &init_net.xfrm.xfrm4_dst_ops.gc_thresh,
 254                .maxlen         = sizeof(int),
 255                .mode           = 0644,
 256                .proc_handler   = proc_dointvec,
 257        },
 258        { }
 259};
 260
 261static struct ctl_table_header *sysctl_hdr;
 262#endif
 263
 264static void __init xfrm4_policy_init(void)
 265{
 266        xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);
 267}
 268
 269static void __exit xfrm4_policy_fini(void)
 270{
 271#ifdef CONFIG_SYSCTL
 272        if (sysctl_hdr)
 273                unregister_net_sysctl_table(sysctl_hdr);
 274#endif
 275        xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
 276}
 277
 278void __init xfrm4_init(int rt_max_size)
 279{
 280        /*
 281         * Select a default value for the gc_thresh based on the main route
 282         * table hash size.  It seems to me the worst case scenario is when
 283         * we have ipsec operating in transport mode, in which we create a
 284         * dst_entry per socket.  The xfrm gc algorithm starts trying to remove
 285         * entries at gc_thresh, and prevents new allocations as 2*gc_thresh
 286         * so lets set an initial xfrm gc_thresh value at the rt_max_size/2.
 287         * That will let us store an ipsec connection per route table entry,
 288         * and start cleaning when were 1/2 full
 289         */
 290        xfrm4_dst_ops.gc_thresh = rt_max_size/2;
 291
 292        xfrm4_state_init();
 293        xfrm4_policy_init();
 294#ifdef CONFIG_SYSCTL
 295        sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path,
 296                                                xfrm4_policy_table);
 297#endif
 298}
 299
 300