linux/net/xfrm/xfrm_policy.c
<<
>>
Prefs
   1/*
   2 * xfrm_policy.c
   3 *
   4 * Changes:
   5 *      Mitsuru KANDA @USAGI
   6 *      Kazunori MIYAZAWA @USAGI
   7 *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
   8 *              IPv6 support
   9 *      Kazunori MIYAZAWA @USAGI
  10 *      YOSHIFUJI Hideaki
  11 *              Split up af-specific portion
  12 *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
  13 *
  14 */
  15
  16#include <linux/err.h>
  17#include <linux/slab.h>
  18#include <linux/kmod.h>
  19#include <linux/list.h>
  20#include <linux/spinlock.h>
  21#include <linux/workqueue.h>
  22#include <linux/notifier.h>
  23#include <linux/netdevice.h>
  24#include <linux/netfilter.h>
  25#include <linux/module.h>
  26#include <linux/cache.h>
  27#include <linux/audit.h>
  28#include <net/dst.h>
  29#include <net/xfrm.h>
  30#include <net/ip.h>
  31#ifdef CONFIG_XFRM_STATISTICS
  32#include <net/snmp.h>
  33#endif
  34
  35#include "xfrm_hash.h"
  36
  37DEFINE_MUTEX(xfrm_cfg_mutex);
  38EXPORT_SYMBOL(xfrm_cfg_mutex);
  39
  40static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock);
  41static struct dst_entry *xfrm_policy_sk_bundles;
  42static DEFINE_RWLOCK(xfrm_policy_lock);
  43
  44static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
  45static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
  46
  47static struct kmem_cache *xfrm_dst_cache __read_mostly;
  48
  49static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
  50static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
  51static void xfrm_init_pmtu(struct dst_entry *dst);
  52static int stale_bundle(struct dst_entry *dst);
  53static int xfrm_bundle_ok(struct xfrm_dst *xdst);
  54
  55
  56static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
  57                                                int dir);
  58
  59static inline int
  60__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
  61{
  62        const struct flowi4 *fl4 = &fl->u.ip4;
  63
  64        return  addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) &&
  65                addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) &&
  66                !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
  67                !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
  68                (fl4->flowi4_proto == sel->proto || !sel->proto) &&
  69                (fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
  70}
  71
  72static inline int
  73__xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
  74{
  75        const struct flowi6 *fl6 = &fl->u.ip6;
  76
  77        return  addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
  78                addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
  79                !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
  80                !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
  81                (fl6->flowi6_proto == sel->proto || !sel->proto) &&
  82                (fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
  83}
  84
  85int xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
  86                        unsigned short family)
  87{
  88        switch (family) {
  89        case AF_INET:
  90                return __xfrm4_selector_match(sel, fl);
  91        case AF_INET6:
  92                return __xfrm6_selector_match(sel, fl);
  93        }
  94        return 0;
  95}
  96
  97static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
  98                                                  const xfrm_address_t *saddr,
  99                                                  const xfrm_address_t *daddr,
 100                                                  int family)
 101{
 102        struct xfrm_policy_afinfo *afinfo;
 103        struct dst_entry *dst;
 104
 105        afinfo = xfrm_policy_get_afinfo(family);
 106        if (unlikely(afinfo == NULL))
 107                return ERR_PTR(-EAFNOSUPPORT);
 108
 109        dst = afinfo->dst_lookup(net, tos, saddr, daddr);
 110
 111        xfrm_policy_put_afinfo(afinfo);
 112
 113        return dst;
 114}
 115
 116static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
 117                                                xfrm_address_t *prev_saddr,
 118                                                xfrm_address_t *prev_daddr,
 119                                                int family)
 120{
 121        struct net *net = xs_net(x);
 122        xfrm_address_t *saddr = &x->props.saddr;
 123        xfrm_address_t *daddr = &x->id.daddr;
 124        struct dst_entry *dst;
 125
 126        if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
 127                saddr = x->coaddr;
 128                daddr = prev_daddr;
 129        }
 130        if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
 131                saddr = prev_saddr;
 132                daddr = x->coaddr;
 133        }
 134
 135        dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family);
 136
 137        if (!IS_ERR(dst)) {
 138                if (prev_saddr != saddr)
 139                        memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
 140                if (prev_daddr != daddr)
 141                        memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
 142        }
 143
 144        return dst;
 145}
 146
 147static inline unsigned long make_jiffies(long secs)
 148{
 149        if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
 150                return MAX_SCHEDULE_TIMEOUT-1;
 151        else
 152                return secs*HZ;
 153}
 154
 155static void xfrm_policy_timer(unsigned long data)
 156{
 157        struct xfrm_policy *xp = (struct xfrm_policy*)data;
 158        unsigned long now = get_seconds();
 159        long next = LONG_MAX;
 160        int warn = 0;
 161        int dir;
 162
 163        read_lock(&xp->lock);
 164
 165        if (unlikely(xp->walk.dead))
 166                goto out;
 167
 168        dir = xfrm_policy_id2dir(xp->index);
 169
 170        if (xp->lft.hard_add_expires_seconds) {
 171                long tmo = xp->lft.hard_add_expires_seconds +
 172                        xp->curlft.add_time - now;
 173                if (tmo <= 0)
 174                        goto expired;
 175                if (tmo < next)
 176                        next = tmo;
 177        }
 178        if (xp->lft.hard_use_expires_seconds) {
 179                long tmo = xp->lft.hard_use_expires_seconds +
 180                        (xp->curlft.use_time ? : xp->curlft.add_time) - now;
 181                if (tmo <= 0)
 182                        goto expired;
 183                if (tmo < next)
 184                        next = tmo;
 185        }
 186        if (xp->lft.soft_add_expires_seconds) {
 187                long tmo = xp->lft.soft_add_expires_seconds +
 188                        xp->curlft.add_time - now;
 189                if (tmo <= 0) {
 190                        warn = 1;
 191                        tmo = XFRM_KM_TIMEOUT;
 192                }
 193                if (tmo < next)
 194                        next = tmo;
 195        }
 196        if (xp->lft.soft_use_expires_seconds) {
 197                long tmo = xp->lft.soft_use_expires_seconds +
 198                        (xp->curlft.use_time ? : xp->curlft.add_time) - now;
 199                if (tmo <= 0) {
 200                        warn = 1;
 201                        tmo = XFRM_KM_TIMEOUT;
 202                }
 203                if (tmo < next)
 204                        next = tmo;
 205        }
 206
 207        if (warn)
 208                km_policy_expired(xp, dir, 0, 0);
 209        if (next != LONG_MAX &&
 210            !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
 211                xfrm_pol_hold(xp);
 212
 213out:
 214        read_unlock(&xp->lock);
 215        xfrm_pol_put(xp);
 216        return;
 217
 218expired:
 219        read_unlock(&xp->lock);
 220        if (!xfrm_policy_delete(xp, dir))
 221                km_policy_expired(xp, dir, 1, 0);
 222        xfrm_pol_put(xp);
 223}
 224
 225static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
 226{
 227        struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
 228
 229        if (unlikely(pol->walk.dead))
 230                flo = NULL;
 231        else
 232                xfrm_pol_hold(pol);
 233
 234        return flo;
 235}
 236
 237static int xfrm_policy_flo_check(struct flow_cache_object *flo)
 238{
 239        struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
 240
 241        return !pol->walk.dead;
 242}
 243
 244static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
 245{
 246        xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
 247}
 248
 249static const struct flow_cache_ops xfrm_policy_fc_ops = {
 250        .get = xfrm_policy_flo_get,
 251        .check = xfrm_policy_flo_check,
 252        .delete = xfrm_policy_flo_delete,
 253};
 254
 255/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
 256 * SPD calls.
 257 */
 258
 259struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
 260{
 261        struct xfrm_policy *policy;
 262
 263        policy = kzalloc(sizeof(struct xfrm_policy), gfp);
 264
 265        if (policy) {
 266                write_pnet(&policy->xp_net, net);
 267                INIT_LIST_HEAD(&policy->walk.all);
 268                INIT_HLIST_NODE(&policy->bydst);
 269                INIT_HLIST_NODE(&policy->byidx);
 270                rwlock_init(&policy->lock);
 271                atomic_set(&policy->refcnt, 1);
 272                setup_timer(&policy->timer, xfrm_policy_timer,
 273                                (unsigned long)policy);
 274                policy->flo.ops = &xfrm_policy_fc_ops;
 275        }
 276        return policy;
 277}
 278EXPORT_SYMBOL(xfrm_policy_alloc);
 279
 280/* Destroy xfrm_policy: descendant resources must be released to this moment. */
 281
 282void xfrm_policy_destroy(struct xfrm_policy *policy)
 283{
 284        BUG_ON(!policy->walk.dead);
 285
 286        if (del_timer(&policy->timer))
 287                BUG();
 288
 289        security_xfrm_policy_free(policy->security);
 290        kfree(policy);
 291}
 292EXPORT_SYMBOL(xfrm_policy_destroy);
 293
 294/* Rule must be locked. Release descentant resources, announce
 295 * entry dead. The rule must be unlinked from lists to the moment.
 296 */
 297
 298static void xfrm_policy_kill(struct xfrm_policy *policy)
 299{
 300        policy->walk.dead = 1;
 301
 302        atomic_inc(&policy->genid);
 303
 304        if (del_timer(&policy->timer))
 305                xfrm_pol_put(policy);
 306
 307        xfrm_pol_put(policy);
 308}
 309
 310static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
 311
 312static inline unsigned int idx_hash(struct net *net, u32 index)
 313{
 314        return __idx_hash(index, net->xfrm.policy_idx_hmask);
 315}
 316
 317static struct hlist_head *policy_hash_bysel(struct net *net,
 318                                            const struct xfrm_selector *sel,
 319                                            unsigned short family, int dir)
 320{
 321        unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
 322        unsigned int hash = __sel_hash(sel, family, hmask);
 323
 324        return (hash == hmask + 1 ?
 325                &net->xfrm.policy_inexact[dir] :
 326                net->xfrm.policy_bydst[dir].table + hash);
 327}
 328
 329static struct hlist_head *policy_hash_direct(struct net *net,
 330                                             const xfrm_address_t *daddr,
 331                                             const xfrm_address_t *saddr,
 332                                             unsigned short family, int dir)
 333{
 334        unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
 335        unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
 336
 337        return net->xfrm.policy_bydst[dir].table + hash;
 338}
 339
 340static void xfrm_dst_hash_transfer(struct hlist_head *list,
 341                                   struct hlist_head *ndsttable,
 342                                   unsigned int nhashmask)
 343{
 344        struct hlist_node *entry, *tmp, *entry0 = NULL;
 345        struct xfrm_policy *pol;
 346        unsigned int h0 = 0;
 347
 348redo:
 349        hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
 350                unsigned int h;
 351
 352                h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
 353                                pol->family, nhashmask);
 354                if (!entry0) {
 355                        hlist_del(entry);
 356                        hlist_add_head(&pol->bydst, ndsttable+h);
 357                        h0 = h;
 358                } else {
 359                        if (h != h0)
 360                                continue;
 361                        hlist_del(entry);
 362                        hlist_add_after(entry0, &pol->bydst);
 363                }
 364                entry0 = entry;
 365        }
 366        if (!hlist_empty(list)) {
 367                entry0 = NULL;
 368                goto redo;
 369        }
 370}
 371
 372static void xfrm_idx_hash_transfer(struct hlist_head *list,
 373                                   struct hlist_head *nidxtable,
 374                                   unsigned int nhashmask)
 375{
 376        struct hlist_node *entry, *tmp;
 377        struct xfrm_policy *pol;
 378
 379        hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
 380                unsigned int h;
 381
 382                h = __idx_hash(pol->index, nhashmask);
 383                hlist_add_head(&pol->byidx, nidxtable+h);
 384        }
 385}
 386
 387static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
 388{
 389        return ((old_hmask + 1) << 1) - 1;
 390}
 391
 392static void xfrm_bydst_resize(struct net *net, int dir)
 393{
 394        unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
 395        unsigned int nhashmask = xfrm_new_hash_mask(hmask);
 396        unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
 397        struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
 398        struct hlist_head *ndst = xfrm_hash_alloc(nsize);
 399        int i;
 400
 401        if (!ndst)
 402                return;
 403
 404        write_lock_bh(&xfrm_policy_lock);
 405
 406        for (i = hmask; i >= 0; i--)
 407                xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
 408
 409        net->xfrm.policy_bydst[dir].table = ndst;
 410        net->xfrm.policy_bydst[dir].hmask = nhashmask;
 411
 412        write_unlock_bh(&xfrm_policy_lock);
 413
 414        xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
 415}
 416
 417static void xfrm_byidx_resize(struct net *net, int total)
 418{
 419        unsigned int hmask = net->xfrm.policy_idx_hmask;
 420        unsigned int nhashmask = xfrm_new_hash_mask(hmask);
 421        unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
 422        struct hlist_head *oidx = net->xfrm.policy_byidx;
 423        struct hlist_head *nidx = xfrm_hash_alloc(nsize);
 424        int i;
 425
 426        if (!nidx)
 427                return;
 428
 429        write_lock_bh(&xfrm_policy_lock);
 430
 431        for (i = hmask; i >= 0; i--)
 432                xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
 433
 434        net->xfrm.policy_byidx = nidx;
 435        net->xfrm.policy_idx_hmask = nhashmask;
 436
 437        write_unlock_bh(&xfrm_policy_lock);
 438
 439        xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
 440}
 441
 442static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
 443{
 444        unsigned int cnt = net->xfrm.policy_count[dir];
 445        unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
 446
 447        if (total)
 448                *total += cnt;
 449
 450        if ((hmask + 1) < xfrm_policy_hashmax &&
 451            cnt > hmask)
 452                return 1;
 453
 454        return 0;
 455}
 456
 457static inline int xfrm_byidx_should_resize(struct net *net, int total)
 458{
 459        unsigned int hmask = net->xfrm.policy_idx_hmask;
 460
 461        if ((hmask + 1) < xfrm_policy_hashmax &&
 462            total > hmask)
 463                return 1;
 464
 465        return 0;
 466}
 467
 468void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
 469{
 470        read_lock_bh(&xfrm_policy_lock);
 471        si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
 472        si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
 473        si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
 474        si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
 475        si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
 476        si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
 477        si->spdhcnt = net->xfrm.policy_idx_hmask;
 478        si->spdhmcnt = xfrm_policy_hashmax;
 479        read_unlock_bh(&xfrm_policy_lock);
 480}
 481EXPORT_SYMBOL(xfrm_spd_getinfo);
 482
 483static DEFINE_MUTEX(hash_resize_mutex);
 484static void xfrm_hash_resize(struct work_struct *work)
 485{
 486        struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
 487        int dir, total;
 488
 489        mutex_lock(&hash_resize_mutex);
 490
 491        total = 0;
 492        for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
 493                if (xfrm_bydst_should_resize(net, dir, &total))
 494                        xfrm_bydst_resize(net, dir);
 495        }
 496        if (xfrm_byidx_should_resize(net, total))
 497                xfrm_byidx_resize(net, total);
 498
 499        mutex_unlock(&hash_resize_mutex);
 500}
 501
 502/* Generate new index... KAME seems to generate them ordered by cost
 503 * of an absolute inpredictability of ordering of rules. This will not pass. */
 504static u32 xfrm_gen_index(struct net *net, int dir)
 505{
 506        static u32 idx_generator;
 507
 508        for (;;) {
 509                struct hlist_node *entry;
 510                struct hlist_head *list;
 511                struct xfrm_policy *p;
 512                u32 idx;
 513                int found;
 514
 515                idx = (idx_generator | dir);
 516                idx_generator += 8;
 517                if (idx == 0)
 518                        idx = 8;
 519                list = net->xfrm.policy_byidx + idx_hash(net, idx);
 520                found = 0;
 521                hlist_for_each_entry(p, entry, list, byidx) {
 522                        if (p->index == idx) {
 523                                found = 1;
 524                                break;
 525                        }
 526                }
 527                if (!found)
 528                        return idx;
 529        }
 530}
 531
 532static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
 533{
 534        u32 *p1 = (u32 *) s1;
 535        u32 *p2 = (u32 *) s2;
 536        int len = sizeof(struct xfrm_selector) / sizeof(u32);
 537        int i;
 538
 539        for (i = 0; i < len; i++) {
 540                if (p1[i] != p2[i])
 541                        return 1;
 542        }
 543
 544        return 0;
 545}
 546
 547int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 548{
 549        struct net *net = xp_net(policy);
 550        struct xfrm_policy *pol;
 551        struct xfrm_policy *delpol;
 552        struct hlist_head *chain;
 553        struct hlist_node *entry, *newpos;
 554        u32 mark = policy->mark.v & policy->mark.m;
 555
 556        write_lock_bh(&xfrm_policy_lock);
 557        chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
 558        delpol = NULL;
 559        newpos = NULL;
 560        hlist_for_each_entry(pol, entry, chain, bydst) {
 561                if (pol->type == policy->type &&
 562                    !selector_cmp(&pol->selector, &policy->selector) &&
 563                    (mark & pol->mark.m) == pol->mark.v &&
 564                    xfrm_sec_ctx_match(pol->security, policy->security) &&
 565                    !WARN_ON(delpol)) {
 566                        if (excl) {
 567                                write_unlock_bh(&xfrm_policy_lock);
 568                                return -EEXIST;
 569                        }
 570                        delpol = pol;
 571                        if (policy->priority > pol->priority)
 572                                continue;
 573                } else if (policy->priority >= pol->priority) {
 574                        newpos = &pol->bydst;
 575                        continue;
 576                }
 577                if (delpol)
 578                        break;
 579        }
 580        if (newpos)
 581                hlist_add_after(newpos, &policy->bydst);
 582        else
 583                hlist_add_head(&policy->bydst, chain);
 584        xfrm_pol_hold(policy);
 585        net->xfrm.policy_count[dir]++;
 586        atomic_inc(&flow_cache_genid);
 587        if (delpol)
 588                __xfrm_policy_unlink(delpol, dir);
 589        policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir);
 590        hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
 591        policy->curlft.add_time = get_seconds();
 592        policy->curlft.use_time = 0;
 593        if (!mod_timer(&policy->timer, jiffies + HZ))
 594                xfrm_pol_hold(policy);
 595        list_add(&policy->walk.all, &net->xfrm.policy_all);
 596        write_unlock_bh(&xfrm_policy_lock);
 597
 598        if (delpol)
 599                xfrm_policy_kill(delpol);
 600        else if (xfrm_bydst_should_resize(net, dir, NULL))
 601                schedule_work(&net->xfrm.policy_hash_work);
 602
 603        return 0;
 604}
 605EXPORT_SYMBOL(xfrm_policy_insert);
 606
 607struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
 608                                          int dir, struct xfrm_selector *sel,
 609                                          struct xfrm_sec_ctx *ctx, int delete,
 610                                          int *err)
 611{
 612        struct xfrm_policy *pol, *ret;
 613        struct hlist_head *chain;
 614        struct hlist_node *entry;
 615
 616        *err = 0;
 617        write_lock_bh(&xfrm_policy_lock);
 618        chain = policy_hash_bysel(net, sel, sel->family, dir);
 619        ret = NULL;
 620        hlist_for_each_entry(pol, entry, chain, bydst) {
 621                if (pol->type == type &&
 622                    (mark & pol->mark.m) == pol->mark.v &&
 623                    !selector_cmp(sel, &pol->selector) &&
 624                    xfrm_sec_ctx_match(ctx, pol->security)) {
 625                        xfrm_pol_hold(pol);
 626                        if (delete) {
 627                                *err = security_xfrm_policy_delete(
 628                                                                pol->security);
 629                                if (*err) {
 630                                        write_unlock_bh(&xfrm_policy_lock);
 631                                        return pol;
 632                                }
 633                                __xfrm_policy_unlink(pol, dir);
 634                        }
 635                        ret = pol;
 636                        break;
 637                }
 638        }
 639        write_unlock_bh(&xfrm_policy_lock);
 640
 641        if (ret && delete)
 642                xfrm_policy_kill(ret);
 643        return ret;
 644}
 645EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 646
 647struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
 648                                     int dir, u32 id, int delete, int *err)
 649{
 650        struct xfrm_policy *pol, *ret;
 651        struct hlist_head *chain;
 652        struct hlist_node *entry;
 653
 654        *err = -ENOENT;
 655        if (xfrm_policy_id2dir(id) != dir)
 656                return NULL;
 657
 658        *err = 0;
 659        write_lock_bh(&xfrm_policy_lock);
 660        chain = net->xfrm.policy_byidx + idx_hash(net, id);
 661        ret = NULL;
 662        hlist_for_each_entry(pol, entry, chain, byidx) {
 663                if (pol->type == type && pol->index == id &&
 664                    (mark & pol->mark.m) == pol->mark.v) {
 665                        xfrm_pol_hold(pol);
 666                        if (delete) {
 667                                *err = security_xfrm_policy_delete(
 668                                                                pol->security);
 669                                if (*err) {
 670                                        write_unlock_bh(&xfrm_policy_lock);
 671                                        return pol;
 672                                }
 673                                __xfrm_policy_unlink(pol, dir);
 674                        }
 675                        ret = pol;
 676                        break;
 677                }
 678        }
 679        write_unlock_bh(&xfrm_policy_lock);
 680
 681        if (ret && delete)
 682                xfrm_policy_kill(ret);
 683        return ret;
 684}
 685EXPORT_SYMBOL(xfrm_policy_byid);
 686
 687#ifdef CONFIG_SECURITY_NETWORK_XFRM
 688static inline int
 689xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
 690{
 691        int dir, err = 0;
 692
 693        for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
 694                struct xfrm_policy *pol;
 695                struct hlist_node *entry;
 696                int i;
 697
 698                hlist_for_each_entry(pol, entry,
 699                                     &net->xfrm.policy_inexact[dir], bydst) {
 700                        if (pol->type != type)
 701                                continue;
 702                        err = security_xfrm_policy_delete(pol->security);
 703                        if (err) {
 704                                xfrm_audit_policy_delete(pol, 0,
 705                                                         audit_info->loginuid,
 706                                                         audit_info->sessionid,
 707                                                         audit_info->secid);
 708                                return err;
 709                        }
 710                }
 711                for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
 712                        hlist_for_each_entry(pol, entry,
 713                                             net->xfrm.policy_bydst[dir].table + i,
 714                                             bydst) {
 715                                if (pol->type != type)
 716                                        continue;
 717                                err = security_xfrm_policy_delete(
 718                                                                pol->security);
 719                                if (err) {
 720                                        xfrm_audit_policy_delete(pol, 0,
 721                                                        audit_info->loginuid,
 722                                                        audit_info->sessionid,
 723                                                        audit_info->secid);
 724                                        return err;
 725                                }
 726                        }
 727                }
 728        }
 729        return err;
 730}
 731#else
 732static inline int
 733xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
 734{
 735        return 0;
 736}
 737#endif
 738
 739int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 740{
 741        int dir, err = 0, cnt = 0;
 742
 743        write_lock_bh(&xfrm_policy_lock);
 744
 745        err = xfrm_policy_flush_secctx_check(net, type, audit_info);
 746        if (err)
 747                goto out;
 748
 749        for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
 750                struct xfrm_policy *pol;
 751                struct hlist_node *entry;
 752                int i;
 753
 754        again1:
 755                hlist_for_each_entry(pol, entry,
 756                                     &net->xfrm.policy_inexact[dir], bydst) {
 757                        if (pol->type != type)
 758                                continue;
 759                        __xfrm_policy_unlink(pol, dir);
 760                        write_unlock_bh(&xfrm_policy_lock);
 761                        cnt++;
 762
 763                        xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
 764                                                 audit_info->sessionid,
 765                                                 audit_info->secid);
 766
 767                        xfrm_policy_kill(pol);
 768
 769                        write_lock_bh(&xfrm_policy_lock);
 770                        goto again1;
 771                }
 772
 773                for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
 774        again2:
 775                        hlist_for_each_entry(pol, entry,
 776                                             net->xfrm.policy_bydst[dir].table + i,
 777                                             bydst) {
 778                                if (pol->type != type)
 779                                        continue;
 780                                __xfrm_policy_unlink(pol, dir);
 781                                write_unlock_bh(&xfrm_policy_lock);
 782                                cnt++;
 783
 784                                xfrm_audit_policy_delete(pol, 1,
 785                                                         audit_info->loginuid,
 786                                                         audit_info->sessionid,
 787                                                         audit_info->secid);
 788                                xfrm_policy_kill(pol);
 789
 790                                write_lock_bh(&xfrm_policy_lock);
 791                                goto again2;
 792                        }
 793                }
 794
 795        }
 796        if (!cnt)
 797                err = -ESRCH;
 798out:
 799        write_unlock_bh(&xfrm_policy_lock);
 800        return err;
 801}
 802EXPORT_SYMBOL(xfrm_policy_flush);
 803
 804int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
 805                     int (*func)(struct xfrm_policy *, int, int, void*),
 806                     void *data)
 807{
 808        struct xfrm_policy *pol;
 809        struct xfrm_policy_walk_entry *x;
 810        int error = 0;
 811
 812        if (walk->type >= XFRM_POLICY_TYPE_MAX &&
 813            walk->type != XFRM_POLICY_TYPE_ANY)
 814                return -EINVAL;
 815
 816        if (list_empty(&walk->walk.all) && walk->seq != 0)
 817                return 0;
 818
 819        write_lock_bh(&xfrm_policy_lock);
 820        if (list_empty(&walk->walk.all))
 821                x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
 822        else
 823                x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
 824        list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
 825                if (x->dead)
 826                        continue;
 827                pol = container_of(x, struct xfrm_policy, walk);
 828                if (walk->type != XFRM_POLICY_TYPE_ANY &&
 829                    walk->type != pol->type)
 830                        continue;
 831                error = func(pol, xfrm_policy_id2dir(pol->index),
 832                             walk->seq, data);
 833                if (error) {
 834                        list_move_tail(&walk->walk.all, &x->all);
 835                        goto out;
 836                }
 837                walk->seq++;
 838        }
 839        if (walk->seq == 0) {
 840                error = -ENOENT;
 841                goto out;
 842        }
 843        list_del_init(&walk->walk.all);
 844out:
 845        write_unlock_bh(&xfrm_policy_lock);
 846        return error;
 847}
 848EXPORT_SYMBOL(xfrm_policy_walk);
 849
 850void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
 851{
 852        INIT_LIST_HEAD(&walk->walk.all);
 853        walk->walk.dead = 1;
 854        walk->type = type;
 855        walk->seq = 0;
 856}
 857EXPORT_SYMBOL(xfrm_policy_walk_init);
 858
 859void xfrm_policy_walk_done(struct xfrm_policy_walk *walk)
 860{
 861        if (list_empty(&walk->walk.all))
 862                return;
 863
 864        write_lock_bh(&xfrm_policy_lock);
 865        list_del(&walk->walk.all);
 866        write_unlock_bh(&xfrm_policy_lock);
 867}
 868EXPORT_SYMBOL(xfrm_policy_walk_done);
 869
 870/*
 871 * Find policy to apply to this flow.
 872 *
 873 * Returns 0 if policy found, else an -errno.
 874 */
 875static int xfrm_policy_match(const struct xfrm_policy *pol,
 876                             const struct flowi *fl,
 877                             u8 type, u16 family, int dir)
 878{
 879        const struct xfrm_selector *sel = &pol->selector;
 880        int match, ret = -ESRCH;
 881
 882        if (pol->family != family ||
 883            (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
 884            pol->type != type)
 885                return ret;
 886
 887        match = xfrm_selector_match(sel, fl, family);
 888        if (match)
 889                ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
 890                                                  dir);
 891
 892        return ret;
 893}
 894
 895static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
 896                                                     const struct flowi *fl,
 897                                                     u16 family, u8 dir)
 898{
 899        int err;
 900        struct xfrm_policy *pol, *ret;
 901        const xfrm_address_t *daddr, *saddr;
 902        struct hlist_node *entry;
 903        struct hlist_head *chain;
 904        u32 priority = ~0U;
 905
 906        daddr = xfrm_flowi_daddr(fl, family);
 907        saddr = xfrm_flowi_saddr(fl, family);
 908        if (unlikely(!daddr || !saddr))
 909                return NULL;
 910
 911        read_lock_bh(&xfrm_policy_lock);
 912        chain = policy_hash_direct(net, daddr, saddr, family, dir);
 913        ret = NULL;
 914        hlist_for_each_entry(pol, entry, chain, bydst) {
 915                err = xfrm_policy_match(pol, fl, type, family, dir);
 916                if (err) {
 917                        if (err == -ESRCH)
 918                                continue;
 919                        else {
 920                                ret = ERR_PTR(err);
 921                                goto fail;
 922                        }
 923                } else {
 924                        ret = pol;
 925                        priority = ret->priority;
 926                        break;
 927                }
 928        }
 929        chain = &net->xfrm.policy_inexact[dir];
 930        hlist_for_each_entry(pol, entry, chain, bydst) {
 931                err = xfrm_policy_match(pol, fl, type, family, dir);
 932                if (err) {
 933                        if (err == -ESRCH)
 934                                continue;
 935                        else {
 936                                ret = ERR_PTR(err);
 937                                goto fail;
 938                        }
 939                } else if (pol->priority < priority) {
 940                        ret = pol;
 941                        break;
 942                }
 943        }
 944        if (ret)
 945                xfrm_pol_hold(ret);
 946fail:
 947        read_unlock_bh(&xfrm_policy_lock);
 948
 949        return ret;
 950}
 951
 952static struct xfrm_policy *
 953__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
 954{
 955#ifdef CONFIG_XFRM_SUB_POLICY
 956        struct xfrm_policy *pol;
 957
 958        pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
 959        if (pol != NULL)
 960                return pol;
 961#endif
 962        return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
 963}
 964
 965static struct flow_cache_object *
 966xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
 967                   u8 dir, struct flow_cache_object *old_obj, void *ctx)
 968{
 969        struct xfrm_policy *pol;
 970
 971        if (old_obj)
 972                xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
 973
 974        pol = __xfrm_policy_lookup(net, fl, family, dir);
 975        if (IS_ERR_OR_NULL(pol))
 976                return ERR_CAST(pol);
 977
 978        /* Resolver returns two references:
 979         * one for cache and one for caller of flow_cache_lookup() */
 980        xfrm_pol_hold(pol);
 981
 982        return &pol->flo;
 983}
 984
 985static inline int policy_to_flow_dir(int dir)
 986{
 987        if (XFRM_POLICY_IN == FLOW_DIR_IN &&
 988            XFRM_POLICY_OUT == FLOW_DIR_OUT &&
 989            XFRM_POLICY_FWD == FLOW_DIR_FWD)
 990                return dir;
 991        switch (dir) {
 992        default:
 993        case XFRM_POLICY_IN:
 994                return FLOW_DIR_IN;
 995        case XFRM_POLICY_OUT:
 996                return FLOW_DIR_OUT;
 997        case XFRM_POLICY_FWD:
 998                return FLOW_DIR_FWD;
 999        }
1000}
1001
1002static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
1003                                                 const struct flowi *fl)
1004{
1005        struct xfrm_policy *pol;
1006
1007        read_lock_bh(&xfrm_policy_lock);
1008        if ((pol = sk->sk_policy[dir]) != NULL) {
1009                int match = xfrm_selector_match(&pol->selector, fl,
1010                                                sk->sk_family);
1011                int err = 0;
1012
1013                if (match) {
1014                        if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
1015                                pol = NULL;
1016                                goto out;
1017                        }
1018                        err = security_xfrm_policy_lookup(pol->security,
1019                                                      fl->flowi_secid,
1020                                                      policy_to_flow_dir(dir));
1021                        if (!err)
1022                                xfrm_pol_hold(pol);
1023                        else if (err == -ESRCH)
1024                                pol = NULL;
1025                        else
1026                                pol = ERR_PTR(err);
1027                } else
1028                        pol = NULL;
1029        }
1030out:
1031        read_unlock_bh(&xfrm_policy_lock);
1032        return pol;
1033}
1034
1035static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1036{
1037        struct net *net = xp_net(pol);
1038        struct hlist_head *chain = policy_hash_bysel(net, &pol->selector,
1039                                                     pol->family, dir);
1040
1041        list_add(&pol->walk.all, &net->xfrm.policy_all);
1042        hlist_add_head(&pol->bydst, chain);
1043        hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index));
1044        net->xfrm.policy_count[dir]++;
1045        xfrm_pol_hold(pol);
1046
1047        if (xfrm_bydst_should_resize(net, dir, NULL))
1048                schedule_work(&net->xfrm.policy_hash_work);
1049}
1050
1051static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1052                                                int dir)
1053{
1054        struct net *net = xp_net(pol);
1055
1056        if (hlist_unhashed(&pol->bydst))
1057                return NULL;
1058
1059        hlist_del(&pol->bydst);
1060        hlist_del(&pol->byidx);
1061        list_del(&pol->walk.all);
1062        net->xfrm.policy_count[dir]--;
1063
1064        return pol;
1065}
1066
1067int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1068{
1069        write_lock_bh(&xfrm_policy_lock);
1070        pol = __xfrm_policy_unlink(pol, dir);
1071        write_unlock_bh(&xfrm_policy_lock);
1072        if (pol) {
1073                xfrm_policy_kill(pol);
1074                return 0;
1075        }
1076        return -ENOENT;
1077}
1078EXPORT_SYMBOL(xfrm_policy_delete);
1079
1080int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1081{
1082        struct net *net = xp_net(pol);
1083        struct xfrm_policy *old_pol;
1084
1085#ifdef CONFIG_XFRM_SUB_POLICY
1086        if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1087                return -EINVAL;
1088#endif
1089
1090        write_lock_bh(&xfrm_policy_lock);
1091        old_pol = sk->sk_policy[dir];
1092        sk->sk_policy[dir] = pol;
1093        if (pol) {
1094                pol->curlft.add_time = get_seconds();
1095                pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir);
1096                __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1097        }
1098        if (old_pol)
1099                /* Unlinking succeeds always. This is the only function
1100                 * allowed to delete or replace socket policy.
1101                 */
1102                __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1103        write_unlock_bh(&xfrm_policy_lock);
1104
1105        if (old_pol) {
1106                xfrm_policy_kill(old_pol);
1107        }
1108        return 0;
1109}
1110
1111static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
1112{
1113        struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
1114
1115        if (newp) {
1116                newp->selector = old->selector;
1117                if (security_xfrm_policy_clone(old->security,
1118                                               &newp->security)) {
1119                        kfree(newp);
1120                        return NULL;  /* ENOMEM */
1121                }
1122                newp->lft = old->lft;
1123                newp->curlft = old->curlft;
1124                newp->mark = old->mark;
1125                newp->action = old->action;
1126                newp->flags = old->flags;
1127                newp->xfrm_nr = old->xfrm_nr;
1128                newp->index = old->index;
1129                newp->type = old->type;
1130                memcpy(newp->xfrm_vec, old->xfrm_vec,
1131                       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1132                write_lock_bh(&xfrm_policy_lock);
1133                __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1134                write_unlock_bh(&xfrm_policy_lock);
1135                xfrm_pol_put(newp);
1136        }
1137        return newp;
1138}
1139
1140int __xfrm_sk_clone_policy(struct sock *sk)
1141{
1142        struct xfrm_policy *p0 = sk->sk_policy[0],
1143                           *p1 = sk->sk_policy[1];
1144
1145        sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1146        if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1147                return -ENOMEM;
1148        if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1149                return -ENOMEM;
1150        return 0;
1151}
1152
1153static int
1154xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote,
1155               unsigned short family)
1156{
1157        int err;
1158        struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1159
1160        if (unlikely(afinfo == NULL))
1161                return -EINVAL;
1162        err = afinfo->get_saddr(net, local, remote);
1163        xfrm_policy_put_afinfo(afinfo);
1164        return err;
1165}
1166
1167/* Resolve list of templates for the flow, given policy. */
1168
1169static int
1170xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
1171                      struct xfrm_state **xfrm, unsigned short family)
1172{
1173        struct net *net = xp_net(policy);
1174        int nx;
1175        int i, error;
1176        xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1177        xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1178        xfrm_address_t tmp;
1179
1180        for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1181                struct xfrm_state *x;
1182                xfrm_address_t *remote = daddr;
1183                xfrm_address_t *local  = saddr;
1184                struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1185
1186                if (tmpl->mode == XFRM_MODE_TUNNEL ||
1187                    tmpl->mode == XFRM_MODE_BEET) {
1188                        remote = &tmpl->id.daddr;
1189                        local = &tmpl->saddr;
1190                        if (xfrm_addr_any(local, tmpl->encap_family)) {
1191                                error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family);
1192                                if (error)
1193                                        goto fail;
1194                                local = &tmp;
1195                        }
1196                }
1197
1198                x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1199
1200                if (x && x->km.state == XFRM_STATE_VALID) {
1201                        xfrm[nx++] = x;
1202                        daddr = remote;
1203                        saddr = local;
1204                        continue;
1205                }
1206                if (x) {
1207                        error = (x->km.state == XFRM_STATE_ERROR ?
1208                                 -EINVAL : -EAGAIN);
1209                        xfrm_state_put(x);
1210                }
1211                else if (error == -ESRCH)
1212                        error = -EAGAIN;
1213
1214                if (!tmpl->optional)
1215                        goto fail;
1216        }
1217        return nx;
1218
1219fail:
1220        for (nx--; nx>=0; nx--)
1221                xfrm_state_put(xfrm[nx]);
1222        return error;
1223}
1224
1225static int
1226xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
1227                  struct xfrm_state **xfrm, unsigned short family)
1228{
1229        struct xfrm_state *tp[XFRM_MAX_DEPTH];
1230        struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1231        int cnx = 0;
1232        int error;
1233        int ret;
1234        int i;
1235
1236        for (i = 0; i < npols; i++) {
1237                if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1238                        error = -ENOBUFS;
1239                        goto fail;
1240                }
1241
1242                ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1243                if (ret < 0) {
1244                        error = ret;
1245                        goto fail;
1246                } else
1247                        cnx += ret;
1248        }
1249
1250        /* found states are sorted for outbound processing */
1251        if (npols > 1)
1252                xfrm_state_sort(xfrm, tpp, cnx, family);
1253
1254        return cnx;
1255
1256 fail:
1257        for (cnx--; cnx>=0; cnx--)
1258                xfrm_state_put(tpp[cnx]);
1259        return error;
1260
1261}
1262
1263/* Check that the bundle accepts the flow and its components are
1264 * still valid.
1265 */
1266
1267static inline int xfrm_get_tos(const struct flowi *fl, int family)
1268{
1269        struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1270        int tos;
1271
1272        if (!afinfo)
1273                return -EINVAL;
1274
1275        tos = afinfo->get_tos(fl);
1276
1277        xfrm_policy_put_afinfo(afinfo);
1278
1279        return tos;
1280}
1281
1282static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
1283{
1284        struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1285        struct dst_entry *dst = &xdst->u.dst;
1286
1287        if (xdst->route == NULL) {
1288                /* Dummy bundle - if it has xfrms we were not
1289                 * able to build bundle as template resolution failed.
1290                 * It means we need to try again resolving. */
1291                if (xdst->num_xfrms > 0)
1292                        return NULL;
1293        } else {
1294                /* Real bundle */
1295                if (stale_bundle(dst))
1296                        return NULL;
1297        }
1298
1299        dst_hold(dst);
1300        return flo;
1301}
1302
1303static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
1304{
1305        struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1306        struct dst_entry *dst = &xdst->u.dst;
1307
1308        if (!xdst->route)
1309                return 0;
1310        if (stale_bundle(dst))
1311                return 0;
1312
1313        return 1;
1314}
1315
1316static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
1317{
1318        struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1319        struct dst_entry *dst = &xdst->u.dst;
1320
1321        dst_free(dst);
1322}
1323
1324static const struct flow_cache_ops xfrm_bundle_fc_ops = {
1325        .get = xfrm_bundle_flo_get,
1326        .check = xfrm_bundle_flo_check,
1327        .delete = xfrm_bundle_flo_delete,
1328};
1329
1330static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1331{
1332        struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1333        struct dst_ops *dst_ops;
1334        struct xfrm_dst *xdst;
1335
1336        if (!afinfo)
1337                return ERR_PTR(-EINVAL);
1338
1339        switch (family) {
1340        case AF_INET:
1341                dst_ops = &net->xfrm.xfrm4_dst_ops;
1342                break;
1343#if IS_ENABLED(CONFIG_IPV6)
1344        case AF_INET6:
1345                dst_ops = &net->xfrm.xfrm6_dst_ops;
1346                break;
1347#endif
1348        default:
1349                BUG();
1350        }
1351        xdst = dst_alloc(dst_ops, NULL, 0, 0, 0);
1352
1353        if (likely(xdst)) {
1354                memset(&xdst->u.rt6.rt6i_table, 0,
1355                        sizeof(*xdst) - sizeof(struct dst_entry));
1356                xdst->flo.ops = &xfrm_bundle_fc_ops;
1357        } else
1358                xdst = ERR_PTR(-ENOBUFS);
1359
1360        xfrm_policy_put_afinfo(afinfo);
1361
1362        return xdst;
1363}
1364
1365static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1366                                 int nfheader_len)
1367{
1368        struct xfrm_policy_afinfo *afinfo =
1369                xfrm_policy_get_afinfo(dst->ops->family);
1370        int err;
1371
1372        if (!afinfo)
1373                return -EINVAL;
1374
1375        err = afinfo->init_path(path, dst, nfheader_len);
1376
1377        xfrm_policy_put_afinfo(afinfo);
1378
1379        return err;
1380}
1381
1382static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1383                                const struct flowi *fl)
1384{
1385        struct xfrm_policy_afinfo *afinfo =
1386                xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1387        int err;
1388
1389        if (!afinfo)
1390                return -EINVAL;
1391
1392        err = afinfo->fill_dst(xdst, dev, fl);
1393
1394        xfrm_policy_put_afinfo(afinfo);
1395
1396        return err;
1397}
1398
1399
1400/* Allocate chain of dst_entry's, attach known xfrm's, calculate
1401 * all the metrics... Shortly, bundle a bundle.
1402 */
1403
1404static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1405                                            struct xfrm_state **xfrm, int nx,
1406                                            const struct flowi *fl,
1407                                            struct dst_entry *dst)
1408{
1409        struct net *net = xp_net(policy);
1410        unsigned long now = jiffies;
1411        struct net_device *dev;
1412        struct xfrm_mode *inner_mode;
1413        struct dst_entry *dst_prev = NULL;
1414        struct dst_entry *dst0 = NULL;
1415        int i = 0;
1416        int err;
1417        int header_len = 0;
1418        int nfheader_len = 0;
1419        int trailer_len = 0;
1420        int tos;
1421        int family = policy->selector.family;
1422        xfrm_address_t saddr, daddr;
1423
1424        xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1425
1426        tos = xfrm_get_tos(fl, family);
1427        err = tos;
1428        if (tos < 0)
1429                goto put_states;
1430
1431        dst_hold(dst);
1432
1433        for (; i < nx; i++) {
1434                struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
1435                struct dst_entry *dst1 = &xdst->u.dst;
1436
1437                err = PTR_ERR(xdst);
1438                if (IS_ERR(xdst)) {
1439                        dst_release(dst);
1440                        goto put_states;
1441                }
1442
1443                if (xfrm[i]->sel.family == AF_UNSPEC) {
1444                        inner_mode = xfrm_ip2inner_mode(xfrm[i],
1445                                                        xfrm_af2proto(family));
1446                        if (!inner_mode) {
1447                                err = -EAFNOSUPPORT;
1448                                dst_release(dst);
1449                                goto put_states;
1450                        }
1451                } else
1452                        inner_mode = xfrm[i]->inner_mode;
1453
1454                if (!dst_prev)
1455                        dst0 = dst1;
1456                else {
1457                        dst_prev->child = dst_clone(dst1);
1458                        dst1->flags |= DST_NOHASH;
1459                }
1460
1461                xdst->route = dst;
1462                dst_copy_metrics(dst1, dst);
1463
1464                if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1465                        family = xfrm[i]->props.family;
1466                        dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr,
1467                                              family);
1468                        err = PTR_ERR(dst);
1469                        if (IS_ERR(dst))
1470                                goto put_states;
1471                } else
1472                        dst_hold(dst);
1473
1474                dst1->xfrm = xfrm[i];
1475                xdst->xfrm_genid = xfrm[i]->genid;
1476
1477                dst1->obsolete = -1;
1478                dst1->flags |= DST_HOST;
1479                dst1->lastuse = now;
1480
1481                dst1->input = dst_discard;
1482                dst1->output = inner_mode->afinfo->output;
1483
1484                dst1->next = dst_prev;
1485                dst_prev = dst1;
1486
1487                header_len += xfrm[i]->props.header_len;
1488                if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1489                        nfheader_len += xfrm[i]->props.header_len;
1490                trailer_len += xfrm[i]->props.trailer_len;
1491        }
1492
1493        dst_prev->child = dst;
1494        dst0->path = dst;
1495
1496        err = -ENODEV;
1497        dev = dst->dev;
1498        if (!dev)
1499                goto free_dst;
1500
1501        /* Copy neighbour for reachability confirmation */
1502        dst_set_neighbour(dst0, neigh_clone(dst_get_neighbour_noref(dst)));
1503
1504        xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1505        xfrm_init_pmtu(dst_prev);
1506
1507        for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1508                struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1509
1510                err = xfrm_fill_dst(xdst, dev, fl);
1511                if (err)
1512                        goto free_dst;
1513
1514                dst_prev->header_len = header_len;
1515                dst_prev->trailer_len = trailer_len;
1516                header_len -= xdst->u.dst.xfrm->props.header_len;
1517                trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1518        }
1519
1520out:
1521        return dst0;
1522
1523put_states:
1524        for (; i < nx; i++)
1525                xfrm_state_put(xfrm[i]);
1526free_dst:
1527        if (dst0)
1528                dst_free(dst0);
1529        dst0 = ERR_PTR(err);
1530        goto out;
1531}
1532
1533static int inline
1534xfrm_dst_alloc_copy(void **target, const void *src, int size)
1535{
1536        if (!*target) {
1537                *target = kmalloc(size, GFP_ATOMIC);
1538                if (!*target)
1539                        return -ENOMEM;
1540        }
1541        memcpy(*target, src, size);
1542        return 0;
1543}
1544
1545static int inline
1546xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel)
1547{
1548#ifdef CONFIG_XFRM_SUB_POLICY
1549        struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1550        return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1551                                   sel, sizeof(*sel));
1552#else
1553        return 0;
1554#endif
1555}
1556
1557static int inline
1558xfrm_dst_update_origin(struct dst_entry *dst, const struct flowi *fl)
1559{
1560#ifdef CONFIG_XFRM_SUB_POLICY
1561        struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1562        return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1563#else
1564        return 0;
1565#endif
1566}
1567
1568static int xfrm_expand_policies(const struct flowi *fl, u16 family,
1569                                struct xfrm_policy **pols,
1570                                int *num_pols, int *num_xfrms)
1571{
1572        int i;
1573
1574        if (*num_pols == 0 || !pols[0]) {
1575                *num_pols = 0;
1576                *num_xfrms = 0;
1577                return 0;
1578        }
1579        if (IS_ERR(pols[0]))
1580                return PTR_ERR(pols[0]);
1581
1582        *num_xfrms = pols[0]->xfrm_nr;
1583
1584#ifdef CONFIG_XFRM_SUB_POLICY
1585        if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
1586            pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1587                pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
1588                                                    XFRM_POLICY_TYPE_MAIN,
1589                                                    fl, family,
1590                                                    XFRM_POLICY_OUT);
1591                if (pols[1]) {
1592                        if (IS_ERR(pols[1])) {
1593                                xfrm_pols_put(pols, *num_pols);
1594                                return PTR_ERR(pols[1]);
1595                        }
1596                        (*num_pols) ++;
1597                        (*num_xfrms) += pols[1]->xfrm_nr;
1598                }
1599        }
1600#endif
1601        for (i = 0; i < *num_pols; i++) {
1602                if (pols[i]->action != XFRM_POLICY_ALLOW) {
1603                        *num_xfrms = -1;
1604                        break;
1605                }
1606        }
1607
1608        return 0;
1609
1610}
1611
1612static struct xfrm_dst *
1613xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1614                               const struct flowi *fl, u16 family,
1615                               struct dst_entry *dst_orig)
1616{
1617        struct net *net = xp_net(pols[0]);
1618        struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1619        struct dst_entry *dst;
1620        struct xfrm_dst *xdst;
1621        int err;
1622
1623        /* Try to instantiate a bundle */
1624        err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1625        if (err <= 0) {
1626                if (err != 0 && err != -EAGAIN)
1627                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1628                return ERR_PTR(err);
1629        }
1630
1631        dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
1632        if (IS_ERR(dst)) {
1633                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1634                return ERR_CAST(dst);
1635        }
1636
1637        xdst = (struct xfrm_dst *)dst;
1638        xdst->num_xfrms = err;
1639        if (num_pols > 1)
1640                err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1641        else
1642                err = xfrm_dst_update_origin(dst, fl);
1643        if (unlikely(err)) {
1644                dst_free(dst);
1645                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1646                return ERR_PTR(err);
1647        }
1648
1649        xdst->num_pols = num_pols;
1650        memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
1651        xdst->policy_genid = atomic_read(&pols[0]->genid);
1652
1653        return xdst;
1654}
1655
1656static struct flow_cache_object *
1657xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
1658                   struct flow_cache_object *oldflo, void *ctx)
1659{
1660        struct dst_entry *dst_orig = (struct dst_entry *)ctx;
1661        struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1662        struct xfrm_dst *xdst, *new_xdst;
1663        int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
1664
1665        /* Check if the policies from old bundle are usable */
1666        xdst = NULL;
1667        if (oldflo) {
1668                xdst = container_of(oldflo, struct xfrm_dst, flo);
1669                num_pols = xdst->num_pols;
1670                num_xfrms = xdst->num_xfrms;
1671                pol_dead = 0;
1672                for (i = 0; i < num_pols; i++) {
1673                        pols[i] = xdst->pols[i];
1674                        pol_dead |= pols[i]->walk.dead;
1675                }
1676                if (pol_dead) {
1677                        dst_free(&xdst->u.dst);
1678                        xdst = NULL;
1679                        num_pols = 0;
1680                        num_xfrms = 0;
1681                        oldflo = NULL;
1682                }
1683        }
1684
1685        /* Resolve policies to use if we couldn't get them from
1686         * previous cache entry */
1687        if (xdst == NULL) {
1688                num_pols = 1;
1689                pols[0] = __xfrm_policy_lookup(net, fl, family, dir);
1690                err = xfrm_expand_policies(fl, family, pols,
1691                                           &num_pols, &num_xfrms);
1692                if (err < 0)
1693                        goto inc_error;
1694                if (num_pols == 0)
1695                        return NULL;
1696                if (num_xfrms <= 0)
1697                        goto make_dummy_bundle;
1698        }
1699
1700        new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
1701        if (IS_ERR(new_xdst)) {
1702                err = PTR_ERR(new_xdst);
1703                if (err != -EAGAIN)
1704                        goto error;
1705                if (oldflo == NULL)
1706                        goto make_dummy_bundle;
1707                dst_hold(&xdst->u.dst);
1708                return oldflo;
1709        } else if (new_xdst == NULL) {
1710                num_xfrms = 0;
1711                if (oldflo == NULL)
1712                        goto make_dummy_bundle;
1713                xdst->num_xfrms = 0;
1714                dst_hold(&xdst->u.dst);
1715                return oldflo;
1716        }
1717
1718        /* Kill the previous bundle */
1719        if (xdst) {
1720                /* The policies were stolen for newly generated bundle */
1721                xdst->num_pols = 0;
1722                dst_free(&xdst->u.dst);
1723        }
1724
1725        /* Flow cache does not have reference, it dst_free()'s,
1726         * but we do need to return one reference for original caller */
1727        dst_hold(&new_xdst->u.dst);
1728        return &new_xdst->flo;
1729
1730make_dummy_bundle:
1731        /* We found policies, but there's no bundles to instantiate:
1732         * either because the policy blocks, has no transformations or
1733         * we could not build template (no xfrm_states).*/
1734        xdst = xfrm_alloc_dst(net, family);
1735        if (IS_ERR(xdst)) {
1736                xfrm_pols_put(pols, num_pols);
1737                return ERR_CAST(xdst);
1738        }
1739        xdst->num_pols = num_pols;
1740        xdst->num_xfrms = num_xfrms;
1741        memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
1742
1743        dst_hold(&xdst->u.dst);
1744        return &xdst->flo;
1745
1746inc_error:
1747        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1748error:
1749        if (xdst != NULL)
1750                dst_free(&xdst->u.dst);
1751        else
1752                xfrm_pols_put(pols, num_pols);
1753        return ERR_PTR(err);
1754}
1755
1756static struct dst_entry *make_blackhole(struct net *net, u16 family,
1757                                        struct dst_entry *dst_orig)
1758{
1759        struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1760        struct dst_entry *ret;
1761
1762        if (!afinfo) {
1763                dst_release(dst_orig);
1764                ret = ERR_PTR(-EINVAL);
1765        } else {
1766                ret = afinfo->blackhole_route(net, dst_orig);
1767        }
1768        xfrm_policy_put_afinfo(afinfo);
1769
1770        return ret;
1771}
1772
1773/* Main function: finds/creates a bundle for given flow.
1774 *
1775 * At the moment we eat a raw IP route. Mostly to speed up lookups
1776 * on interfaces with disabled IPsec.
1777 */
1778struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
1779                              const struct flowi *fl,
1780                              struct sock *sk, int flags)
1781{
1782        struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1783        struct flow_cache_object *flo;
1784        struct xfrm_dst *xdst;
1785        struct dst_entry *dst, *route;
1786        u16 family = dst_orig->ops->family;
1787        u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1788        int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
1789
1790restart:
1791        dst = NULL;
1792        xdst = NULL;
1793        route = NULL;
1794
1795        if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1796                num_pols = 1;
1797                pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1798                err = xfrm_expand_policies(fl, family, pols,
1799                                           &num_pols, &num_xfrms);
1800                if (err < 0)
1801                        goto dropdst;
1802
1803                if (num_pols) {
1804                        if (num_xfrms <= 0) {
1805                                drop_pols = num_pols;
1806                                goto no_transform;
1807                        }
1808
1809                        xdst = xfrm_resolve_and_create_bundle(
1810                                        pols, num_pols, fl,
1811                                        family, dst_orig);
1812                        if (IS_ERR(xdst)) {
1813                                xfrm_pols_put(pols, num_pols);
1814                                err = PTR_ERR(xdst);
1815                                goto dropdst;
1816                        } else if (xdst == NULL) {
1817                                num_xfrms = 0;
1818                                drop_pols = num_pols;
1819                                goto no_transform;
1820                        }
1821
1822                        dst_hold(&xdst->u.dst);
1823
1824                        spin_lock_bh(&xfrm_policy_sk_bundle_lock);
1825                        xdst->u.dst.next = xfrm_policy_sk_bundles;
1826                        xfrm_policy_sk_bundles = &xdst->u.dst;
1827                        spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
1828
1829                        route = xdst->route;
1830                }
1831        }
1832
1833        if (xdst == NULL) {
1834                /* To accelerate a bit...  */
1835                if ((dst_orig->flags & DST_NOXFRM) ||
1836                    !net->xfrm.policy_count[XFRM_POLICY_OUT])
1837                        goto nopol;
1838
1839                flo = flow_cache_lookup(net, fl, family, dir,
1840                                        xfrm_bundle_lookup, dst_orig);
1841                if (flo == NULL)
1842                        goto nopol;
1843                if (IS_ERR(flo)) {
1844                        err = PTR_ERR(flo);
1845                        goto dropdst;
1846                }
1847                xdst = container_of(flo, struct xfrm_dst, flo);
1848
1849                num_pols = xdst->num_pols;
1850                num_xfrms = xdst->num_xfrms;
1851                memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols);
1852                route = xdst->route;
1853        }
1854
1855        dst = &xdst->u.dst;
1856        if (route == NULL && num_xfrms > 0) {
1857                /* The only case when xfrm_bundle_lookup() returns a
1858                 * bundle with null route, is when the template could
1859                 * not be resolved. It means policies are there, but
1860                 * bundle could not be created, since we don't yet
1861                 * have the xfrm_state's. We need to wait for KM to
1862                 * negotiate new SA's or bail out with error.*/
1863                if (net->xfrm.sysctl_larval_drop) {
1864                        /* EREMOTE tells the caller to generate
1865                         * a one-shot blackhole route. */
1866                        dst_release(dst);
1867                        xfrm_pols_put(pols, drop_pols);
1868                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1869
1870                        return make_blackhole(net, family, dst_orig);
1871                }
1872                if (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP) {
1873                        DECLARE_WAITQUEUE(wait, current);
1874
1875                        add_wait_queue(&net->xfrm.km_waitq, &wait);
1876                        set_current_state(TASK_INTERRUPTIBLE);
1877                        schedule();
1878                        set_current_state(TASK_RUNNING);
1879                        remove_wait_queue(&net->xfrm.km_waitq, &wait);
1880
1881                        if (!signal_pending(current)) {
1882                                dst_release(dst);
1883                                goto restart;
1884                        }
1885
1886                        err = -ERESTART;
1887                } else
1888                        err = -EAGAIN;
1889
1890                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
1891                goto error;
1892        }
1893
1894no_transform:
1895        if (num_pols == 0)
1896                goto nopol;
1897
1898        if ((flags & XFRM_LOOKUP_ICMP) &&
1899            !(pols[0]->flags & XFRM_POLICY_ICMP)) {
1900                err = -ENOENT;
1901                goto error;
1902        }
1903
1904        for (i = 0; i < num_pols; i++)
1905                pols[i]->curlft.use_time = get_seconds();
1906
1907        if (num_xfrms < 0) {
1908                /* Prohibit the flow */
1909                XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
1910                err = -EPERM;
1911                goto error;
1912        } else if (num_xfrms > 0) {
1913                /* Flow transformed */
1914                dst_release(dst_orig);
1915        } else {
1916                /* Flow passes untransformed */
1917                dst_release(dst);
1918                dst = dst_orig;
1919        }
1920ok:
1921        xfrm_pols_put(pols, drop_pols);
1922        return dst;
1923
1924nopol:
1925        if (!(flags & XFRM_LOOKUP_ICMP)) {
1926                dst = dst_orig;
1927                goto ok;
1928        }
1929        err = -ENOENT;
1930error:
1931        dst_release(dst);
1932dropdst:
1933        dst_release(dst_orig);
1934        xfrm_pols_put(pols, drop_pols);
1935        return ERR_PTR(err);
1936}
1937EXPORT_SYMBOL(xfrm_lookup);
1938
1939static inline int
1940xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
1941{
1942        struct xfrm_state *x;
1943
1944        if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1945                return 0;
1946        x = skb->sp->xvec[idx];
1947        if (!x->type->reject)
1948                return 0;
1949        return x->type->reject(x, skb, fl);
1950}
1951
1952/* When skb is transformed back to its "native" form, we have to
1953 * check policy restrictions. At the moment we make this in maximally
1954 * stupid way. Shame on me. :-) Of course, connected sockets must
1955 * have policy cached at them.
1956 */
1957
1958static inline int
1959xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
1960              unsigned short family)
1961{
1962        if (xfrm_state_kern(x))
1963                return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
1964        return  x->id.proto == tmpl->id.proto &&
1965                (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1966                (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1967                x->props.mode == tmpl->mode &&
1968                (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
1969                 !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1970                !(x->props.mode != XFRM_MODE_TRANSPORT &&
1971                  xfrm_state_addr_cmp(tmpl, x, family));
1972}
1973
1974/*
1975 * 0 or more than 0 is returned when validation is succeeded (either bypass
1976 * because of optional transport mode, or next index of the mathced secpath
1977 * state with the template.
1978 * -1 is returned when no matching template is found.
1979 * Otherwise "-2 - errored_index" is returned.
1980 */
1981static inline int
1982xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
1983               unsigned short family)
1984{
1985        int idx = start;
1986
1987        if (tmpl->optional) {
1988                if (tmpl->mode == XFRM_MODE_TRANSPORT)
1989                        return start;
1990        } else
1991                start = -1;
1992        for (; idx < sp->len; idx++) {
1993                if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1994                        return ++idx;
1995                if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1996                        if (start == -1)
1997                                start = -2-idx;
1998                        break;
1999                }
2000        }
2001        return start;
2002}
2003
2004int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
2005                          unsigned int family, int reverse)
2006{
2007        struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2008        int err;
2009
2010        if (unlikely(afinfo == NULL))
2011                return -EAFNOSUPPORT;
2012
2013        afinfo->decode_session(skb, fl, reverse);
2014        err = security_xfrm_decode_session(skb, &fl->flowi_secid);
2015        xfrm_policy_put_afinfo(afinfo);
2016        return err;
2017}
2018EXPORT_SYMBOL(__xfrm_decode_session);
2019
2020static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
2021{
2022        for (; k < sp->len; k++) {
2023                if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
2024                        *idxp = k;
2025                        return 1;
2026                }
2027        }
2028
2029        return 0;
2030}
2031
2032int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
2033                        unsigned short family)
2034{
2035        struct net *net = dev_net(skb->dev);
2036        struct xfrm_policy *pol;
2037        struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2038        int npols = 0;
2039        int xfrm_nr;
2040        int pi;
2041        int reverse;
2042        struct flowi fl;
2043        u8 fl_dir;
2044        int xerr_idx = -1;
2045
2046        reverse = dir & ~XFRM_POLICY_MASK;
2047        dir &= XFRM_POLICY_MASK;
2048        fl_dir = policy_to_flow_dir(dir);
2049
2050        if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
2051                XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
2052                return 0;
2053        }
2054
2055        nf_nat_decode_session(skb, &fl, family);
2056
2057        /* First, check used SA against their selectors. */
2058        if (skb->sp) {
2059                int i;
2060
2061                for (i=skb->sp->len-1; i>=0; i--) {
2062                        struct xfrm_state *x = skb->sp->xvec[i];
2063                        if (!xfrm_selector_match(&x->sel, &fl, family)) {
2064                                XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
2065                                return 0;
2066                        }
2067                }
2068        }
2069
2070        pol = NULL;
2071        if (sk && sk->sk_policy[dir]) {
2072                pol = xfrm_sk_policy_lookup(sk, dir, &fl);
2073                if (IS_ERR(pol)) {
2074                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2075                        return 0;
2076                }
2077        }
2078
2079        if (!pol) {
2080                struct flow_cache_object *flo;
2081
2082                flo = flow_cache_lookup(net, &fl, family, fl_dir,
2083                                        xfrm_policy_lookup, NULL);
2084                if (IS_ERR_OR_NULL(flo))
2085                        pol = ERR_CAST(flo);
2086                else
2087                        pol = container_of(flo, struct xfrm_policy, flo);
2088        }
2089
2090        if (IS_ERR(pol)) {
2091                XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2092                return 0;
2093        }
2094
2095        if (!pol) {
2096                if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
2097                        xfrm_secpath_reject(xerr_idx, skb, &fl);
2098                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
2099                        return 0;
2100                }
2101                return 1;
2102        }
2103
2104        pol->curlft.use_time = get_seconds();
2105
2106        pols[0] = pol;
2107        npols ++;
2108#ifdef CONFIG_XFRM_SUB_POLICY
2109        if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
2110                pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
2111                                                    &fl, family,
2112                                                    XFRM_POLICY_IN);
2113                if (pols[1]) {
2114                        if (IS_ERR(pols[1])) {
2115                                XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2116                                return 0;
2117                        }
2118                        pols[1]->curlft.use_time = get_seconds();
2119                        npols ++;
2120                }
2121        }
2122#endif
2123
2124        if (pol->action == XFRM_POLICY_ALLOW) {
2125                struct sec_path *sp;
2126                static struct sec_path dummy;
2127                struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
2128                struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
2129                struct xfrm_tmpl **tpp = tp;
2130                int ti = 0;
2131                int i, k;
2132
2133                if ((sp = skb->sp) == NULL)
2134                        sp = &dummy;
2135
2136                for (pi = 0; pi < npols; pi++) {
2137                        if (pols[pi] != pol &&
2138                            pols[pi]->action != XFRM_POLICY_ALLOW) {
2139                                XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2140                                goto reject;
2141                        }
2142                        if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
2143                                XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
2144                                goto reject_error;
2145                        }
2146                        for (i = 0; i < pols[pi]->xfrm_nr; i++)
2147                                tpp[ti++] = &pols[pi]->xfrm_vec[i];
2148                }
2149                xfrm_nr = ti;
2150                if (npols > 1) {
2151                        xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
2152                        tpp = stp;
2153                }
2154
2155                /* For each tunnel xfrm, find the first matching tmpl.
2156                 * For each tmpl before that, find corresponding xfrm.
2157                 * Order is _important_. Later we will implement
2158                 * some barriers, but at the moment barriers
2159                 * are implied between each two transformations.
2160                 */
2161                for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
2162                        k = xfrm_policy_ok(tpp[i], sp, k, family);
2163                        if (k < 0) {
2164                                if (k < -1)
2165                                        /* "-2 - errored_index" returned */
2166                                        xerr_idx = -(2+k);
2167                                XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2168                                goto reject;
2169                        }
2170                }
2171
2172                if (secpath_has_nontransport(sp, k, &xerr_idx)) {
2173                        XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2174                        goto reject;
2175                }
2176
2177                xfrm_pols_put(pols, npols);
2178                return 1;
2179        }
2180        XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2181
2182reject:
2183        xfrm_secpath_reject(xerr_idx, skb, &fl);
2184reject_error:
2185        xfrm_pols_put(pols, npols);
2186        return 0;
2187}
2188EXPORT_SYMBOL(__xfrm_policy_check);
2189
2190int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2191{
2192        struct net *net = dev_net(skb->dev);
2193        struct flowi fl;
2194        struct dst_entry *dst;
2195        int res = 1;
2196
2197        if (xfrm_decode_session(skb, &fl, family) < 0) {
2198                XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
2199                return 0;
2200        }
2201
2202        skb_dst_force(skb);
2203
2204        dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0);
2205        if (IS_ERR(dst)) {
2206                res = 0;
2207                dst = NULL;
2208        }
2209        skb_dst_set(skb, dst);
2210        return res;
2211}
2212EXPORT_SYMBOL(__xfrm_route_forward);
2213
2214/* Optimize later using cookies and generation ids. */
2215
2216static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2217{
2218        /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2219         * to "-1" to force all XFRM destinations to get validated by
2220         * dst_ops->check on every use.  We do this because when a
2221         * normal route referenced by an XFRM dst is obsoleted we do
2222         * not go looking around for all parent referencing XFRM dsts
2223         * so that we can invalidate them.  It is just too much work.
2224         * Instead we make the checks here on every use.  For example:
2225         *
2226         *      XFRM dst A --> IPv4 dst X
2227         *
2228         * X is the "xdst->route" of A (X is also the "dst->path" of A
2229         * in this example).  If X is marked obsolete, "A" will not
2230         * notice.  That's what we are validating here via the
2231         * stale_bundle() check.
2232         *
2233         * When a policy's bundle is pruned, we dst_free() the XFRM
2234         * dst which causes it's ->obsolete field to be set to a
2235         * positive non-zero integer.  If an XFRM dst has been pruned
2236         * like this, we want to force a new route lookup.
2237         */
2238        if (dst->obsolete < 0 && !stale_bundle(dst))
2239                return dst;
2240
2241        return NULL;
2242}
2243
2244static int stale_bundle(struct dst_entry *dst)
2245{
2246        return !xfrm_bundle_ok((struct xfrm_dst *)dst);
2247}
2248
2249void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2250{
2251        while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2252                dst->dev = dev_net(dev)->loopback_dev;
2253                dev_hold(dst->dev);
2254                dev_put(dev);
2255        }
2256}
2257EXPORT_SYMBOL(xfrm_dst_ifdown);
2258
2259static void xfrm_link_failure(struct sk_buff *skb)
2260{
2261        /* Impossible. Such dst must be popped before reaches point of failure. */
2262}
2263
2264static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2265{
2266        if (dst) {
2267                if (dst->obsolete) {
2268                        dst_release(dst);
2269                        dst = NULL;
2270                }
2271        }
2272        return dst;
2273}
2274
2275static void __xfrm_garbage_collect(struct net *net)
2276{
2277        struct dst_entry *head, *next;
2278
2279        spin_lock_bh(&xfrm_policy_sk_bundle_lock);
2280        head = xfrm_policy_sk_bundles;
2281        xfrm_policy_sk_bundles = NULL;
2282        spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
2283
2284        while (head) {
2285                next = head->next;
2286                dst_free(head);
2287                head = next;
2288        }
2289}
2290
2291static void xfrm_garbage_collect(struct net *net)
2292{
2293        flow_cache_flush();
2294        __xfrm_garbage_collect(net);
2295}
2296
2297static void xfrm_garbage_collect_deferred(struct net *net)
2298{
2299        flow_cache_flush_deferred();
2300        __xfrm_garbage_collect(net);
2301}
2302
2303static void xfrm_init_pmtu(struct dst_entry *dst)
2304{
2305        do {
2306                struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2307                u32 pmtu, route_mtu_cached;
2308
2309                pmtu = dst_mtu(dst->child);
2310                xdst->child_mtu_cached = pmtu;
2311
2312                pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2313
2314                route_mtu_cached = dst_mtu(xdst->route);
2315                xdst->route_mtu_cached = route_mtu_cached;
2316
2317                if (pmtu > route_mtu_cached)
2318                        pmtu = route_mtu_cached;
2319
2320                dst_metric_set(dst, RTAX_MTU, pmtu);
2321        } while ((dst = dst->next));
2322}
2323
2324/* Check that the bundle accepts the flow and its components are
2325 * still valid.
2326 */
2327
2328static int xfrm_bundle_ok(struct xfrm_dst *first)
2329{
2330        struct dst_entry *dst = &first->u.dst;
2331        struct xfrm_dst *last;
2332        u32 mtu;
2333
2334        if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2335            (dst->dev && !netif_running(dst->dev)))
2336                return 0;
2337
2338        last = NULL;
2339
2340        do {
2341                struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2342
2343                if (dst->xfrm->km.state != XFRM_STATE_VALID)
2344                        return 0;
2345                if (xdst->xfrm_genid != dst->xfrm->genid)
2346                        return 0;
2347                if (xdst->num_pols > 0 &&
2348                    xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2349                        return 0;
2350
2351                mtu = dst_mtu(dst->child);
2352                if (xdst->child_mtu_cached != mtu) {
2353                        last = xdst;
2354                        xdst->child_mtu_cached = mtu;
2355                }
2356
2357                if (!dst_check(xdst->route, xdst->route_cookie))
2358                        return 0;
2359                mtu = dst_mtu(xdst->route);
2360                if (xdst->route_mtu_cached != mtu) {
2361                        last = xdst;
2362                        xdst->route_mtu_cached = mtu;
2363                }
2364
2365                dst = dst->child;
2366        } while (dst->xfrm);
2367
2368        if (likely(!last))
2369                return 1;
2370
2371        mtu = last->child_mtu_cached;
2372        for (;;) {
2373                dst = &last->u.dst;
2374
2375                mtu = xfrm_state_mtu(dst->xfrm, mtu);
2376                if (mtu > last->route_mtu_cached)
2377                        mtu = last->route_mtu_cached;
2378                dst_metric_set(dst, RTAX_MTU, mtu);
2379
2380                if (last == first)
2381                        break;
2382
2383                last = (struct xfrm_dst *)last->u.dst.next;
2384                last->child_mtu_cached = mtu;
2385        }
2386
2387        return 1;
2388}
2389
2390static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
2391{
2392        return dst_metric_advmss(dst->path);
2393}
2394
2395static unsigned int xfrm_mtu(const struct dst_entry *dst)
2396{
2397        unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2398
2399        return mtu ? : dst_mtu(dst->path);
2400}
2401
2402static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, const void *daddr)
2403{
2404        return dst_neigh_lookup(dst->path, daddr);
2405}
2406
2407int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2408{
2409        struct net *net;
2410        int err = 0;
2411        if (unlikely(afinfo == NULL))
2412                return -EINVAL;
2413        if (unlikely(afinfo->family >= NPROTO))
2414                return -EAFNOSUPPORT;
2415        write_lock_bh(&xfrm_policy_afinfo_lock);
2416        if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2417                err = -ENOBUFS;
2418        else {
2419                struct dst_ops *dst_ops = afinfo->dst_ops;
2420                if (likely(dst_ops->kmem_cachep == NULL))
2421                        dst_ops->kmem_cachep = xfrm_dst_cache;
2422                if (likely(dst_ops->check == NULL))
2423                        dst_ops->check = xfrm_dst_check;
2424                if (likely(dst_ops->default_advmss == NULL))
2425                        dst_ops->default_advmss = xfrm_default_advmss;
2426                if (likely(dst_ops->mtu == NULL))
2427                        dst_ops->mtu = xfrm_mtu;
2428                if (likely(dst_ops->negative_advice == NULL))
2429                        dst_ops->negative_advice = xfrm_negative_advice;
2430                if (likely(dst_ops->link_failure == NULL))
2431                        dst_ops->link_failure = xfrm_link_failure;
2432                if (likely(dst_ops->neigh_lookup == NULL))
2433                        dst_ops->neigh_lookup = xfrm_neigh_lookup;
2434                if (likely(afinfo->garbage_collect == NULL))
2435                        afinfo->garbage_collect = xfrm_garbage_collect_deferred;
2436                xfrm_policy_afinfo[afinfo->family] = afinfo;
2437        }
2438        write_unlock_bh(&xfrm_policy_afinfo_lock);
2439
2440        rtnl_lock();
2441        for_each_net(net) {
2442                struct dst_ops *xfrm_dst_ops;
2443
2444                switch (afinfo->family) {
2445                case AF_INET:
2446                        xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
2447                        break;
2448#if IS_ENABLED(CONFIG_IPV6)
2449                case AF_INET6:
2450                        xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
2451                        break;
2452#endif
2453                default:
2454                        BUG();
2455                }
2456                *xfrm_dst_ops = *afinfo->dst_ops;
2457        }
2458        rtnl_unlock();
2459
2460        return err;
2461}
2462EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2463
2464int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2465{
2466        int err = 0;
2467        if (unlikely(afinfo == NULL))
2468                return -EINVAL;
2469        if (unlikely(afinfo->family >= NPROTO))
2470                return -EAFNOSUPPORT;
2471        write_lock_bh(&xfrm_policy_afinfo_lock);
2472        if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2473                if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2474                        err = -EINVAL;
2475                else {
2476                        struct dst_ops *dst_ops = afinfo->dst_ops;
2477                        xfrm_policy_afinfo[afinfo->family] = NULL;
2478                        dst_ops->kmem_cachep = NULL;
2479                        dst_ops->check = NULL;
2480                        dst_ops->negative_advice = NULL;
2481                        dst_ops->link_failure = NULL;
2482                        afinfo->garbage_collect = NULL;
2483                }
2484        }
2485        write_unlock_bh(&xfrm_policy_afinfo_lock);
2486        return err;
2487}
2488EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2489
2490static void __net_init xfrm_dst_ops_init(struct net *net)
2491{
2492        struct xfrm_policy_afinfo *afinfo;
2493
2494        read_lock_bh(&xfrm_policy_afinfo_lock);
2495        afinfo = xfrm_policy_afinfo[AF_INET];
2496        if (afinfo)
2497                net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
2498#if IS_ENABLED(CONFIG_IPV6)
2499        afinfo = xfrm_policy_afinfo[AF_INET6];
2500        if (afinfo)
2501                net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
2502#endif
2503        read_unlock_bh(&xfrm_policy_afinfo_lock);
2504}
2505
2506static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
2507{
2508        struct xfrm_policy_afinfo *afinfo;
2509        if (unlikely(family >= NPROTO))
2510                return NULL;
2511        read_lock(&xfrm_policy_afinfo_lock);
2512        afinfo = xfrm_policy_afinfo[family];
2513        if (unlikely(!afinfo))
2514                read_unlock(&xfrm_policy_afinfo_lock);
2515        return afinfo;
2516}
2517
2518static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
2519{
2520        read_unlock(&xfrm_policy_afinfo_lock);
2521}
2522
2523static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2524{
2525        struct net_device *dev = ptr;
2526
2527        switch (event) {
2528        case NETDEV_DOWN:
2529                xfrm_garbage_collect(dev_net(dev));
2530        }
2531        return NOTIFY_DONE;
2532}
2533
2534static struct notifier_block xfrm_dev_notifier = {
2535        .notifier_call  = xfrm_dev_event,
2536};
2537
2538#ifdef CONFIG_XFRM_STATISTICS
2539static int __net_init xfrm_statistics_init(struct net *net)
2540{
2541        int rv;
2542
2543        if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics,
2544                          sizeof(struct linux_xfrm_mib),
2545                          __alignof__(struct linux_xfrm_mib)) < 0)
2546                return -ENOMEM;
2547        rv = xfrm_proc_init(net);
2548        if (rv < 0)
2549                snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
2550        return rv;
2551}
2552
2553static void xfrm_statistics_fini(struct net *net)
2554{
2555        xfrm_proc_fini(net);
2556        snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
2557}
2558#else
2559static int __net_init xfrm_statistics_init(struct net *net)
2560{
2561        return 0;
2562}
2563
2564static void xfrm_statistics_fini(struct net *net)
2565{
2566}
2567#endif
2568
2569static int __net_init xfrm_policy_init(struct net *net)
2570{
2571        unsigned int hmask, sz;
2572        int dir;
2573
2574        if (net_eq(net, &init_net))
2575                xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2576                                           sizeof(struct xfrm_dst),
2577                                           0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2578                                           NULL);
2579
2580        hmask = 8 - 1;
2581        sz = (hmask+1) * sizeof(struct hlist_head);
2582
2583        net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
2584        if (!net->xfrm.policy_byidx)
2585                goto out_byidx;
2586        net->xfrm.policy_idx_hmask = hmask;
2587
2588        for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2589                struct xfrm_policy_hash *htab;
2590
2591                net->xfrm.policy_count[dir] = 0;
2592                INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
2593
2594                htab = &net->xfrm.policy_bydst[dir];
2595                htab->table = xfrm_hash_alloc(sz);
2596                if (!htab->table)
2597                        goto out_bydst;
2598                htab->hmask = hmask;
2599        }
2600
2601        INIT_LIST_HEAD(&net->xfrm.policy_all);
2602        INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
2603        if (net_eq(net, &init_net))
2604                register_netdevice_notifier(&xfrm_dev_notifier);
2605        return 0;
2606
2607out_bydst:
2608        for (dir--; dir >= 0; dir--) {
2609                struct xfrm_policy_hash *htab;
2610
2611                htab = &net->xfrm.policy_bydst[dir];
2612                xfrm_hash_free(htab->table, sz);
2613        }
2614        xfrm_hash_free(net->xfrm.policy_byidx, sz);
2615out_byidx:
2616        return -ENOMEM;
2617}
2618
2619static void xfrm_policy_fini(struct net *net)
2620{
2621        struct xfrm_audit audit_info;
2622        unsigned int sz;
2623        int dir;
2624
2625        flush_work(&net->xfrm.policy_hash_work);
2626#ifdef CONFIG_XFRM_SUB_POLICY
2627        audit_info.loginuid = -1;
2628        audit_info.sessionid = -1;
2629        audit_info.secid = 0;
2630        xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info);
2631#endif
2632        audit_info.loginuid = -1;
2633        audit_info.sessionid = -1;
2634        audit_info.secid = 0;
2635        xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
2636
2637        WARN_ON(!list_empty(&net->xfrm.policy_all));
2638
2639        for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2640                struct xfrm_policy_hash *htab;
2641
2642                WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
2643
2644                htab = &net->xfrm.policy_bydst[dir];
2645                sz = (htab->hmask + 1);
2646                WARN_ON(!hlist_empty(htab->table));
2647                xfrm_hash_free(htab->table, sz);
2648        }
2649
2650        sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
2651        WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
2652        xfrm_hash_free(net->xfrm.policy_byidx, sz);
2653}
2654
2655static int __net_init xfrm_net_init(struct net *net)
2656{
2657        int rv;
2658
2659        rv = xfrm_statistics_init(net);
2660        if (rv < 0)
2661                goto out_statistics;
2662        rv = xfrm_state_init(net);
2663        if (rv < 0)
2664                goto out_state;
2665        rv = xfrm_policy_init(net);
2666        if (rv < 0)
2667                goto out_policy;
2668        xfrm_dst_ops_init(net);
2669        rv = xfrm_sysctl_init(net);
2670        if (rv < 0)
2671                goto out_sysctl;
2672        return 0;
2673
2674out_sysctl:
2675        xfrm_policy_fini(net);
2676out_policy:
2677        xfrm_state_fini(net);
2678out_state:
2679        xfrm_statistics_fini(net);
2680out_statistics:
2681        return rv;
2682}
2683
2684static void __net_exit xfrm_net_exit(struct net *net)
2685{
2686        xfrm_sysctl_fini(net);
2687        xfrm_policy_fini(net);
2688        xfrm_state_fini(net);
2689        xfrm_statistics_fini(net);
2690}
2691
2692static struct pernet_operations __net_initdata xfrm_net_ops = {
2693        .init = xfrm_net_init,
2694        .exit = xfrm_net_exit,
2695};
2696
2697void __init xfrm_init(void)
2698{
2699        register_pernet_subsys(&xfrm_net_ops);
2700        xfrm_input_init();
2701}
2702
2703#ifdef CONFIG_AUDITSYSCALL
2704static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2705                                         struct audit_buffer *audit_buf)
2706{
2707        struct xfrm_sec_ctx *ctx = xp->security;
2708        struct xfrm_selector *sel = &xp->selector;
2709
2710        if (ctx)
2711                audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2712                                 ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2713
2714        switch(sel->family) {
2715        case AF_INET:
2716                audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
2717                if (sel->prefixlen_s != 32)
2718                        audit_log_format(audit_buf, " src_prefixlen=%d",
2719                                         sel->prefixlen_s);
2720                audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
2721                if (sel->prefixlen_d != 32)
2722                        audit_log_format(audit_buf, " dst_prefixlen=%d",
2723                                         sel->prefixlen_d);
2724                break;
2725        case AF_INET6:
2726                audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
2727                if (sel->prefixlen_s != 128)
2728                        audit_log_format(audit_buf, " src_prefixlen=%d",
2729                                         sel->prefixlen_s);
2730                audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
2731                if (sel->prefixlen_d != 128)
2732                        audit_log_format(audit_buf, " dst_prefixlen=%d",
2733                                         sel->prefixlen_d);
2734                break;
2735        }
2736}
2737
2738void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
2739                           uid_t auid, u32 sessionid, u32 secid)
2740{
2741        struct audit_buffer *audit_buf;
2742
2743        audit_buf = xfrm_audit_start("SPD-add");
2744        if (audit_buf == NULL)
2745                return;
2746        xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2747        audit_log_format(audit_buf, " res=%u", result);
2748        xfrm_audit_common_policyinfo(xp, audit_buf);
2749        audit_log_end(audit_buf);
2750}
2751EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2752
2753void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
2754                              uid_t auid, u32 sessionid, u32 secid)
2755{
2756        struct audit_buffer *audit_buf;
2757
2758        audit_buf = xfrm_audit_start("SPD-delete");
2759        if (audit_buf == NULL)
2760                return;
2761        xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
2762        audit_log_format(audit_buf, " res=%u", result);
2763        xfrm_audit_common_policyinfo(xp, audit_buf);
2764        audit_log_end(audit_buf);
2765}
2766EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2767#endif
2768
2769#ifdef CONFIG_XFRM_MIGRATE
2770static int xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
2771                                       const struct xfrm_selector *sel_tgt)
2772{
2773        if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2774                if (sel_tgt->family == sel_cmp->family &&
2775                    xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
2776                                  sel_cmp->family) == 0 &&
2777                    xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
2778                                  sel_cmp->family) == 0 &&
2779                    sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
2780                    sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
2781                        return 1;
2782                }
2783        } else {
2784                if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
2785                        return 1;
2786                }
2787        }
2788        return 0;
2789}
2790
2791static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector *sel,
2792                                                     u8 dir, u8 type)
2793{
2794        struct xfrm_policy *pol, *ret = NULL;
2795        struct hlist_node *entry;
2796        struct hlist_head *chain;
2797        u32 priority = ~0U;
2798
2799        read_lock_bh(&xfrm_policy_lock);
2800        chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir);
2801        hlist_for_each_entry(pol, entry, chain, bydst) {
2802                if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2803                    pol->type == type) {
2804                        ret = pol;
2805                        priority = ret->priority;
2806                        break;
2807                }
2808        }
2809        chain = &init_net.xfrm.policy_inexact[dir];
2810        hlist_for_each_entry(pol, entry, chain, bydst) {
2811                if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2812                    pol->type == type &&
2813                    pol->priority < priority) {
2814                        ret = pol;
2815                        break;
2816                }
2817        }
2818
2819        if (ret)
2820                xfrm_pol_hold(ret);
2821
2822        read_unlock_bh(&xfrm_policy_lock);
2823
2824        return ret;
2825}
2826
2827static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
2828{
2829        int match = 0;
2830
2831        if (t->mode == m->mode && t->id.proto == m->proto &&
2832            (m->reqid == 0 || t->reqid == m->reqid)) {
2833                switch (t->mode) {
2834                case XFRM_MODE_TUNNEL:
2835                case XFRM_MODE_BEET:
2836                        if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
2837                                          m->old_family) == 0 &&
2838                            xfrm_addr_cmp(&t->saddr, &m->old_saddr,
2839                                          m->old_family) == 0) {
2840                                match = 1;
2841                        }
2842                        break;
2843                case XFRM_MODE_TRANSPORT:
2844                        /* in case of transport mode, template does not store
2845                           any IP addresses, hence we just compare mode and
2846                           protocol */
2847                        match = 1;
2848                        break;
2849                default:
2850                        break;
2851                }
2852        }
2853        return match;
2854}
2855
2856/* update endpoint address(es) of template(s) */
2857static int xfrm_policy_migrate(struct xfrm_policy *pol,
2858                               struct xfrm_migrate *m, int num_migrate)
2859{
2860        struct xfrm_migrate *mp;
2861        int i, j, n = 0;
2862
2863        write_lock_bh(&pol->lock);
2864        if (unlikely(pol->walk.dead)) {
2865                /* target policy has been deleted */
2866                write_unlock_bh(&pol->lock);
2867                return -ENOENT;
2868        }
2869
2870        for (i = 0; i < pol->xfrm_nr; i++) {
2871                for (j = 0, mp = m; j < num_migrate; j++, mp++) {
2872                        if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
2873                                continue;
2874                        n++;
2875                        if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
2876                            pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
2877                                continue;
2878                        /* update endpoints */
2879                        memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
2880                               sizeof(pol->xfrm_vec[i].id.daddr));
2881                        memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
2882                               sizeof(pol->xfrm_vec[i].saddr));
2883                        pol->xfrm_vec[i].encap_family = mp->new_family;
2884                        /* flush bundles */
2885                        atomic_inc(&pol->genid);
2886                }
2887        }
2888
2889        write_unlock_bh(&pol->lock);
2890
2891        if (!n)
2892                return -ENODATA;
2893
2894        return 0;
2895}
2896
2897static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
2898{
2899        int i, j;
2900
2901        if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
2902                return -EINVAL;
2903
2904        for (i = 0; i < num_migrate; i++) {
2905                if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
2906                                   m[i].old_family) == 0) &&
2907                    (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
2908                                   m[i].old_family) == 0))
2909                        return -EINVAL;
2910                if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
2911                    xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
2912                        return -EINVAL;
2913
2914                /* check if there is any duplicated entry */
2915                for (j = i + 1; j < num_migrate; j++) {
2916                        if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
2917                                    sizeof(m[i].old_daddr)) &&
2918                            !memcmp(&m[i].old_saddr, &m[j].old_saddr,
2919                                    sizeof(m[i].old_saddr)) &&
2920                            m[i].proto == m[j].proto &&
2921                            m[i].mode == m[j].mode &&
2922                            m[i].reqid == m[j].reqid &&
2923                            m[i].old_family == m[j].old_family)
2924                                return -EINVAL;
2925                }
2926        }
2927
2928        return 0;
2929}
2930
2931int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
2932                 struct xfrm_migrate *m, int num_migrate,
2933                 struct xfrm_kmaddress *k)
2934{
2935        int i, err, nx_cur = 0, nx_new = 0;
2936        struct xfrm_policy *pol = NULL;
2937        struct xfrm_state *x, *xc;
2938        struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
2939        struct xfrm_state *x_new[XFRM_MAX_DEPTH];
2940        struct xfrm_migrate *mp;
2941
2942        if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
2943                goto out;
2944
2945        /* Stage 1 - find policy */
2946        if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
2947                err = -ENOENT;
2948                goto out;
2949        }
2950
2951        /* Stage 2 - find and update state(s) */
2952        for (i = 0, mp = m; i < num_migrate; i++, mp++) {
2953                if ((x = xfrm_migrate_state_find(mp))) {
2954                        x_cur[nx_cur] = x;
2955                        nx_cur++;
2956                        if ((xc = xfrm_state_migrate(x, mp))) {
2957                                x_new[nx_new] = xc;
2958                                nx_new++;
2959                        } else {
2960                                err = -ENODATA;
2961                                goto restore_state;
2962                        }
2963                }
2964        }
2965
2966        /* Stage 3 - update policy */
2967        if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
2968                goto restore_state;
2969
2970        /* Stage 4 - delete old state(s) */
2971        if (nx_cur) {
2972                xfrm_states_put(x_cur, nx_cur);
2973                xfrm_states_delete(x_cur, nx_cur);
2974        }
2975
2976        /* Stage 5 - announce */
2977        km_migrate(sel, dir, type, m, num_migrate, k);
2978
2979        xfrm_pol_put(pol);
2980
2981        return 0;
2982out:
2983        return err;
2984
2985restore_state:
2986        if (pol)
2987                xfrm_pol_put(pol);
2988        if (nx_cur)
2989                xfrm_states_put(x_cur, nx_cur);
2990        if (nx_new)
2991                xfrm_states_delete(x_new, nx_new);
2992
2993        return err;
2994}
2995EXPORT_SYMBOL(xfrm_migrate);
2996#endif
2997