linux/net/ipv4/ipvs/ip_vs_proto_udp.c
<<
>>
Prefs
   1/*
   2 * ip_vs_proto_udp.c:   UDP load balancing support for IPVS
   3 *
   4 * Version:     $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
   5 *
   6 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
   7 *              Julian Anastasov <ja@ssi.bg>
   8 *
   9 *              This program is free software; you can redistribute it and/or
  10 *              modify it under the terms of the GNU General Public License
  11 *              as published by the Free Software Foundation; either version
  12 *              2 of the License, or (at your option) any later version.
  13 *
  14 * Changes:
  15 *
  16 */
  17
  18#include <linux/in.h>
  19#include <linux/ip.h>
  20#include <linux/kernel.h>
  21#include <linux/netfilter.h>
  22#include <linux/netfilter_ipv4.h>
  23#include <linux/udp.h>
  24
  25#include <net/ip_vs.h>
  26#include <net/ip.h>
  27
  28static struct ip_vs_conn *
  29udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  30                const struct iphdr *iph, unsigned int proto_off, int inverse)
  31{
  32        struct ip_vs_conn *cp;
  33        __be16 _ports[2], *pptr;
  34
  35        pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
  36        if (pptr == NULL)
  37                return NULL;
  38
  39        if (likely(!inverse)) {
  40                cp = ip_vs_conn_in_get(iph->protocol,
  41                                       iph->saddr, pptr[0],
  42                                       iph->daddr, pptr[1]);
  43        } else {
  44                cp = ip_vs_conn_in_get(iph->protocol,
  45                                       iph->daddr, pptr[1],
  46                                       iph->saddr, pptr[0]);
  47        }
  48
  49        return cp;
  50}
  51
  52
  53static struct ip_vs_conn *
  54udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  55                 const struct iphdr *iph, unsigned int proto_off, int inverse)
  56{
  57        struct ip_vs_conn *cp;
  58        __be16 _ports[2], *pptr;
  59
  60        pptr = skb_header_pointer(skb, ip_hdrlen(skb),
  61                                  sizeof(_ports), _ports);
  62        if (pptr == NULL)
  63                return NULL;
  64
  65        if (likely(!inverse)) {
  66                cp = ip_vs_conn_out_get(iph->protocol,
  67                                        iph->saddr, pptr[0],
  68                                        iph->daddr, pptr[1]);
  69        } else {
  70                cp = ip_vs_conn_out_get(iph->protocol,
  71                                        iph->daddr, pptr[1],
  72                                        iph->saddr, pptr[0]);
  73        }
  74
  75        return cp;
  76}
  77
  78
  79static int
  80udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp,
  81                  int *verdict, struct ip_vs_conn **cpp)
  82{
  83        struct ip_vs_service *svc;
  84        struct udphdr _udph, *uh;
  85
  86        uh = skb_header_pointer(skb, ip_hdrlen(skb),
  87                                sizeof(_udph), &_udph);
  88        if (uh == NULL) {
  89                *verdict = NF_DROP;
  90                return 0;
  91        }
  92
  93        if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
  94                                     ip_hdr(skb)->daddr, uh->dest))) {
  95                if (ip_vs_todrop()) {
  96                        /*
  97                         * It seems that we are very loaded.
  98                         * We have to drop this packet :(
  99                         */
 100                        ip_vs_service_put(svc);
 101                        *verdict = NF_DROP;
 102                        return 0;
 103                }
 104
 105                /*
 106                 * Let the virtual server select a real server for the
 107                 * incoming connection, and create a connection entry.
 108                 */
 109                *cpp = ip_vs_schedule(svc, skb);
 110                if (!*cpp) {
 111                        *verdict = ip_vs_leave(svc, skb, pp);
 112                        return 0;
 113                }
 114                ip_vs_service_put(svc);
 115        }
 116        return 1;
 117}
 118
 119
 120static inline void
 121udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip,
 122                     __be16 oldport, __be16 newport)
 123{
 124        uhdr->check =
 125                csum_fold(ip_vs_check_diff4(oldip, newip,
 126                                 ip_vs_check_diff2(oldport, newport,
 127                                        ~csum_unfold(uhdr->check))));
 128        if (!uhdr->check)
 129                uhdr->check = CSUM_MANGLED_0;
 130}
 131
 132static int
 133udp_snat_handler(struct sk_buff *skb,
 134                 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 135{
 136        struct udphdr *udph;
 137        const unsigned int udphoff = ip_hdrlen(skb);
 138
 139        /* csum_check requires unshared skb */
 140        if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
 141                return 0;
 142
 143        if (unlikely(cp->app != NULL)) {
 144                /* Some checks before mangling */
 145                if (pp->csum_check && !pp->csum_check(skb, pp))
 146                        return 0;
 147
 148                /*
 149                 *      Call application helper if needed
 150                 */
 151                if (!ip_vs_app_pkt_out(cp, skb))
 152                        return 0;
 153        }
 154
 155        udph = (void *)ip_hdr(skb) + udphoff;
 156        udph->source = cp->vport;
 157
 158        /*
 159         *      Adjust UDP checksums
 160         */
 161        if (!cp->app && (udph->check != 0)) {
 162                /* Only port and addr are changed, do fast csum update */
 163                udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
 164                                     cp->dport, cp->vport);
 165                if (skb->ip_summed == CHECKSUM_COMPLETE)
 166                        skb->ip_summed = CHECKSUM_NONE;
 167        } else {
 168                /* full checksum calculation */
 169                udph->check = 0;
 170                skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
 171                udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
 172                                                skb->len - udphoff,
 173                                                cp->protocol, skb->csum);
 174                if (udph->check == 0)
 175                        udph->check = CSUM_MANGLED_0;
 176                IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
 177                          pp->name, udph->check,
 178                          (char*)&(udph->check) - (char*)udph);
 179        }
 180        return 1;
 181}
 182
 183
 184static int
 185udp_dnat_handler(struct sk_buff *skb,
 186                 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
 187{
 188        struct udphdr *udph;
 189        unsigned int udphoff = ip_hdrlen(skb);
 190
 191        /* csum_check requires unshared skb */
 192        if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
 193                return 0;
 194
 195        if (unlikely(cp->app != NULL)) {
 196                /* Some checks before mangling */
 197                if (pp->csum_check && !pp->csum_check(skb, pp))
 198                        return 0;
 199
 200                /*
 201                 *      Attempt ip_vs_app call.
 202                 *      It will fix ip_vs_conn
 203                 */
 204                if (!ip_vs_app_pkt_in(cp, skb))
 205                        return 0;
 206        }
 207
 208        udph = (void *)ip_hdr(skb) + udphoff;
 209        udph->dest = cp->dport;
 210
 211        /*
 212         *      Adjust UDP checksums
 213         */
 214        if (!cp->app && (udph->check != 0)) {
 215                /* Only port and addr are changed, do fast csum update */
 216                udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
 217                                     cp->vport, cp->dport);
 218                if (skb->ip_summed == CHECKSUM_COMPLETE)
 219                        skb->ip_summed = CHECKSUM_NONE;
 220        } else {
 221                /* full checksum calculation */
 222                udph->check = 0;
 223                skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
 224                udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
 225                                                skb->len - udphoff,
 226                                                cp->protocol, skb->csum);
 227                if (udph->check == 0)
 228                        udph->check = CSUM_MANGLED_0;
 229                skb->ip_summed = CHECKSUM_UNNECESSARY;
 230        }
 231        return 1;
 232}
 233
 234
 235static int
 236udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
 237{
 238        struct udphdr _udph, *uh;
 239        const unsigned int udphoff = ip_hdrlen(skb);
 240
 241        uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
 242        if (uh == NULL)
 243                return 0;
 244
 245        if (uh->check != 0) {
 246                switch (skb->ip_summed) {
 247                case CHECKSUM_NONE:
 248                        skb->csum = skb_checksum(skb, udphoff,
 249                                                 skb->len - udphoff, 0);
 250                case CHECKSUM_COMPLETE:
 251                        if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
 252                                              ip_hdr(skb)->daddr,
 253                                              skb->len - udphoff,
 254                                              ip_hdr(skb)->protocol,
 255                                              skb->csum)) {
 256                                IP_VS_DBG_RL_PKT(0, pp, skb, 0,
 257                                                 "Failed checksum for");
 258                                return 0;
 259                        }
 260                        break;
 261                default:
 262                        /* No need to checksum. */
 263                        break;
 264                }
 265        }
 266        return 1;
 267}
 268
 269
 270/*
 271 *      Note: the caller guarantees that only one of register_app,
 272 *      unregister_app or app_conn_bind is called each time.
 273 */
 274
 275#define UDP_APP_TAB_BITS        4
 276#define UDP_APP_TAB_SIZE        (1 << UDP_APP_TAB_BITS)
 277#define UDP_APP_TAB_MASK        (UDP_APP_TAB_SIZE - 1)
 278
 279static struct list_head udp_apps[UDP_APP_TAB_SIZE];
 280static DEFINE_SPINLOCK(udp_app_lock);
 281
 282static inline __u16 udp_app_hashkey(__be16 port)
 283{
 284        return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
 285                & UDP_APP_TAB_MASK;
 286}
 287
 288
 289static int udp_register_app(struct ip_vs_app *inc)
 290{
 291        struct ip_vs_app *i;
 292        __u16 hash;
 293        __be16 port = inc->port;
 294        int ret = 0;
 295
 296        hash = udp_app_hashkey(port);
 297
 298
 299        spin_lock_bh(&udp_app_lock);
 300        list_for_each_entry(i, &udp_apps[hash], p_list) {
 301                if (i->port == port) {
 302                        ret = -EEXIST;
 303                        goto out;
 304                }
 305        }
 306        list_add(&inc->p_list, &udp_apps[hash]);
 307        atomic_inc(&ip_vs_protocol_udp.appcnt);
 308
 309  out:
 310        spin_unlock_bh(&udp_app_lock);
 311        return ret;
 312}
 313
 314
 315static void
 316udp_unregister_app(struct ip_vs_app *inc)
 317{
 318        spin_lock_bh(&udp_app_lock);
 319        atomic_dec(&ip_vs_protocol_udp.appcnt);
 320        list_del(&inc->p_list);
 321        spin_unlock_bh(&udp_app_lock);
 322}
 323
 324
 325static int udp_app_conn_bind(struct ip_vs_conn *cp)
 326{
 327        int hash;
 328        struct ip_vs_app *inc;
 329        int result = 0;
 330
 331        /* Default binding: bind app only for NAT */
 332        if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
 333                return 0;
 334
 335        /* Lookup application incarnations and bind the right one */
 336        hash = udp_app_hashkey(cp->vport);
 337
 338        spin_lock(&udp_app_lock);
 339        list_for_each_entry(inc, &udp_apps[hash], p_list) {
 340                if (inc->port == cp->vport) {
 341                        if (unlikely(!ip_vs_app_inc_get(inc)))
 342                                break;
 343                        spin_unlock(&udp_app_lock);
 344
 345                        IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
 346                                  "%u.%u.%u.%u:%u to app %s on port %u\n",
 347                                  __func__,
 348                                  NIPQUAD(cp->caddr), ntohs(cp->cport),
 349                                  NIPQUAD(cp->vaddr), ntohs(cp->vport),
 350                                  inc->name, ntohs(inc->port));
 351                        cp->app = inc;
 352                        if (inc->init_conn)
 353                                result = inc->init_conn(inc, cp);
 354                        goto out;
 355                }
 356        }
 357        spin_unlock(&udp_app_lock);
 358
 359  out:
 360        return result;
 361}
 362
 363
 364static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
 365        [IP_VS_UDP_S_NORMAL]            =       5*60*HZ,
 366        [IP_VS_UDP_S_LAST]              =       2*HZ,
 367};
 368
 369static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
 370        [IP_VS_UDP_S_NORMAL]            =       "UDP",
 371        [IP_VS_UDP_S_LAST]              =       "BUG!",
 372};
 373
 374
 375static int
 376udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
 377{
 378        return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
 379                                       udp_state_name_table, sname, to);
 380}
 381
 382static const char * udp_state_name(int state)
 383{
 384        if (state >= IP_VS_UDP_S_LAST)
 385                return "ERR!";
 386        return udp_state_name_table[state] ? udp_state_name_table[state] : "?";
 387}
 388
 389static int
 390udp_state_transition(struct ip_vs_conn *cp, int direction,
 391                     const struct sk_buff *skb,
 392                     struct ip_vs_protocol *pp)
 393{
 394        cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
 395        return 1;
 396}
 397
 398static void udp_init(struct ip_vs_protocol *pp)
 399{
 400        IP_VS_INIT_HASH_TABLE(udp_apps);
 401        pp->timeout_table = udp_timeouts;
 402}
 403
 404static void udp_exit(struct ip_vs_protocol *pp)
 405{
 406}
 407
 408
 409struct ip_vs_protocol ip_vs_protocol_udp = {
 410        .name =                 "UDP",
 411        .protocol =             IPPROTO_UDP,
 412        .num_states =           IP_VS_UDP_S_LAST,
 413        .dont_defrag =          0,
 414        .init =                 udp_init,
 415        .exit =                 udp_exit,
 416        .conn_schedule =        udp_conn_schedule,
 417        .conn_in_get =          udp_conn_in_get,
 418        .conn_out_get =         udp_conn_out_get,
 419        .snat_handler =         udp_snat_handler,
 420        .dnat_handler =         udp_dnat_handler,
 421        .csum_check =           udp_csum_check,
 422        .state_transition =     udp_state_transition,
 423        .state_name =           udp_state_name,
 424        .register_app =         udp_register_app,
 425        .unregister_app =       udp_unregister_app,
 426        .app_conn_bind =        udp_app_conn_bind,
 427        .debug_packet =         ip_vs_tcpudp_debug_packet,
 428        .timeout_change =       NULL,
 429        .set_state_timeout =    udp_set_state_timeout,
 430};
 431
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.