linux-old/net/ipv4/udp.c
<<
>>
Prefs
   1/*
   2 * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3 *              operating system.  INET is implemented using the  BSD Socket
   4 *              interface as the means of communication with the user level.
   5 *
   6 *              The User Datagram Protocol (UDP).
   7 *
   8 * Version:     $Id: udp.c,v 1.65 1999/03/21 05:22:49 davem Exp $
   9 *
  10 * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  13 *              Alan Cox, <Alan.Cox@linux.org>
  14 *
  15 * Fixes:
  16 *              Alan Cox        :       verify_area() calls
  17 *              Alan Cox        :       stopped close while in use off icmp
  18 *                                      messages. Not a fix but a botch that
  19 *                                      for udp at least is 'valid'.
  20 *              Alan Cox        :       Fixed icmp handling properly
  21 *              Alan Cox        :       Correct error for oversized datagrams
  22 *              Alan Cox        :       Tidied select() semantics. 
  23 *              Alan Cox        :       udp_err() fixed properly, also now 
  24 *                                      select and read wake correctly on errors
  25 *              Alan Cox        :       udp_send verify_area moved to avoid mem leak
  26 *              Alan Cox        :       UDP can count its memory
  27 *              Alan Cox        :       send to an unknown connection causes
  28 *                                      an ECONNREFUSED off the icmp, but
  29 *                                      does NOT close.
  30 *              Alan Cox        :       Switched to new sk_buff handlers. No more backlog!
  31 *              Alan Cox        :       Using generic datagram code. Even smaller and the PEEK
  32 *                                      bug no longer crashes it.
  33 *              Fred Van Kempen :       Net2e support for sk->broadcast.
  34 *              Alan Cox        :       Uses skb_free_datagram
  35 *              Alan Cox        :       Added get/set sockopt support.
  36 *              Alan Cox        :       Broadcasting without option set returns EACCES.
  37 *              Alan Cox        :       No wakeup calls. Instead we now use the callbacks.
  38 *              Alan Cox        :       Use ip_tos and ip_ttl
  39 *              Alan Cox        :       SNMP Mibs
  40 *              Alan Cox        :       MSG_DONTROUTE, and 0.0.0.0 support.
  41 *              Matt Dillon     :       UDP length checks.
  42 *              Alan Cox        :       Smarter af_inet used properly.
  43 *              Alan Cox        :       Use new kernel side addressing.
  44 *              Alan Cox        :       Incorrect return on truncated datagram receive.
  45 *      Arnt Gulbrandsen        :       New udp_send and stuff
  46 *              Alan Cox        :       Cache last socket
  47 *              Alan Cox        :       Route cache
  48 *              Jon Peatfield   :       Minor efficiency fix to sendto().
  49 *              Mike Shaver     :       RFC1122 checks.
  50 *              Alan Cox        :       Nonblocking error fix.
  51 *      Willy Konynenberg       :       Transparent proxying support.
  52 *              Mike McLagan    :       Routing by source
  53 *              David S. Miller :       New socket lookup architecture.
  54 *                                      Last socket cache retained as it
  55 *                                      does have a high hit rate.
  56 *              Olaf Kirch      :       Don't linearise iovec on sendmsg.
  57 *              Andi Kleen      :       Some cleanups, cache destination entry
  58 *                                      for connect. 
  59 *      Vitaly E. Lavrov        :       Transparent proxy revived after year coma.
  60 *              Melvin Smith    :       Check msg_name not msg_namelen in sendto(),
  61 *                                      return ENOTCONN for unconnected sockets (POSIX)
  62 *              Janos Farkas    :       don't deliver multi/broadcasts to a different
  63 *                                      bound-to-device socket
  64 *
  65 *
  66 *              This program is free software; you can redistribute it and/or
  67 *              modify it under the terms of the GNU General Public License
  68 *              as published by the Free Software Foundation; either version
  69 *              2 of the License, or (at your option) any later version.
  70 */
  71 
  72/* RFC1122 Status:
  73   4.1.3.1 (Ports):
  74     SHOULD send ICMP_PORT_UNREACHABLE in response to datagrams to 
  75       an un-listened port. (OK)
  76   4.1.3.2 (IP Options)
  77     MUST pass IP options from IP -> application (OK)
  78     MUST allow application to specify IP options (OK)
  79   4.1.3.3 (ICMP Messages)
  80     MUST pass ICMP error messages to application (OK -- except when SO_BSDCOMPAT is set)
  81   4.1.3.4 (UDP Checksums)
  82     MUST provide facility for checksumming (OK)
  83     MAY allow application to control checksumming (OK)
  84     MUST default to checksumming on (OK)
  85     MUST discard silently datagrams with bad csums (OK, except during debugging)
  86   4.1.3.5 (UDP Multihoming)
  87     MUST allow application to specify source address (OK)
  88     SHOULD be able to communicate the chosen src addr up to application
  89       when application doesn't choose (DOES - use recvmsg cmsgs)
  90   4.1.3.6 (Invalid Addresses)
  91     MUST discard invalid source addresses (OK -- done in the new routing code)
  92     MUST only send datagrams with one of our addresses (OK)
  93*/
  94
  95#include <asm/system.h>
  96#include <asm/uaccess.h>
  97#include <linux/types.h>
  98#include <linux/fcntl.h>
  99#include <linux/socket.h>
 100#include <linux/sockios.h>
 101#include <linux/in.h>
 102#include <linux/errno.h>
 103#include <linux/timer.h>
 104#include <linux/mm.h>
 105#include <linux/config.h>
 106#include <linux/inet.h>
 107#include <linux/netdevice.h>
 108#include <net/snmp.h>
 109#include <net/ip.h>
 110#include <net/protocol.h>
 111#include <linux/skbuff.h>
 112#include <net/sock.h>
 113#include <net/udp.h>
 114#include <net/icmp.h>
 115#include <net/route.h>
 116#include <net/checksum.h>
 117
 118/*
 119 *      Snmp MIB for the UDP layer
 120 */
 121
 122struct udp_mib          udp_statistics;
 123
 124struct sock *udp_hash[UDP_HTABLE_SIZE];
 125
 126static int udp_v4_verify_bind(struct sock *sk, unsigned short snum)
 127{
 128        struct sock *sk2;
 129        int retval = 0, sk_reuse = sk->reuse;
 130
 131        SOCKHASH_LOCK();
 132        for(sk2 = udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; sk2 != NULL; sk2 = sk2->next) {
 133                if((sk2->num == snum) && (sk2 != sk)) {
 134                        unsigned char state = sk2->state;
 135                        int sk2_reuse = sk2->reuse;
 136
 137                        /* Two sockets can be bound to the same port if they're
 138                         * bound to different interfaces.
 139                         */
 140
 141                        if(sk2->bound_dev_if != sk->bound_dev_if)
 142                                continue;
 143
 144                        if(!sk2->rcv_saddr || !sk->rcv_saddr) {
 145                                if((!sk2_reuse)                 ||
 146                                   (!sk_reuse)                  ||
 147                                   (state == TCP_LISTEN)) {
 148                                        retval = 1;
 149                                        break;
 150                                }
 151                        } else if(sk2->rcv_saddr == sk->rcv_saddr) {
 152                                if((!sk_reuse)                  ||
 153                                   (!sk2_reuse)                 ||
 154                                   (state == TCP_LISTEN)) {
 155                                        retval = 1;
 156                                        break;
 157                                }
 158                        }
 159                }
 160        }
 161        SOCKHASH_UNLOCK();
 162        return retval;
 163}
 164
 165static inline int udp_lport_inuse(u16 num)
 166{
 167        struct sock *sk = udp_hash[num & (UDP_HTABLE_SIZE - 1)];
 168
 169        for(; sk != NULL; sk = sk->next) {
 170                if(sk->num == num)
 171                        return 1;
 172        }
 173        return 0;
 174}
 175
 176/* Shared by v4/v6 tcp. */
 177unsigned short udp_good_socknum(void)
 178{
 179        int result;
 180        static int start = 0;
 181        int i, best, best_size_so_far;
 182
 183        SOCKHASH_LOCK();
 184        if (start > sysctl_local_port_range[1] || start < sysctl_local_port_range[0])
 185                start = sysctl_local_port_range[0];
 186
 187        best_size_so_far = 32767;       /* "big" num */
 188        best = result = start;
 189
 190        for(i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
 191                struct sock *sk;
 192                int size;
 193
 194                sk = udp_hash[result & (UDP_HTABLE_SIZE - 1)];
 195
 196                if(!sk) {
 197                        if (result > sysctl_local_port_range[1])
 198                                result = sysctl_local_port_range[0]
 199                                        + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1));
 200                        goto out;
 201                }
 202
 203                /* Is this one better than our best so far? */
 204                size = 0;
 205                do {
 206                        if(++size >= best_size_so_far)
 207                                goto next;
 208                } while((sk = sk->next) != NULL);
 209                best_size_so_far = size;
 210                best = result;
 211        next:
 212        }
 213
 214        result = best;
 215
 216        for(;; result += UDP_HTABLE_SIZE) {
 217                /* Get into range (but preserve hash bin)... */
 218                if (result > sysctl_local_port_range[1])
 219                        result = sysctl_local_port_range[0]
 220                                + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1));
 221                if (!udp_lport_inuse(result))
 222                        break;
 223        }
 224out:
 225        start = result;
 226        SOCKHASH_UNLOCK();
 227        return result;
 228}
 229
 230/* Last hit UDP socket cache, this is ipv4 specific so make it static. */
 231static u32 uh_cache_saddr, uh_cache_daddr;
 232static u16 uh_cache_dport, uh_cache_sport;
 233static struct sock *uh_cache_sk = NULL;
 234
 235static void udp_v4_hash(struct sock *sk)
 236{
 237        struct sock **skp;
 238        int num = sk->num;
 239
 240        num &= (UDP_HTABLE_SIZE - 1);
 241        skp = &udp_hash[num];
 242
 243        SOCKHASH_LOCK();
 244        sk->next = *skp;
 245        *skp = sk;
 246        sk->hashent = num;
 247        SOCKHASH_UNLOCK();
 248}
 249
 250static void udp_v4_unhash(struct sock *sk)
 251{
 252        struct sock **skp;
 253        int num = sk->num;
 254
 255        num &= (UDP_HTABLE_SIZE - 1);
 256        skp = &udp_hash[num];
 257
 258        SOCKHASH_LOCK();
 259        while(*skp != NULL) {
 260                if(*skp == sk) {
 261                        *skp = sk->next;
 262                        break;
 263                }
 264                skp = &((*skp)->next);
 265        }
 266        if(uh_cache_sk == sk)
 267                uh_cache_sk = NULL;
 268        SOCKHASH_UNLOCK();
 269}
 270
 271static void udp_v4_rehash(struct sock *sk)
 272{
 273        struct sock **skp;
 274        int num = sk->num;
 275        int oldnum = sk->hashent;
 276
 277        num &= (UDP_HTABLE_SIZE - 1);
 278        skp = &udp_hash[oldnum];
 279
 280        SOCKHASH_LOCK();
 281        while(*skp != NULL) {
 282                if(*skp == sk) {
 283                        *skp = sk->next;
 284                        break;
 285                }
 286                skp = &((*skp)->next);
 287        }
 288        sk->next = udp_hash[num];
 289        udp_hash[num] = sk;
 290        sk->hashent = num;
 291        if(uh_cache_sk == sk)
 292                uh_cache_sk = NULL;
 293        SOCKHASH_UNLOCK();
 294}
 295
 296/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
 297 * harder than this here plus the last hit cache. -DaveM
 298 */
 299struct sock *udp_v4_lookup_longway(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
 300{
 301        struct sock *sk, *result = NULL;
 302        unsigned short hnum = ntohs(dport);
 303        int badness = -1;
 304
 305        for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) {
 306                if((sk->num == hnum) && !(sk->dead && (sk->state == TCP_CLOSE))) {
 307                        int score = 0;
 308                        if(sk->rcv_saddr) {
 309                                if(sk->rcv_saddr != daddr)
 310                                        continue;
 311                                score++;
 312                        }
 313                        if(sk->daddr) {
 314                                if(sk->daddr != saddr)
 315                                        continue;
 316                                score++;
 317                        }
 318                        if(sk->dport) {
 319                                if(sk->dport != sport)
 320                                        continue;
 321                                score++;
 322                        }
 323                        if(sk->bound_dev_if) {
 324                                if(sk->bound_dev_if != dif)
 325                                        continue;
 326                                score++;
 327                        }
 328                        if(score == 4) {
 329                                result = sk;
 330                                break;
 331                        } else if(score > badness) {
 332                                result = sk;
 333                                badness = score;
 334                        }
 335                }
 336        }
 337        return result;
 338}
 339
 340__inline__ struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif)
 341{
 342        struct sock *sk;
 343
 344        if(!dif && uh_cache_sk          &&
 345           uh_cache_saddr == saddr      &&
 346           uh_cache_sport == sport      &&
 347           uh_cache_dport == dport      &&
 348           uh_cache_daddr == daddr)
 349                return uh_cache_sk;
 350
 351        sk = udp_v4_lookup_longway(saddr, sport, daddr, dport, dif);
 352        if(!dif) {
 353                uh_cache_sk     = sk;
 354                uh_cache_saddr  = saddr;
 355                uh_cache_daddr  = daddr;
 356                uh_cache_sport  = sport;
 357                uh_cache_dport  = dport;
 358        }
 359        return sk;
 360}
 361
 362#ifdef CONFIG_IP_TRANSPARENT_PROXY
 363#define secondlist(hpnum, sk, fpass) \
 364({ struct sock *s1; if(!(sk) && (fpass)--) \
 365        s1 = udp_hash[(hpnum) & (UDP_HTABLE_SIZE - 1)]; \
 366   else \
 367        s1 = (sk); \
 368   s1; \
 369})
 370
 371#define udp_v4_proxy_loop_init(hnum, hpnum, sk, fpass) \
 372        secondlist((hpnum), udp_hash[(hnum)&(UDP_HTABLE_SIZE-1)],(fpass))
 373
 374#define udp_v4_proxy_loop_next(hnum, hpnum, sk, fpass) \
 375        secondlist((hpnum),(sk)->next,(fpass))
 376
 377static struct sock *udp_v4_proxy_lookup(unsigned short num, unsigned long raddr,
 378                                        unsigned short rnum, unsigned long laddr,
 379                                        struct device *dev, unsigned short pnum,
 380                                        int dif)
 381{
 382        struct sock *s, *result = NULL;
 383        int badness = -1;
 384        u32 paddr = 0;
 385        unsigned short hnum = ntohs(num);
 386        unsigned short hpnum = ntohs(pnum);
 387        int firstpass = 1;
 388
 389        if(dev && dev->ip_ptr) {
 390                struct in_device *idev = dev->ip_ptr;
 391
 392                if(idev->ifa_list)
 393                        paddr = idev->ifa_list->ifa_local;
 394        }
 395
 396        SOCKHASH_LOCK();
 397        for(s = udp_v4_proxy_loop_init(hnum, hpnum, s, firstpass);
 398            s != NULL;
 399            s = udp_v4_proxy_loop_next(hnum, hpnum, s, firstpass)) {
 400                if(s->num == hnum || s->num == hpnum) {
 401                        int score = 0;
 402                        if(s->dead && (s->state == TCP_CLOSE))
 403                                continue;
 404                        if(s->rcv_saddr) {
 405                                if((s->num != hpnum || s->rcv_saddr != paddr) &&
 406                                   (s->num != hnum || s->rcv_saddr != laddr))
 407                                        continue;
 408                                score++;
 409                        }
 410                        if(s->daddr) {
 411                                if(s->daddr != raddr)
 412                                        continue;
 413                                score++;
 414                        }
 415                        if(s->dport) {
 416                                if(s->dport != rnum)
 417                                        continue;
 418                                score++;
 419                        }
 420                        if(s->bound_dev_if) {
 421                                if(s->bound_dev_if != dif)
 422                                        continue;
 423                                score++;
 424                        }
 425                        if(score == 4 && s->num == hnum) {
 426                                result = s;
 427                                break;
 428                        } else if(score > badness && (s->num == hpnum || s->rcv_saddr)) {
 429                                        result = s;
 430                                        badness = score;
 431                        }
 432                }
 433        }
 434        SOCKHASH_UNLOCK();
 435        return result;
 436}
 437
 438#undef secondlist
 439#undef udp_v4_proxy_loop_init
 440#undef udp_v4_proxy_loop_next
 441
 442#endif
 443
 444static inline struct sock *udp_v4_mcast_next(struct sock *sk,
 445                                             unsigned short num,
 446                                             unsigned long raddr,
 447                                             unsigned short rnum,
 448                                             unsigned long laddr,
 449                                             int dif)
 450{
 451        struct sock *s = sk;
 452        unsigned short hnum = ntohs(num);
 453        for(; s; s = s->next) {
 454                if ((s->num != hnum)                                    ||
 455                    (s->dead && (s->state == TCP_CLOSE))                ||
 456                    (s->daddr && s->daddr!=raddr)                       ||
 457                    (s->dport != rnum && s->dport != 0)                 ||
 458                    (s->rcv_saddr  && s->rcv_saddr != laddr)            ||
 459                    (s->bound_dev_if && s->bound_dev_if != dif))
 460                        continue;
 461                break;
 462        }
 463        return s;
 464}
 465
 466/*
 467 * This routine is called by the ICMP module when it gets some
 468 * sort of error condition.  If err < 0 then the socket should
 469 * be closed and the error returned to the user.  If err > 0
 470 * it's just the icmp type << 8 | icmp code.  
 471 * Header points to the ip header of the error packet. We move
 472 * on past this. Then (as it used to claim before adjustment)
 473 * header points to the first 8 bytes of the udp header.  We need
 474 * to find the appropriate port.
 475 */
 476
 477void udp_err(struct sk_buff *skb, unsigned char *dp, int len)
 478{
 479        struct iphdr *iph = (struct iphdr*)dp;
 480        struct udphdr *uh = (struct udphdr*)(dp+(iph->ihl<<2));
 481        int type = skb->h.icmph->type;
 482        int code = skb->h.icmph->code;
 483        struct sock *sk;
 484        int harderr;
 485        u32 info;
 486        int err;
 487
 488        if (len < (iph->ihl<<2)+sizeof(struct udphdr)) {
 489                icmp_statistics.IcmpInErrors++;
 490                return;
 491        }
 492
 493        sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex);
 494        if (sk == NULL) {
 495                icmp_statistics.IcmpInErrors++;
 496                return; /* No socket for error */
 497        }
 498
 499        err = 0;
 500        info = 0;
 501        harderr = 0;
 502
 503        switch (type) {
 504        default:
 505        case ICMP_TIME_EXCEEDED:
 506                err = EHOSTUNREACH;
 507                break;
 508        case ICMP_SOURCE_QUENCH:
 509                return;
 510        case ICMP_PARAMETERPROB:
 511                err = EPROTO;
 512                info = ntohl(skb->h.icmph->un.gateway)>>24;
 513                harderr = 1;
 514                break;
 515        case ICMP_DEST_UNREACH:
 516                if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
 517                        if (sk->ip_pmtudisc != IP_PMTUDISC_DONT) {
 518                                err = EMSGSIZE;
 519                                info = ntohs(skb->h.icmph->un.frag.mtu);
 520                                harderr = 1;
 521                                break;
 522                        }
 523                        return;
 524                }
 525                err = EHOSTUNREACH;
 526                if (code <= NR_ICMP_UNREACH) {
 527                        harderr = icmp_err_convert[code].fatal;
 528                        err = icmp_err_convert[code].errno;
 529                }
 530                break;
 531        }
 532
 533        /*
 534         *      Various people wanted BSD UDP semantics. Well they've come 
 535         *      back out because they slow down response to stuff like dead
 536         *      or unreachable name servers and they screw term users something
 537         *      chronic. Oh and it violates RFC1122. So basically fix your 
 538         *      client code people.
 539         */
 540         
 541        /*
 542         *      RFC1122: OK.  Passes ICMP errors back to application, as per 
 543         *      4.1.3.3. After the comment above, that should be no surprise. 
 544         */
 545
 546        if (!harderr && !sk->ip_recverr)
 547                return;
 548
 549        /*
 550         *      4.x BSD compatibility item. Break RFC1122 to
 551         *      get BSD socket semantics.
 552         */
 553        if(sk->bsdism && sk->state!=TCP_ESTABLISHED)
 554                return;
 555
 556        if (sk->ip_recverr)
 557                ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
 558        sk->err = err;
 559        sk->error_report(sk);
 560}
 561
 562
 563static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
 564{
 565        return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
 566}
 567
 568struct udpfakehdr 
 569{
 570        struct udphdr uh;
 571        u32 saddr;
 572        u32 daddr;
 573        struct iovec *iov;
 574        u32 wcheck;
 575};
 576
 577/*
 578 *      Copy and checksum a UDP packet from user space into a buffer. We still have
 579 *      to do the planning to get ip_build_xmit to spot direct transfer to network
 580 *      card and provide an additional callback mode for direct user->board I/O
 581 *      transfers. That one will be fun.
 582 */
 583 
 584static int udp_getfrag(const void *p, char * to, unsigned int offset, unsigned int fraglen) 
 585{
 586        struct udpfakehdr *ufh = (struct udpfakehdr *)p;
 587        if (offset==0) {
 588                if (csum_partial_copy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
 589                                                   fraglen-sizeof(struct udphdr), &ufh->wcheck))
 590                        return -EFAULT;
 591                ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr),
 592                                           ufh->wcheck);
 593                ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr, 
 594                                          ntohs(ufh->uh.len),
 595                                          IPPROTO_UDP, ufh->wcheck);
 596                if (ufh->uh.check == 0)
 597                        ufh->uh.check = -1;
 598                memcpy(to, ufh, sizeof(struct udphdr));
 599                return 0;
 600        }
 601        if (csum_partial_copy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
 602                                           fraglen, &ufh->wcheck))
 603                return -EFAULT;
 604        return 0;
 605}
 606
 607/*
 608 *      Unchecksummed UDP is sufficiently critical to stuff like ATM video conferencing
 609 *      that we use two routines for this for speed. Probably we ought to have a
 610 *      CONFIG_FAST_NET set for >10Mb/second boards to activate this sort of coding.
 611 *      Timing needed to verify if this is a valid decision.
 612 */
 613 
 614static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset, unsigned int fraglen) 
 615{
 616        struct udpfakehdr *ufh = (struct udpfakehdr *)p;
 617
 618        if (offset==0) {
 619                memcpy(to, ufh, sizeof(struct udphdr));
 620                return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
 621                                           fraglen-sizeof(struct udphdr));
 622        }
 623        return memcpy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
 624                                   fraglen);
 625}
 626
 627int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
 628{
 629        int ulen = len + sizeof(struct udphdr);
 630        struct ipcm_cookie ipc;
 631        struct udpfakehdr ufh;
 632        struct rtable *rt = NULL;
 633        int free = 0;
 634        int connected = 0;
 635        u32 daddr;
 636        u8  tos;
 637        int err;
 638
 639        /* This check is ONLY to check for arithmetic overflow
 640           on integer(!) len. Not more! Real check will be made
 641           in ip_build_xmit --ANK
 642
 643           BTW socket.c -> af_*.c -> ... make multiple
 644           invalid conversions size_t -> int. We MUST repair it f.e.
 645           by replacing all of them with size_t and revise all
 646           the places sort of len += sizeof(struct iphdr)
 647           If len was ULONG_MAX-10 it would be cathastrophe  --ANK
 648         */
 649
 650        if (len < 0 || len > 0xFFFF)
 651                return -EMSGSIZE;
 652
 653        /* 
 654         *      Check the flags.
 655         */
 656
 657        if (msg->msg_flags&MSG_OOB)     /* Mirror BSD error message compatibility */
 658                return -EOPNOTSUPP;
 659
 660#ifdef CONFIG_IP_TRANSPARENT_PROXY
 661        if (msg->msg_flags&~(MSG_DONTROUTE|MSG_DONTWAIT|MSG_PROXY|MSG_NOSIGNAL))
 662                return -EINVAL;
 663        if ((msg->msg_flags&MSG_PROXY) && !capable(CAP_NET_ADMIN))
 664                return -EPERM;
 665#else
 666        if (msg->msg_flags&~(MSG_DONTROUTE|MSG_DONTWAIT|MSG_NOSIGNAL))
 667                return -EINVAL;
 668#endif
 669
 670        /*
 671         *      Get and verify the address. 
 672         */
 673         
 674        if (msg->msg_name) {
 675                struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
 676                if (msg->msg_namelen < sizeof(*usin))
 677                        return(-EINVAL);
 678                if (usin->sin_family != AF_INET) {
 679                        static int complained;
 680                        if (!complained++)
 681                                printk(KERN_WARNING "%s forgot to set AF_INET in udp sendmsg. Fix it!\n", current->comm);
 682                        if (usin->sin_family)
 683                                return -EINVAL;
 684                }
 685                ufh.daddr = usin->sin_addr.s_addr;
 686                ufh.uh.dest = usin->sin_port;
 687                if (ufh.uh.dest == 0)
 688                        return -EINVAL;
 689        } else {
 690                if (sk->state != TCP_ESTABLISHED)
 691                        return -ENOTCONN;
 692                ufh.daddr = sk->daddr;
 693                ufh.uh.dest = sk->dport;
 694                /* Open fast path for connected socket.
 695                   Route will not be used, if at least one option is set.
 696                 */
 697                connected = 1;
 698        }
 699#ifdef CONFIG_IP_TRANSPARENT_PROXY
 700        if (msg->msg_flags&MSG_PROXY) {
 701                /*
 702                 * We map the first 8 bytes of a second sockaddr_in
 703                 * into the last 8 (unused) bytes of a sockaddr_in.
 704                 */
 705                struct sockaddr_in *from = (struct sockaddr_in *)msg->msg_name;
 706                from = (struct sockaddr_in *)&from->sin_zero;
 707                if (from->sin_family != AF_INET)
 708                        return -EINVAL;
 709                ipc.addr = from->sin_addr.s_addr;
 710                ufh.uh.source = from->sin_port;
 711                if (ipc.addr == 0)
 712                        ipc.addr = sk->saddr;
 713                connected = 0;
 714        } else
 715#endif
 716        {
 717                ipc.addr = sk->saddr;
 718                ufh.uh.source = sk->sport;
 719        }
 720
 721        ipc.opt = NULL;
 722        ipc.oif = sk->bound_dev_if;
 723        if (msg->msg_controllen) {
 724                err = ip_cmsg_send(msg, &ipc);
 725                if (err)
 726                        return err;
 727                if (ipc.opt)
 728                        free = 1;
 729                connected = 0;
 730        }
 731        if (!ipc.opt)
 732                ipc.opt = sk->opt;
 733
 734        ufh.saddr = ipc.addr;
 735        ipc.addr = daddr = ufh.daddr;
 736
 737        if (ipc.opt && ipc.opt->srr) {
 738                if (!daddr)
 739                        return -EINVAL;
 740                daddr = ipc.opt->faddr;
 741                connected = 0;
 742        }
 743        tos = RT_TOS(sk->ip_tos);
 744        if (sk->localroute || (msg->msg_flags&MSG_DONTROUTE) || 
 745            (ipc.opt && ipc.opt->is_strictroute)) {
 746                tos |= RTO_ONLINK;
 747                connected = 0;
 748        }
 749
 750        if (MULTICAST(daddr)) {
 751                if (!ipc.oif)
 752                        ipc.oif = sk->ip_mc_index;
 753                if (!ufh.saddr)
 754                        ufh.saddr = sk->ip_mc_addr;
 755                connected = 0;
 756        }
 757
 758        if (connected)
 759                rt = (struct rtable*)dst_clone(sk->dst_cache);
 760
 761        if (rt == NULL) {
 762                err = ip_route_output(&rt, daddr, ufh.saddr,
 763#ifdef CONFIG_IP_TRANSPARENT_PROXY
 764                        (msg->msg_flags&MSG_PROXY ? RTO_TPROXY : 0) |
 765#endif
 766                         tos, ipc.oif);
 767                if (err) 
 768                        goto out;
 769
 770                err = -EACCES;
 771                if (rt->rt_flags&RTCF_BROADCAST && !sk->broadcast) 
 772                        goto out;
 773        }
 774
 775        ufh.saddr = rt->rt_src;
 776        if (!ipc.addr)
 777                ufh.daddr = ipc.addr = rt->rt_dst;
 778        ufh.uh.len = htons(ulen);
 779        ufh.uh.check = 0;
 780        ufh.iov = msg->msg_iov;
 781        ufh.wcheck = 0;
 782
 783        /* RFC1122: OK.  Provides the checksumming facility (MUST) as per */
 784        /* 4.1.3.4. It's configurable by the application via setsockopt() */
 785        /* (MAY) and it defaults to on (MUST). */
 786
 787        err = ip_build_xmit(sk,sk->no_check ? udp_getfrag_nosum : udp_getfrag,
 788                            &ufh, ulen, &ipc, rt, msg->msg_flags);
 789
 790out:
 791        ip_rt_put(rt);
 792        if (free)
 793                kfree(ipc.opt);
 794        if (!err) {
 795                udp_statistics.UdpOutDatagrams++;
 796                return len;
 797        }
 798        return err;
 799}
 800
 801/*
 802 *      IOCTL requests applicable to the UDP protocol
 803 */
 804 
 805int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 806{
 807        switch(cmd) 
 808        {
 809                case TIOCOUTQ:
 810                {
 811                        unsigned long amount;
 812
 813                        if (sk->state == TCP_LISTEN) return(-EINVAL);
 814                        amount = sock_wspace(sk);
 815                        return put_user(amount, (int *)arg);
 816                }
 817
 818                case TIOCINQ:
 819                {
 820                        struct sk_buff *skb;
 821                        unsigned long amount;
 822
 823                        if (sk->state == TCP_LISTEN)
 824                                return(-EINVAL);
 825                        amount = 0;
 826                        /* N.B. Is this interrupt safe??
 827                           -> Yes. Interrupts do not remove skbs. --ANK (980725)
 828                         */
 829                        skb = skb_peek(&sk->receive_queue);
 830                        if (skb != NULL) {
 831                                /*
 832                                 * We will only return the amount
 833                                 * of this packet since that is all
 834                                 * that will be read.
 835                                 */
 836                                amount = skb->len - sizeof(struct udphdr);
 837                        }
 838                        return put_user(amount, (int *)arg);
 839                }
 840
 841                default:
 842                        return(-ENOIOCTLCMD);
 843        }
 844        return(0);
 845}
 846
 847#ifndef HAVE_CSUM_COPY_USER
 848#undef CONFIG_UDP_DELAY_CSUM
 849#endif
 850
 851/*
 852 *      This should be easy, if there is something there we
 853 *      return it, otherwise we block.
 854 */
 855
 856int udp_recvmsg(struct sock *sk, struct msghdr *msg, int len,
 857                int noblock, int flags, int *addr_len)
 858{
 859        struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
 860        struct sk_buff *skb;
 861        int copied, err;
 862
 863        /*
 864         *      Check any passed addresses
 865         */
 866        if (addr_len) 
 867                *addr_len=sizeof(*sin);
 868
 869        if (flags & MSG_ERRQUEUE)
 870                return ip_recv_error(sk, msg, len);
 871
 872        /*
 873         *      From here the generic datagram does a lot of the work. Come
 874         *      the finished NET3, it will do _ALL_ the work!
 875         */
 876
 877        skb = skb_recv_datagram(sk, flags, noblock, &err);
 878        if (!skb)
 879                goto out;
 880  
 881        copied = skb->len - sizeof(struct udphdr);
 882        if (copied > len) {
 883                copied = len;
 884                msg->msg_flags |= MSG_TRUNC;
 885        }
 886
 887#ifndef CONFIG_UDP_DELAY_CSUM
 888        err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
 889                                        copied);
 890#else
 891        if (skb->ip_summed==CHECKSUM_UNNECESSARY) {
 892                err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
 893                                              copied);
 894        } else if (copied > msg->msg_iov[0].iov_len || (msg->msg_flags&MSG_TRUNC)) {
 895                if ((unsigned short)csum_fold(csum_partial(skb->h.raw, skb->len, skb->csum))) 
 896                        goto csum_copy_err;
 897                err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov,
 898                                              copied);
 899        } else {
 900                unsigned int csum;
 901
 902                err = 0;
 903                csum = csum_partial(skb->h.raw, sizeof(struct udphdr), skb->csum);
 904                csum = csum_and_copy_to_user((char*)&skb->h.uh[1], msg->msg_iov[0].iov_base, 
 905                                             copied, csum, &err);
 906                if (err)
 907                        goto out_free;
 908                if ((unsigned short)csum_fold(csum)) 
 909                        goto csum_copy_err;
 910        }
 911#endif
 912        if (err)
 913                goto out_free;
 914        sk->stamp=skb->stamp;
 915
 916        /* Copy the address. */
 917        if (sin)
 918        {
 919                sin->sin_family = AF_INET;
 920                sin->sin_port = skb->h.uh->source;
 921                sin->sin_addr.s_addr = skb->nh.iph->saddr;
 922#ifdef CONFIG_IP_TRANSPARENT_PROXY
 923                if (flags&MSG_PROXY)
 924                {
 925                        /*
 926                         * We map the first 8 bytes of a second sockaddr_in
 927                         * into the last 8 (unused) bytes of a sockaddr_in.
 928                         * This _is_ ugly, but it's the only way to do it
 929                         * easily,  without adding system calls.
 930                         */
 931                        struct sockaddr_in *sinto =
 932                                (struct sockaddr_in *) sin->sin_zero;
 933
 934                        sinto->sin_family = AF_INET;
 935                        sinto->sin_port = skb->h.uh->dest;
 936                        sinto->sin_addr.s_addr = skb->nh.iph->daddr;
 937                }
 938#endif
 939        }
 940        if (sk->ip_cmsg_flags)
 941                ip_cmsg_recv(msg, skb);
 942        err = copied;
 943  
 944out_free:
 945        skb_free_datagram(sk, skb);
 946out:
 947        return err;
 948
 949#ifdef CONFIG_UDP_DELAY_CSUM
 950csum_copy_err:
 951        udp_statistics.UdpInErrors++;
 952        skb_free_datagram(sk, skb);
 953
 954        /* 
 955         * Error for blocking case is chosen to masquerade
 956         * as some normal condition.
 957         */
 958        return (msg->msg_flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH; 
 959#endif
 960}
 961
 962int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 963{
 964        struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
 965        struct rtable *rt;
 966        int err;
 967
 968        
 969        if (addr_len < sizeof(*usin)) 
 970                return(-EINVAL);
 971
 972        /*
 973         *      1003.1g - break association.
 974         */
 975         
 976        if (usin->sin_family==AF_UNSPEC)
 977        {
 978                sk->saddr=INADDR_ANY;
 979                sk->rcv_saddr=INADDR_ANY;
 980                sk->daddr=INADDR_ANY;
 981                sk->state = TCP_CLOSE;
 982                if(uh_cache_sk == sk)
 983                        uh_cache_sk = NULL;
 984                return 0;
 985        }
 986
 987        if (usin->sin_family && usin->sin_family != AF_INET) 
 988                return(-EAFNOSUPPORT);
 989
 990        dst_release(xchg(&sk->dst_cache, NULL));
 991
 992        err = ip_route_connect(&rt, usin->sin_addr.s_addr, sk->saddr,
 993                               sk->ip_tos|sk->localroute, sk->bound_dev_if);
 994        if (err)
 995                return err;
 996        if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) {
 997                ip_rt_put(rt);
 998                return -EACCES;
 999        }
1000        if(!sk->saddr)
1001                sk->saddr = rt->rt_src;         /* Update source address */
1002        if(!sk->rcv_saddr)
1003                sk->rcv_saddr = rt->rt_src;
1004        sk->daddr = rt->rt_dst;
1005        sk->dport = usin->sin_port;
1006        sk->state = TCP_ESTABLISHED;
1007
1008        if(uh_cache_sk == sk)
1009                uh_cache_sk = NULL;
1010
1011        sk->dst_cache = &rt->u.dst;
1012        return(0);
1013}
1014
1015
1016static void udp_close(struct sock *sk, long timeout)
1017{
1018        /* See for explanation: raw_close in ipv4/raw.c */
1019        sk->state = TCP_CLOSE;
1020        udp_v4_unhash(sk);
1021        sk->dead = 1;
1022        destroy_sock(sk);
1023}
1024
1025static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
1026{
1027        /*
1028         *      Charge it to the socket, dropping if the queue is full.
1029         */
1030
1031#if defined(CONFIG_FILTER) && defined(CONFIG_UDP_DELAY_CSUM)
1032        if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
1033                if ((unsigned short)csum_fold(csum_partial(skb->h.raw, skb->len, skb->csum))) {
1034                        udp_statistics.UdpInErrors++;
1035                        ip_statistics.IpInDiscards++;
1036                        ip_statistics.IpInDelivers--;
1037                        kfree_skb(skb);
1038                        return -1;
1039                }
1040                skb->ip_summed = CHECKSUM_UNNECESSARY;
1041        }
1042#endif
1043
1044        if (sock_queue_rcv_skb(sk,skb)<0) {
1045                udp_statistics.UdpInErrors++;
1046                ip_statistics.IpInDiscards++;
1047                ip_statistics.IpInDelivers--;
1048                kfree_skb(skb);
1049                return -1;
1050        }
1051        udp_statistics.UdpInDatagrams++;
1052        return 0;
1053}
1054
1055
1056static inline void udp_deliver(struct sock *sk, struct sk_buff *skb)
1057{
1058        udp_queue_rcv_skb(sk, skb);
1059}
1060
1061/*
1062 *      Multicasts and broadcasts go to each listener.
1063 *
1064 *      Note: called only from the BH handler context,
1065 *      so we don't need to lock the hashes.
1066 */
1067static int udp_v4_mcast_deliver(struct sk_buff *skb, struct udphdr *uh,
1068                                 u32 saddr, u32 daddr)
1069{
1070        struct sock *sk;
1071        int dif;
1072
1073        sk = udp_hash[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)];
1074        dif = skb->dev->ifindex;
1075        sk = udp_v4_mcast_next(sk, uh->dest, saddr, uh->source, daddr, dif);
1076        if (sk) {
1077                struct sock *sknext = NULL;
1078
1079                do {
1080                        struct sk_buff *skb1 = skb;
1081
1082                        sknext = udp_v4_mcast_next(sk->next, uh->dest, saddr,
1083                                                   uh->source, daddr, dif);
1084                        if(sknext)
1085                                skb1 = skb_clone(skb, GFP_ATOMIC);
1086
1087                        if(skb1)
1088                                udp_deliver(sk, skb1);
1089                        sk = sknext;
1090                } while(sknext);
1091        } else
1092                kfree_skb(skb);
1093        return 0;
1094}
1095
1096#ifdef CONFIG_IP_TRANSPARENT_PROXY
1097/*
1098 *      Check whether a received UDP packet might be for one of our
1099 *      sockets.
1100 */
1101
1102int udp_chkaddr(struct sk_buff *skb)
1103{
1104        struct iphdr *iph = skb->nh.iph;
1105        struct udphdr *uh = (struct udphdr *)(skb->nh.raw + iph->ihl*4);
1106        struct sock *sk;
1107
1108        sk = udp_v4_lookup(iph->saddr, uh->source, iph->daddr, uh->dest, skb->dev->ifindex);
1109        if (!sk)
1110                return 0;
1111
1112        /* 0 means accept all LOCAL addresses here, not all the world... */
1113        if (sk->rcv_saddr == 0)
1114                return 0;
1115
1116        return 1;
1117}
1118#endif
1119
1120/*
1121 *      All we need to do is get the socket, and then do a checksum. 
1122 */
1123 
1124int udp_rcv(struct sk_buff *skb, unsigned short len)
1125{
1126        struct sock *sk;
1127        struct udphdr *uh;
1128        unsigned short ulen;
1129        struct rtable *rt = (struct rtable*)skb->dst;
1130        u32 saddr = skb->nh.iph->saddr;
1131        u32 daddr = skb->nh.iph->daddr;
1132
1133        /*
1134         * First time through the loop.. Do all the setup stuff
1135         * (including finding out the socket we go to etc)
1136         */
1137
1138        /*
1139         *      Get the header.
1140         */
1141         
1142        uh = skb->h.uh;
1143        __skb_pull(skb, skb->h.raw - skb->data);
1144
1145        ip_statistics.IpInDelivers++;
1146
1147        /*
1148         *      Validate the packet and the UDP length.
1149         */
1150         
1151        ulen = ntohs(uh->len);
1152
1153        if (ulen > len || ulen < sizeof(*uh)) {
1154                NETDEBUG(printk(KERN_DEBUG "UDP: short packet: %d/%d\n", ulen, len));
1155                udp_statistics.UdpInErrors++;
1156                kfree_skb(skb);
1157                return(0);
1158        }
1159        skb_trim(skb, ulen);
1160
1161#ifndef CONFIG_UDP_DELAY_CSUM
1162        if (uh->check &&
1163            (((skb->ip_summed==CHECKSUM_HW)&&udp_check(uh,ulen,saddr,daddr,skb->csum)) ||
1164             ((skb->ip_summed==CHECKSUM_NONE) &&
1165              (udp_check(uh,ulen,saddr,daddr, csum_partial((char*)uh, ulen, 0)))))) 
1166                goto csum_error;
1167#else
1168        if (uh->check==0)
1169                skb->ip_summed = CHECKSUM_UNNECESSARY;
1170        else if (skb->ip_summed==CHECKSUM_HW) {
1171                if (udp_check(uh,ulen,saddr,daddr,skb->csum)) 
1172                        goto csum_error;
1173                skb->ip_summed = CHECKSUM_UNNECESSARY;
1174        } else if (skb->ip_summed != CHECKSUM_UNNECESSARY)
1175                skb->csum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);
1176#endif
1177
1178        if(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
1179                return udp_v4_mcast_deliver(skb, uh, saddr, daddr);
1180
1181#ifdef CONFIG_IP_TRANSPARENT_PROXY
1182        if (IPCB(skb)->redirport)
1183                sk = udp_v4_proxy_lookup(uh->dest, saddr, uh->source,
1184                                         daddr, skb->dev, IPCB(skb)->redirport,
1185                                         skb->dev->ifindex);
1186        else
1187#endif
1188        sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
1189        
1190        if (sk == NULL) {
1191#ifdef CONFIG_UDP_DELAY_CSUM
1192                if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
1193                    (unsigned short)csum_fold(csum_partial((char*)uh, ulen, skb->csum))) 
1194                        goto csum_error;
1195#endif
1196                udp_statistics.UdpNoPorts++;
1197                icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
1198
1199                /*
1200                 * Hmm.  We got an UDP broadcast to a port to which we
1201                 * don't wanna listen.  Ignore it.
1202                 */
1203                kfree_skb(skb);
1204                return(0);
1205        }
1206        udp_deliver(sk, skb);
1207        return 0;
1208
1209csum_error:
1210        /* 
1211         * RFC1122: OK.  Discards the bad packet silently (as far as 
1212         * the network is concerned, anyway) as per 4.1.3.4 (MUST). 
1213         */
1214        NETDEBUG(printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
1215                        NIPQUAD(saddr),
1216                        ntohs(uh->source),
1217                        NIPQUAD(daddr),
1218                        ntohs(uh->dest),
1219                        ulen));
1220        udp_statistics.UdpInErrors++;
1221        kfree_skb(skb);
1222        return(0);
1223}
1224
1225struct proto udp_prot = {
1226        (struct sock *)&udp_prot,       /* sklist_next */
1227        (struct sock *)&udp_prot,       /* sklist_prev */
1228        udp_close,                      /* close */
1229        udp_connect,                    /* connect */
1230        NULL,                           /* accept */
1231        NULL,                           /* retransmit */
1232        NULL,                           /* write_wakeup */
1233        NULL,                           /* read_wakeup */
1234        datagram_poll,                  /* poll */
1235        udp_ioctl,                      /* ioctl */
1236        NULL,                           /* init */
1237        NULL,                           /* destroy */
1238        NULL,                           /* shutdown */
1239        ip_setsockopt,                  /* setsockopt */
1240        ip_getsockopt,                  /* getsockopt */
1241        udp_sendmsg,                    /* sendmsg */
1242        udp_recvmsg,                    /* recvmsg */
1243        NULL,                           /* bind */
1244        udp_queue_rcv_skb,              /* backlog_rcv */
1245        udp_v4_hash,                    /* hash */
1246        udp_v4_unhash,                  /* unhash */
1247        udp_v4_rehash,                  /* rehash */
1248        udp_good_socknum,               /* good_socknum */
1249        udp_v4_verify_bind,             /* verify_bind */
1250        128,                            /* max_header */
1251        0,                              /* retransmits */
1252        "UDP",                          /* name */
1253        0,                              /* inuse */
1254        0                               /* highestinuse */
1255};
1256
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.