1/* 2 * NET3: Implementation of the ICMP protocol layer. 3 * 4 * Alan Cox, <alan@cymru.net> 5 * 6 * Version: $Id: icmp.c,v 1.52 1999/03/21 12:04:11 davem Exp $ 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 * 13 * Some of the function names and the icmp unreach table for this 14 * module were derived from [icmp.c 1.0.11 06/02/93] by 15 * Ross Biro, Fred N. van Kempen, Mark Evans, Alan Cox, Gerhard Koerting. 16 * Other than that this module is a complete rewrite. 17 * 18 * Fixes: 19 * Mike Shaver : RFC1122 checks. 20 * Alan Cox : Multicast ping reply as self. 21 * Alan Cox : Fix atomicity lockup in ip_build_xmit 22 * call. 23 * Alan Cox : Added 216,128 byte paths to the MTU 24 * code. 25 * Martin Mares : RFC1812 checks. 26 * Martin Mares : Can be configured to follow redirects 27 * if acting as a router _without_ a 28 * routing protocol (RFC 1812). 29 * Martin Mares : Echo requests may be configured to 30 * be ignored (RFC 1812). 31 * Martin Mares : Limitation of ICMP error message 32 * transmit rate (RFC 1812). 33 * Martin Mares : TOS and Precedence set correctly 34 * (RFC 1812). 35 * Martin Mares : Now copying as much data from the 36 * original packet as we can without 37 * exceeding 576 bytes (RFC 1812). 38 * Willy Konynenberg : Transparent proxying support. 39 * Keith Owens : RFC1191 correction for 4.2BSD based 40 * path MTU bug. 41 * Thomas Quinot : ICMP Dest Unreach codes up to 15 are 42 * valid (RFC 1812). 43 * Andi Kleen : Check all packet lengths properly 44 * and moved all kfree_skb() up to 45 * icmp_rcv. 46 * Andi Kleen : Move the rate limit bookkeeping 47 * into the dest entry and use a token 48 * bucket filter (thanks to ANK). Make 49 * the rates sysctl configurable. 50 * Yu Tianli : Fixed two ugly bugs in icmp_send 51 * - IP option length was accounted wrongly 52 * - ICMP header length was not accounted at all. 53 * Tristan Greaves : Added sysctl option to ignore bogus broadcast 54 * responses from broken routers. 55 * 56 * To Fix: 57 * 58 * - Should use skb_pull() instead of all the manual checking. 59 * This would also greatly simply some upper layer error handlers. --AK 60 * 61 * RFC1122 (Host Requirements -- Comm. Layer) Status: 62 * (boy, are there a lot of rules for ICMP) 63 * 3.2.2 (Generic ICMP stuff) 64 * MUST discard messages of unknown type. (OK) 65 * MUST copy at least the first 8 bytes from the offending packet 66 * when sending ICMP errors. (OBSOLETE -- see RFC1812) 67 * MUST pass received ICMP errors up to protocol level. (OK) 68 * SHOULD send ICMP errors with TOS == 0. (OBSOLETE -- see RFC1812) 69 * MUST NOT send ICMP errors in reply to: 70 * ICMP errors (OK) 71 * Broadcast/multicast datagrams (OK) 72 * MAC broadcasts (OK) 73 * Non-initial fragments (OK) 74 * Datagram with a source address that isn't a single host. (OK) 75 * 3.2.2.1 (Destination Unreachable) 76 * All the rules govern the IP layer, and are dealt with in ip.c, not here. 77 * 3.2.2.2 (Redirect) 78 * Host SHOULD NOT send ICMP_REDIRECTs. (OK) 79 * MUST update routing table in response to host or network redirects. 80 * (host OK, network OBSOLETE) 81 * SHOULD drop redirects if they're not from directly connected gateway 82 * (OK -- we drop it if it's not from our old gateway, which is close 83 * enough) 84 * 3.2.2.3 (Source Quench) 85 * MUST pass incoming SOURCE_QUENCHs to transport layer (OK) 86 * Other requirements are dealt with at the transport layer. 87 * 3.2.2.4 (Time Exceeded) 88 * MUST pass TIME_EXCEEDED to transport layer (OK) 89 * Other requirements dealt with at IP (generating TIME_EXCEEDED). 90 * 3.2.2.5 (Parameter Problem) 91 * SHOULD generate these (OK) 92 * MUST pass received PARAMPROBLEM to transport layer (NOT YET) 93 * [Solaris 2.X seems to assert EPROTO when this occurs] -- AC 94 * 3.2.2.6 (Echo Request/Reply) 95 * MUST reply to ECHO_REQUEST, and give app to do ECHO stuff (OK, OK) 96 * MAY discard broadcast ECHO_REQUESTs. (Configurable with a sysctl.) 97 * MUST reply using same source address as the request was sent to. 98 * We're OK for unicast ECHOs, and it doesn't say anything about 99 * how to handle broadcast ones, since it's optional. 100 * MUST copy data from REQUEST to REPLY (OK) 101 * unless it would require illegal fragmentation (OK) 102 * MUST pass REPLYs to transport/user layer (OK) 103 * MUST use any provided source route (reversed) for REPLY. (NOT YET) 104 * 3.2.2.7 (Information Request/Reply) 105 * MUST NOT implement this. (I guess that means silently discard...?) (OK) 106 * 3.2.2.8 (Timestamp Request/Reply) 107 * MAY implement (OK) 108 * SHOULD be in-kernel for "minimum variability" (OK) 109 * MAY discard broadcast REQUESTs. (OK, but see source for inconsistency) 110 * MUST reply using same source address as the request was sent to. (OK) 111 * MUST reverse source route, as per ECHO (NOT YET) 112 * MUST pass REPLYs to transport/user layer (requires RAW, just like 113 * ECHO) (OK) 114 * MUST update clock for timestamp at least 15 times/sec (OK) 115 * MUST be "correct within a few minutes" (OK) 116 * 3.2.2.9 (Address Mask Request/Reply) 117 * MAY implement (OK) 118 * MUST send a broadcast REQUEST if using this system to set netmask 119 * (OK... we don't use it) 120 * MUST discard received REPLYs if not using this system (OK) 121 * MUST NOT send replies unless specifically made agent for this sort 122 * of thing. (OK) 123 * 124 * 125 * RFC 1812 (IPv4 Router Requirements) Status (even longer): 126 * 4.3.2.1 (Unknown Message Types) 127 * MUST pass messages of unknown type to ICMP user iface or silently discard 128 * them (OK) 129 * 4.3.2.2 (ICMP Message TTL) 130 * MUST initialize TTL when originating an ICMP message (OK) 131 * 4.3.2.3 (Original Message Header) 132 * SHOULD copy as much data from the offending packet as possible without 133 * the length of the ICMP datagram exceeding 576 bytes (OK) 134 * MUST leave original IP header of the offending packet, but we're not 135 * required to undo modifications made (OK) 136 * 4.3.2.4 (Original Message Source Address) 137 * MUST use one of addresses for the interface the orig. packet arrived as 138 * source address (OK) 139 * 4.3.2.5 (TOS and Precedence) 140 * SHOULD leave TOS set to the same value unless the packet would be 141 * discarded for that reason (OK) 142 * MUST use TOS=0 if not possible to leave original value (OK) 143 * MUST leave IP Precedence for Source Quench messages (OK -- not sent 144 * at all) 145 * SHOULD use IP Precedence = 6 (Internetwork Control) or 7 (Network Control) 146 * for all other error messages (OK, we use 6) 147 * MAY allow configuration of IP Precedence (OK -- not done) 148 * MUST leave IP Precedence and TOS for reply messages (OK) 149 * 4.3.2.6 (Source Route) 150 * SHOULD use reverse source route UNLESS sending Parameter Problem on source 151 * routing and UNLESS the packet would be immediately discarded (NOT YET) 152 * 4.3.2.7 (When Not to Send ICMP Errors) 153 * MUST NOT send ICMP errors in reply to: 154 * ICMP errors (OK) 155 * Packets failing IP header validation tests unless otherwise noted (OK) 156 * Broadcast/multicast datagrams (OK) 157 * MAC broadcasts (OK) 158 * Non-initial fragments (OK) 159 * Datagram with a source address that isn't a single host. (OK) 160 * 4.3.2.8 (Rate Limiting) 161 * SHOULD be able to limit error message rate (OK) 162 * SHOULD allow setting of rate limits (OK, in the source) 163 * 4.3.3.1 (Destination Unreachable) 164 * All the rules govern the IP layer, and are dealt with in ip.c, not here. 165 * 4.3.3.2 (Redirect) 166 * MAY ignore ICMP Redirects if running a routing protocol or if forwarding 167 * is enabled on the interface (OK -- ignores) 168 * 4.3.3.3 (Source Quench) 169 * SHOULD NOT originate SQ messages (OK) 170 * MUST be able to limit SQ rate if originates them (OK as we don't 171 * send them) 172 * MAY ignore SQ messages it receives (OK -- we don't) 173 * 4.3.3.4 (Time Exceeded) 174 * Requirements dealt with at IP (generating TIME_EXCEEDED). 175 * 4.3.3.5 (Parameter Problem) 176 * MUST generate these for all errors not covered by other messages (OK) 177 * MUST include original value of the value pointed by (OK) 178 * 4.3.3.6 (Echo Request) 179 * MUST implement echo server function (OK) 180 * MUST process at ER of at least max(576, MTU) (OK) 181 * MAY reject broadcast/multicast ER's (We don't, but that's OK) 182 * SHOULD have a config option for silently ignoring ER's (OK) 183 * MUST have a default value for the above switch = NO (OK) 184 * MUST have application layer interface for Echo Request/Reply (OK) 185 * MUST reply using same source address as the request was sent to. 186 * We're OK for unicast ECHOs, and it doesn't say anything about 187 * how to handle broadcast ones, since it's optional. 188 * MUST copy data from Request to Reply (OK) 189 * SHOULD update Record Route / Timestamp options (??) 190 * MUST use reversed Source Route for Reply if possible (NOT YET) 191 * 4.3.3.7 (Information Request/Reply) 192 * SHOULD NOT originate or respond to these (OK) 193 * 4.3.3.8 (Timestamp / Timestamp Reply) 194 * MAY implement (OK) 195 * MUST reply to every Timestamp message received (OK) 196 * MAY discard broadcast REQUESTs. (OK, but see source for inconsistency) 197 * MUST reply using same source address as the request was sent to. (OK) 198 * MUST use reversed Source Route if possible (NOT YET) 199 * SHOULD update Record Route / Timestamp options (??) 200 * MUST pass REPLYs to transport/user layer (requires RAW, just like 201 * ECHO) (OK) 202 * MUST update clock for timestamp at least 16 times/sec (OK) 203 * MUST be "correct within a few minutes" (OK) 204 * 4.3.3.9 (Address Mask Request/Reply) 205 * MUST have support for receiving AMRq and responding with AMRe (OK, 206 * but only as a compile-time option) 207 * SHOULD have option for each interface for AMRe's, MUST default to 208 * NO (NOT YET) 209 * MUST NOT reply to AMRq before knows the correct AM (OK) 210 * MUST NOT respond to AMRq with source address 0.0.0.0 on physical 211 * interfaces having multiple logical i-faces with different masks 212 * (NOT YET) 213 * SHOULD examine all AMRe's it receives and check them (NOT YET) 214 * SHOULD log invalid AMRe's (AM+sender) (NOT YET) 215 * MUST NOT use contents of AMRe to determine correct AM (OK) 216 * MAY broadcast AMRe's after having configured address masks (OK -- doesn't) 217 * MUST NOT do broadcast AMRe's if not set by extra option (OK, no option) 218 * MUST use the { <NetPrefix>, -1 } form of broadcast addresses (OK) 219 * 4.3.3.10 (Router Advertisement and Solicitations) 220 * MUST support router part of Router Discovery Protocol on all networks we 221 * support broadcast or multicast addressing. (OK -- done by gated) 222 * MUST have all config parameters with the respective defaults (OK) 223 * 5.2.7.1 (Destination Unreachable) 224 * MUST generate DU's (OK) 225 * SHOULD choose a best-match response code (OK) 226 * SHOULD NOT generate Host Isolated codes (OK) 227 * SHOULD use Communication Administratively Prohibited when administratively 228 * filtering packets (NOT YET -- bug-to-bug compatibility) 229 * MAY include config option for not generating the above and silently 230 * discard the packets instead (OK) 231 * MAY include config option for not generating Precedence Violation and 232 * Precedence Cutoff messages (OK as we don't generate them at all) 233 * MUST use Host Unreachable or Dest. Host Unknown codes whenever other hosts 234 * on the same network might be reachable (OK -- no net unreach's at all) 235 * MUST use new form of Fragmentation Needed and DF Set messages (OK) 236 * 5.2.7.2 (Redirect) 237 * MUST NOT generate network redirects (OK) 238 * MUST be able to generate host redirects (OK) 239 * SHOULD be able to generate Host+TOS redirects (NO as we don't use TOS) 240 * MUST have an option to use Host redirects instead of Host+TOS ones (OK as 241 * no Host+TOS Redirects are used) 242 * MUST NOT generate redirects unless forwarding to the same i-face and the 243 * dest. address is on the same subnet as the src. address and no source 244 * routing is in use. (OK) 245 * MUST NOT follow redirects when using a routing protocol (OK) 246 * MAY use redirects if not using a routing protocol (OK, compile-time option) 247 * MUST comply to Host Requirements when not acting as a router (OK) 248 * 5.2.7.3 (Time Exceeded) 249 * MUST generate Time Exceeded Code 0 when discarding packet due to TTL=0 (OK) 250 * MAY have a per-interface option to disable origination of TE messages, but 251 * it MUST default to "originate" (OK -- we don't support it) 252 */ 253 254#include <linux/config.h> 255#include <linux/types.h> 256#include <linux/sched.h> 257#include <linux/kernel.h> 258#include <linux/fcntl.h> 259#include <linux/socket.h> 260#include <linux/in.h> 261#include <linux/inet.h> 262#include <linux/netdevice.h> 263#include <linux/string.h> 264#include <net/snmp.h> 265#include <net/ip.h> 266#include <net/route.h> 267#include <net/protocol.h> 268#include <net/icmp.h> 269#include <net/tcp.h> 270#include <net/udp.h> 271#include <net/raw.h> 272#include <net/snmp.h> 273#include <linux/skbuff.h> 274#include <net/sock.h> 275#include <linux/errno.h> 276#include <linux/timer.h> 277#include <linux/init.h> 278#include <asm/system.h> 279#include <asm/uaccess.h> 280#include <net/checksum.h> 281 282#ifdef CONFIG_IP_MASQUERADE 283#include <net/ip_masq.h> 284#endif 285 286#define min(a,b) ((a)<(b)?(a):(b)) 287 288/* 289 * Statistics 290 */ 291 292struct icmp_mib icmp_statistics; 293 294/* An array of errno for error messages from dest unreach. */ 295/* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOS_UNREACH and SR_FAIELD MUST be considered 'transient errs'. */ 296 297struct icmp_err icmp_err_convert[] = { 298 { ENETUNREACH, 0 }, /* ICMP_NET_UNREACH */ 299 { EHOSTUNREACH, 0 }, /* ICMP_HOST_UNREACH */ 300 { ENOPROTOOPT, 1 }, /* ICMP_PROT_UNREACH */ 301 { ECONNREFUSED, 1 }, /* ICMP_PORT_UNREACH */ 302 { EMSGSIZE, 0 }, /* ICMP_FRAG_NEEDED */ 303 { EOPNOTSUPP, 0 }, /* ICMP_SR_FAILED */ 304 { ENETUNREACH, 1 }, /* ICMP_NET_UNKNOWN */ 305 { EHOSTDOWN, 1 }, /* ICMP_HOST_UNKNOWN */ 306 { ENONET, 1 }, /* ICMP_HOST_ISOLATED */ 307 { ENETUNREACH, 1 }, /* ICMP_NET_ANO */ 308 { EHOSTUNREACH, 1 }, /* ICMP_HOST_ANO */ 309 { ENETUNREACH, 0 }, /* ICMP_NET_UNR_TOS */ 310 { EHOSTUNREACH, 0 }, /* ICMP_HOST_UNR_TOS */ 311 { EHOSTUNREACH, 1 }, /* ICMP_PKT_FILTERED */ 312 { EHOSTUNREACH, 1 }, /* ICMP_PREC_VIOLATION */ 313 { EHOSTUNREACH, 1 } /* ICMP_PREC_CUTOFF */ 314}; 315 316/* Control parameters for ECHO relies. */ 317int sysctl_icmp_echo_ignore_all = 0; 318int sysctl_icmp_echo_ignore_broadcasts = 0; 319 320/* Control parameter - ignore bogus broadcast responses? */ 321int sysctl_icmp_ignore_bogus_error_responses =0; 322 323/* 324 * ICMP control array. This specifies what to do with each ICMP. 325 */ 326 327struct icmp_control 328{ 329 unsigned long *output; /* Address to increment on output */ 330 unsigned long *input; /* Address to increment on input */ 331 void (*handler)(struct icmphdr *icmph, struct sk_buff *skb, int len); 332 short error; /* This ICMP is classed as an error message */ 333 int *timeout; /* Rate limit */ 334}; 335 336static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1]; 337 338/* 339 * Build xmit assembly blocks 340 */ 341 342struct icmp_bxm 343{ 344 void *data_ptr; 345 int data_len; 346 struct icmphdr icmph; 347 unsigned long csum; 348 struct ip_options replyopts; 349 unsigned char optbuf[40]; 350}; 351 352/* 353 * The ICMP socket. This is the most convenient way to flow control 354 * our ICMP output as well as maintain a clean interface throughout 355 * all layers. All Socketless IP sends will soon be gone. 356 */ 357 358struct inode icmp_inode; 359struct socket *icmp_socket=&icmp_inode.u.socket_i; 360 361/* 362 * Send an ICMP frame. 363 */ 364 365/* 366 * Check transmit rate limitation for given message. 367 * The rate information is held in the destination cache now. 368 * This function is generic and could be used for other purposes 369 * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. 370 * 371 * Note that the same dst_entry fields are modified by functions in 372 * route.c too, but these work for packet destinations while xrlim_allow 373 * works for icmp destinations. This means the rate limiting information 374 * for one "ip object" is shared. 375 * 376 * Note that the same dst_entry fields are modified by functions in 377 * route.c too, but these work for packet destinations while xrlim_allow 378 * works for icmp destinations. This means the rate limiting information 379 * for one "ip object" is shared - and these ICMPs are twice limited: 380 * by source and by destination. 381 * 382 * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate 383 * SHOULD allow setting of rate limits 384 * 385 * Shared between ICMPv4 and ICMPv6. 386 */ 387#define XRLIM_BURST_FACTOR 6 388int xrlim_allow(struct dst_entry *dst, int timeout) 389{ 390 unsigned long now; 391 392 now = jiffies; 393 dst->rate_tokens += now - dst->rate_last; 394 dst->rate_last = now; 395 if (dst->rate_tokens > XRLIM_BURST_FACTOR*timeout) 396 dst->rate_tokens = XRLIM_BURST_FACTOR*timeout; 397 if (dst->rate_tokens >= timeout) { 398 dst->rate_tokens -= timeout; 399 return 1; 400 } 401 return 0; 402} 403 404static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code) 405{ 406 struct dst_entry *dst = &rt->u.dst; 407 408 if (type > NR_ICMP_TYPES || !icmp_pointers[type].timeout) 409 return 1; 410 411 /* Don't limit PMTU discovery. */ 412 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) 413 return 1; 414 415 /* Redirect has its own rate limit mechanism */ 416 if (type == ICMP_REDIRECT) 417 return 1; 418 419 /* No rate limit on loopback */ 420 if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) 421 return 1; 422 423 return xrlim_allow(dst, *(icmp_pointers[type].timeout)); 424} 425 426/* 427 * Maintain the counters used in the SNMP statistics for outgoing ICMP 428 */ 429 430static void icmp_out_count(int type) 431{ 432 if (type>NR_ICMP_TYPES) 433 return; 434 (*icmp_pointers[type].output)++; 435 icmp_statistics.IcmpOutMsgs++; 436} 437 438/* 439 * Checksum each fragment, and on the first include the headers and final checksum. 440 */ 441 442static int icmp_glue_bits(const void *p, char *to, unsigned int offset, unsigned int fraglen) 443{ 444 struct icmp_bxm *icmp_param = (struct icmp_bxm *)p; 445 struct icmphdr *icmph; 446 unsigned long csum; 447 448 if (offset) { 449 icmp_param->csum=csum_partial_copy(icmp_param->data_ptr+offset-sizeof(struct icmphdr), 450 to, fraglen,icmp_param->csum); 451 return 0; 452 } 453 454 /* 455 * First fragment includes header. Note that we've done 456 * the other fragments first, so that we get the checksum 457 * for the whole packet here. 458 */ 459 csum = csum_partial_copy((void *)&icmp_param->icmph, 460 to, sizeof(struct icmphdr), 461 icmp_param->csum); 462 csum = csum_partial_copy(icmp_param->data_ptr, 463 to+sizeof(struct icmphdr), 464 fraglen-sizeof(struct icmphdr), csum); 465 icmph=(struct icmphdr *)to; 466 icmph->checksum = csum_fold(csum); 467 return 0; 468} 469 470/* 471 * Driving logic for building and sending ICMP messages. 472 */ 473 474static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) 475{ 476 struct sock *sk=icmp_socket->sk; 477 struct ipcm_cookie ipc; 478 struct rtable *rt = (struct rtable*)skb->dst; 479 u32 daddr; 480 481 if (ip_options_echo(&icmp_param->replyopts, skb)) 482 return; 483 484 icmp_param->icmph.checksum=0; 485 icmp_param->csum=0; 486 icmp_out_count(icmp_param->icmph.type); 487 488 sk->ip_tos = skb->nh.iph->tos; 489 daddr = ipc.addr = rt->rt_src; 490 ipc.opt = &icmp_param->replyopts; 491 if (ipc.opt->srr) 492 daddr = icmp_param->replyopts.faddr; 493 if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0)) 494 return; 495 ip_build_xmit(sk, icmp_glue_bits, icmp_param, 496 icmp_param->data_len+sizeof(struct icmphdr), 497 &ipc, rt, MSG_DONTWAIT); 498 ip_rt_put(rt); 499} 500 501 502/* 503 * Send an ICMP message in response to a situation 504 * 505 * RFC 1122: 3.2.2 MUST send at least the IP header and 8 bytes of header. MAY send more (we do). 506 * MUST NOT change this header information. 507 * MUST NOT reply to a multicast/broadcast IP address. 508 * MUST NOT reply to a multicast/broadcast MAC address. 509 * MUST reply to only the first fragment. 510 */ 511 512void icmp_send(struct sk_buff *skb_in, int type, int code, unsigned long info) 513{ 514 struct iphdr *iph; 515 struct icmphdr *icmph; 516 int room; 517 struct icmp_bxm icmp_param; 518 struct rtable *rt = (struct rtable*)skb_in->dst; 519 struct ipcm_cookie ipc; 520 u32 saddr; 521 u8 tos; 522 523 /* 524 * Find the original header 525 */ 526 527 iph = skb_in->nh.iph; 528 529 /* 530 * No replies to physical multicast/broadcast 531 */ 532 533 if (skb_in->pkt_type!=PACKET_HOST) 534 return; 535 536 /* 537 * Now check at the protocol level 538 */ 539 if (!rt) { 540#ifndef CONFIG_IP_ALWAYS_DEFRAG 541 if (net_ratelimit()) 542 printk(KERN_DEBUG "icmp_send: destinationless packet\n"); 543#endif 544 return; 545 } 546 if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST)) 547 return; 548 549 550 /* 551 * Only reply to fragment 0. We byte re-order the constant 552 * mask for efficiency. 553 */ 554 555 if (iph->frag_off&htons(IP_OFFSET)) 556 return; 557 558 /* 559 * If we send an ICMP error to an ICMP error a mess would result.. 560 */ 561 562 if (icmp_pointers[type].error) { 563 /* 564 * We are an error, check if we are replying to an ICMP error 565 */ 566 567 if (iph->protocol==IPPROTO_ICMP) { 568 icmph = (struct icmphdr *)((char *)iph + (iph->ihl<<2)); 569 /* 570 * Assume any unknown ICMP type is an error. This isn't 571 * specified by the RFC, but think about it.. 572 */ 573 if (icmph->type>NR_ICMP_TYPES || icmp_pointers[icmph->type].error) 574 return; 575 } 576 } 577 578 579 /* 580 * Construct source address and options. 581 */ 582 583#ifdef CONFIG_IP_ROUTE_NAT 584 /* 585 * Restore original addresses if packet has been translated. 586 */ 587 if (rt->rt_flags&RTCF_NAT && IPCB(skb_in)->flags&IPSKB_TRANSLATED) { 588 iph->daddr = rt->key.dst; 589 iph->saddr = rt->key.src; 590 } 591#endif 592#ifdef CONFIG_IP_MASQUERADE 593 if (type==ICMP_DEST_UNREACH && IPCB(skb_in)->flags&IPSKB_MASQUERADED) { 594 ip_fw_unmasq_icmp(skb_in); 595 } 596#endif 597 598 saddr = iph->daddr; 599 if (!(rt->rt_flags & RTCF_LOCAL)) 600 saddr = 0; 601 602 tos = icmp_pointers[type].error ? 603 ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : 604 iph->tos; 605 606 /* XXX: use a more aggressive expire for routes created by 607 * this call (not longer than the rate limit timeout). 608 * It could be also worthwhile to not put them into ipv4 609 * fast routing cache at first. Otherwise an attacker can 610 * grow the routing table. 611 */ 612 if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0)) 613 return; 614 615 if (ip_options_echo(&icmp_param.replyopts, skb_in)) 616 goto ende; 617 618 619 /* 620 * Prepare data for ICMP header. 621 */ 622 623 icmp_param.icmph.type=type; 624 icmp_param.icmph.code=code; 625 icmp_param.icmph.un.gateway = info; 626 icmp_param.icmph.checksum=0; 627 icmp_param.csum=0; 628 icmp_param.data_ptr=iph; 629 icmp_out_count(icmp_param.icmph.type); 630 icmp_socket->sk->ip_tos = tos; 631 ipc.addr = iph->saddr; 632 ipc.opt = &icmp_param.replyopts; 633 if (icmp_param.replyopts.srr) { 634 ip_rt_put(rt); 635 if (ip_route_output(&rt, icmp_param.replyopts.faddr, saddr, RT_TOS(tos), 0)) 636 return; 637 } 638 639 if (!icmpv4_xrlim_allow(rt, type, code)) 640 goto ende; 641 642 /* RFC says return as much as we can without exceeding 576 bytes. */ 643 644 room = rt->u.dst.pmtu; 645 if (room > 576) 646 room = 576; 647 room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen; 648 room -= sizeof(struct icmphdr); 649 650 icmp_param.data_len=(iph->ihl<<2)+skb_in->len; 651 if (icmp_param.data_len > room) 652 icmp_param.data_len = room; 653 654 ip_build_xmit(icmp_socket->sk, icmp_glue_bits, &icmp_param, 655 icmp_param.data_len+sizeof(struct icmphdr), 656 &ipc, rt, MSG_DONTWAIT); 657 658ende: 659 ip_rt_put(rt); 660} 661 662 663/* 664 * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, and ICMP_QUENCH. 665 */ 666 667static void icmp_unreach(struct icmphdr *icmph, struct sk_buff *skb, int len) 668{ 669 struct iphdr *iph; 670 int hash; 671 struct inet_protocol *ipprot; 672 unsigned char *dp; 673 struct sock *raw_sk; 674 675 /* 676 * Incomplete header ? 677 * Only checks for the IP header, there should be an 678 * additional check for longer headers in upper levels. 679 */ 680 681 if(len<sizeof(struct iphdr)) { 682 icmp_statistics.IcmpInErrors++; 683 return; 684 } 685 686 iph = (struct iphdr *) (icmph + 1); 687 dp = (unsigned char*)iph; 688 689 if(icmph->type==ICMP_DEST_UNREACH) { 690 switch(icmph->code & 15) { 691 case ICMP_NET_UNREACH: 692 break; 693 case ICMP_HOST_UNREACH: 694 break; 695 case ICMP_PROT_UNREACH: 696 break; 697 case ICMP_PORT_UNREACH: 698 break; 699 case ICMP_FRAG_NEEDED: 700 if (ipv4_config.no_pmtu_disc) { 701 if (net_ratelimit()) 702 printk(KERN_INFO "ICMP: %s: fragmentation needed and DF set.\n", 703 in_ntoa(iph->daddr)); 704 } else { 705 unsigned short new_mtu; 706 new_mtu = ip_rt_frag_needed(iph, ntohs(icmph->un.frag.mtu)); 707 if (!new_mtu) 708 return; 709 icmph->un.frag.mtu = htons(new_mtu); 710 } 711 break; 712 case ICMP_SR_FAILED: 713 if (net_ratelimit()) 714 printk(KERN_INFO "ICMP: %s: Source Route Failed.\n", in_ntoa(iph->daddr)); 715 break; 716 default: 717 break; 718 } 719 if (icmph->code>NR_ICMP_UNREACH) 720 return; 721 } 722 723 /* 724 * Throw it at our lower layers 725 * 726 * RFC 1122: 3.2.2 MUST extract the protocol ID from the passed header. 727 * RFC 1122: 3.2.2.1 MUST pass ICMP unreach messages to the transport layer. 728 * RFC 1122: 3.2.2.2 MUST pass ICMP time expired messages to transport layer. 729 */ 730 731 /* 732 * Check the other end isnt violating RFC 1122. Some routers send 733 * bogus responses to broadcast frames. If you see this message 734 * first check your netmask matches at both ends, if it does then 735 * get the other vendor to fix their kit. 736 */ 737 738 if (!sysctl_icmp_ignore_bogus_error_responses) 739 { 740 741 if (inet_addr_type(iph->daddr) == RTN_BROADCAST) 742 { 743 if (net_ratelimit()) 744 printk(KERN_WARNING "%s sent an invalid ICMP error to a broadcast.\n", 745 in_ntoa(skb->nh.iph->saddr)); 746 return; 747 } 748 } 749 750 /* 751 * Deliver ICMP message to raw sockets. Pretty useless feature? 752 */ 753 754 /* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */ 755 hash = iph->protocol & (MAX_INET_PROTOS - 1); 756 if ((raw_sk = raw_v4_htable[hash]) != NULL) 757 { 758 while ((raw_sk = raw_v4_lookup(raw_sk, iph->protocol, iph->saddr, 759 iph->daddr, skb->dev->ifindex)) != NULL) { 760 raw_err(raw_sk, skb); 761 raw_sk = raw_sk->next; 762 } 763 } 764 765 /* 766 * This can't change while we are doing it. 767 */ 768 769 ipprot = (struct inet_protocol *) inet_protos[hash]; 770 while(ipprot != NULL) { 771 struct inet_protocol *nextip; 772 773 nextip = (struct inet_protocol *) ipprot->next; 774 775 /* 776 * Pass it off to everyone who wants it. 777 */ 778 779 /* RFC1122: OK. Passes appropriate ICMP errors to the */ 780 /* appropriate protocol layer (MUST), as per 3.2.2. */ 781 782 if (iph->protocol == ipprot->protocol && ipprot->err_handler) 783 ipprot->err_handler(skb, dp, len); 784 785 ipprot = nextip; 786 } 787} 788 789 790/* 791 * Handle ICMP_REDIRECT. 792 */ 793 794static void icmp_redirect(struct icmphdr *icmph, struct sk_buff *skb, int len) 795{ 796 struct iphdr *iph; 797 unsigned long ip; 798 799 if (len < sizeof(struct iphdr)) { 800 icmp_statistics.IcmpInErrors++; 801 return; 802 } 803 804 /* 805 * Get the copied header of the packet that caused the redirect 806 */ 807 808 iph = (struct iphdr *) (icmph + 1); 809 ip = iph->daddr; 810 811 switch(icmph->code & 7) { 812 case ICMP_REDIR_NET: 813 case ICMP_REDIR_NETTOS: 814 /* 815 * As per RFC recommendations now handle it as 816 * a host redirect. 817 */ 818 819 case ICMP_REDIR_HOST: 820 case ICMP_REDIR_HOSTTOS: 821 ip_rt_redirect(skb->nh.iph->saddr, ip, icmph->un.gateway, iph->saddr, iph->tos, skb->dev); 822 break; 823 default: 824 break; 825 } 826} 827 828/* 829 * Handle ICMP_ECHO ("ping") requests. 830 * 831 * RFC 1122: 3.2.2.6 MUST have an echo server that answers ICMP echo requests. 832 * RFC 1122: 3.2.2.6 Data received in the ICMP_ECHO request MUST be included in the reply. 833 * RFC 1812: 4.3.3.6 SHOULD have a config option for silently ignoring echo requests, MUST have default=NOT. 834 * See also WRT handling of options once they are done and working. 835 */ 836 837static void icmp_echo(struct icmphdr *icmph, struct sk_buff *skb, int len) 838{ 839 if (!sysctl_icmp_echo_ignore_all) { 840 struct icmp_bxm icmp_param; 841 842 icmp_param.icmph=*icmph; 843 icmp_param.icmph.type=ICMP_ECHOREPLY; 844 icmp_param.data_ptr=(icmph+1); 845 icmp_param.data_len=len; 846 icmp_reply(&icmp_param, skb); 847 } 848} 849 850/* 851 * Handle ICMP Timestamp requests. 852 * RFC 1122: 3.2.2.8 MAY implement ICMP timestamp requests. 853 * SHOULD be in the kernel for minimum random latency. 854 * MUST be accurate to a few minutes. 855 * MUST be updated at least at 15Hz. 856 */ 857 858static void icmp_timestamp(struct icmphdr *icmph, struct sk_buff *skb, int len) 859{ 860 struct timeval tv; 861 __u32 times[3]; /* So the new timestamp works on ALPHA's.. */ 862 struct icmp_bxm icmp_param; 863 864 /* 865 * Too short. 866 */ 867 868 if(len<12) { 869 icmp_statistics.IcmpInErrors++; 870 return; 871 } 872 873 /* 874 * Fill in the current time as ms since midnight UT: 875 */ 876 877 do_gettimeofday(&tv); 878 times[1] = htonl((tv.tv_sec % 86400) * 1000 + tv.tv_usec / 1000); 879 times[2] = times[1]; 880 memcpy((void *)×[0], icmph+1, 4); /* Incoming stamp */ 881 icmp_param.icmph=*icmph; 882 icmp_param.icmph.type=ICMP_TIMESTAMPREPLY; 883 icmp_param.icmph.code=0; 884 icmp_param.data_ptr=× 885 icmp_param.data_len=12; 886 icmp_reply(&icmp_param, skb); 887} 888 889 890/* 891 * Handle ICMP_ADDRESS_MASK requests. (RFC950) 892 * 893 * RFC1122 (3.2.2.9). A host MUST only send replies to 894 * ADDRESS_MASK requests if it's been configured as an address mask 895 * agent. Receiving a request doesn't constitute implicit permission to 896 * act as one. Of course, implementing this correctly requires (SHOULD) 897 * a way to turn the functionality on and off. Another one for sysctl(), 898 * I guess. -- MS 899 * 900 * RFC1812 (4.3.3.9). A router MUST implement it. 901 * A router SHOULD have switch turning it on/off. 902 * This switch MUST be ON by default. 903 * 904 * Gratuitous replies, zero-source replies are not implemented, 905 * that complies with RFC. DO NOT implement them!!! All the idea 906 * of broadcast addrmask replies as specified in RFC950 is broken. 907 * The problem is that it is not uncommon to have several prefixes 908 * on one physical interface. Moreover, addrmask agent can even be 909 * not aware of existing another prefixes. 910 * If source is zero, addrmask agent cannot choose correct prefix. 911 * Gratuitous mask announcements suffer from the same problem. 912 * RFC1812 explains it, but still allows to use ADDRMASK, 913 * that is pretty silly. --ANK 914 * 915 * All these rules are so bizarre, that I removed kernel addrmask 916 * support at all. It is wrong, it is obsolete, nobody uses it in 917 * any case. --ANK 918 * 919 * Furthermore you can do it with a usermode address agent program 920 * anyway... 921 */ 922 923static void icmp_address(struct icmphdr *icmph, struct sk_buff *skb, int len) 924{ 925#if 0 926 if (net_ratelimit()) 927 printk(KERN_DEBUG "a guy asks for address mask. Who is it?\n"); 928#endif 929} 930 931/* 932 * RFC1812 (4.3.3.9). A router SHOULD listen all replies, and complain 933 * loudly if an inconsistency is found. 934 */ 935 936static void icmp_address_reply(struct icmphdr *icmph, struct sk_buff *skb, int len) 937{ 938 struct rtable *rt = (struct rtable*)skb->dst; 939 struct device *dev = skb->dev; 940 struct in_device *in_dev = dev->ip_ptr; 941 struct in_ifaddr *ifa; 942 u32 mask; 943 944 if (!in_dev || !in_dev->ifa_list || 945 !IN_DEV_LOG_MARTIANS(in_dev) || 946 !IN_DEV_FORWARD(in_dev) || 947 len < 4 || 948 !(rt->rt_flags&RTCF_DIRECTSRC)) 949 return; 950 951 mask = *(u32*)&icmph[1]; 952 for (ifa=in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { 953 if (mask == ifa->ifa_mask && inet_ifa_match(rt->rt_src, ifa)) 954 return; 955 } 956 if (net_ratelimit()) 957 printk(KERN_INFO "Wrong address mask %08lX from %08lX/%s\n", 958 ntohl(mask), ntohl(rt->rt_src), dev->name); 959} 960 961static void icmp_discard(struct icmphdr *icmph, struct sk_buff *skb, int len) 962{ 963} 964 965#ifdef CONFIG_IP_TRANSPARENT_PROXY 966/* 967 * Check incoming icmp packets not addressed locally, to check whether 968 * they relate to a (proxying) socket on our system. 969 * Needed for transparent proxying. 970 * 971 * This code is presently ugly and needs cleanup. 972 * Probably should add a chkaddr entry to ipprot to call a chk routine 973 * in udp.c or tcp.c... 974 */ 975 976/* This should work with the new hashes now. -DaveM */ 977extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif); 978extern struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif); 979 980int icmp_chkaddr(struct sk_buff *skb) 981{ 982 struct icmphdr *icmph=(struct icmphdr *)(skb->nh.raw + skb->nh.iph->ihl*4); 983 struct iphdr *iph = (struct iphdr *) (icmph + 1); 984 void (*handler)(struct icmphdr *icmph, struct sk_buff *skb, int len) = icmp_pointers[icmph->type].handler; 985 986 if (handler == icmp_unreach || handler == icmp_redirect) { 987 struct sock *sk; 988 989 switch (iph->protocol) { 990 case IPPROTO_TCP: 991 { 992 struct tcphdr *th = (struct tcphdr *)(((unsigned char *)iph)+(iph->ihl<<2)); 993 994 sk = tcp_v4_lookup(iph->daddr, th->dest, iph->saddr, th->source, skb->dev->ifindex); 995 if (!sk || (sk->state == TCP_LISTEN)) 996 return 0; 997 /* 998 * This packet came from us. 999 */ 1000 return 1;
1001 } 1002 case IPPROTO_UDP: 1003 { 1004 struct udphdr *uh = (struct udphdr *)(((unsigned char *)iph)+(iph->ihl<<2)); 1005 1006 sk = udp_v4_lookup(iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex); 1007 if (!sk) return 0; 1008 if (sk->saddr != iph->saddr && inet_addr_type(iph->saddr) != RTN_LOCAL) 1009 return 0; 1010 /* 1011 * This packet may have come from us. 1012 * Assume it did. 1013 */ 1014 return 1; 1015 } 1016 } 1017 } 1018 return 0; 1019} 1020 1021#endif 1022 1023/* 1024 * Deal with incoming ICMP packets. 1025 */ 1026 1027int icmp_rcv(struct sk_buff *skb, unsigned short len) 1028{ 1029 struct icmphdr *icmph = skb->h.icmph; 1030 struct rtable *rt = (struct rtable*)skb->dst; 1031 1032 icmp_statistics.IcmpInMsgs++; 1033 1034 /* 1035 * 18 is the highest 'known' ICMP type. Anything else is a mystery 1036 * 1037 * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently discarded. 1038 */ 1039 if(len < sizeof(struct icmphdr) || 1040 ip_compute_csum((unsigned char *) icmph, len) || 1041 icmph->type > NR_ICMP_TYPES) 1042 goto error; 1043 1044 /* 1045 * Parse the ICMP message 1046 */ 1047 1048 if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST)) { 1049 /* 1050 * RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be 1051 * silently ignored (we let user decide with a sysctl). 1052 * RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently 1053 * discarded if to broadcast/multicast. 1054 */ 1055 if (icmph->type == ICMP_ECHO && 1056 sysctl_icmp_echo_ignore_broadcasts) { 1057 goto error; 1058 } 1059 if (icmph->type != ICMP_ECHO && 1060 icmph->type != ICMP_TIMESTAMP && 1061 icmph->type != ICMP_ADDRESS && 1062 icmph->type != ICMP_ADDRESSREPLY) { 1063 goto error; 1064 } 1065 } 1066 1067 len -= sizeof(struct icmphdr); 1068 (*icmp_pointers[icmph->type].input)++; 1069 (icmp_pointers[icmph->type].handler)(icmph, skb, len); 1070 1071drop: 1072 kfree_skb(skb); 1073 return 0; 1074error: 1075 icmp_statistics.IcmpInErrors++; 1076 goto drop; 1077} 1078 1079/* 1080 * A spare long used to speed up statistics updating 1081 */ 1082 1083static unsigned long dummy; 1084 1085/* 1086 * Configurable rate limits. 1087 * Someone should check if these default values are correct. 1088 * Note that these values interact with the routing cache GC timeout. 1089 * If you chose them too high they won't take effect, because the 1090 * dst_entry gets expired too early. The same should happen when 1091 * the cache grows too big. 1092 */ 1093int sysctl_icmp_destunreach_time = 1*HZ; 1094int sysctl_icmp_timeexceed_time = 1*HZ; 1095int sysctl_icmp_paramprob_time = 1*HZ; 1096int sysctl_icmp_echoreply_time = 0; /* don't limit it per default. */ 1097 1098/* 1099 * This table is the definition of how we handle ICMP. 1100 */ 1101 1102static struct icmp_control icmp_pointers[NR_ICMP_TYPES+1] = { 1103/* ECHO REPLY (0) */ 1104 { &icmp_statistics.IcmpOutEchoReps, &icmp_statistics.IcmpInEchoReps, icmp_discard, 0, &sysctl_icmp_echoreply_time}, 1105 { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, }, 1106 { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, }, 1107/* DEST UNREACH (3) */ 1108 { &icmp_statistics.IcmpOutDestUnreachs, &icmp_statistics.IcmpInDestUnreachs, icmp_unreach, 1, &sysctl_icmp_destunreach_time }, 1109/* SOURCE QUENCH (4) */ 1110 { &icmp_statistics.IcmpOutSrcQuenchs, &icmp_statistics.IcmpInSrcQuenchs, icmp_unreach, 1, }, 1111/* REDIRECT (5) */ 1112 { &icmp_statistics.IcmpOutRedirects, &icmp_statistics.IcmpInRedirects, icmp_redirect, 1, }, 1113 { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, }, 1114 { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, }, 1115/* ECHO (8) */ 1116 { &icmp_statistics.IcmpOutEchos, &icmp_statistics.IcmpInEchos, icmp_echo, 0, }, 1117 { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, }, 1118 { &dummy, &icmp_statistics.IcmpInErrors, icmp_discard, 1, }, 1119/* TIME EXCEEDED (11) */ 1120 { &icmp_statistics.IcmpOutTimeExcds, &icmp_statistics.IcmpInTimeExcds, icmp_unreach, 1, &sysctl_icmp_timeexceed_time }, 1121/* PARAMETER PROBLEM (12) */ 1122 { &icmp_statistics.IcmpOutParmProbs, &icmp_statistics.IcmpInParmProbs, icmp_unreach, 1, &sysctl_icmp_paramprob_time }, 1123/* TIMESTAMP (13) */ 1124 { &icmp_statistics.IcmpOutTimestamps, &icmp_statistics.IcmpInTimestamps, icmp_timestamp, 0, }, 1125/* TIMESTAMP REPLY (14) */ 1126 { &icmp_statistics.IcmpOutTimestampReps, &icmp_statistics.IcmpInTimestampReps, icmp_discard, 0, }, 1127/* INFO (15) */ 1128 { &dummy, &dummy, icmp_discard, 0, }, 1129/* INFO REPLY (16) */ 1130 { &dummy, &dummy, icmp_discard, 0, }, 1131/* ADDR MASK (17) */ 1132 { &icmp_statistics.IcmpOutAddrMasks, &icmp_statistics.IcmpInAddrMasks, icmp_address, 0, }, 1133/* ADDR MASK REPLY (18) */ 1134 { &icmp_statistics.IcmpOutAddrMaskReps, &icmp_statistics.IcmpInAddrMaskReps, icmp_address_reply, 0, } 1135}; 1136 1137__initfunc(void icmp_init(struct net_proto_family *ops)) 1138{ 1139 int err; 1140 1141 icmp_inode.i_mode = S_IFSOCK; 1142 icmp_inode.i_sock = 1; 1143 icmp_inode.i_uid = 0; 1144 icmp_inode.i_gid = 0; 1145 1146 icmp_socket->inode = &icmp_inode; 1147 icmp_socket->state = SS_UNCONNECTED; 1148 icmp_socket->type=SOCK_RAW; 1149 1150 if ((err=ops->create(icmp_socket, IPPROTO_ICMP))<0) 1151 panic("Failed to create the ICMP control socket.\n"); 1152 icmp_socket->sk->allocation=GFP_ATOMIC; 1153 icmp_socket->sk->num = 256; /* Don't receive any data */ 1154 icmp_socket->sk->ip_ttl = MAXTTL; 1155} 1156

