1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include <linux/atomic.h>
21#include <linux/module.h>
22#include <linux/types.h>
23#include <linux/mm.h>
24#include <linux/fcntl.h>
25#include <linux/socket.h>
26#include <linux/sock_diag.h>
27#include <linux/in.h>
28#include <linux/inet.h>
29#include <linux/netdevice.h>
30#include <linux/if_packet.h>
31#include <linux/if_arp.h>
32#include <linux/gfp.h>
33#include <net/inet_common.h>
34#include <net/ip.h>
35#include <net/protocol.h>
36#include <net/netlink.h>
37#include <linux/skbuff.h>
38#include <linux/skmsg.h>
39#include <net/sock.h>
40#include <net/flow_dissector.h>
41#include <linux/errno.h>
42#include <linux/timer.h>
43#include <linux/uaccess.h>
44#include <asm/unaligned.h>
45#include <linux/filter.h>
46#include <linux/ratelimit.h>
47#include <linux/seccomp.h>
48#include <linux/if_vlan.h>
49#include <linux/bpf.h>
50#include <linux/btf.h>
51#include <net/sch_generic.h>
52#include <net/cls_cgroup.h>
53#include <net/dst_metadata.h>
54#include <net/dst.h>
55#include <net/sock_reuseport.h>
56#include <net/busy_poll.h>
57#include <net/tcp.h>
58#include <net/xfrm.h>
59#include <net/udp.h>
60#include <linux/bpf_trace.h>
61#include <net/xdp_sock.h>
62#include <linux/inetdevice.h>
63#include <net/inet_hashtables.h>
64#include <net/inet6_hashtables.h>
65#include <net/ip_fib.h>
66#include <net/nexthop.h>
67#include <net/flow.h>
68#include <net/arp.h>
69#include <net/ipv6.h>
70#include <net/net_namespace.h>
71#include <linux/seg6_local.h>
72#include <net/seg6.h>
73#include <net/seg6_local.h>
74#include <net/lwtunnel.h>
75#include <net/ipv6_stubs.h>
76#include <net/bpf_sk_storage.h>
77#include <net/transp_v6.h>
78#include <linux/btf_ids.h>
79#include <net/tls.h>
80
81static const struct bpf_func_proto *
82bpf_sk_base_func_proto(enum bpf_func_id func_id);
83
84int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
85{
86 if (in_compat_syscall()) {
87 struct compat_sock_fprog f32;
88
89 if (len != sizeof(f32))
90 return -EINVAL;
91 if (copy_from_sockptr(&f32, src, sizeof(f32)))
92 return -EFAULT;
93 memset(dst, 0, sizeof(*dst));
94 dst->len = f32.len;
95 dst->filter = compat_ptr(f32.filter);
96 } else {
97 if (len != sizeof(*dst))
98 return -EINVAL;
99 if (copy_from_sockptr(dst, src, sizeof(*dst)))
100 return -EFAULT;
101 }
102
103 return 0;
104}
105EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
121{
122 int err;
123 struct sk_filter *filter;
124
125
126
127
128
129
130 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
131 NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
132 return -ENOMEM;
133 }
134 err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
135 if (err)
136 return err;
137
138 err = security_sock_rcv_skb(sk, skb);
139 if (err)
140 return err;
141
142 rcu_read_lock();
143 filter = rcu_dereference(sk->sk_filter);
144 if (filter) {
145 struct sock *save_sk = skb->sk;
146 unsigned int pkt_len;
147
148 skb->sk = sk;
149 pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
150 skb->sk = save_sk;
151 err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
152 }
153 rcu_read_unlock();
154
155 return err;
156}
157EXPORT_SYMBOL(sk_filter_trim_cap);
158
159BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb)
160{
161 return skb_get_poff(skb);
162}
163
164BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
165{
166 struct nlattr *nla;
167
168 if (skb_is_nonlinear(skb))
169 return 0;
170
171 if (skb->len < sizeof(struct nlattr))
172 return 0;
173
174 if (a > skb->len - sizeof(struct nlattr))
175 return 0;
176
177 nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
178 if (nla)
179 return (void *) nla - (void *) skb->data;
180
181 return 0;
182}
183
184BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
185{
186 struct nlattr *nla;
187
188 if (skb_is_nonlinear(skb))
189 return 0;
190
191 if (skb->len < sizeof(struct nlattr))
192 return 0;
193
194 if (a > skb->len - sizeof(struct nlattr))
195 return 0;
196
197 nla = (struct nlattr *) &skb->data[a];
198 if (nla->nla_len > skb->len - a)
199 return 0;
200
201 nla = nla_find_nested(nla, x);
202 if (nla)
203 return (void *) nla - (void *) skb->data;
204
205 return 0;
206}
207
208BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
209 data, int, headlen, int, offset)
210{
211 u8 tmp, *ptr;
212 const int len = sizeof(tmp);
213
214 if (offset >= 0) {
215 if (headlen - offset >= len)
216 return *(u8 *)(data + offset);
217 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
218 return tmp;
219 } else {
220 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
221 if (likely(ptr))
222 return *(u8 *)ptr;
223 }
224
225 return -EFAULT;
226}
227
228BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
229 int, offset)
230{
231 return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len,
232 offset);
233}
234
235BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
236 data, int, headlen, int, offset)
237{
238 u16 tmp, *ptr;
239 const int len = sizeof(tmp);
240
241 if (offset >= 0) {
242 if (headlen - offset >= len)
243 return get_unaligned_be16(data + offset);
244 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
245 return be16_to_cpu(tmp);
246 } else {
247 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
248 if (likely(ptr))
249 return get_unaligned_be16(ptr);
250 }
251
252 return -EFAULT;
253}
254
255BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
256 int, offset)
257{
258 return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len,
259 offset);
260}
261
262BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
263 data, int, headlen, int, offset)
264{
265 u32 tmp, *ptr;
266 const int len = sizeof(tmp);
267
268 if (likely(offset >= 0)) {
269 if (headlen - offset >= len)
270 return get_unaligned_be32(data + offset);
271 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
272 return be32_to_cpu(tmp);
273 } else {
274 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
275 if (likely(ptr))
276 return get_unaligned_be32(ptr);
277 }
278
279 return -EFAULT;
280}
281
282BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
283 int, offset)
284{
285 return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len,
286 offset);
287}
288
289static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
290 struct bpf_insn *insn_buf)
291{
292 struct bpf_insn *insn = insn_buf;
293
294 switch (skb_field) {
295 case SKF_AD_MARK:
296 BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4);
297
298 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
299 offsetof(struct sk_buff, mark));
300 break;
301
302 case SKF_AD_PKTTYPE:
303 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET());
304 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
305#ifdef __BIG_ENDIAN_BITFIELD
306 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
307#endif
308 break;
309
310 case SKF_AD_QUEUE:
311 BUILD_BUG_ON(sizeof_field(struct sk_buff, queue_mapping) != 2);
312
313 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
314 offsetof(struct sk_buff, queue_mapping));
315 break;
316
317 case SKF_AD_VLAN_TAG:
318 BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2);
319
320
321 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
322 offsetof(struct sk_buff, vlan_tci));
323 break;
324 case SKF_AD_VLAN_TAG_PRESENT:
325 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET());
326 if (PKT_VLAN_PRESENT_BIT)
327 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT);
328 if (PKT_VLAN_PRESENT_BIT < 7)
329 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
330 break;
331 }
332
333 return insn - insn_buf;
334}
335
336static bool convert_bpf_extensions(struct sock_filter *fp,
337 struct bpf_insn **insnp)
338{
339 struct bpf_insn *insn = *insnp;
340 u32 cnt;
341
342 switch (fp->k) {
343 case SKF_AD_OFF + SKF_AD_PROTOCOL:
344 BUILD_BUG_ON(sizeof_field(struct sk_buff, protocol) != 2);
345
346
347 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
348 offsetof(struct sk_buff, protocol));
349
350 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
351 break;
352
353 case SKF_AD_OFF + SKF_AD_PKTTYPE:
354 cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
355 insn += cnt - 1;
356 break;
357
358 case SKF_AD_OFF + SKF_AD_IFINDEX:
359 case SKF_AD_OFF + SKF_AD_HATYPE:
360 BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4);
361 BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2);
362
363 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
364 BPF_REG_TMP, BPF_REG_CTX,
365 offsetof(struct sk_buff, dev));
366
367 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
368 *insn++ = BPF_EXIT_INSN();
369 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
370 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
371 offsetof(struct net_device, ifindex));
372 else
373 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
374 offsetof(struct net_device, type));
375 break;
376
377 case SKF_AD_OFF + SKF_AD_MARK:
378 cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
379 insn += cnt - 1;
380 break;
381
382 case SKF_AD_OFF + SKF_AD_RXHASH:
383 BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4);
384
385 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
386 offsetof(struct sk_buff, hash));
387 break;
388
389 case SKF_AD_OFF + SKF_AD_QUEUE:
390 cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
391 insn += cnt - 1;
392 break;
393
394 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
395 cnt = convert_skb_access(SKF_AD_VLAN_TAG,
396 BPF_REG_A, BPF_REG_CTX, insn);
397 insn += cnt - 1;
398 break;
399
400 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
401 cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
402 BPF_REG_A, BPF_REG_CTX, insn);
403 insn += cnt - 1;
404 break;
405
406 case SKF_AD_OFF + SKF_AD_VLAN_TPID:
407 BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_proto) != 2);
408
409
410 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
411 offsetof(struct sk_buff, vlan_proto));
412
413 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
414 break;
415
416 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
417 case SKF_AD_OFF + SKF_AD_NLATTR:
418 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
419 case SKF_AD_OFF + SKF_AD_CPU:
420 case SKF_AD_OFF + SKF_AD_RANDOM:
421
422 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
423
424 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
425
426 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
427
428 switch (fp->k) {
429 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
430 *insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset);
431 break;
432 case SKF_AD_OFF + SKF_AD_NLATTR:
433 *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr);
434 break;
435 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
436 *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest);
437 break;
438 case SKF_AD_OFF + SKF_AD_CPU:
439 *insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id);
440 break;
441 case SKF_AD_OFF + SKF_AD_RANDOM:
442 *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
443 bpf_user_rnd_init_once();
444 break;
445 }
446 break;
447
448 case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
449
450 *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
451 break;
452
453 default:
454
455
456
457
458 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
459 return false;
460 }
461
462 *insnp = insn;
463 return true;
464}
465
466static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
467{
468 const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS);
469 int size = bpf_size_to_bytes(BPF_SIZE(fp->code));
470 bool endian = BPF_SIZE(fp->code) == BPF_H ||
471 BPF_SIZE(fp->code) == BPF_W;
472 bool indirect = BPF_MODE(fp->code) == BPF_IND;
473 const int ip_align = NET_IP_ALIGN;
474 struct bpf_insn *insn = *insnp;
475 int offset = fp->k;
476
477 if (!indirect &&
478 ((unaligned_ok && offset >= 0) ||
479 (!unaligned_ok && offset >= 0 &&
480 offset + ip_align >= 0 &&
481 offset + ip_align % size == 0))) {
482 bool ldx_off_ok = offset <= S16_MAX;
483
484 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
485 if (offset)
486 *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
487 *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
488 size, 2 + endian + (!ldx_off_ok * 2));
489 if (ldx_off_ok) {
490 *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
491 BPF_REG_D, offset);
492 } else {
493 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
494 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
495 *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
496 BPF_REG_TMP, 0);
497 }
498 if (endian)
499 *insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);
500 *insn++ = BPF_JMP_A(8);
501 }
502
503 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
504 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D);
505 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H);
506 if (!indirect) {
507 *insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset);
508 } else {
509 *insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X);
510 if (fp->k)
511 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset);
512 }
513
514 switch (BPF_SIZE(fp->code)) {
515 case BPF_B:
516 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8);
517 break;
518 case BPF_H:
519 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16);
520 break;
521 case BPF_W:
522 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32);
523 break;
524 default:
525 return false;
526 }
527
528 *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2);
529 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
530 *insn = BPF_EXIT_INSN();
531
532 *insnp = insn;
533 return true;
534}
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555static int bpf_convert_filter(struct sock_filter *prog, int len,
556 struct bpf_prog *new_prog, int *new_len,
557 bool *seen_ld_abs)
558{
559 int new_flen = 0, pass = 0, target, i, stack_off;
560 struct bpf_insn *new_insn, *first_insn = NULL;
561 struct sock_filter *fp;
562 int *addrs = NULL;
563 u8 bpf_src;
564
565 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
566 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
567
568 if (len <= 0 || len > BPF_MAXINSNS)
569 return -EINVAL;
570
571 if (new_prog) {
572 first_insn = new_prog->insnsi;
573 addrs = kcalloc(len, sizeof(*addrs),
574 GFP_KERNEL | __GFP_NOWARN);
575 if (!addrs)
576 return -ENOMEM;
577 }
578
579do_pass:
580 new_insn = first_insn;
581 fp = prog;
582
583
584 if (new_prog) {
585
586
587
588 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
589 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
590
591
592
593
594
595 *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
596 if (*seen_ld_abs) {
597
598
599
600
601
602 *new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
603 BPF_REG_D, BPF_REG_CTX,
604 offsetof(struct sk_buff, data));
605 *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX,
606 offsetof(struct sk_buff, len));
607 *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX,
608 offsetof(struct sk_buff, data_len));
609 *new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP);
610 }
611 } else {
612 new_insn += 3;
613 }
614
615 for (i = 0; i < len; fp++, i++) {
616 struct bpf_insn tmp_insns[32] = { };
617 struct bpf_insn *insn = tmp_insns;
618
619 if (addrs)
620 addrs[i] = new_insn - first_insn;
621
622 switch (fp->code) {
623
624 case BPF_ALU | BPF_ADD | BPF_X:
625 case BPF_ALU | BPF_ADD | BPF_K:
626 case BPF_ALU | BPF_SUB | BPF_X:
627 case BPF_ALU | BPF_SUB | BPF_K:
628 case BPF_ALU | BPF_AND | BPF_X:
629 case BPF_ALU | BPF_AND | BPF_K:
630 case BPF_ALU | BPF_OR | BPF_X:
631 case BPF_ALU | BPF_OR | BPF_K:
632 case BPF_ALU | BPF_LSH | BPF_X:
633 case BPF_ALU | BPF_LSH | BPF_K:
634 case BPF_ALU | BPF_RSH | BPF_X:
635 case BPF_ALU | BPF_RSH | BPF_K:
636 case BPF_ALU | BPF_XOR | BPF_X:
637 case BPF_ALU | BPF_XOR | BPF_K:
638 case BPF_ALU | BPF_MUL | BPF_X:
639 case BPF_ALU | BPF_MUL | BPF_K:
640 case BPF_ALU | BPF_DIV | BPF_X:
641 case BPF_ALU | BPF_DIV | BPF_K:
642 case BPF_ALU | BPF_MOD | BPF_X:
643 case BPF_ALU | BPF_MOD | BPF_K:
644 case BPF_ALU | BPF_NEG:
645 case BPF_LD | BPF_ABS | BPF_W:
646 case BPF_LD | BPF_ABS | BPF_H:
647 case BPF_LD | BPF_ABS | BPF_B:
648 case BPF_LD | BPF_IND | BPF_W:
649 case BPF_LD | BPF_IND | BPF_H:
650 case BPF_LD | BPF_IND | BPF_B:
651
652
653
654
655 if (BPF_CLASS(fp->code) == BPF_LD &&
656 BPF_MODE(fp->code) == BPF_ABS &&
657 convert_bpf_extensions(fp, &insn))
658 break;
659 if (BPF_CLASS(fp->code) == BPF_LD &&
660 convert_bpf_ld_abs(fp, &insn)) {
661 *seen_ld_abs = true;
662 break;
663 }
664
665 if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
666 fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
667 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
668
669
670
671 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
672 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
673 *insn++ = BPF_EXIT_INSN();
674 }
675
676 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
677 break;
678
679
680
681
682
683
684
685#define BPF_EMIT_JMP \
686 do { \
687 const s32 off_min = S16_MIN, off_max = S16_MAX; \
688 s32 off; \
689 \
690 if (target >= len || target < 0) \
691 goto err; \
692 off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
693 \
694 off -= insn - tmp_insns; \
695 \
696 if (off < off_min || off > off_max) \
697 goto err; \
698 insn->off = off; \
699 } while (0)
700
701 case BPF_JMP | BPF_JA:
702 target = i + fp->k + 1;
703 insn->code = fp->code;
704 BPF_EMIT_JMP;
705 break;
706
707 case BPF_JMP | BPF_JEQ | BPF_K:
708 case BPF_JMP | BPF_JEQ | BPF_X:
709 case BPF_JMP | BPF_JSET | BPF_K:
710 case BPF_JMP | BPF_JSET | BPF_X:
711 case BPF_JMP | BPF_JGT | BPF_K:
712 case BPF_JMP | BPF_JGT | BPF_X:
713 case BPF_JMP | BPF_JGE | BPF_K:
714 case BPF_JMP | BPF_JGE | BPF_X:
715 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
716
717
718
719
720 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
721
722 insn->dst_reg = BPF_REG_A;
723 insn->src_reg = BPF_REG_TMP;
724 bpf_src = BPF_X;
725 } else {
726 insn->dst_reg = BPF_REG_A;
727 insn->imm = fp->k;
728 bpf_src = BPF_SRC(fp->code);
729 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
730 }
731
732
733 if (fp->jf == 0) {
734 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
735 target = i + fp->jt + 1;
736 BPF_EMIT_JMP;
737 break;
738 }
739
740
741 if (fp->jt == 0) {
742 switch (BPF_OP(fp->code)) {
743 case BPF_JEQ:
744 insn->code = BPF_JMP | BPF_JNE | bpf_src;
745 break;
746 case BPF_JGT:
747 insn->code = BPF_JMP | BPF_JLE | bpf_src;
748 break;
749 case BPF_JGE:
750 insn->code = BPF_JMP | BPF_JLT | bpf_src;
751 break;
752 default:
753 goto jmp_rest;
754 }
755
756 target = i + fp->jf + 1;
757 BPF_EMIT_JMP;
758 break;
759 }
760jmp_rest:
761
762 target = i + fp->jt + 1;
763 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
764 BPF_EMIT_JMP;
765 insn++;
766
767 insn->code = BPF_JMP | BPF_JA;
768 target = i + fp->jf + 1;
769 BPF_EMIT_JMP;
770 break;
771
772
773 case BPF_LDX | BPF_MSH | BPF_B: {
774 struct sock_filter tmp = {
775 .code = BPF_LD | BPF_ABS | BPF_B,
776 .k = fp->k,
777 };
778
779 *seen_ld_abs = true;
780
781
782 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
783
784 convert_bpf_ld_abs(&tmp, &insn);
785 insn++;
786
787 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
788
789 *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
790
791 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X);
792
793 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
794
795 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
796 break;
797 }
798
799
800
801 case BPF_RET | BPF_A:
802 case BPF_RET | BPF_K:
803 if (BPF_RVAL(fp->code) == BPF_K)
804 *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
805 0, fp->k);
806 *insn = BPF_EXIT_INSN();
807 break;
808
809
810 case BPF_ST:
811 case BPF_STX:
812 stack_off = fp->k * 4 + 4;
813 *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
814 BPF_ST ? BPF_REG_A : BPF_REG_X,
815 -stack_off);
816
817
818
819
820 if (new_prog && new_prog->aux->stack_depth < stack_off)
821 new_prog->aux->stack_depth = stack_off;
822 break;
823
824
825 case BPF_LD | BPF_MEM:
826 case BPF_LDX | BPF_MEM:
827 stack_off = fp->k * 4 + 4;
828 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
829 BPF_REG_A : BPF_REG_X, BPF_REG_FP,
830 -stack_off);
831 break;
832
833
834 case BPF_LD | BPF_IMM:
835 case BPF_LDX | BPF_IMM:
836 *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
837 BPF_REG_A : BPF_REG_X, fp->k);
838 break;
839
840
841 case BPF_MISC | BPF_TAX:
842 *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
843 break;
844
845
846 case BPF_MISC | BPF_TXA:
847 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
848 break;
849
850
851 case BPF_LD | BPF_W | BPF_LEN:
852 case BPF_LDX | BPF_W | BPF_LEN:
853 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
854 BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
855 offsetof(struct sk_buff, len));
856 break;
857
858
859 case BPF_LDX | BPF_ABS | BPF_W:
860
861 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
862 break;
863
864
865 default:
866 goto err;
867 }
868
869 insn++;
870 if (new_prog)
871 memcpy(new_insn, tmp_insns,
872 sizeof(*insn) * (insn - tmp_insns));
873 new_insn += insn - tmp_insns;
874 }
875
876 if (!new_prog) {
877
878 *new_len = new_insn - first_insn;
879 if (*seen_ld_abs)
880 *new_len += 4;
881 return 0;
882 }
883
884 pass++;
885 if (new_flen != new_insn - first_insn) {
886 new_flen = new_insn - first_insn;
887 if (pass > 2)
888 goto err;
889 goto do_pass;
890 }
891
892 kfree(addrs);
893 BUG_ON(*new_len != new_flen);
894 return 0;
895err:
896 kfree(addrs);
897 return -EINVAL;
898}
899
900
901
902
903
904
905
906
907static int check_load_and_stores(const struct sock_filter *filter, int flen)
908{
909 u16 *masks, memvalid = 0;
910 int pc, ret = 0;
911
912 BUILD_BUG_ON(BPF_MEMWORDS > 16);
913
914 masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
915 if (!masks)
916 return -ENOMEM;
917
918 memset(masks, 0xff, flen * sizeof(*masks));
919
920 for (pc = 0; pc < flen; pc++) {
921 memvalid &= masks[pc];
922
923 switch (filter[pc].code) {
924 case BPF_ST:
925 case BPF_STX:
926 memvalid |= (1 << filter[pc].k);
927 break;
928 case BPF_LD | BPF_MEM:
929 case BPF_LDX | BPF_MEM:
930 if (!(memvalid & (1 << filter[pc].k))) {
931 ret = -EINVAL;
932 goto error;
933 }
934 break;
935 case BPF_JMP | BPF_JA:
936
937 masks[pc + 1 + filter[pc].k] &= memvalid;
938 memvalid = ~0;
939 break;
940 case BPF_JMP | BPF_JEQ | BPF_K:
941 case BPF_JMP | BPF_JEQ | BPF_X:
942 case BPF_JMP | BPF_JGE | BPF_K:
943 case BPF_JMP | BPF_JGE | BPF_X:
944 case BPF_JMP | BPF_JGT | BPF_K:
945 case BPF_JMP | BPF_JGT | BPF_X:
946 case BPF_JMP | BPF_JSET | BPF_K:
947 case BPF_JMP | BPF_JSET | BPF_X:
948
949 masks[pc + 1 + filter[pc].jt] &= memvalid;
950 masks[pc + 1 + filter[pc].jf] &= memvalid;
951 memvalid = ~0;
952 break;
953 }
954 }
955error:
956 kfree(masks);
957 return ret;
958}
959
960static bool chk_code_allowed(u16 code_to_probe)
961{
962 static const bool codes[] = {
963
964 [BPF_ALU | BPF_ADD | BPF_K] = true,
965 [BPF_ALU | BPF_ADD | BPF_X] = true,
966 [BPF_ALU | BPF_SUB | BPF_K] = true,
967 [BPF_ALU | BPF_SUB | BPF_X] = true,
968 [BPF_ALU | BPF_MUL | BPF_K] = true,
969 [BPF_ALU | BPF_MUL | BPF_X] = true,
970 [BPF_ALU | BPF_DIV | BPF_K] = true,
971 [BPF_ALU | BPF_DIV | BPF_X] = true,
972 [BPF_ALU | BPF_MOD | BPF_K] = true,
973 [BPF_ALU | BPF_MOD | BPF_X] = true,
974 [BPF_ALU | BPF_AND | BPF_K] = true,
975 [BPF_ALU | BPF_AND | BPF_X] = true,
976 [BPF_ALU | BPF_OR | BPF_K] = true,
977 [BPF_ALU | BPF_OR | BPF_X] = true,
978 [BPF_ALU | BPF_XOR | BPF_K] = true,
979 [BPF_ALU | BPF_XOR | BPF_X] = true,
980 [BPF_ALU | BPF_LSH | BPF_K] = true,
981 [BPF_ALU | BPF_LSH | BPF_X] = true,
982 [BPF_ALU | BPF_RSH | BPF_K] = true,
983 [BPF_ALU | BPF_RSH | BPF_X] = true,
984 [BPF_ALU | BPF_NEG] = true,
985
986 [BPF_LD | BPF_W | BPF_ABS] = true,
987 [BPF_LD | BPF_H | BPF_ABS] = true,
988 [BPF_LD | BPF_B | BPF_ABS] = true,
989 [BPF_LD | BPF_W | BPF_LEN] = true,
990 [BPF_LD | BPF_W | BPF_IND] = true,
991 [BPF_LD | BPF_H | BPF_IND] = true,
992 [BPF_LD | BPF_B | BPF_IND] = true,
993 [BPF_LD | BPF_IMM] = true,
994 [BPF_LD | BPF_MEM] = true,
995 [BPF_LDX | BPF_W | BPF_LEN] = true,
996 [BPF_LDX | BPF_B | BPF_MSH] = true,
997 [BPF_LDX | BPF_IMM] = true,
998 [BPF_LDX | BPF_MEM] = true,
999
1000 [BPF_ST] = true,
1001 [BPF_STX] = true,
1002
1003 [BPF_MISC | BPF_TAX] = true,
1004 [BPF_MISC | BPF_TXA] = true,
1005
1006 [BPF_RET | BPF_K] = true,
1007 [BPF_RET | BPF_A] = true,
1008
1009 [BPF_JMP | BPF_JA] = true,
1010 [BPF_JMP | BPF_JEQ | BPF_K] = true,
1011 [BPF_JMP | BPF_JEQ | BPF_X] = true,
1012 [BPF_JMP | BPF_JGE | BPF_K] = true,
1013 [BPF_JMP | BPF_JGE | BPF_X] = true,
1014 [BPF_JMP | BPF_JGT | BPF_K] = true,
1015 [BPF_JMP | BPF_JGT | BPF_X] = true,
1016 [BPF_JMP | BPF_JSET | BPF_K] = true,
1017 [BPF_JMP | BPF_JSET | BPF_X] = true,
1018 };
1019
1020 if (code_to_probe >= ARRAY_SIZE(codes))
1021 return false;
1022
1023 return codes[code_to_probe];
1024}
1025
1026static bool bpf_check_basics_ok(const struct sock_filter *filter,
1027 unsigned int flen)
1028{
1029 if (filter == NULL)
1030 return false;
1031 if (flen == 0 || flen > BPF_MAXINSNS)
1032 return false;
1033
1034 return true;
1035}
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051static int bpf_check_classic(const struct sock_filter *filter,
1052 unsigned int flen)
1053{
1054 bool anc_found;
1055 int pc;
1056
1057
1058 for (pc = 0; pc < flen; pc++) {
1059 const struct sock_filter *ftest = &filter[pc];
1060
1061
1062 if (!chk_code_allowed(ftest->code))
1063 return -EINVAL;
1064
1065
1066 switch (ftest->code) {
1067 case BPF_ALU | BPF_DIV | BPF_K:
1068 case BPF_ALU | BPF_MOD | BPF_K:
1069
1070 if (ftest->k == 0)
1071 return -EINVAL;
1072 break;
1073 case BPF_ALU | BPF_LSH | BPF_K:
1074 case BPF_ALU | BPF_RSH | BPF_K:
1075 if (ftest->k >= 32)
1076 return -EINVAL;
1077 break;
1078 case BPF_LD | BPF_MEM:
1079 case BPF_LDX | BPF_MEM:
1080 case BPF_ST:
1081 case BPF_STX:
1082
1083 if (ftest->k >= BPF_MEMWORDS)
1084 return -EINVAL;
1085 break;
1086 case BPF_JMP | BPF_JA:
1087
1088
1089
1090
1091 if (ftest->k >= (unsigned int)(flen - pc - 1))
1092 return -EINVAL;
1093 break;
1094 case BPF_JMP | BPF_JEQ | BPF_K:
1095 case BPF_JMP | BPF_JEQ | BPF_X:
1096 case BPF_JMP | BPF_JGE | BPF_K:
1097 case BPF_JMP | BPF_JGE | BPF_X:
1098 case BPF_JMP | BPF_JGT | BPF_K:
1099 case BPF_JMP | BPF_JGT | BPF_X:
1100 case BPF_JMP | BPF_JSET | BPF_K:
1101 case BPF_JMP | BPF_JSET | BPF_X:
1102
1103 if (pc + ftest->jt + 1 >= flen ||
1104 pc + ftest->jf + 1 >= flen)
1105 return -EINVAL;
1106 break;
1107 case BPF_LD | BPF_W | BPF_ABS:
1108 case BPF_LD | BPF_H | BPF_ABS:
1109 case BPF_LD | BPF_B | BPF_ABS:
1110 anc_found = false;
1111 if (bpf_anc_helper(ftest) & BPF_ANC)
1112 anc_found = true;
1113
1114 if (anc_found == false && ftest->k >= SKF_AD_OFF)
1115 return -EINVAL;
1116 }
1117 }
1118
1119
1120 switch (filter[flen - 1].code) {
1121 case BPF_RET | BPF_K:
1122 case BPF_RET | BPF_A:
1123 return check_load_and_stores(filter, flen);
1124 }
1125
1126 return -EINVAL;
1127}
1128
1129static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
1130 const struct sock_fprog *fprog)
1131{
1132 unsigned int fsize = bpf_classic_proglen(fprog);
1133 struct sock_fprog_kern *fkprog;
1134
1135 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
1136 if (!fp->orig_prog)
1137 return -ENOMEM;
1138
1139 fkprog = fp->orig_prog;
1140 fkprog->len = fprog->len;
1141
1142 fkprog->filter = kmemdup(fp->insns, fsize,
1143 GFP_KERNEL | __GFP_NOWARN);
1144 if (!fkprog->filter) {
1145 kfree(fp->orig_prog);
1146 return -ENOMEM;
1147 }
1148
1149 return 0;
1150}
1151
1152static void bpf_release_orig_filter(struct bpf_prog *fp)
1153{
1154 struct sock_fprog_kern *fprog = fp->orig_prog;
1155
1156 if (fprog) {
1157 kfree(fprog->filter);
1158 kfree(fprog);
1159 }
1160}
1161
1162static void __bpf_prog_release(struct bpf_prog *prog)
1163{
1164 if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
1165 bpf_prog_put(prog);
1166 } else {
1167 bpf_release_orig_filter(prog);
1168 bpf_prog_free(prog);
1169 }
1170}
1171
1172static void __sk_filter_release(struct sk_filter *fp)
1173{
1174 __bpf_prog_release(fp->prog);
1175 kfree(fp);
1176}
1177
1178
1179
1180
1181
1182static void sk_filter_release_rcu(struct rcu_head *rcu)
1183{
1184 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
1185
1186 __sk_filter_release(fp);
1187}
1188
1189
1190
1191
1192
1193
1194
1195static void sk_filter_release(struct sk_filter *fp)
1196{
1197 if (refcount_dec_and_test(&fp->refcnt))
1198 call_rcu(&fp->rcu, sk_filter_release_rcu);
1199}
1200
1201void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
1202{
1203 u32 filter_size = bpf_prog_size(fp->prog->len);
1204
1205 atomic_sub(filter_size, &sk->sk_omem_alloc);
1206 sk_filter_release(fp);
1207}
1208
1209
1210
1211
1212static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1213{
1214 u32 filter_size = bpf_prog_size(fp->prog->len);
1215
1216
1217 if (filter_size <= sysctl_optmem_max &&
1218 atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
1219 atomic_add(filter_size, &sk->sk_omem_alloc);
1220 return true;
1221 }
1222 return false;
1223}
1224
1225bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1226{
1227 if (!refcount_inc_not_zero(&fp->refcnt))
1228 return false;
1229
1230 if (!__sk_filter_charge(sk, fp)) {
1231 sk_filter_release(fp);
1232 return false;
1233 }
1234 return true;
1235}
1236
1237static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
1238{
1239 struct sock_filter *old_prog;
1240 struct bpf_prog *old_fp;
1241 int err, new_len, old_len = fp->len;
1242 bool seen_ld_abs = false;
1243
1244
1245
1246
1247
1248
1249 BUILD_BUG_ON(sizeof(struct sock_filter) !=
1250 sizeof(struct bpf_insn));
1251
1252
1253
1254
1255
1256 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
1257 GFP_KERNEL | __GFP_NOWARN);
1258 if (!old_prog) {
1259 err = -ENOMEM;
1260 goto out_err;
1261 }
1262
1263
1264 err = bpf_convert_filter(old_prog, old_len, NULL, &new_len,
1265 &seen_ld_abs);
1266 if (err)
1267 goto out_err_free;
1268
1269
1270 old_fp = fp;
1271 fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
1272 if (!fp) {
1273
1274
1275
1276 fp = old_fp;
1277 err = -ENOMEM;
1278 goto out_err_free;
1279 }
1280
1281 fp->len = new_len;
1282
1283
1284 err = bpf_convert_filter(old_prog, old_len, fp, &new_len,
1285 &seen_ld_abs);
1286 if (err)
1287
1288
1289
1290
1291
1292 goto out_err_free;
1293
1294 fp = bpf_prog_select_runtime(fp, &err);
1295 if (err)
1296 goto out_err_free;
1297
1298 kfree(old_prog);
1299 return fp;
1300
1301out_err_free:
1302 kfree(old_prog);
1303out_err:
1304 __bpf_prog_release(fp);
1305 return ERR_PTR(err);
1306}
1307
1308static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
1309 bpf_aux_classic_check_t trans)
1310{
1311 int err;
1312
1313 fp->bpf_func = NULL;
1314 fp->jited = 0;
1315
1316 err = bpf_check_classic(fp->insns, fp->len);
1317 if (err) {
1318 __bpf_prog_release(fp);
1319 return ERR_PTR(err);
1320 }
1321
1322
1323
1324
1325 if (trans) {
1326 err = trans(fp->insns, fp->len);
1327 if (err) {
1328 __bpf_prog_release(fp);
1329 return ERR_PTR(err);
1330 }
1331 }
1332
1333
1334
1335
1336 bpf_jit_compile(fp);
1337
1338
1339
1340
1341 if (!fp->jited)
1342 fp = bpf_migrate_filter(fp);
1343
1344 return fp;
1345}
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
1358{
1359 unsigned int fsize = bpf_classic_proglen(fprog);
1360 struct bpf_prog *fp;
1361
1362
1363 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1364 return -EINVAL;
1365
1366 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1367 if (!fp)
1368 return -ENOMEM;
1369
1370 memcpy(fp->insns, fprog->filter, fsize);
1371
1372 fp->len = fprog->len;
1373
1374
1375
1376
1377 fp->orig_prog = NULL;
1378
1379
1380
1381
1382 fp = bpf_prepare_filter(fp, NULL);
1383 if (IS_ERR(fp))
1384 return PTR_ERR(fp);
1385
1386 *pfp = fp;
1387 return 0;
1388}
1389EXPORT_SYMBOL_GPL(bpf_prog_create);
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
1403 bpf_aux_classic_check_t trans, bool save_orig)
1404{
1405 unsigned int fsize = bpf_classic_proglen(fprog);
1406 struct bpf_prog *fp;
1407 int err;
1408
1409
1410 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1411 return -EINVAL;
1412
1413 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1414 if (!fp)
1415 return -ENOMEM;
1416
1417 if (copy_from_user(fp->insns, fprog->filter, fsize)) {
1418 __bpf_prog_free(fp);
1419 return -EFAULT;
1420 }
1421
1422 fp->len = fprog->len;
1423 fp->orig_prog = NULL;
1424
1425 if (save_orig) {
1426 err = bpf_prog_store_orig_filter(fp, fprog);
1427 if (err) {
1428 __bpf_prog_free(fp);
1429 return -ENOMEM;
1430 }
1431 }
1432
1433
1434
1435
1436 fp = bpf_prepare_filter(fp, trans);
1437 if (IS_ERR(fp))
1438 return PTR_ERR(fp);
1439
1440 *pfp = fp;
1441 return 0;
1442}
1443EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
1444
1445void bpf_prog_destroy(struct bpf_prog *fp)
1446{
1447 __bpf_prog_release(fp);
1448}
1449EXPORT_SYMBOL_GPL(bpf_prog_destroy);
1450
1451static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
1452{
1453 struct sk_filter *fp, *old_fp;
1454
1455 fp = kmalloc(sizeof(*fp), GFP_KERNEL);
1456 if (!fp)
1457 return -ENOMEM;
1458
1459 fp->prog = prog;
1460
1461 if (!__sk_filter_charge(sk, fp)) {
1462 kfree(fp);
1463 return -ENOMEM;
1464 }
1465 refcount_set(&fp->refcnt, 1);
1466
1467 old_fp = rcu_dereference_protected(sk->sk_filter,
1468 lockdep_sock_is_held(sk));
1469 rcu_assign_pointer(sk->sk_filter, fp);
1470
1471 if (old_fp)
1472 sk_filter_uncharge(sk, old_fp);
1473
1474 return 0;
1475}
1476
1477static
1478struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
1479{
1480 unsigned int fsize = bpf_classic_proglen(fprog);
1481 struct bpf_prog *prog;
1482 int err;
1483
1484 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1485 return ERR_PTR(-EPERM);
1486
1487
1488 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1489 return ERR_PTR(-EINVAL);
1490
1491 prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1492 if (!prog)
1493 return ERR_PTR(-ENOMEM);
1494
1495 if (copy_from_user(prog->insns, fprog->filter, fsize)) {
1496 __bpf_prog_free(prog);
1497 return ERR_PTR(-EFAULT);
1498 }
1499
1500 prog->len = fprog->len;
1501
1502 err = bpf_prog_store_orig_filter(prog, fprog);
1503 if (err) {
1504 __bpf_prog_free(prog);
1505 return ERR_PTR(-ENOMEM);
1506 }
1507
1508
1509
1510
1511 return bpf_prepare_filter(prog, NULL);
1512}
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1525{
1526 struct bpf_prog *prog = __get_filter(fprog, sk);
1527 int err;
1528
1529 if (IS_ERR(prog))
1530 return PTR_ERR(prog);
1531
1532 err = __sk_attach_prog(prog, sk);
1533 if (err < 0) {
1534 __bpf_prog_release(prog);
1535 return err;
1536 }
1537
1538 return 0;
1539}
1540EXPORT_SYMBOL_GPL(sk_attach_filter);
1541
1542int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1543{
1544 struct bpf_prog *prog = __get_filter(fprog, sk);
1545 int err;
1546
1547 if (IS_ERR(prog))
1548 return PTR_ERR(prog);
1549
1550 if (bpf_prog_size(prog->len) > sysctl_optmem_max)
1551 err = -ENOMEM;
1552 else
1553 err = reuseport_attach_prog(sk, prog);
1554
1555 if (err)
1556 __bpf_prog_release(prog);
1557
1558 return err;
1559}
1560
1561static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
1562{
1563 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1564 return ERR_PTR(-EPERM);
1565
1566 return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
1567}
1568
1569int sk_attach_bpf(u32 ufd, struct sock *sk)
1570{
1571 struct bpf_prog *prog = __get_bpf(ufd, sk);
1572 int err;
1573
1574 if (IS_ERR(prog))
1575 return PTR_ERR(prog);
1576
1577 err = __sk_attach_prog(prog, sk);
1578 if (err < 0) {
1579 bpf_prog_put(prog);
1580 return err;
1581 }
1582
1583 return 0;
1584}
1585
1586int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1587{
1588 struct bpf_prog *prog;
1589 int err;
1590
1591 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1592 return -EPERM;
1593
1594 prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
1595 if (PTR_ERR(prog) == -EINVAL)
1596 prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
1597 if (IS_ERR(prog))
1598 return PTR_ERR(prog);
1599
1600 if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
1601
1602
1603
1604
1605
1606 if ((sk->sk_type != SOCK_STREAM &&
1607 sk->sk_type != SOCK_DGRAM) ||
1608 (sk->sk_protocol != IPPROTO_UDP &&
1609 sk->sk_protocol != IPPROTO_TCP) ||
1610 (sk->sk_family != AF_INET &&
1611 sk->sk_family != AF_INET6)) {
1612 err = -ENOTSUPP;
1613 goto err_prog_put;
1614 }
1615 } else {
1616
1617 if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
1618 err = -ENOMEM;
1619 goto err_prog_put;
1620 }
1621 }
1622
1623 err = reuseport_attach_prog(sk, prog);
1624err_prog_put:
1625 if (err)
1626 bpf_prog_put(prog);
1627
1628 return err;
1629}
1630
1631void sk_reuseport_prog_free(struct bpf_prog *prog)
1632{
1633 if (!prog)
1634 return;
1635
1636 if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
1637 bpf_prog_put(prog);
1638 else
1639 bpf_prog_destroy(prog);
1640}
1641
1642struct bpf_scratchpad {
1643 union {
1644 __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
1645 u8 buff[MAX_BPF_STACK];
1646 };
1647};
1648
1649static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
1650
1651static inline int __bpf_try_make_writable(struct sk_buff *skb,
1652 unsigned int write_len)
1653{
1654 return skb_ensure_writable(skb, write_len);
1655}
1656
1657static inline int bpf_try_make_writable(struct sk_buff *skb,
1658 unsigned int write_len)
1659{
1660 int err = __bpf_try_make_writable(skb, write_len);
1661
1662 bpf_compute_data_pointers(skb);
1663 return err;
1664}
1665
1666static int bpf_try_make_head_writable(struct sk_buff *skb)
1667{
1668 return bpf_try_make_writable(skb, skb_headlen(skb));
1669}
1670
1671static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
1672{
1673 if (skb_at_tc_ingress(skb))
1674 skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1675}
1676
1677static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
1678{
1679 if (skb_at_tc_ingress(skb))
1680 skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1681}
1682
1683BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
1684 const void *, from, u32, len, u64, flags)
1685{
1686 void *ptr;
1687
1688 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
1689 return -EINVAL;
1690 if (unlikely(offset > 0xffff))
1691 return -EFAULT;
1692 if (unlikely(bpf_try_make_writable(skb, offset + len)))
1693 return -EFAULT;
1694
1695 ptr = skb->data + offset;
1696 if (flags & BPF_F_RECOMPUTE_CSUM)
1697 __skb_postpull_rcsum(skb, ptr, len, offset);
1698
1699 memcpy(ptr, from, len);
1700
1701 if (flags & BPF_F_RECOMPUTE_CSUM)
1702 __skb_postpush_rcsum(skb, ptr, len, offset);
1703 if (flags & BPF_F_INVALIDATE_HASH)
1704 skb_clear_hash(skb);
1705
1706 return 0;
1707}
1708
1709static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
1710 .func = bpf_skb_store_bytes,
1711 .gpl_only = false,
1712 .ret_type = RET_INTEGER,
1713 .arg1_type = ARG_PTR_TO_CTX,
1714 .arg2_type = ARG_ANYTHING,
1715 .arg3_type = ARG_PTR_TO_MEM,
1716 .arg4_type = ARG_CONST_SIZE,
1717 .arg5_type = ARG_ANYTHING,
1718};
1719
1720BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
1721 void *, to, u32, len)
1722{
1723 void *ptr;
1724
1725 if (unlikely(offset > 0xffff))
1726 goto err_clear;
1727
1728 ptr = skb_header_pointer(skb, offset, len, to);
1729 if (unlikely(!ptr))
1730 goto err_clear;
1731 if (ptr != to)
1732 memcpy(to, ptr, len);
1733
1734 return 0;
1735err_clear:
1736 memset(to, 0, len);
1737 return -EFAULT;
1738}
1739
1740static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
1741 .func = bpf_skb_load_bytes,
1742 .gpl_only = false,
1743 .ret_type = RET_INTEGER,
1744 .arg1_type = ARG_PTR_TO_CTX,
1745 .arg2_type = ARG_ANYTHING,
1746 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1747 .arg4_type = ARG_CONST_SIZE,
1748};
1749
1750BPF_CALL_4(bpf_flow_dissector_load_bytes,
1751 const struct bpf_flow_dissector *, ctx, u32, offset,
1752 void *, to, u32, len)
1753{
1754 void *ptr;
1755
1756 if (unlikely(offset > 0xffff))
1757 goto err_clear;
1758
1759 if (unlikely(!ctx->skb))
1760 goto err_clear;
1761
1762 ptr = skb_header_pointer(ctx->skb, offset, len, to);
1763 if (unlikely(!ptr))
1764 goto err_clear;
1765 if (ptr != to)
1766 memcpy(to, ptr, len);
1767
1768 return 0;
1769err_clear:
1770 memset(to, 0, len);
1771 return -EFAULT;
1772}
1773
1774static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = {
1775 .func = bpf_flow_dissector_load_bytes,
1776 .gpl_only = false,
1777 .ret_type = RET_INTEGER,
1778 .arg1_type = ARG_PTR_TO_CTX,
1779 .arg2_type = ARG_ANYTHING,
1780 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1781 .arg4_type = ARG_CONST_SIZE,
1782};
1783
1784BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
1785 u32, offset, void *, to, u32, len, u32, start_header)
1786{
1787 u8 *end = skb_tail_pointer(skb);
1788 u8 *start, *ptr;
1789
1790 if (unlikely(offset > 0xffff))
1791 goto err_clear;
1792
1793 switch (start_header) {
1794 case BPF_HDR_START_MAC:
1795 if (unlikely(!skb_mac_header_was_set(skb)))
1796 goto err_clear;
1797 start = skb_mac_header(skb);
1798 break;
1799 case BPF_HDR_START_NET:
1800 start = skb_network_header(skb);
1801 break;
1802 default:
1803 goto err_clear;
1804 }
1805
1806 ptr = start + offset;
1807
1808 if (likely(ptr + len <= end)) {
1809 memcpy(to, ptr, len);
1810 return 0;
1811 }
1812
1813err_clear:
1814 memset(to, 0, len);
1815 return -EFAULT;
1816}
1817
1818static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = {
1819 .func = bpf_skb_load_bytes_relative,
1820 .gpl_only = false,
1821 .ret_type = RET_INTEGER,
1822 .arg1_type = ARG_PTR_TO_CTX,
1823 .arg2_type = ARG_ANYTHING,
1824 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1825 .arg4_type = ARG_CONST_SIZE,
1826 .arg5_type = ARG_ANYTHING,
1827};
1828
1829BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
1830{
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840 return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
1841}
1842
1843static const struct bpf_func_proto bpf_skb_pull_data_proto = {
1844 .func = bpf_skb_pull_data,
1845 .gpl_only = false,
1846 .ret_type = RET_INTEGER,
1847 .arg1_type = ARG_PTR_TO_CTX,
1848 .arg2_type = ARG_ANYTHING,
1849};
1850
1851BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
1852{
1853 return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
1854}
1855
1856static const struct bpf_func_proto bpf_sk_fullsock_proto = {
1857 .func = bpf_sk_fullsock,
1858 .gpl_only = false,
1859 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
1860 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
1861};
1862
1863static inline int sk_skb_try_make_writable(struct sk_buff *skb,
1864 unsigned int write_len)
1865{
1866 return __bpf_try_make_writable(skb, write_len);
1867}
1868
1869BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
1870{
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880 return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
1881}
1882
1883static const struct bpf_func_proto sk_skb_pull_data_proto = {
1884 .func = sk_skb_pull_data,
1885 .gpl_only = false,
1886 .ret_type = RET_INTEGER,
1887 .arg1_type = ARG_PTR_TO_CTX,
1888 .arg2_type = ARG_ANYTHING,
1889};
1890
1891BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
1892 u64, from, u64, to, u64, flags)
1893{
1894 __sum16 *ptr;
1895
1896 if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
1897 return -EINVAL;
1898 if (unlikely(offset > 0xffff || offset & 1))
1899 return -EFAULT;
1900 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
1901 return -EFAULT;
1902
1903 ptr = (__sum16 *)(skb->data + offset);
1904 switch (flags & BPF_F_HDR_FIELD_MASK) {
1905 case 0:
1906 if (unlikely(from != 0))
1907 return -EINVAL;
1908
1909 csum_replace_by_diff(ptr, to);
1910 break;
1911 case 2:
1912 csum_replace2(ptr, from, to);
1913 break;
1914 case 4:
1915 csum_replace4(ptr, from, to);
1916 break;
1917 default:
1918 return -EINVAL;
1919 }
1920
1921 return 0;
1922}
1923
1924static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
1925 .func = bpf_l3_csum_replace,
1926 .gpl_only = false,
1927 .ret_type = RET_INTEGER,
1928 .arg1_type = ARG_PTR_TO_CTX,
1929 .arg2_type = ARG_ANYTHING,
1930 .arg3_type = ARG_ANYTHING,
1931 .arg4_type = ARG_ANYTHING,
1932 .arg5_type = ARG_ANYTHING,
1933};
1934
1935BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
1936 u64, from, u64, to, u64, flags)
1937{
1938 bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
1939 bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
1940 bool do_mforce = flags & BPF_F_MARK_ENFORCE;
1941 __sum16 *ptr;
1942
1943 if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
1944 BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
1945 return -EINVAL;
1946 if (unlikely(offset > 0xffff || offset & 1))
1947 return -EFAULT;
1948 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
1949 return -EFAULT;
1950
1951 ptr = (__sum16 *)(skb->data + offset);
1952 if (is_mmzero && !do_mforce && !*ptr)
1953 return 0;
1954
1955 switch (flags & BPF_F_HDR_FIELD_MASK) {
1956 case 0:
1957 if (unlikely(from != 0))
1958 return -EINVAL;
1959
1960 inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
1961 break;
1962 case 2:
1963 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
1964 break;
1965 case 4:
1966 inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
1967 break;
1968 default:
1969 return -EINVAL;
1970 }
1971
1972 if (is_mmzero && !*ptr)
1973 *ptr = CSUM_MANGLED_0;
1974 return 0;
1975}
1976
1977static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
1978 .func = bpf_l4_csum_replace,
1979 .gpl_only = false,
1980 .ret_type = RET_INTEGER,
1981 .arg1_type = ARG_PTR_TO_CTX,
1982 .arg2_type = ARG_ANYTHING,
1983 .arg3_type = ARG_ANYTHING,
1984 .arg4_type = ARG_ANYTHING,
1985 .arg5_type = ARG_ANYTHING,
1986};
1987
1988BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
1989 __be32 *, to, u32, to_size, __wsum, seed)
1990{
1991 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
1992 u32 diff_size = from_size + to_size;
1993 int i, j = 0;
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003 if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
2004 diff_size > sizeof(sp->diff)))
2005 return -EINVAL;
2006
2007 for (i = 0; i < from_size / sizeof(__be32); i++, j++)
2008 sp->diff[j] = ~from[i];
2009 for (i = 0; i < to_size / sizeof(__be32); i++, j++)
2010 sp->diff[j] = to[i];
2011
2012 return csum_partial(sp->diff, diff_size, seed);
2013}
2014
2015static const struct bpf_func_proto bpf_csum_diff_proto = {
2016 .func = bpf_csum_diff,
2017 .gpl_only = false,
2018 .pkt_access = true,
2019 .ret_type = RET_INTEGER,
2020 .arg1_type = ARG_PTR_TO_MEM_OR_NULL,
2021 .arg2_type = ARG_CONST_SIZE_OR_ZERO,
2022 .arg3_type = ARG_PTR_TO_MEM_OR_NULL,
2023 .arg4_type = ARG_CONST_SIZE_OR_ZERO,
2024 .arg5_type = ARG_ANYTHING,
2025};
2026
2027BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
2028{
2029
2030
2031
2032
2033 if (skb->ip_summed == CHECKSUM_COMPLETE)
2034 return (skb->csum = csum_add(skb->csum, csum));
2035
2036 return -ENOTSUPP;
2037}
2038
2039static const struct bpf_func_proto bpf_csum_update_proto = {
2040 .func = bpf_csum_update,
2041 .gpl_only = false,
2042 .ret_type = RET_INTEGER,
2043 .arg1_type = ARG_PTR_TO_CTX,
2044 .arg2_type = ARG_ANYTHING,
2045};
2046
2047BPF_CALL_2(bpf_csum_level, struct sk_buff *, skb, u64, level)
2048{
2049
2050
2051
2052
2053 switch (level) {
2054 case BPF_CSUM_LEVEL_INC:
2055 __skb_incr_checksum_unnecessary(skb);
2056 break;
2057 case BPF_CSUM_LEVEL_DEC:
2058 __skb_decr_checksum_unnecessary(skb);
2059 break;
2060 case BPF_CSUM_LEVEL_RESET:
2061 __skb_reset_checksum_unnecessary(skb);
2062 break;
2063 case BPF_CSUM_LEVEL_QUERY:
2064 return skb->ip_summed == CHECKSUM_UNNECESSARY ?
2065 skb->csum_level : -EACCES;
2066 default:
2067 return -EINVAL;
2068 }
2069
2070 return 0;
2071}
2072
2073static const struct bpf_func_proto bpf_csum_level_proto = {
2074 .func = bpf_csum_level,
2075 .gpl_only = false,
2076 .ret_type = RET_INTEGER,
2077 .arg1_type = ARG_PTR_TO_CTX,
2078 .arg2_type = ARG_ANYTHING,
2079};
2080
2081static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
2082{
2083 return dev_forward_skb_nomtu(dev, skb);
2084}
2085
2086static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
2087 struct sk_buff *skb)
2088{
2089 int ret = ____dev_forward_skb(dev, skb, false);
2090
2091 if (likely(!ret)) {
2092 skb->dev = dev;
2093 ret = netif_rx(skb);
2094 }
2095
2096 return ret;
2097}
2098
2099static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
2100{
2101 int ret;
2102
2103 if (dev_xmit_recursion()) {
2104 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2105 kfree_skb(skb);
2106 return -ENETDOWN;
2107 }
2108
2109 skb->dev = dev;
2110 skb->tstamp = 0;
2111
2112 dev_xmit_recursion_inc();
2113 ret = dev_queue_xmit(skb);
2114 dev_xmit_recursion_dec();
2115
2116 return ret;
2117}
2118
2119static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
2120 u32 flags)
2121{
2122 unsigned int mlen = skb_network_offset(skb);
2123
2124 if (mlen) {
2125 __skb_pull(skb, mlen);
2126
2127
2128
2129
2130
2131
2132 if (!skb_at_tc_ingress(skb))
2133 skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
2134 }
2135 skb_pop_mac_header(skb);
2136 skb_reset_mac_len(skb);
2137 return flags & BPF_F_INGRESS ?
2138 __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
2139}
2140
2141static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
2142 u32 flags)
2143{
2144
2145 if (unlikely(skb->mac_header >= skb->network_header)) {
2146 kfree_skb(skb);
2147 return -ERANGE;
2148 }
2149
2150 bpf_push_mac_rcsum(skb);
2151 return flags & BPF_F_INGRESS ?
2152 __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
2153}
2154
2155static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
2156 u32 flags)
2157{
2158 if (dev_is_mac_header_xmit(dev))
2159 return __bpf_redirect_common(skb, dev, flags);
2160 else
2161 return __bpf_redirect_no_mac(skb, dev, flags);
2162}
2163
2164#if IS_ENABLED(CONFIG_IPV6)
2165static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
2166 struct net_device *dev, struct bpf_nh_params *nh)
2167{
2168 u32 hh_len = LL_RESERVED_SPACE(dev);
2169 const struct in6_addr *nexthop;
2170 struct dst_entry *dst = NULL;
2171 struct neighbour *neigh;
2172
2173 if (dev_xmit_recursion()) {
2174 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2175 goto out_drop;
2176 }
2177
2178 skb->dev = dev;
2179 skb->tstamp = 0;
2180
2181 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
2182 struct sk_buff *skb2;
2183
2184 skb2 = skb_realloc_headroom(skb, hh_len);
2185 if (unlikely(!skb2)) {
2186 kfree_skb(skb);
2187 return -ENOMEM;
2188 }
2189 if (skb->sk)
2190 skb_set_owner_w(skb2, skb->sk);
2191 consume_skb(skb);
2192 skb = skb2;
2193 }
2194
2195 rcu_read_lock_bh();
2196 if (!nh) {
2197 dst = skb_dst(skb);
2198 nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
2199 &ipv6_hdr(skb)->daddr);
2200 } else {
2201 nexthop = &nh->ipv6_nh;
2202 }
2203 neigh = ip_neigh_gw6(dev, nexthop);
2204 if (likely(!IS_ERR(neigh))) {
2205 int ret;
2206
2207 sock_confirm_neigh(skb, neigh);
2208 dev_xmit_recursion_inc();
2209 ret = neigh_output(neigh, skb, false);
2210 dev_xmit_recursion_dec();
2211 rcu_read_unlock_bh();
2212 return ret;
2213 }
2214 rcu_read_unlock_bh();
2215 if (dst)
2216 IP6_INC_STATS(dev_net(dst->dev),
2217 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
2218out_drop:
2219 kfree_skb(skb);
2220 return -ENETDOWN;
2221}
2222
2223static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
2224 struct bpf_nh_params *nh)
2225{
2226 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
2227 struct net *net = dev_net(dev);
2228 int err, ret = NET_XMIT_DROP;
2229
2230 if (!nh) {
2231 struct dst_entry *dst;
2232 struct flowi6 fl6 = {
2233 .flowi6_flags = FLOWI_FLAG_ANYSRC,
2234 .flowi6_mark = skb->mark,
2235 .flowlabel = ip6_flowinfo(ip6h),
2236 .flowi6_oif = dev->ifindex,
2237 .flowi6_proto = ip6h->nexthdr,
2238 .daddr = ip6h->daddr,
2239 .saddr = ip6h->saddr,
2240 };
2241
2242 dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
2243 if (IS_ERR(dst))
2244 goto out_drop;
2245
2246 skb_dst_set(skb, dst);
2247 } else if (nh->nh_family != AF_INET6) {
2248 goto out_drop;
2249 }
2250
2251 err = bpf_out_neigh_v6(net, skb, dev, nh);
2252 if (unlikely(net_xmit_eval(err)))
2253 dev->stats.tx_errors++;
2254 else
2255 ret = NET_XMIT_SUCCESS;
2256 goto out_xmit;
2257out_drop:
2258 dev->stats.tx_errors++;
2259 kfree_skb(skb);
2260out_xmit:
2261 return ret;
2262}
2263#else
2264static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
2265 struct bpf_nh_params *nh)
2266{
2267 kfree_skb(skb);
2268 return NET_XMIT_DROP;
2269}
2270#endif
2271
2272#if IS_ENABLED(CONFIG_INET)
2273static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
2274 struct net_device *dev, struct bpf_nh_params *nh)
2275{
2276 u32 hh_len = LL_RESERVED_SPACE(dev);
2277 struct neighbour *neigh;
2278 bool is_v6gw = false;
2279
2280 if (dev_xmit_recursion()) {
2281 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2282 goto out_drop;
2283 }
2284
2285 skb->dev = dev;
2286 skb->tstamp = 0;
2287
2288 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
2289 struct sk_buff *skb2;
2290
2291 skb2 = skb_realloc_headroom(skb, hh_len);
2292 if (unlikely(!skb2)) {
2293 kfree_skb(skb);
2294 return -ENOMEM;
2295 }
2296 if (skb->sk)
2297 skb_set_owner_w(skb2, skb->sk);
2298 consume_skb(skb);
2299 skb = skb2;
2300 }
2301
2302 rcu_read_lock_bh();
2303 if (!nh) {
2304 struct dst_entry *dst = skb_dst(skb);
2305 struct rtable *rt = container_of(dst, struct rtable, dst);
2306
2307 neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
2308 } else if (nh->nh_family == AF_INET6) {
2309 neigh = ip_neigh_gw6(dev, &nh->ipv6_nh);
2310 is_v6gw = true;
2311 } else if (nh->nh_family == AF_INET) {
2312 neigh = ip_neigh_gw4(dev, nh->ipv4_nh);
2313 } else {
2314 rcu_read_unlock_bh();
2315 goto out_drop;
2316 }
2317
2318 if (likely(!IS_ERR(neigh))) {
2319 int ret;
2320
2321 sock_confirm_neigh(skb, neigh);
2322 dev_xmit_recursion_inc();
2323 ret = neigh_output(neigh, skb, is_v6gw);
2324 dev_xmit_recursion_dec();
2325 rcu_read_unlock_bh();
2326 return ret;
2327 }
2328 rcu_read_unlock_bh();
2329out_drop:
2330 kfree_skb(skb);
2331 return -ENETDOWN;
2332}
2333
2334static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
2335 struct bpf_nh_params *nh)
2336{
2337 const struct iphdr *ip4h = ip_hdr(skb);
2338 struct net *net = dev_net(dev);
2339 int err, ret = NET_XMIT_DROP;
2340
2341 if (!nh) {
2342 struct flowi4 fl4 = {
2343 .flowi4_flags = FLOWI_FLAG_ANYSRC,
2344 .flowi4_mark = skb->mark,
2345 .flowi4_tos = RT_TOS(ip4h->tos),
2346 .flowi4_oif = dev->ifindex,
2347 .flowi4_proto = ip4h->protocol,
2348 .daddr = ip4h->daddr,
2349 .saddr = ip4h->saddr,
2350 };
2351 struct rtable *rt;
2352
2353 rt = ip_route_output_flow(net, &fl4, NULL);
2354 if (IS_ERR(rt))
2355 goto out_drop;
2356 if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
2357 ip_rt_put(rt);
2358 goto out_drop;
2359 }
2360
2361 skb_dst_set(skb, &rt->dst);
2362 }
2363
2364 err = bpf_out_neigh_v4(net, skb, dev, nh);
2365 if (unlikely(net_xmit_eval(err)))
2366 dev->stats.tx_errors++;
2367 else
2368 ret = NET_XMIT_SUCCESS;
2369 goto out_xmit;
2370out_drop:
2371 dev->stats.tx_errors++;
2372 kfree_skb(skb);
2373out_xmit:
2374 return ret;
2375}
2376#else
2377static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
2378 struct bpf_nh_params *nh)
2379{
2380 kfree_skb(skb);
2381 return NET_XMIT_DROP;
2382}
2383#endif
2384
2385static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev,
2386 struct bpf_nh_params *nh)
2387{
2388 struct ethhdr *ethh = eth_hdr(skb);
2389
2390 if (unlikely(skb->mac_header >= skb->network_header))
2391 goto out;
2392 bpf_push_mac_rcsum(skb);
2393 if (is_multicast_ether_addr(ethh->h_dest))
2394 goto out;
2395
2396 skb_pull(skb, sizeof(*ethh));
2397 skb_unset_mac_header(skb);
2398 skb_reset_network_header(skb);
2399
2400 if (skb->protocol == htons(ETH_P_IP))
2401 return __bpf_redirect_neigh_v4(skb, dev, nh);
2402 else if (skb->protocol == htons(ETH_P_IPV6))
2403 return __bpf_redirect_neigh_v6(skb, dev, nh);
2404out:
2405 kfree_skb(skb);
2406 return -ENOTSUPP;
2407}
2408
2409
2410enum {
2411 BPF_F_NEIGH = (1ULL << 1),
2412 BPF_F_PEER = (1ULL << 2),
2413 BPF_F_NEXTHOP = (1ULL << 3),
2414#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP)
2415};
2416
2417BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
2418{
2419 struct net_device *dev;
2420 struct sk_buff *clone;
2421 int ret;
2422
2423 if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
2424 return -EINVAL;
2425
2426 dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
2427 if (unlikely(!dev))
2428 return -EINVAL;
2429
2430 clone = skb_clone(skb, GFP_ATOMIC);
2431 if (unlikely(!clone))
2432 return -ENOMEM;
2433
2434
2435
2436
2437
2438
2439 ret = bpf_try_make_head_writable(skb);
2440 if (unlikely(ret)) {
2441 kfree_skb(clone);
2442 return -ENOMEM;
2443 }
2444
2445 return __bpf_redirect(clone, dev, flags);
2446}
2447
2448static const struct bpf_func_proto bpf_clone_redirect_proto = {
2449 .func = bpf_clone_redirect,
2450 .gpl_only = false,
2451 .ret_type = RET_INTEGER,
2452 .arg1_type = ARG_PTR_TO_CTX,
2453 .arg2_type = ARG_ANYTHING,
2454 .arg3_type = ARG_ANYTHING,
2455};
2456
2457DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
2458EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
2459
2460int skb_do_redirect(struct sk_buff *skb)
2461{
2462 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2463 struct net *net = dev_net(skb->dev);
2464 struct net_device *dev;
2465 u32 flags = ri->flags;
2466
2467 dev = dev_get_by_index_rcu(net, ri->tgt_index);
2468 ri->tgt_index = 0;
2469 ri->flags = 0;
2470 if (unlikely(!dev))
2471 goto out_drop;
2472 if (flags & BPF_F_PEER) {
2473 const struct net_device_ops *ops = dev->netdev_ops;
2474
2475 if (unlikely(!ops->ndo_get_peer_dev ||
2476 !skb_at_tc_ingress(skb)))
2477 goto out_drop;
2478 dev = ops->ndo_get_peer_dev(dev);
2479 if (unlikely(!dev ||
2480 !(dev->flags & IFF_UP) ||
2481 net_eq(net, dev_net(dev))))
2482 goto out_drop;
2483 skb->dev = dev;
2484 return -EAGAIN;
2485 }
2486 return flags & BPF_F_NEIGH ?
2487 __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ?
2488 &ri->nh : NULL) :
2489 __bpf_redirect(skb, dev, flags);
2490out_drop:
2491 kfree_skb(skb);
2492 return -EINVAL;
2493}
2494
2495BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
2496{
2497 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2498
2499 if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
2500 return TC_ACT_SHOT;
2501
2502 ri->flags = flags;
2503 ri->tgt_index = ifindex;
2504
2505 return TC_ACT_REDIRECT;
2506}
2507
2508static const struct bpf_func_proto bpf_redirect_proto = {
2509 .func = bpf_redirect,
2510 .gpl_only = false,
2511 .ret_type = RET_INTEGER,
2512 .arg1_type = ARG_ANYTHING,
2513 .arg2_type = ARG_ANYTHING,
2514};
2515
2516BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags)
2517{
2518 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2519
2520 if (unlikely(flags))
2521 return TC_ACT_SHOT;
2522
2523 ri->flags = BPF_F_PEER;
2524 ri->tgt_index = ifindex;
2525
2526 return TC_ACT_REDIRECT;
2527}
2528
2529static const struct bpf_func_proto bpf_redirect_peer_proto = {
2530 .func = bpf_redirect_peer,
2531 .gpl_only = false,
2532 .ret_type = RET_INTEGER,
2533 .arg1_type = ARG_ANYTHING,
2534 .arg2_type = ARG_ANYTHING,
2535};
2536
2537BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params,
2538 int, plen, u64, flags)
2539{
2540 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2541
2542 if (unlikely((plen && plen < sizeof(*params)) || flags))
2543 return TC_ACT_SHOT;
2544
2545 ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0);
2546 ri->tgt_index = ifindex;
2547
2548 BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params));
2549 if (plen)
2550 memcpy(&ri->nh, params, sizeof(ri->nh));
2551
2552 return TC_ACT_REDIRECT;
2553}
2554
2555static const struct bpf_func_proto bpf_redirect_neigh_proto = {
2556 .func = bpf_redirect_neigh,
2557 .gpl_only = false,
2558 .ret_type = RET_INTEGER,
2559 .arg1_type = ARG_ANYTHING,
2560 .arg2_type = ARG_PTR_TO_MEM_OR_NULL,
2561 .arg3_type = ARG_CONST_SIZE_OR_ZERO,
2562 .arg4_type = ARG_ANYTHING,
2563};
2564
2565BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
2566{
2567 msg->apply_bytes = bytes;
2568 return 0;
2569}
2570
2571static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
2572 .func = bpf_msg_apply_bytes,
2573 .gpl_only = false,
2574 .ret_type = RET_INTEGER,
2575 .arg1_type = ARG_PTR_TO_CTX,
2576 .arg2_type = ARG_ANYTHING,
2577};
2578
2579BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
2580{
2581 msg->cork_bytes = bytes;
2582 return 0;
2583}
2584
2585static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
2586 .func = bpf_msg_cork_bytes,
2587 .gpl_only = false,
2588 .ret_type = RET_INTEGER,
2589 .arg1_type = ARG_PTR_TO_CTX,
2590 .arg2_type = ARG_ANYTHING,
2591};
2592
2593BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
2594 u32, end, u64, flags)
2595{
2596 u32 len = 0, offset = 0, copy = 0, poffset = 0, bytes = end - start;
2597 u32 first_sge, last_sge, i, shift, bytes_sg_total;
2598 struct scatterlist *sge;
2599 u8 *raw, *to, *from;
2600 struct page *page;
2601
2602 if (unlikely(flags || end <= start))
2603 return -EINVAL;
2604
2605
2606 i = msg->sg.start;
2607 do {
2608 offset += len;
2609 len = sk_msg_elem(msg, i)->length;
2610 if (start < offset + len)
2611 break;
2612 sk_msg_iter_var_next(i);
2613 } while (i != msg->sg.end);
2614
2615 if (unlikely(start >= offset + len))
2616 return -EINVAL;
2617
2618 first_sge = i;
2619
2620
2621
2622 bytes_sg_total = start - offset + bytes;
2623 if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len)
2624 goto out;
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636 do {
2637 copy += sk_msg_elem(msg, i)->length;
2638 sk_msg_iter_var_next(i);
2639 if (bytes_sg_total <= copy)
2640 break;
2641 } while (i != msg->sg.end);
2642 last_sge = i;
2643
2644 if (unlikely(bytes_sg_total > copy))
2645 return -EINVAL;
2646
2647 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2648 get_order(copy));
2649 if (unlikely(!page))
2650 return -ENOMEM;
2651
2652 raw = page_address(page);
2653 i = first_sge;
2654 do {
2655 sge = sk_msg_elem(msg, i);
2656 from = sg_virt(sge);
2657 len = sge->length;
2658 to = raw + poffset;
2659
2660 memcpy(to, from, len);
2661 poffset += len;
2662 sge->length = 0;
2663 put_page(sg_page(sge));
2664
2665 sk_msg_iter_var_next(i);
2666 } while (i != last_sge);
2667
2668 sg_set_page(&msg->sg.data[first_sge], page, copy, 0);
2669
2670
2671
2672
2673
2674 WARN_ON_ONCE(last_sge == first_sge);
2675 shift = last_sge > first_sge ?
2676 last_sge - first_sge - 1 :
2677 NR_MSG_FRAG_IDS - first_sge + last_sge - 1;
2678 if (!shift)
2679 goto out;
2680
2681 i = first_sge;
2682 sk_msg_iter_var_next(i);
2683 do {
2684 u32 move_from;
2685
2686 if (i + shift >= NR_MSG_FRAG_IDS)
2687 move_from = i + shift - NR_MSG_FRAG_IDS;
2688 else
2689 move_from = i + shift;
2690 if (move_from == msg->sg.end)
2691 break;
2692
2693 msg->sg.data[i] = msg->sg.data[move_from];
2694 msg->sg.data[move_from].length = 0;
2695 msg->sg.data[move_from].page_link = 0;
2696 msg->sg.data[move_from].offset = 0;
2697 sk_msg_iter_var_next(i);
2698 } while (1);
2699
2700 msg->sg.end = msg->sg.end - shift > msg->sg.end ?
2701 msg->sg.end - shift + NR_MSG_FRAG_IDS :
2702 msg->sg.end - shift;
2703out:
2704 msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
2705 msg->data_end = msg->data + bytes;
2706 return 0;
2707}
2708
2709static const struct bpf_func_proto bpf_msg_pull_data_proto = {
2710 .func = bpf_msg_pull_data,
2711 .gpl_only = false,
2712 .ret_type = RET_INTEGER,
2713 .arg1_type = ARG_PTR_TO_CTX,
2714 .arg2_type = ARG_ANYTHING,
2715 .arg3_type = ARG_ANYTHING,
2716 .arg4_type = ARG_ANYTHING,
2717};
2718
2719BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
2720 u32, len, u64, flags)
2721{
2722 struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
2723 u32 new, i = 0, l = 0, space, copy = 0, offset = 0;
2724 u8 *raw, *to, *from;
2725 struct page *page;
2726
2727 if (unlikely(flags))
2728 return -EINVAL;
2729
2730
2731 i = msg->sg.start;
2732 do {
2733 offset += l;
2734 l = sk_msg_elem(msg, i)->length;
2735
2736 if (start < offset + l)
2737 break;
2738 sk_msg_iter_var_next(i);
2739 } while (i != msg->sg.end);
2740
2741 if (start >= offset + l)
2742 return -EINVAL;
2743
2744 space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2745
2746
2747
2748
2749
2750
2751
2752
2753 if (!space || (space == 1 && start != offset))
2754 copy = msg->sg.data[i].length;
2755
2756 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2757 get_order(copy + len));
2758 if (unlikely(!page))
2759 return -ENOMEM;
2760
2761 if (copy) {
2762 int front, back;
2763
2764 raw = page_address(page);
2765
2766 psge = sk_msg_elem(msg, i);
2767 front = start - offset;
2768 back = psge->length - front;
2769 from = sg_virt(psge);
2770
2771 if (front)
2772 memcpy(raw, from, front);
2773
2774 if (back) {
2775 from += front;
2776 to = raw + front + len;
2777
2778 memcpy(to, from, back);
2779 }
2780
2781 put_page(sg_page(psge));
2782 } else if (start - offset) {
2783 psge = sk_msg_elem(msg, i);
2784 rsge = sk_msg_elem_cpy(msg, i);
2785
2786 psge->length = start - offset;
2787 rsge.length -= psge->length;
2788 rsge.offset += start;
2789
2790 sk_msg_iter_var_next(i);
2791 sg_unmark_end(psge);
2792 sg_unmark_end(&rsge);
2793 sk_msg_iter_next(msg, end);
2794 }
2795
2796
2797 new = i;
2798
2799
2800 if (!copy) {
2801 sge = sk_msg_elem_cpy(msg, i);
2802
2803 sk_msg_iter_var_next(i);
2804 sg_unmark_end(&sge);
2805 sk_msg_iter_next(msg, end);
2806
2807 nsge = sk_msg_elem_cpy(msg, i);
2808 if (rsge.length) {
2809 sk_msg_iter_var_next(i);
2810 nnsge = sk_msg_elem_cpy(msg, i);
2811 }
2812
2813 while (i != msg->sg.end) {
2814 msg->sg.data[i] = sge;
2815 sge = nsge;
2816 sk_msg_iter_var_next(i);
2817 if (rsge.length) {
2818 nsge = nnsge;
2819 nnsge = sk_msg_elem_cpy(msg, i);
2820 } else {
2821 nsge = sk_msg_elem_cpy(msg, i);
2822 }
2823 }
2824 }
2825
2826
2827 sk_mem_charge(msg->sk, len);
2828 msg->sg.size += len;
2829 __clear_bit(new, &msg->sg.copy);
2830 sg_set_page(&msg->sg.data[new], page, len + copy, 0);
2831 if (rsge.length) {
2832 get_page(sg_page(&rsge));
2833 sk_msg_iter_var_next(new);
2834 msg->sg.data[new] = rsge;
2835 }
2836
2837 sk_msg_compute_data_pointers(msg);
2838 return 0;
2839}
2840
2841static const struct bpf_func_proto bpf_msg_push_data_proto = {
2842 .func = bpf_msg_push_data,
2843 .gpl_only = false,
2844 .ret_type = RET_INTEGER,
2845 .arg1_type = ARG_PTR_TO_CTX,
2846 .arg2_type = ARG_ANYTHING,
2847 .arg3_type = ARG_ANYTHING,
2848 .arg4_type = ARG_ANYTHING,
2849};
2850
2851static void sk_msg_shift_left(struct sk_msg *msg, int i)
2852{
2853 int prev;
2854
2855 do {
2856 prev = i;
2857 sk_msg_iter_var_next(i);
2858 msg->sg.data[prev] = msg->sg.data[i];
2859 } while (i != msg->sg.end);
2860
2861 sk_msg_iter_prev(msg, end);
2862}
2863
2864static void sk_msg_shift_right(struct sk_msg *msg, int i)
2865{
2866 struct scatterlist tmp, sge;
2867
2868 sk_msg_iter_next(msg, end);
2869 sge = sk_msg_elem_cpy(msg, i);
2870 sk_msg_iter_var_next(i);
2871 tmp = sk_msg_elem_cpy(msg, i);
2872
2873 while (i != msg->sg.end) {
2874 msg->sg.data[i] = sge;
2875 sk_msg_iter_var_next(i);
2876 sge = tmp;
2877 tmp = sk_msg_elem_cpy(msg, i);
2878 }
2879}
2880
2881BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
2882 u32, len, u64, flags)
2883{
2884 u32 i = 0, l = 0, space, offset = 0;
2885 u64 last = start + len;
2886 int pop;
2887
2888 if (unlikely(flags))
2889 return -EINVAL;
2890
2891
2892 i = msg->sg.start;
2893 do {
2894 offset += l;
2895 l = sk_msg_elem(msg, i)->length;
2896
2897 if (start < offset + l)
2898 break;
2899 sk_msg_iter_var_next(i);
2900 } while (i != msg->sg.end);
2901
2902
2903 if (start >= offset + l || last >= msg->sg.size)
2904 return -EINVAL;
2905
2906 space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2907
2908 pop = len;
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930 if (start != offset) {
2931 struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
2932 int a = start;
2933 int b = sge->length - pop - a;
2934
2935 sk_msg_iter_var_next(i);
2936
2937 if (pop < sge->length - a) {
2938 if (space) {
2939 sge->length = a;
2940 sk_msg_shift_right(msg, i);
2941 nsge = sk_msg_elem(msg, i);
2942 get_page(sg_page(sge));
2943 sg_set_page(nsge,
2944 sg_page(sge),
2945 b, sge->offset + pop + a);
2946 } else {
2947 struct page *page, *orig;
2948 u8 *to, *from;
2949
2950 page = alloc_pages(__GFP_NOWARN |
2951 __GFP_COMP | GFP_ATOMIC,
2952 get_order(a + b));
2953 if (unlikely(!page))
2954 return -ENOMEM;
2955
2956 sge->length = a;
2957 orig = sg_page(sge);
2958 from = sg_virt(sge);
2959 to = page_address(page);
2960 memcpy(to, from, a);
2961 memcpy(to + a, from + a + pop, b);
2962 sg_set_page(sge, page, a + b, 0);
2963 put_page(orig);
2964 }
2965 pop = 0;
2966 } else if (pop >= sge->length - a) {
2967 pop -= (sge->length - a);
2968 sge->length = a;
2969 }
2970 }
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989 while (pop) {
2990 struct scatterlist *sge = sk_msg_elem(msg, i);
2991
2992 if (pop < sge->length) {
2993 sge->length -= pop;
2994 sge->offset += pop;
2995 pop = 0;
2996 } else {
2997 pop -= sge->length;
2998 sk_msg_shift_left(msg, i);
2999 }
3000 sk_msg_iter_var_next(i);
3001 }
3002
3003 sk_mem_uncharge(msg->sk, len - pop);
3004 msg->sg.size -= (len - pop);
3005 sk_msg_compute_data_pointers(msg);
3006 return 0;
3007}
3008
3009static const struct bpf_func_proto bpf_msg_pop_data_proto = {
3010 .func = bpf_msg_pop_data,
3011 .gpl_only = false,
3012 .ret_type = RET_INTEGER,
3013 .arg1_type = ARG_PTR_TO_CTX,
3014 .arg2_type = ARG_ANYTHING,
3015 .arg3_type = ARG_ANYTHING,
3016 .arg4_type = ARG_ANYTHING,
3017};
3018
3019#ifdef CONFIG_CGROUP_NET_CLASSID
3020BPF_CALL_0(bpf_get_cgroup_classid_curr)
3021{
3022 return __task_get_classid(current);
3023}
3024
3025static const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
3026 .func = bpf_get_cgroup_classid_curr,
3027 .gpl_only = false,
3028 .ret_type = RET_INTEGER,
3029};
3030
3031BPF_CALL_1(bpf_skb_cgroup_classid, const struct sk_buff *, skb)
3032{
3033 struct sock *sk = skb_to_full_sk(skb);
3034
3035 if (!sk || !sk_fullsock(sk))
3036 return 0;
3037
3038 return sock_cgroup_classid(&sk->sk_cgrp_data);
3039}
3040
3041static const struct bpf_func_proto bpf_skb_cgroup_classid_proto = {
3042 .func = bpf_skb_cgroup_classid,
3043 .gpl_only = false,
3044 .ret_type = RET_INTEGER,
3045 .arg1_type = ARG_PTR_TO_CTX,
3046};
3047#endif
3048
3049BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
3050{
3051 return task_get_classid(skb);
3052}
3053
3054static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
3055 .func = bpf_get_cgroup_classid,
3056 .gpl_only = false,
3057 .ret_type = RET_INTEGER,
3058 .arg1_type = ARG_PTR_TO_CTX,
3059};
3060
3061BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
3062{
3063 return dst_tclassid(skb);
3064}
3065
3066static const struct bpf_func_proto bpf_get_route_realm_proto = {
3067 .func = bpf_get_route_realm,
3068 .gpl_only = false,
3069 .ret_type = RET_INTEGER,
3070 .arg1_type = ARG_PTR_TO_CTX,
3071};
3072
3073BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
3074{
3075
3076
3077
3078
3079
3080 return skb_get_hash(skb);
3081}
3082
3083static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
3084 .func = bpf_get_hash_recalc,
3085 .gpl_only = false,
3086 .ret_type = RET_INTEGER,
3087 .arg1_type = ARG_PTR_TO_CTX,
3088};
3089
3090BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
3091{
3092
3093
3094
3095 skb_clear_hash(skb);
3096 return 0;
3097}
3098
3099static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
3100 .func = bpf_set_hash_invalid,
3101 .gpl_only = false,
3102 .ret_type = RET_INTEGER,
3103 .arg1_type = ARG_PTR_TO_CTX,
3104};
3105
3106BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash)
3107{
3108
3109
3110
3111
3112 __skb_set_sw_hash(skb, hash, true);
3113 return 0;
3114}
3115
3116static const struct bpf_func_proto bpf_set_hash_proto = {
3117 .func = bpf_set_hash,
3118 .gpl_only = false,
3119 .ret_type = RET_INTEGER,
3120 .arg1_type = ARG_PTR_TO_CTX,
3121 .arg2_type = ARG_ANYTHING,
3122};
3123
3124BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
3125 u16, vlan_tci)
3126{
3127 int ret;
3128
3129 if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
3130 vlan_proto != htons(ETH_P_8021AD)))
3131 vlan_proto = htons(ETH_P_8021Q);
3132
3133 bpf_push_mac_rcsum(skb);
3134 ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
3135 bpf_pull_mac_rcsum(skb);
3136
3137 bpf_compute_data_pointers(skb);
3138 return ret;
3139}
3140
3141static const struct bpf_func_proto bpf_skb_vlan_push_proto = {
3142 .func = bpf_skb_vlan_push,
3143 .gpl_only = false,
3144 .ret_type = RET_INTEGER,
3145 .arg1_type = ARG_PTR_TO_CTX,
3146 .arg2_type = ARG_ANYTHING,
3147 .arg3_type = ARG_ANYTHING,
3148};
3149
3150BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
3151{
3152 int ret;
3153
3154 bpf_push_mac_rcsum(skb);
3155 ret = skb_vlan_pop(skb);
3156 bpf_pull_mac_rcsum(skb);
3157
3158 bpf_compute_data_pointers(skb);
3159 return ret;
3160}
3161
3162static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
3163 .func = bpf_skb_vlan_pop,
3164 .gpl_only = false,
3165 .ret_type = RET_INTEGER,
3166 .arg1_type = ARG_PTR_TO_CTX,
3167};
3168
3169static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
3170{
3171
3172
3173
3174 skb_push(skb, len);
3175 memmove(skb->data, skb->data + len, off);
3176 memset(skb->data + off, 0, len);
3177
3178
3179
3180
3181
3182
3183 return 0;
3184}
3185
3186static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
3187{
3188
3189
3190
3191 if (unlikely(!pskb_may_pull(skb, off + len)))
3192 return -ENOMEM;
3193
3194 skb_postpull_rcsum(skb, skb->data + off, len);
3195 memmove(skb->data + len, skb->data, off);
3196 __skb_pull(skb, len);
3197
3198 return 0;
3199}
3200
3201static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
3202{
3203 bool trans_same = skb->transport_header == skb->network_header;
3204 int ret;
3205
3206
3207
3208
3209
3210 ret = bpf_skb_generic_push(skb, off, len);
3211 if (likely(!ret)) {
3212 skb->mac_header -= len;
3213 skb->network_header -= len;
3214 if (trans_same)
3215 skb->transport_header = skb->network_header;
3216 }
3217
3218 return ret;
3219}
3220
3221static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
3222{
3223 bool trans_same = skb->transport_header == skb->network_header;
3224 int ret;
3225
3226
3227 ret = bpf_skb_generic_pop(skb, off, len);
3228 if (likely(!ret)) {
3229 skb->mac_header += len;
3230 skb->network_header += len;
3231 if (trans_same)
3232 skb->transport_header = skb->network_header;
3233 }
3234
3235 return ret;
3236}
3237
3238static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
3239{
3240 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
3241 u32 off = skb_mac_header_len(skb);
3242 int ret;
3243
3244 ret = skb_cow(skb, len_diff);
3245 if (unlikely(ret < 0))
3246 return ret;
3247
3248 ret = bpf_skb_net_hdr_push(skb, off, len_diff);
3249 if (unlikely(ret < 0))
3250 return ret;
3251
3252 if (skb_is_gso(skb)) {
3253 struct skb_shared_info *shinfo = skb_shinfo(skb);
3254
3255
3256 if (shinfo->gso_type & SKB_GSO_TCPV4) {
3257 shinfo->gso_type &= ~SKB_GSO_TCPV4;
3258 shinfo->gso_type |= SKB_GSO_TCPV6;
3259 }
3260 }
3261
3262 skb->protocol = htons(ETH_P_IPV6);
3263 skb_clear_hash(skb);
3264
3265 return 0;
3266}
3267
3268static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
3269{
3270 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
3271 u32 off = skb_mac_header_len(skb);
3272 int ret;
3273
3274 ret = skb_unclone(skb, GFP_ATOMIC);
3275 if (unlikely(ret < 0))
3276 return ret;
3277
3278 ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
3279 if (unlikely(ret < 0))
3280 return ret;
3281
3282 if (skb_is_gso(skb)) {
3283 struct skb_shared_info *shinfo = skb_shinfo(skb);
3284
3285
3286 if (shinfo->gso_type & SKB_GSO_TCPV6) {
3287 shinfo->gso_type &= ~SKB_GSO_TCPV6;
3288 shinfo->gso_type |= SKB_GSO_TCPV4;
3289 }
3290 }
3291
3292 skb->protocol = htons(ETH_P_IP);
3293 skb_clear_hash(skb);
3294
3295 return 0;
3296}
3297
3298static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
3299{
3300 __be16 from_proto = skb->protocol;
3301
3302 if (from_proto == htons(ETH_P_IP) &&
3303 to_proto == htons(ETH_P_IPV6))
3304 return bpf_skb_proto_4_to_6(skb);
3305
3306 if (from_proto == htons(ETH_P_IPV6) &&
3307 to_proto == htons(ETH_P_IP))
3308 return bpf_skb_proto_6_to_4(skb);
3309
3310 return -ENOTSUPP;
3311}
3312
3313BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
3314 u64, flags)
3315{
3316 int ret;
3317
3318 if (unlikely(flags))
3319 return -EINVAL;
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338 ret = bpf_skb_proto_xlat(skb, proto);
3339 bpf_compute_data_pointers(skb);
3340 return ret;
3341}
3342
3343static const struct bpf_func_proto bpf_skb_change_proto_proto = {
3344 .func = bpf_skb_change_proto,
3345 .gpl_only = false,
3346 .ret_type = RET_INTEGER,
3347 .arg1_type = ARG_PTR_TO_CTX,
3348 .arg2_type = ARG_ANYTHING,
3349 .arg3_type = ARG_ANYTHING,
3350};
3351
3352BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
3353{
3354
3355 if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
3356 !skb_pkt_type_ok(pkt_type)))
3357 return -EINVAL;
3358
3359 skb->pkt_type = pkt_type;
3360 return 0;
3361}
3362
3363static const struct bpf_func_proto bpf_skb_change_type_proto = {
3364 .func = bpf_skb_change_type,
3365 .gpl_only = false,
3366 .ret_type = RET_INTEGER,
3367 .arg1_type = ARG_PTR_TO_CTX,
3368 .arg2_type = ARG_ANYTHING,
3369};
3370
3371static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
3372{
3373 switch (skb->protocol) {
3374 case htons(ETH_P_IP):
3375 return sizeof(struct iphdr);
3376 case htons(ETH_P_IPV6):
3377 return sizeof(struct ipv6hdr);
3378 default:
3379 return ~0U;
3380 }
3381}
3382
3383#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
3384 BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3385
3386#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
3387 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
3388 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
3389 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
3390 BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
3391 BPF_F_ADJ_ROOM_ENCAP_L2( \
3392 BPF_ADJ_ROOM_ENCAP_L2_MASK))
3393
3394static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
3395 u64 flags)
3396{
3397 u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
3398 bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
3399 u16 mac_len = 0, inner_net = 0, inner_trans = 0;
3400 unsigned int gso_type = SKB_GSO_DODGY;
3401 int ret;
3402
3403 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3404
3405 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
3406 !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3407 return -ENOTSUPP;
3408 }
3409
3410 ret = skb_cow_head(skb, len_diff);
3411 if (unlikely(ret < 0))
3412 return ret;
3413
3414 if (encap) {
3415 if (skb->protocol != htons(ETH_P_IP) &&
3416 skb->protocol != htons(ETH_P_IPV6))
3417 return -ENOTSUPP;
3418
3419 if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 &&
3420 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3421 return -EINVAL;
3422
3423 if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE &&
3424 flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
3425 return -EINVAL;
3426
3427 if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
3428 inner_mac_len < ETH_HLEN)
3429 return -EINVAL;
3430
3431 if (skb->encapsulation)
3432 return -EALREADY;
3433
3434 mac_len = skb->network_header - skb->mac_header;
3435 inner_net = skb->network_header;
3436 if (inner_mac_len > len_diff)
3437 return -EINVAL;
3438 inner_trans = skb->transport_header;
3439 }
3440
3441 ret = bpf_skb_net_hdr_push(skb, off, len_diff);
3442 if (unlikely(ret < 0))
3443 return ret;
3444
3445 if (encap) {
3446 skb->inner_mac_header = inner_net - inner_mac_len;
3447 skb->inner_network_header = inner_net;
3448 skb->inner_transport_header = inner_trans;
3449
3450 if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
3451 skb_set_inner_protocol(skb, htons(ETH_P_TEB));
3452 else
3453 skb_set_inner_protocol(skb, skb->protocol);
3454
3455 skb->encapsulation = 1;
3456 skb_set_network_header(skb, mac_len);
3457
3458 if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
3459 gso_type |= SKB_GSO_UDP_TUNNEL;
3460 else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE)
3461 gso_type |= SKB_GSO_GRE;
3462 else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3463 gso_type |= SKB_GSO_IPXIP6;
3464 else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
3465 gso_type |= SKB_GSO_IPXIP4;
3466
3467 if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
3468 flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) {
3469 int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ?
3470 sizeof(struct ipv6hdr) :
3471 sizeof(struct iphdr);
3472
3473 skb_set_transport_header(skb, mac_len + nh_len);
3474 }
3475
3476
3477 if (skb->protocol == htons(ETH_P_IP) &&
3478 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3479 skb->protocol = htons(ETH_P_IPV6);
3480 else if (skb->protocol == htons(ETH_P_IPV6) &&
3481 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
3482 skb->protocol = htons(ETH_P_IP);
3483 }
3484
3485 if (skb_is_gso(skb)) {
3486 struct skb_shared_info *shinfo = skb_shinfo(skb);
3487
3488
3489 if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3490 skb_decrease_gso_size(shinfo, len_diff);
3491
3492
3493 shinfo->gso_type |= gso_type;
3494 shinfo->gso_segs = 0;
3495 }
3496
3497 return 0;
3498}
3499
3500static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
3501 u64 flags)
3502{
3503 int ret;
3504
3505 if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
3506 BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
3507 return -EINVAL;
3508
3509 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3510
3511 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
3512 !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3513 return -ENOTSUPP;
3514 }
3515
3516 ret = skb_unclone(skb, GFP_ATOMIC);
3517 if (unlikely(ret < 0))
3518 return ret;
3519
3520 ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
3521 if (unlikely(ret < 0))
3522 return ret;
3523
3524 if (skb_is_gso(skb)) {
3525 struct skb_shared_info *shinfo = skb_shinfo(skb);
3526
3527
3528 if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3529 skb_increase_gso_size(shinfo, len_diff);
3530
3531
3532 shinfo->gso_type |= SKB_GSO_DODGY;
3533 shinfo->gso_segs = 0;
3534 }
3535
3536 return 0;
3537}
3538
3539#define BPF_SKB_MAX_LEN SKB_MAX_ALLOC
3540
3541BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
3542 u32, mode, u64, flags)
3543{
3544 u32 len_diff_abs = abs(len_diff);
3545 bool shrink = len_diff < 0;
3546 int ret = 0;
3547
3548 if (unlikely(flags || mode))
3549 return -EINVAL;
3550 if (unlikely(len_diff_abs > 0xfffU))
3551 return -EFAULT;
3552
3553 if (!shrink) {
3554 ret = skb_cow(skb, len_diff);
3555 if (unlikely(ret < 0))
3556 return ret;
3557 __skb_push(skb, len_diff_abs);
3558 memset(skb->data, 0, len_diff_abs);
3559 } else {
3560 if (unlikely(!pskb_may_pull(skb, len_diff_abs)))
3561 return -ENOMEM;
3562 __skb_pull(skb, len_diff_abs);
3563 }
3564 if (tls_sw_has_ctx_rx(skb->sk)) {
3565 struct strp_msg *rxm = strp_msg(skb);
3566
3567 rxm->full_len += len_diff;
3568 }
3569 return ret;
3570}
3571
3572static const struct bpf_func_proto sk_skb_adjust_room_proto = {
3573 .func = sk_skb_adjust_room,
3574 .gpl_only = false,
3575 .ret_type = RET_INTEGER,
3576 .arg1_type = ARG_PTR_TO_CTX,
3577 .arg2_type = ARG_ANYTHING,
3578 .arg3_type = ARG_ANYTHING,
3579 .arg4_type = ARG_ANYTHING,
3580};
3581
3582BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
3583 u32, mode, u64, flags)
3584{
3585 u32 len_cur, len_diff_abs = abs(len_diff);
3586 u32 len_min = bpf_skb_net_base_len(skb);
3587 u32 len_max = BPF_SKB_MAX_LEN;
3588 __be16 proto = skb->protocol;
3589 bool shrink = len_diff < 0;
3590 u32 off;
3591 int ret;
3592
3593 if (unlikely(flags & ~(BPF_F_ADJ_ROOM_MASK |
3594 BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
3595 return -EINVAL;
3596 if (unlikely(len_diff_abs > 0xfffU))
3597 return -EFAULT;
3598 if (unlikely(proto != htons(ETH_P_IP) &&
3599 proto != htons(ETH_P_IPV6)))
3600 return -ENOTSUPP;
3601
3602 off = skb_mac_header_len(skb);
3603 switch (mode) {
3604 case BPF_ADJ_ROOM_NET:
3605 off += bpf_skb_net_base_len(skb);
3606 break;
3607 case BPF_ADJ_ROOM_MAC:
3608 break;
3609 default:
3610 return -ENOTSUPP;
3611 }
3612
3613 len_cur = skb->len - skb_network_offset(skb);
3614 if ((shrink && (len_diff_abs >= len_cur ||
3615 len_cur - len_diff_abs < len_min)) ||
3616 (!shrink && (skb->len + len_diff_abs > len_max &&
3617 !skb_is_gso(skb))))
3618 return -ENOTSUPP;
3619
3620 ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
3621 bpf_skb_net_grow(skb, off, len_diff_abs, flags);
3622 if (!ret && !(flags & BPF_F_ADJ_ROOM_NO_CSUM_RESET))
3623 __skb_reset_checksum_unnecessary(skb);
3624
3625 bpf_compute_data_pointers(skb);
3626 return ret;
3627}
3628
3629static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
3630 .func = bpf_skb_adjust_room,
3631 .gpl_only = false,
3632 .ret_type = RET_INTEGER,
3633 .arg1_type = ARG_PTR_TO_CTX,
3634 .arg2_type = ARG_ANYTHING,
3635 .arg3_type = ARG_ANYTHING,
3636 .arg4_type = ARG_ANYTHING,
3637};
3638
3639static u32 __bpf_skb_min_len(const struct sk_buff *skb)
3640{
3641 u32 min_len = skb_network_offset(skb);
3642
3643 if (skb_transport_header_was_set(skb))
3644 min_len = skb_transport_offset(skb);
3645 if (skb->ip_summed == CHECKSUM_PARTIAL)
3646 min_len = skb_checksum_start_offset(skb) +
3647 skb->csum_offset + sizeof(__sum16);
3648 return min_len;
3649}
3650
3651static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
3652{
3653 unsigned int old_len = skb->len;
3654 int ret;
3655
3656 ret = __skb_grow_rcsum(skb, new_len);
3657 if (!ret)
3658 memset(skb->data + old_len, 0, new_len - old_len);
3659 return ret;
3660}
3661
3662static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
3663{
3664 return __skb_trim_rcsum(skb, new_len);
3665}
3666
3667static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
3668 u64 flags)
3669{
3670 u32 max_len = BPF_SKB_MAX_LEN;
3671 u32 min_len = __bpf_skb_min_len(skb);
3672 int ret;
3673
3674 if (unlikely(flags || new_len > max_len || new_len < min_len))
3675 return -EINVAL;
3676 if (skb->encapsulation)
3677 return -ENOTSUPP;
3678
3679
3680
3681
3682
3683
3684
3685
3686
3687
3688
3689
3690
3691
3692
3693
3694
3695 ret = __bpf_try_make_writable(skb, skb->len);
3696 if (!ret) {
3697 if (new_len > skb->len)
3698 ret = bpf_skb_grow_rcsum(skb, new_len);
3699 else if (new_len < skb->len)
3700 ret = bpf_skb_trim_rcsum(skb, new_len);
3701 if (!ret && skb_is_gso(skb))
3702 skb_gso_reset(skb);
3703 }
3704 return ret;
3705}
3706
3707BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3708 u64, flags)
3709{
3710 int ret = __bpf_skb_change_tail(skb, new_len, flags);
3711
3712 bpf_compute_data_pointers(skb);
3713 return ret;
3714}
3715
3716static const struct bpf_func_proto bpf_skb_change_tail_proto = {
3717 .func = bpf_skb_change_tail,
3718 .gpl_only = false,
3719 .ret_type = RET_INTEGER,
3720 .arg1_type = ARG_PTR_TO_CTX,
3721 .arg2_type = ARG_ANYTHING,
3722 .arg3_type = ARG_ANYTHING,
3723};
3724
3725BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3726 u64, flags)
3727{
3728 return __bpf_skb_change_tail(skb, new_len, flags);
3729}
3730
3731static const struct bpf_func_proto sk_skb_change_tail_proto = {
3732 .func = sk_skb_change_tail,
3733 .gpl_only = false,
3734 .ret_type = RET_INTEGER,
3735 .arg1_type = ARG_PTR_TO_CTX,
3736 .arg2_type = ARG_ANYTHING,
3737 .arg3_type = ARG_ANYTHING,
3738};
3739
3740static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
3741 u64 flags)
3742{
3743 u32 max_len = BPF_SKB_MAX_LEN;
3744 u32 new_len = skb->len + head_room;
3745 int ret;
3746
3747 if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
3748 new_len < skb->len))
3749 return -EINVAL;
3750
3751 ret = skb_cow(skb, head_room);
3752 if (likely(!ret)) {
3753
3754
3755
3756
3757
3758
3759
3760
3761
3762 __skb_push(skb, head_room);
3763 memset(skb->data, 0, head_room);
3764 skb_reset_mac_header(skb);
3765 skb_reset_mac_len(skb);
3766 }
3767
3768 return ret;
3769}
3770
3771BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
3772 u64, flags)
3773{
3774 int ret = __bpf_skb_change_head(skb, head_room, flags);
3775
3776 bpf_compute_data_pointers(skb);
3777 return ret;
3778}
3779
3780static const struct bpf_func_proto bpf_skb_change_head_proto = {
3781 .func = bpf_skb_change_head,
3782 .gpl_only = false,
3783 .ret_type = RET_INTEGER,
3784 .arg1_type = ARG_PTR_TO_CTX,
3785 .arg2_type = ARG_ANYTHING,
3786 .arg3_type = ARG_ANYTHING,
3787};
3788
3789BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
3790 u64, flags)
3791{
3792 return __bpf_skb_change_head(skb, head_room, flags);
3793}
3794
3795static const struct bpf_func_proto sk_skb_change_head_proto = {
3796 .func = sk_skb_change_head,
3797 .gpl_only = false,
3798 .ret_type = RET_INTEGER,
3799 .arg1_type = ARG_PTR_TO_CTX,
3800 .arg2_type = ARG_ANYTHING,
3801 .arg3_type = ARG_ANYTHING,
3802};
3803static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
3804{
3805 return xdp_data_meta_unsupported(xdp) ? 0 :
3806 xdp->data - xdp->data_meta;
3807}
3808
3809BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
3810{
3811 void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
3812 unsigned long metalen = xdp_get_metalen(xdp);
3813 void *data_start = xdp_frame_end + metalen;
3814 void *data = xdp->data + offset;
3815
3816 if (unlikely(data < data_start ||
3817 data > xdp->data_end - ETH_HLEN))
3818 return -EINVAL;
3819
3820 if (metalen)
3821 memmove(xdp->data_meta + offset,
3822 xdp->data_meta, metalen);
3823 xdp->data_meta += offset;
3824 xdp->data = data;
3825
3826 return 0;
3827}
3828
3829static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
3830 .func = bpf_xdp_adjust_head,
3831 .gpl_only = false,
3832 .ret_type = RET_INTEGER,
3833 .arg1_type = ARG_PTR_TO_CTX,
3834 .arg2_type = ARG_ANYTHING,
3835};
3836
3837BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
3838{
3839 void *data_hard_end = xdp_data_hard_end(xdp);
3840 void *data_end = xdp->data_end + offset;
3841
3842
3843 if (unlikely(data_end > data_hard_end))
3844 return -EINVAL;
3845
3846
3847 if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
3848 WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
3849 return -EINVAL;
3850 }
3851
3852 if (unlikely(data_end < xdp->data + ETH_HLEN))
3853 return -EINVAL;
3854
3855
3856 if (offset > 0)
3857 memset(xdp->data_end, 0, offset);
3858
3859 xdp->data_end = data_end;
3860
3861 return 0;
3862}
3863
3864static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
3865 .func = bpf_xdp_adjust_tail,
3866 .gpl_only = false,
3867 .ret_type = RET_INTEGER,
3868 .arg1_type = ARG_PTR_TO_CTX,
3869 .arg2_type = ARG_ANYTHING,
3870};
3871
3872BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
3873{
3874 void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
3875 void *meta = xdp->data_meta + offset;
3876 unsigned long metalen = xdp->data - meta;
3877
3878 if (xdp_data_meta_unsupported(xdp))
3879 return -ENOTSUPP;
3880 if (unlikely(meta < xdp_frame_end ||
3881 meta > xdp->data))
3882 return -EINVAL;
3883 if (unlikely((metalen & (sizeof(__u32) - 1)) ||
3884 (metalen > 32)))
3885 return -EACCES;
3886
3887 xdp->data_meta = meta;
3888
3889 return 0;
3890}
3891
3892static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
3893 .func = bpf_xdp_adjust_meta,
3894 .gpl_only = false,
3895 .ret_type = RET_INTEGER,
3896 .arg1_type = ARG_PTR_TO_CTX,
3897 .arg2_type = ARG_ANYTHING,
3898};
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928void xdp_do_flush(void)
3929{
3930 __dev_flush();
3931 __cpu_map_flush();
3932 __xsk_map_flush();
3933}
3934EXPORT_SYMBOL_GPL(xdp_do_flush);
3935
3936void bpf_clear_redirect_map(struct bpf_map *map)
3937{
3938 struct bpf_redirect_info *ri;
3939 int cpu;
3940
3941 for_each_possible_cpu(cpu) {
3942 ri = per_cpu_ptr(&bpf_redirect_info, cpu);
3943
3944
3945
3946
3947
3948 if (unlikely(READ_ONCE(ri->map) == map))
3949 cmpxchg(&ri->map, map, NULL);
3950 }
3951}
3952
3953int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
3954 struct bpf_prog *xdp_prog)
3955{
3956 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3957 enum bpf_map_type map_type = ri->map_type;
3958 void *fwd = ri->tgt_value;
3959 u32 map_id = ri->map_id;
3960 struct bpf_map *map;
3961 int err;
3962
3963 ri->map_id = 0;
3964 ri->map_type = BPF_MAP_TYPE_UNSPEC;
3965
3966 switch (map_type) {
3967 case BPF_MAP_TYPE_DEVMAP:
3968 fallthrough;
3969 case BPF_MAP_TYPE_DEVMAP_HASH:
3970 map = READ_ONCE(ri->map);
3971 if (unlikely(map)) {
3972 WRITE_ONCE(ri->map, NULL);
3973 err = dev_map_enqueue_multi(xdp, dev, map,
3974 ri->flags & BPF_F_EXCLUDE_INGRESS);
3975 } else {
3976 err = dev_map_enqueue(fwd, xdp, dev);
3977 }
3978 break;
3979 case BPF_MAP_TYPE_CPUMAP:
3980 err = cpu_map_enqueue(fwd, xdp, dev);
3981 break;
3982 case BPF_MAP_TYPE_XSKMAP:
3983 err = __xsk_map_redirect(fwd, xdp);
3984 break;
3985 case BPF_MAP_TYPE_UNSPEC:
3986 if (map_id == INT_MAX) {
3987 fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
3988 if (unlikely(!fwd)) {
3989 err = -EINVAL;
3990 break;
3991 }
3992 err = dev_xdp_enqueue(fwd, xdp, dev);
3993 break;
3994 }
3995 fallthrough;
3996 default:
3997 err = -EBADRQC;
3998 }
3999
4000 if (unlikely(err))
4001 goto err;
4002
4003 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
4004 return 0;
4005err:
4006 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
4007 return err;
4008}
4009EXPORT_SYMBOL_GPL(xdp_do_redirect);
4010
4011static int xdp_do_generic_redirect_map(struct net_device *dev,
4012 struct sk_buff *skb,
4013 struct xdp_buff *xdp,
4014 struct bpf_prog *xdp_prog,
4015 void *fwd,
4016 enum bpf_map_type map_type, u32 map_id)
4017{
4018 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4019 struct bpf_map *map;
4020 int err;
4021
4022 switch (map_type) {
4023 case BPF_MAP_TYPE_DEVMAP:
4024 fallthrough;
4025 case BPF_MAP_TYPE_DEVMAP_HASH:
4026 map = READ_ONCE(ri->map);
4027 if (unlikely(map)) {
4028 WRITE_ONCE(ri->map, NULL);
4029 err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
4030 ri->flags & BPF_F_EXCLUDE_INGRESS);
4031 } else {
4032 err = dev_map_generic_redirect(fwd, skb, xdp_prog);
4033 }
4034 if (unlikely(err))
4035 goto err;
4036 break;
4037 case BPF_MAP_TYPE_XSKMAP:
4038 err = xsk_generic_rcv(fwd, xdp);
4039 if (err)
4040 goto err;
4041 consume_skb(skb);
4042 break;
4043 default:
4044
4045 err = -EBADRQC;
4046 goto err;
4047 }
4048
4049 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
4050 return 0;
4051err:
4052 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
4053 return err;
4054}
4055
4056int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
4057 struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
4058{
4059 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4060 enum bpf_map_type map_type = ri->map_type;
4061 void *fwd = ri->tgt_value;
4062 u32 map_id = ri->map_id;
4063 int err;
4064
4065 ri->map_id = 0;
4066 ri->map_type = BPF_MAP_TYPE_UNSPEC;
4067
4068 if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
4069 fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
4070 if (unlikely(!fwd)) {
4071 err = -EINVAL;
4072 goto err;
4073 }
4074
4075 err = xdp_ok_fwd_dev(fwd, skb->len);
4076 if (unlikely(err))
4077 goto err;
4078
4079 skb->dev = fwd;
4080 _trace_xdp_redirect(dev, xdp_prog, ri->tgt_index);
4081 generic_xdp_tx(skb, xdp_prog);
4082 return 0;
4083 }
4084
4085 return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
4086err:
4087 _trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
4088 return err;
4089}
4090
4091BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
4092{
4093 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4094
4095 if (unlikely(flags))
4096 return XDP_ABORTED;
4097
4098
4099
4100
4101 ri->tgt_index = ifindex;
4102 ri->map_id = INT_MAX;
4103 ri->map_type = BPF_MAP_TYPE_UNSPEC;
4104
4105 return XDP_REDIRECT;
4106}
4107
4108static const struct bpf_func_proto bpf_xdp_redirect_proto = {
4109 .func = bpf_xdp_redirect,
4110 .gpl_only = false,
4111 .ret_type = RET_INTEGER,
4112 .arg1_type = ARG_ANYTHING,
4113 .arg2_type = ARG_ANYTHING,
4114};
4115
4116BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
4117 u64, flags)
4118{
4119 return map->ops->map_redirect(map, ifindex, flags);
4120}
4121
4122static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
4123 .func = bpf_xdp_redirect_map,
4124 .gpl_only = false,
4125 .ret_type = RET_INTEGER,
4126 .arg1_type = ARG_CONST_MAP_PTR,
4127 .arg2_type = ARG_ANYTHING,
4128 .arg3_type = ARG_ANYTHING,
4129};
4130
4131static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
4132 unsigned long off, unsigned long len)
4133{
4134 void *ptr = skb_header_pointer(skb, off, len, dst_buff);
4135
4136 if (unlikely(!ptr))
4137 return len;
4138 if (ptr != dst_buff)
4139 memcpy(dst_buff, ptr, len);
4140
4141 return 0;
4142}
4143
4144BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
4145 u64, flags, void *, meta, u64, meta_size)
4146{
4147 u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
4148
4149 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
4150 return -EINVAL;
4151 if (unlikely(!skb || skb_size > skb->len))
4152 return -EFAULT;
4153
4154 return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
4155 bpf_skb_copy);
4156}
4157
4158static const struct bpf_func_proto bpf_skb_event_output_proto = {
4159 .func = bpf_skb_event_output,
4160 .gpl_only = true,
4161 .ret_type = RET_INTEGER,
4162 .arg1_type = ARG_PTR_TO_CTX,
4163 .arg2_type = ARG_CONST_MAP_PTR,
4164 .arg3_type = ARG_ANYTHING,
4165 .arg4_type = ARG_PTR_TO_MEM,
4166 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
4167};
4168
4169BTF_ID_LIST_SINGLE(bpf_skb_output_btf_ids, struct, sk_buff)
4170
4171const struct bpf_func_proto bpf_skb_output_proto = {
4172 .func = bpf_skb_event_output,
4173 .gpl_only = true,
4174 .ret_type = RET_INTEGER,
4175 .arg1_type = ARG_PTR_TO_BTF_ID,
4176 .arg1_btf_id = &bpf_skb_output_btf_ids[0],
4177 .arg2_type = ARG_CONST_MAP_PTR,
4178 .arg3_type = ARG_ANYTHING,
4179 .arg4_type = ARG_PTR_TO_MEM,
4180 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
4181};
4182
4183static unsigned short bpf_tunnel_key_af(u64 flags)
4184{
4185 return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
4186}
4187
4188BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to,
4189 u32, size, u64, flags)
4190{
4191 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
4192 u8 compat[sizeof(struct bpf_tunnel_key)];
4193 void *to_orig = to;
4194 int err;
4195
4196 if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) {
4197 err = -EINVAL;
4198 goto err_clear;
4199 }
4200 if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) {
4201 err = -EPROTO;
4202 goto err_clear;
4203 }
4204 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
4205 err = -EINVAL;
4206 switch (size) {
4207 case offsetof(struct bpf_tunnel_key, tunnel_label):
4208 case offsetof(struct bpf_tunnel_key, tunnel_ext):
4209 goto set_compat;
4210 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
4211
4212
4213
4214 if (ip_tunnel_info_af(info) != AF_INET)
4215 goto err_clear;
4216set_compat:
4217 to = (struct bpf_tunnel_key *)compat;
4218 break;
4219 default:
4220 goto err_clear;
4221 }
4222 }
4223
4224 to->tunnel_id = be64_to_cpu(info->key.tun_id);
4225 to->tunnel_tos = info->key.tos;
4226 to->tunnel_ttl = info->key.ttl;
4227 to->tunnel_ext = 0;
4228
4229 if (flags & BPF_F_TUNINFO_IPV6) {
4230 memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
4231 sizeof(to->remote_ipv6));
4232 to->tunnel_label = be32_to_cpu(info->key.label);
4233 } else {
4234 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
4235 memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
4236 to->tunnel_label = 0;
4237 }
4238
4239 if (unlikely(size != sizeof(struct bpf_tunnel_key)))
4240 memcpy(to_orig, to, size);
4241
4242 return 0;
4243err_clear:
4244 memset(to_orig, 0, size);
4245 return err;
4246}
4247
4248static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
4249 .func = bpf_skb_get_tunnel_key,
4250 .gpl_only = false,
4251 .ret_type = RET_INTEGER,
4252 .arg1_type = ARG_PTR_TO_CTX,
4253 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
4254 .arg3_type = ARG_CONST_SIZE,
4255 .arg4_type = ARG_ANYTHING,
4256};
4257
4258BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
4259{
4260 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
4261 int err;
4262
4263 if (unlikely(!info ||
4264 !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
4265 err = -ENOENT;
4266 goto err_clear;
4267 }
4268 if (unlikely(size < info->options_len)) {
4269 err = -ENOMEM;
4270 goto err_clear;
4271 }
4272
4273 ip_tunnel_info_opts_get(to, info);
4274 if (size > info->options_len)
4275 memset(to + info->options_len, 0, size - info->options_len);
4276
4277 return info->options_len;
4278err_clear:
4279 memset(to, 0, size);
4280 return err;
4281}
4282
4283static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
4284 .func = bpf_skb_get_tunnel_opt,
4285 .gpl_only = false,
4286 .ret_type = RET_INTEGER,
4287 .arg1_type = ARG_PTR_TO_CTX,
4288 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
4289 .arg3_type = ARG_CONST_SIZE,
4290};
4291
4292static struct metadata_dst __percpu *md_dst;
4293
4294BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
4295 const struct bpf_tunnel_key *, from, u32, size, u64, flags)
4296{
4297 struct metadata_dst *md = this_cpu_ptr(md_dst);
4298 u8 compat[sizeof(struct bpf_tunnel_key)];
4299 struct ip_tunnel_info *info;
4300
4301 if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
4302 BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
4303 return -EINVAL;
4304 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
4305 switch (size) {
4306 case offsetof(struct bpf_tunnel_key, tunnel_label):
4307 case offsetof(struct bpf_tunnel_key, tunnel_ext):
4308 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
4309
4310
4311
4312 memcpy(compat, from, size);
4313 memset(compat + size, 0, sizeof(compat) - size);
4314 from = (const struct bpf_tunnel_key *) compat;
4315 break;
4316 default:
4317 return -EINVAL;
4318 }
4319 }
4320 if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) ||
4321 from->tunnel_ext))
4322 return -EINVAL;
4323
4324 skb_dst_drop(skb);
4325 dst_hold((struct dst_entry *) md);
4326 skb_dst_set(skb, (struct dst_entry *) md);
4327
4328 info = &md->u.tun_info;
4329 memset(info, 0, sizeof(*info));
4330 info->mode = IP_TUNNEL_INFO_TX;
4331
4332 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
4333 if (flags & BPF_F_DONT_FRAGMENT)
4334 info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
4335 if (flags & BPF_F_ZERO_CSUM_TX)
4336 info->key.tun_flags &= ~TUNNEL_CSUM;
4337 if (flags & BPF_F_SEQ_NUMBER)
4338 info->key.tun_flags |= TUNNEL_SEQ;
4339
4340 info->key.tun_id = cpu_to_be64(from->tunnel_id);
4341 info->key.tos = from->tunnel_tos;
4342 info->key.ttl = from->tunnel_ttl;
4343
4344 if (flags & BPF_F_TUNINFO_IPV6) {
4345 info->mode |= IP_TUNNEL_INFO_IPV6;
4346 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
4347 sizeof(from->remote_ipv6));
4348 info->key.label = cpu_to_be32(from->tunnel_label) &
4349 IPV6_FLOWLABEL_MASK;
4350 } else {
4351 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
4352 }
4353
4354 return 0;
4355}
4356
4357static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
4358 .func = bpf_skb_set_tunnel_key,
4359 .gpl_only = false,
4360 .ret_type = RET_INTEGER,
4361 .arg1_type = ARG_PTR_TO_CTX,
4362 .arg2_type = ARG_PTR_TO_MEM,
4363 .arg3_type = ARG_CONST_SIZE,
4364 .arg4_type = ARG_ANYTHING,
4365};
4366
4367BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
4368 const u8 *, from, u32, size)
4369{
4370 struct ip_tunnel_info *info = skb_tunnel_info(skb);
4371 const struct metadata_dst *md = this_cpu_ptr(md_dst);
4372
4373 if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
4374 return -EINVAL;
4375 if (unlikely(size > IP_TUNNEL_OPTS_MAX))
4376 return -ENOMEM;
4377
4378 ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
4379
4380 return 0;
4381}
4382
4383static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
4384 .func = bpf_skb_set_tunnel_opt,
4385 .gpl_only = false,
4386 .ret_type = RET_INTEGER,
4387 .arg1_type = ARG_PTR_TO_CTX,
4388 .arg2_type = ARG_PTR_TO_MEM,
4389 .arg3_type = ARG_CONST_SIZE,
4390};
4391
4392static const struct bpf_func_proto *
4393bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
4394{
4395 if (!md_dst) {
4396 struct metadata_dst __percpu *tmp;
4397
4398 tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
4399 METADATA_IP_TUNNEL,
4400 GFP_KERNEL);
4401 if (!tmp)
4402