1#ifndef _RDS_RDS_H
2#define _RDS_RDS_H
3
4#include <net/sock.h>
5#include <linux/scatterlist.h>
6#include <linux/highmem.h>
7#include <rdma/rdma_cm.h>
8#include <linux/mutex.h>
9#include <linux/rds.h>
10
11#include "info.h"
12
13
14
15
16#define RDS_PROTOCOL_3_0 0x0300
17#define RDS_PROTOCOL_3_1 0x0301
18#define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1
19#define RDS_PROTOCOL_MAJOR(v) ((v) >> 8)
20#define RDS_PROTOCOL_MINOR(v) ((v) & 255)
21#define RDS_PROTOCOL(maj, min) (((maj) << 8) | min)
22
23
24
25
26
27
28
29#define RDS_PORT 18634
30
31#ifdef ATOMIC64_INIT
32#define KERNEL_HAS_ATOMIC64
33#endif
34
35#ifdef DEBUG
36#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
37#else
38
39static inline void __attribute__ ((format (printf, 1, 2)))
40rdsdebug(char *fmt, ...)
41{
42}
43#endif
44
45
46#define ceil(x, y) \
47 ({ unsigned long __x = (x), __y = (y); (__x + __y - 1) / __y; })
48
49#define RDS_FRAG_SHIFT 12
50#define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT))
51
52#define RDS_CONG_MAP_BYTES (65536 / 8)
53#define RDS_CONG_MAP_LONGS (RDS_CONG_MAP_BYTES / sizeof(unsigned long))
54#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE)
55#define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8)
56
57struct rds_cong_map {
58 struct rb_node m_rb_node;
59 __be32 m_addr;
60 wait_queue_head_t m_waitq;
61 struct list_head m_conn_list;
62 unsigned long m_page_addrs[RDS_CONG_MAP_PAGES];
63};
64
65
66
67
68
69
70
71
72enum {
73 RDS_CONN_DOWN = 0,
74 RDS_CONN_CONNECTING,
75 RDS_CONN_DISCONNECTING,
76 RDS_CONN_UP,
77 RDS_CONN_ERROR,
78};
79
80
81#define RDS_LL_SEND_FULL 0
82#define RDS_RECONNECT_PENDING 1
83
84struct rds_connection {
85 struct hlist_node c_hash_node;
86 __be32 c_laddr;
87 __be32 c_faddr;
88 unsigned int c_loopback:1;
89 struct rds_connection *c_passive;
90
91 struct rds_cong_map *c_lcong;
92 struct rds_cong_map *c_fcong;
93
94 struct mutex c_send_lock;
95 struct rds_message *c_xmit_rm;
96 unsigned long c_xmit_sg;
97 unsigned int c_xmit_hdr_off;
98 unsigned int c_xmit_data_off;
99 unsigned int c_xmit_rdma_sent;
100
101 spinlock_t c_lock;
102 u64 c_next_tx_seq;
103 struct list_head c_send_queue;
104 struct list_head c_retrans;
105
106 u64 c_next_rx_seq;
107
108 struct rds_transport *c_trans;
109 void *c_transport_data;
110
111 atomic_t c_state;
112 unsigned long c_flags;
113 unsigned long c_reconnect_jiffies;
114 struct delayed_work c_send_w;
115 struct delayed_work c_recv_w;
116 struct delayed_work c_conn_w;
117 struct work_struct c_down_w;
118 struct mutex c_cm_lock;
119
120 struct list_head c_map_item;
121 unsigned long c_map_queued;
122 unsigned long c_map_offset;
123 unsigned long c_map_bytes;
124
125 unsigned int c_unacked_packets;
126 unsigned int c_unacked_bytes;
127
128
129 unsigned int c_version;
130};
131
132#define RDS_FLAG_CONG_BITMAP 0x01
133#define RDS_FLAG_ACK_REQUIRED 0x02
134#define RDS_FLAG_RETRANSMITTED 0x04
135#define RDS_MAX_ADV_CREDIT 127
136
137
138
139
140#define RDS_HEADER_EXT_SPACE 16
141
142struct rds_header {
143 __be64 h_sequence;
144 __be64 h_ack;
145 __be32 h_len;
146 __be16 h_sport;
147 __be16 h_dport;
148 u8 h_flags;
149 u8 h_credit;
150 u8 h_padding[4];
151 __sum16 h_csum;
152
153 u8 h_exthdr[RDS_HEADER_EXT_SPACE];
154};
155
156
157
158
159#define RDS_EXTHDR_NONE 0
160
161
162
163
164
165
166
167
168
169
170
171#define RDS_EXTHDR_VERSION 1
172struct rds_ext_header_version {
173 __be32 h_version;
174};
175
176
177
178
179
180#define RDS_EXTHDR_RDMA 2
181struct rds_ext_header_rdma {
182 __be32 h_rdma_rkey;
183};
184
185
186
187
188
189
190#define RDS_EXTHDR_RDMA_DEST 3
191struct rds_ext_header_rdma_dest {
192 __be32 h_rdma_rkey;
193 __be32 h_rdma_offset;
194};
195
196#define __RDS_EXTHDR_MAX 16
197
198struct rds_incoming {
199 atomic_t i_refcount;
200 struct list_head i_item;
201 struct rds_connection *i_conn;
202 struct rds_header i_hdr;
203 unsigned long i_rx_jiffies;
204 __be32 i_saddr;
205
206 rds_rdma_cookie_t i_rdma_cookie;
207};
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238#define RDS_MSG_ON_SOCK 1
239#define RDS_MSG_ON_CONN 2
240#define RDS_MSG_HAS_ACK_SEQ 3
241#define RDS_MSG_ACK_REQUIRED 4
242#define RDS_MSG_RETRANSMITTED 5
243#define RDS_MSG_MAPPED 6
244#define RDS_MSG_PAGEVEC 7
245
246struct rds_message {
247 atomic_t m_refcount;
248 struct list_head m_sock_item;
249 struct list_head m_conn_item;
250 struct rds_incoming m_inc;
251 u64 m_ack_seq;
252 __be32 m_daddr;
253 unsigned long m_flags;
254
255
256
257
258
259
260 spinlock_t m_rs_lock;
261 struct rds_sock *m_rs;
262 struct rds_rdma_op *m_rdma_op;
263 rds_rdma_cookie_t m_rdma_cookie;
264 struct rds_mr *m_rdma_mr;
265 unsigned int m_nents;
266 unsigned int m_count;
267 struct scatterlist m_sg[0];
268};
269
270
271
272
273
274
275
276
277struct rds_notifier {
278 struct list_head n_list;
279 uint64_t n_user_token;
280 int n_status;
281};
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314struct rds_transport {
315 char t_name[TRANSNAMSIZ];
316 struct list_head t_item;
317 struct module *t_owner;
318 unsigned int t_prefer_loopback:1;
319
320 int (*laddr_check)(__be32 addr);
321 int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp);
322 void (*conn_free)(void *data);
323 int (*conn_connect)(struct rds_connection *conn);
324 void (*conn_shutdown)(struct rds_connection *conn);
325 void (*xmit_prepare)(struct rds_connection *conn);
326 void (*xmit_complete)(struct rds_connection *conn);
327 int (*xmit)(struct rds_connection *conn, struct rds_message *rm,
328 unsigned int hdr_off, unsigned int sg, unsigned int off);
329 int (*xmit_cong_map)(struct rds_connection *conn,
330 struct rds_cong_map *map, unsigned long offset);
331 int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op);
332 int (*recv)(struct rds_connection *conn);
333 int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov,
334 size_t size);
335 void (*inc_purge)(struct rds_incoming *inc);
336 void (*inc_free)(struct rds_incoming *inc);
337
338 int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
339 struct rdma_cm_event *event);
340 int (*cm_initiate_connect)(struct rdma_cm_id *cm_id);
341 void (*cm_connect_complete)(struct rds_connection *conn,
342 struct rdma_cm_event *event);
343
344 unsigned int (*stats_info_copy)(struct rds_info_iterator *iter,
345 unsigned int avail);
346 void (*exit)(void);
347 void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg,
348 struct rds_sock *rs, u32 *key_ret);
349 void (*sync_mr)(void *trans_private, int direction);
350 void (*free_mr)(void *trans_private, int invalidate);
351 void (*flush_mrs)(void);
352};
353
354struct rds_sock {
355 struct sock rs_sk;
356
357 u64 rs_user_addr;
358 u64 rs_user_bytes;
359
360
361
362
363
364 struct rb_node rs_bound_node;
365 __be32 rs_bound_addr;
366 __be32 rs_conn_addr;
367 __be16 rs_bound_port;
368 __be16 rs_conn_port;
369
370
371
372
373
374
375 struct rds_transport *rs_transport;
376
377
378
379
380
381 struct rds_connection *rs_conn;
382
383
384 int rs_congested;
385
386
387 spinlock_t rs_lock;
388 struct list_head rs_send_queue;
389 u32 rs_snd_bytes;
390 int rs_rcv_bytes;
391 struct list_head rs_notify_queue;
392
393
394
395
396
397
398 uint64_t rs_cong_mask;
399 uint64_t rs_cong_notify;
400 struct list_head rs_cong_list;
401 unsigned long rs_cong_track;
402
403
404
405
406
407 rwlock_t rs_recv_lock;
408 struct list_head rs_recv_queue;
409
410
411 struct list_head rs_item;
412
413
414 spinlock_t rs_rdma_lock;
415 struct rb_root rs_rdma_keys;
416
417
418 unsigned char rs_recverr,
419 rs_cong_monitor;
420};
421
422static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
423{
424 return container_of(sk, struct rds_sock, rs_sk);
425}
426static inline struct sock *rds_rs_to_sk(struct rds_sock *rs)
427{
428 return &rs->rs_sk;
429}
430
431
432
433
434
435
436static inline int rds_sk_sndbuf(struct rds_sock *rs)
437{
438 return rds_rs_to_sk(rs)->sk_sndbuf / 2;
439}
440static inline int rds_sk_rcvbuf(struct rds_sock *rs)
441{
442 return rds_rs_to_sk(rs)->sk_rcvbuf / 2;
443}
444
445struct rds_statistics {
446 uint64_t s_conn_reset;
447 uint64_t s_recv_drop_bad_checksum;
448 uint64_t s_recv_drop_old_seq;
449 uint64_t s_recv_drop_no_sock;
450 uint64_t s_recv_drop_dead_sock;
451 uint64_t s_recv_deliver_raced;
452 uint64_t s_recv_delivered;
453 uint64_t s_recv_queued;
454 uint64_t s_recv_immediate_retry;
455 uint64_t s_recv_delayed_retry;
456 uint64_t s_recv_ack_required;
457 uint64_t s_recv_rdma_bytes;
458 uint64_t s_recv_ping;
459 uint64_t s_send_queue_empty;
460 uint64_t s_send_queue_full;
461 uint64_t s_send_sem_contention;
462 uint64_t s_send_sem_queue_raced;
463 uint64_t s_send_immediate_retry;
464 uint64_t s_send_delayed_retry;
465 uint64_t s_send_drop_acked;
466 uint64_t s_send_ack_required;
467 uint64_t s_send_queued;
468 uint64_t s_send_rdma;
469 uint64_t s_send_rdma_bytes;
470 uint64_t s_send_pong;
471 uint64_t s_page_remainder_hit;
472 uint64_t s_page_remainder_miss;
473 uint64_t s_copy_to_user;
474 uint64_t s_copy_from_user;
475 uint64_t s_cong_update_queued;
476 uint64_t s_cong_update_received;
477 uint64_t s_cong_send_error;
478 uint64_t s_cong_send_blocked;
479};
480
481
482void rds_sock_addref(struct rds_sock *rs);
483void rds_sock_put(struct rds_sock *rs);
484void rds_wake_sk_sleep(struct rds_sock *rs);
485static inline void __rds_wake_sk_sleep(struct sock *sk)
486{
487 wait_queue_head_t *waitq = sk->sk_sleep;
488
489 if (!sock_flag(sk, SOCK_DEAD) && waitq)
490 wake_up(waitq);
491}
492extern wait_queue_head_t rds_poll_waitq;
493
494
495
496int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
497void rds_remove_bound(struct rds_sock *rs);
498struct rds_sock *rds_find_bound(__be32 addr, __be16 port);
499
500
501int rds_cong_get_maps(struct rds_connection *conn);
502void rds_cong_add_conn(struct rds_connection *conn);
503void rds_cong_remove_conn(struct rds_connection *conn);
504void rds_cong_set_bit(struct rds_cong_map *map, __be16 port);
505void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port);
506int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock, struct rds_sock *rs);
507void rds_cong_queue_updates(struct rds_cong_map *map);
508void rds_cong_map_updated(struct rds_cong_map *map, uint64_t);
509int rds_cong_updated_since(unsigned long *recent);
510void rds_cong_add_socket(struct rds_sock *);
511void rds_cong_remove_socket(struct rds_sock *);
512void rds_cong_exit(void);
513struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
514
515
516int __init rds_conn_init(void);
517void rds_conn_exit(void);
518struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
519 struct rds_transport *trans, gfp_t gfp);
520struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
521 struct rds_transport *trans, gfp_t gfp);
522void rds_conn_destroy(struct rds_connection *conn);
523void rds_conn_reset(struct rds_connection *conn);
524void rds_conn_drop(struct rds_connection *conn);
525void rds_for_each_conn_info(struct socket *sock, unsigned int len,
526 struct rds_info_iterator *iter,
527 struct rds_info_lengths *lens,
528 int (*visitor)(struct rds_connection *, void *),
529 size_t item_len);
530void __rds_conn_error(struct rds_connection *conn, const char *, ...)
531 __attribute__ ((format (printf, 2, 3)));
532#define rds_conn_error(conn, fmt...) \
533 __rds_conn_error(conn, KERN_WARNING "RDS: " fmt)
534
535static inline int
536rds_conn_transition(struct rds_connection *conn, int old, int new)
537{
538 return atomic_cmpxchg(&conn->c_state, old, new) == old;
539}
540
541static inline int
542rds_conn_state(struct rds_connection *conn)
543{
544 return atomic_read(&conn->c_state);
545}
546
547static inline int
548rds_conn_up(struct rds_connection *conn)
549{
550 return atomic_read(&conn->c_state) == RDS_CONN_UP;
551}
552
553static inline int
554rds_conn_connecting(struct rds_connection *conn)
555{
556 return atomic_read(&conn->c_state) == RDS_CONN_CONNECTING;
557}
558
559
560struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
561struct rds_message *rds_message_copy_from_user(struct iovec *first_iov,
562 size_t total_len);
563struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
564void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
565 __be16 dport, u64 seq);
566int rds_message_add_extension(struct rds_header *hdr,
567 unsigned int type, const void *data, unsigned int len);
568int rds_message_next_extension(struct rds_header *hdr,
569 unsigned int *pos, void *buf, unsigned int *buflen);
570int rds_message_add_version_extension(struct rds_header *hdr, unsigned int version);
571int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *version);
572int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset);
573int rds_message_inc_copy_to_user(struct rds_incoming *inc,
574 struct iovec *first_iov, size_t size);
575void rds_message_inc_purge(struct rds_incoming *inc);
576void rds_message_inc_free(struct rds_incoming *inc);
577void rds_message_addref(struct rds_message *rm);
578void rds_message_put(struct rds_message *rm);
579void rds_message_wait(struct rds_message *rm);
580void rds_message_unmapped(struct rds_message *rm);
581
582static inline void rds_message_make_checksum(struct rds_header *hdr)
583{
584 hdr->h_csum = 0;
585 hdr->h_csum = ip_fast_csum((void *) hdr, sizeof(*hdr) >> 2);
586}
587
588static inline int rds_message_verify_checksum(const struct rds_header *hdr)
589{
590 return !hdr->h_csum || ip_fast_csum((void *) hdr, sizeof(*hdr) >> 2) == 0;
591}
592
593
594
595int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
596 gfp_t gfp);
597int rds_page_copy_user(struct page *page, unsigned long offset,
598 void __user *ptr, unsigned long bytes,
599 int to_user);
600#define rds_page_copy_to_user(page, offset, ptr, bytes) \
601 rds_page_copy_user(page, offset, ptr, bytes, 1)
602#define rds_page_copy_from_user(page, offset, ptr, bytes) \
603 rds_page_copy_user(page, offset, ptr, bytes, 0)
604void rds_page_exit(void);
605
606
607void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
608 __be32 saddr);
609void rds_inc_addref(struct rds_incoming *inc);
610void rds_inc_put(struct rds_incoming *inc);
611void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
612 struct rds_incoming *inc, gfp_t gfp, enum km_type km);
613int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
614 size_t size, int msg_flags);
615void rds_clear_recv_queue(struct rds_sock *rs);
616int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg);
617void rds_inc_info_copy(struct rds_incoming *inc,
618 struct rds_info_iterator *iter,
619 __be32 saddr, __be32 daddr, int flip);
620
621
622int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
623 size_t payload_len);
624void rds_send_reset(struct rds_connection *conn);
625int rds_send_xmit(struct rds_connection *conn);
626struct sockaddr_in;
627void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest);
628typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
629void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
630 is_acked_func is_acked);
631int rds_send_acked_before(struct rds_connection *conn, u64 seq);
632void rds_send_remove_from_sock(struct list_head *messages, int status);
633int rds_send_pong(struct rds_connection *conn, __be16 dport);
634struct rds_message *rds_send_get_message(struct rds_connection *,
635 struct rds_rdma_op *);
636
637
638void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
639
640
641DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
642#define rds_stats_inc_which(which, member) do { \
643 per_cpu(which, get_cpu()).member++; \
644 put_cpu(); \
645} while (0)
646#define rds_stats_inc(member) rds_stats_inc_which(rds_stats, member)
647#define rds_stats_add_which(which, member, count) do { \
648 per_cpu(which, get_cpu()).member += count; \
649 put_cpu(); \
650} while (0)
651#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count)
652int __init rds_stats_init(void);
653void rds_stats_exit(void);
654void rds_stats_info_copy(struct rds_info_iterator *iter,
655 uint64_t *values, char **names, size_t nr);
656
657
658int __init rds_sysctl_init(void);
659void rds_sysctl_exit(void);
660extern unsigned long rds_sysctl_sndbuf_min;
661extern unsigned long rds_sysctl_sndbuf_default;
662extern unsigned long rds_sysctl_sndbuf_max;
663extern unsigned long rds_sysctl_reconnect_min_jiffies;
664extern unsigned long rds_sysctl_reconnect_max_jiffies;
665extern unsigned int rds_sysctl_max_unacked_packets;
666extern unsigned int rds_sysctl_max_unacked_bytes;
667extern unsigned int rds_sysctl_ping_enable;
668extern unsigned long rds_sysctl_trace_flags;
669extern unsigned int rds_sysctl_trace_level;
670
671
672int __init rds_threads_init(void);
673void rds_threads_exit(void);
674extern struct workqueue_struct *rds_wq;
675void rds_connect_worker(struct work_struct *);
676void rds_shutdown_worker(struct work_struct *);
677void rds_send_worker(struct work_struct *);
678void rds_recv_worker(struct work_struct *);
679void rds_connect_complete(struct rds_connection *conn);
680
681
682int rds_trans_register(struct rds_transport *trans);
683void rds_trans_unregister(struct rds_transport *trans);
684struct rds_transport *rds_trans_get_preferred(__be32 addr);
685unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
686 unsigned int avail);
687int __init rds_trans_init(void);
688void rds_trans_exit(void);
689
690#endif
691