1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26#include <linux/kernel.h>
27#include <linux/spinlock.h>
28#include <linux/sched.h>
29#include <linux/mm.h>
30#include <linux/hash.h>
31#include <linux/init.h>
32#include <linux/fs.h>
33#include <linux/futex.h>
34#include <linux/highmem.h>
35#include <linux/time.h>
36#include <linux/pagemap.h>
37#include <linux/slab.h>
38#include <linux/poll.h>
39#include <linux/file.h>
40#include <linux/dcache.h>
41#include <asm/uaccess.h>
42
43
44
45
46#define FUTEX_HASHBITS 6
47
48extern void send_sigio(struct fown_struct *fown, int fd, int band);
49
50
51static struct vfsmount *futex_mnt;
52
53
54
55struct futex_q {
56 struct list_head list;
57 wait_queue_head_t waiters;
58
59 struct page *page;
60 unsigned int offset;
61
62 int fd;
63 struct file *filp;
64};
65
66
67static struct list_head futex_queues[1<<FUTEX_HASHBITS];
68static spinlock_t futex_lock = SPIN_LOCK_UNLOCKED;
69
70static inline struct list_head *hash_futex(struct page *page,
71 unsigned long offset)
72{
73 unsigned long h;
74
75
76 h = (unsigned long)page + offset;
77 return &futex_queues[hash_long(h, FUTEX_HASHBITS)];
78}
79
80
81static inline void tell_waiter(struct futex_q *q)
82{
83 wake_up_all(&q->waiters);
84 if (q->filp)
85 send_sigio(&q->filp->f_owner, q->fd, POLL_IN);
86}
87
88static inline void unpin_page(struct page *page)
89{
90
91
92
93 page_cache_release(page);
94}
95
96static int futex_wake(struct list_head *head,
97 struct page *page,
98 unsigned int offset,
99 int num)
100{
101 struct list_head *i, *next;
102 int num_woken = 0;
103
104 spin_lock(&futex_lock);
105 list_for_each_safe(i, next, head) {
106 struct futex_q *this = list_entry(i, struct futex_q, list);
107
108 if (this->page == page && this->offset == offset) {
109 list_del_init(i);
110 tell_waiter(this);
111 num_woken++;
112 if (num_woken >= num) break;
113 }
114 }
115 spin_unlock(&futex_lock);
116 return num_woken;
117}
118
119
120static inline void queue_me(struct list_head *head,
121 struct futex_q *q,
122 struct page *page,
123 unsigned int offset,
124 int fd,
125 struct file *filp)
126{
127 q->page = page;
128 q->offset = offset;
129 q->fd = fd;
130 q->filp = filp;
131
132 spin_lock(&futex_lock);
133 list_add_tail(&q->list, head);
134 spin_unlock(&futex_lock);
135}
136
137
138static inline int unqueue_me(struct futex_q *q)
139{
140 int ret = 0;
141 spin_lock(&futex_lock);
142 if (!list_empty(&q->list)) {
143 list_del(&q->list);
144 ret = 1;
145 }
146 spin_unlock(&futex_lock);
147 return ret;
148}
149
150
151static struct page *pin_page(unsigned long page_start)
152{
153 struct mm_struct *mm = current->mm;
154 struct page *page;
155 int err;
156
157 down_read(&mm->mmap_sem);
158 err = get_user_pages(current, mm, page_start,
159 1 ,
160 0 ,
161 0 ,
162 &page,
163 NULL );
164 up_read(&mm->mmap_sem);
165
166 if (err < 0)
167 return ERR_PTR(err);
168 return page;
169}
170
171static int futex_wait(struct list_head *head,
172 struct page *page,
173 int offset,
174 int val,
175 int *uaddr,
176 unsigned long time)
177{
178 int curval;
179 struct futex_q q;
180 DECLARE_WAITQUEUE(wait, current);
181 int ret = 0;
182
183 set_current_state(TASK_INTERRUPTIBLE);
184 init_waitqueue_head(&q.waiters);
185 add_wait_queue(&q.waiters, &wait);
186 queue_me(head, &q, page, offset, -1, NULL);
187
188
189 if (get_user(curval, uaddr) != 0) {
190 ret = -EFAULT;
191 goto out;
192 }
193
194 if (curval != val) {
195 ret = -EWOULDBLOCK;
196 goto out;
197 }
198 time = schedule_timeout(time);
199 if (time == 0) {
200 ret = -ETIMEDOUT;
201 goto out;
202 }
203 if (signal_pending(current)) {
204 ret = -EINTR;
205 goto out;
206 }
207 out:
208 set_current_state(TASK_RUNNING);
209
210 if (!unqueue_me(&q))
211 return 0;
212 return ret;
213}
214
215static int futex_close(struct inode *inode, struct file *filp)
216{
217 struct futex_q *q = filp->private_data;
218
219 spin_lock(&futex_lock);
220 if (!list_empty(&q->list)) {
221 list_del(&q->list);
222
223 BUG_ON(waitqueue_active(&q->waiters));
224 }
225 spin_unlock(&futex_lock);
226 unpin_page(q->page);
227 kfree(filp->private_data);
228 return 0;
229}
230
231
232static unsigned int futex_poll(struct file *filp,
233 struct poll_table_struct *wait)
234{
235 struct futex_q *q = filp->private_data;
236 int ret = 0;
237
238 poll_wait(filp, &q->waiters, wait);
239 spin_lock(&futex_lock);
240 if (list_empty(&q->list))
241 ret = POLLIN | POLLRDNORM;
242 spin_unlock(&futex_lock);
243
244 return ret;
245}
246
247static struct file_operations futex_fops = {
248 .release = futex_close,
249 .poll = futex_poll,
250};
251
252
253
254static int futex_fd(struct list_head *head,
255 struct page *page,
256 int offset,
257 int signal)
258{
259 int fd;
260 struct futex_q *q;
261 struct file *filp;
262
263 if (signal < 0 || signal > _NSIG)
264 return -EINVAL;
265
266 fd = get_unused_fd();
267 if (fd < 0)
268 return fd;
269 filp = get_empty_filp();
270 if (!filp) {
271 put_unused_fd(fd);
272 return -ENFILE;
273 }
274 filp->f_op = &futex_fops;
275 filp->f_vfsmnt = mntget(futex_mnt);
276 filp->f_dentry = dget(futex_mnt->mnt_root);
277
278 if (signal) {
279 int ret;
280
281 ret = f_setown(filp, current->tgid, 1);
282 if (ret) {
283 put_unused_fd(fd);
284 put_filp(filp);
285 return ret;
286 }
287 filp->f_owner.signum = signal;
288 }
289
290 q = kmalloc(sizeof(*q), GFP_KERNEL);
291 if (!q) {
292 put_unused_fd(fd);
293 put_filp(filp);
294 return -ENOMEM;
295 }
296
297
298 filp->private_data = q;
299 init_waitqueue_head(&q->waiters);
300 queue_me(head, q, page, offset, fd, filp);
301
302
303 fd_install(fd, filp);
304 return fd;
305}
306
307asmlinkage int sys_futex(void *uaddr, int op, int val, struct timespec *utime)
308{
309 int ret;
310 unsigned long pos_in_page;
311 struct list_head *head;
312 struct page *page;
313 unsigned long time = MAX_SCHEDULE_TIMEOUT;
314
315 if (utime) {
316 struct timespec t;
317 if (copy_from_user(&t, utime, sizeof(t)) != 0)
318 return -EFAULT;
319 time = timespec_to_jiffies(&t) + 1;
320 }
321
322 pos_in_page = ((unsigned long)uaddr) % PAGE_SIZE;
323
324
325 if ((pos_in_page % __alignof__(int)) != 0
326 || pos_in_page + sizeof(int) > PAGE_SIZE)
327 return -EINVAL;
328
329
330 page = pin_page((unsigned long)uaddr - pos_in_page);
331 if (IS_ERR(page))
332 return PTR_ERR(page);
333
334 head = hash_futex(page, pos_in_page);
335 switch (op) {
336 case FUTEX_WAIT:
337 ret = futex_wait(head, page, pos_in_page, val, uaddr, time);
338 break;
339 case FUTEX_WAKE:
340 ret = futex_wake(head, page, pos_in_page, val);
341 break;
342 case FUTEX_FD:
343
344 ret = futex_fd(head, page, pos_in_page, val);
345 if (ret >= 0)
346
347 return ret;
348 break;
349 default:
350 ret = -EINVAL;
351 }
352 unpin_page(page);
353
354 return ret;
355}
356
357static struct super_block *
358futexfs_get_sb(struct file_system_type *fs_type,
359 int flags, char *dev_name, void *data)
360{
361 return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA);
362}
363
364static struct file_system_type futex_fs_type = {
365 .name = "futexfs",
366 .get_sb = futexfs_get_sb,
367};
368
369static int __init init(void)
370{
371 unsigned int i;
372
373 register_filesystem(&futex_fs_type);
374 futex_mnt = kern_mount(&futex_fs_type);
375
376 for (i = 0; i < ARRAY_SIZE(futex_queues); i++)
377 INIT_LIST_HEAD(&futex_queues[i]);
378 return 0;
379}
380__initcall(init);
381