1
2
3
4
5
6
7
8
9
10
11
12#include <linux/init.h>
13#include <linux/initrd.h>
14#include <linux/module.h>
15#include <linux/moduleparam.h>
16#include <linux/major.h>
17#include <linux/blkdev.h>
18#include <linux/bio.h>
19#include <linux/highmem.h>
20#include <linux/mutex.h>
21#include <linux/pagemap.h>
22#include <linux/radix-tree.h>
23#include <linux/fs.h>
24#include <linux/slab.h>
25#include <linux/backing-dev.h>
26#include <linux/debugfs.h>
27
28#include <linux/uaccess.h>
29
30#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
31#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT)
32
33
34
35
36
37
38
39
40struct brd_device {
41 int brd_number;
42 struct gendisk *brd_disk;
43 struct list_head brd_list;
44
45
46
47
48
49 spinlock_t brd_lock;
50 struct radix_tree_root brd_pages;
51 u64 brd_nr_pages;
52};
53
54
55
56
57static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
58{
59 pgoff_t idx;
60 struct page *page;
61
62
63
64
65
66
67
68
69
70
71
72
73 rcu_read_lock();
74 idx = sector >> PAGE_SECTORS_SHIFT;
75 page = radix_tree_lookup(&brd->brd_pages, idx);
76 rcu_read_unlock();
77
78 BUG_ON(page && page->index != idx);
79
80 return page;
81}
82
83
84
85
86
87
88static struct page *brd_insert_page(struct brd_device *brd, sector_t sector)
89{
90 pgoff_t idx;
91 struct page *page;
92 gfp_t gfp_flags;
93
94 page = brd_lookup_page(brd, sector);
95 if (page)
96 return page;
97
98
99
100
101
102 gfp_flags = GFP_NOIO | __GFP_ZERO | __GFP_HIGHMEM;
103 page = alloc_page(gfp_flags);
104 if (!page)
105 return NULL;
106
107 if (radix_tree_preload(GFP_NOIO)) {
108 __free_page(page);
109 return NULL;
110 }
111
112 spin_lock(&brd->brd_lock);
113 idx = sector >> PAGE_SECTORS_SHIFT;
114 page->index = idx;
115 if (radix_tree_insert(&brd->brd_pages, idx, page)) {
116 __free_page(page);
117 page = radix_tree_lookup(&brd->brd_pages, idx);
118 BUG_ON(!page);
119 BUG_ON(page->index != idx);
120 } else {
121 brd->brd_nr_pages++;
122 }
123 spin_unlock(&brd->brd_lock);
124
125 radix_tree_preload_end();
126
127 return page;
128}
129
130
131
132
133
134#define FREE_BATCH 16
135static void brd_free_pages(struct brd_device *brd)
136{
137 unsigned long pos = 0;
138 struct page *pages[FREE_BATCH];
139 int nr_pages;
140
141 do {
142 int i;
143
144 nr_pages = radix_tree_gang_lookup(&brd->brd_pages,
145 (void **)pages, pos, FREE_BATCH);
146
147 for (i = 0; i < nr_pages; i++) {
148 void *ret;
149
150 BUG_ON(pages[i]->index < pos);
151 pos = pages[i]->index;
152 ret = radix_tree_delete(&brd->brd_pages, pos);
153 BUG_ON(!ret || ret != pages[i]);
154 __free_page(pages[i]);
155 }
156
157 pos++;
158
159
160
161
162
163 cond_resched();
164
165
166
167
168
169
170 } while (nr_pages == FREE_BATCH);
171}
172
173
174
175
176static int copy_to_brd_setup(struct brd_device *brd, sector_t sector, size_t n)
177{
178 unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
179 size_t copy;
180
181 copy = min_t(size_t, n, PAGE_SIZE - offset);
182 if (!brd_insert_page(brd, sector))
183 return -ENOSPC;
184 if (copy < n) {
185 sector += copy >> SECTOR_SHIFT;
186 if (!brd_insert_page(brd, sector))
187 return -ENOSPC;
188 }
189 return 0;
190}
191
192
193
194
195static void copy_to_brd(struct brd_device *brd, const void *src,
196 sector_t sector, size_t n)
197{
198 struct page *page;
199 void *dst;
200 unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
201 size_t copy;
202
203 copy = min_t(size_t, n, PAGE_SIZE - offset);
204 page = brd_lookup_page(brd, sector);
205 BUG_ON(!page);
206
207 dst = kmap_atomic(page);
208 memcpy(dst + offset, src, copy);
209 kunmap_atomic(dst);
210
211 if (copy < n) {
212 src += copy;
213 sector += copy >> SECTOR_SHIFT;
214 copy = n - copy;
215 page = brd_lookup_page(brd, sector);
216 BUG_ON(!page);
217
218 dst = kmap_atomic(page);
219 memcpy(dst, src, copy);
220 kunmap_atomic(dst);
221 }
222}
223
224
225
226
227static void copy_from_brd(void *dst, struct brd_device *brd,
228 sector_t sector, size_t n)
229{
230 struct page *page;
231 void *src;
232 unsigned int offset = (sector & (PAGE_SECTORS-1)) << SECTOR_SHIFT;
233 size_t copy;
234
235 copy = min_t(size_t, n, PAGE_SIZE - offset);
236 page = brd_lookup_page(brd, sector);
237 if (page) {
238 src = kmap_atomic(page);
239 memcpy(dst, src + offset, copy);
240 kunmap_atomic(src);
241 } else
242 memset(dst, 0, copy);
243
244 if (copy < n) {
245 dst += copy;
246 sector += copy >> SECTOR_SHIFT;
247 copy = n - copy;
248 page = brd_lookup_page(brd, sector);
249 if (page) {
250 src = kmap_atomic(page);
251 memcpy(dst, src, copy);
252 kunmap_atomic(src);
253 } else
254 memset(dst, 0, copy);
255 }
256}
257
258
259
260
261static int brd_do_bvec(struct brd_device *brd, struct page *page,
262 unsigned int len, unsigned int off, unsigned int op,
263 sector_t sector)
264{
265 void *mem;
266 int err = 0;
267
268 if (op_is_write(op)) {
269 err = copy_to_brd_setup(brd, sector, len);
270 if (err)
271 goto out;
272 }
273
274 mem = kmap_atomic(page);
275 if (!op_is_write(op)) {
276 copy_from_brd(mem + off, brd, sector, len);
277 flush_dcache_page(page);
278 } else {
279 flush_dcache_page(page);
280 copy_to_brd(brd, mem + off, sector, len);
281 }
282 kunmap_atomic(mem);
283
284out:
285 return err;
286}
287
288static blk_qc_t brd_submit_bio(struct bio *bio)
289{
290 struct brd_device *brd = bio->bi_bdev->bd_disk->private_data;
291 sector_t sector = bio->bi_iter.bi_sector;
292 struct bio_vec bvec;
293 struct bvec_iter iter;
294
295 bio_for_each_segment(bvec, bio, iter) {
296 unsigned int len = bvec.bv_len;
297 int err;
298
299
300 WARN_ON_ONCE((bvec.bv_offset & (SECTOR_SIZE - 1)) ||
301 (len & (SECTOR_SIZE - 1)));
302
303 err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
304 bio_op(bio), sector);
305 if (err)
306 goto io_error;
307 sector += len >> SECTOR_SHIFT;
308 }
309
310 bio_endio(bio);
311 return BLK_QC_T_NONE;
312io_error:
313 bio_io_error(bio);
314 return BLK_QC_T_NONE;
315}
316
317static int brd_rw_page(struct block_device *bdev, sector_t sector,
318 struct page *page, unsigned int op)
319{
320 struct brd_device *brd = bdev->bd_disk->private_data;
321 int err;
322
323 if (PageTransHuge(page))
324 return -ENOTSUPP;
325 err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector);
326 page_endio(page, op_is_write(op), err);
327 return err;
328}
329
330static const struct block_device_operations brd_fops = {
331 .owner = THIS_MODULE,
332 .submit_bio = brd_submit_bio,
333 .rw_page = brd_rw_page,
334};
335
336
337
338
339static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
340module_param(rd_nr, int, 0444);
341MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
342
343unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
344module_param(rd_size, ulong, 0444);
345MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
346
347static int max_part = 1;
348module_param(max_part, int, 0444);
349MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
350
351MODULE_LICENSE("GPL");
352MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR);
353MODULE_ALIAS("rd");
354
355#ifndef MODULE
356
357static int __init ramdisk_size(char *str)
358{
359 rd_size = simple_strtol(str, NULL, 0);
360 return 1;
361}
362__setup("ramdisk_size=", ramdisk_size);
363#endif
364
365
366
367
368
369static LIST_HEAD(brd_devices);
370static DEFINE_MUTEX(brd_devices_mutex);
371static struct dentry *brd_debugfs_dir;
372
373static int brd_alloc(int i)
374{
375 struct brd_device *brd;
376 struct gendisk *disk;
377 char buf[DISK_NAME_LEN];
378
379 brd = kzalloc(sizeof(*brd), GFP_KERNEL);
380 if (!brd)
381 return -ENOMEM;
382 brd->brd_number = i;
383 spin_lock_init(&brd->brd_lock);
384 INIT_RADIX_TREE(&brd->brd_pages, GFP_ATOMIC);
385
386 snprintf(buf, DISK_NAME_LEN, "ram%d", i);
387 if (!IS_ERR_OR_NULL(brd_debugfs_dir))
388 debugfs_create_u64(buf, 0444, brd_debugfs_dir,
389 &brd->brd_nr_pages);
390
391 disk = brd->brd_disk = blk_alloc_disk(NUMA_NO_NODE);
392 if (!disk)
393 goto out_free_dev;
394
395 disk->major = RAMDISK_MAJOR;
396 disk->first_minor = i * max_part;
397 disk->minors = max_part;
398 disk->fops = &brd_fops;
399 disk->private_data = brd;
400 disk->flags = GENHD_FL_EXT_DEVT;
401 strlcpy(disk->disk_name, buf, DISK_NAME_LEN);
402 set_capacity(disk, rd_size * 2);
403
404
405
406
407
408
409
410
411 blk_queue_physical_block_size(disk->queue, PAGE_SIZE);
412
413
414 blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
415 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue);
416 add_disk(disk);
417 list_add_tail(&brd->brd_list, &brd_devices);
418
419 return 0;
420
421out_free_dev:
422 kfree(brd);
423 return -ENOMEM;
424}
425
426static void brd_probe(dev_t dev)
427{
428 int i = MINOR(dev) / max_part;
429 struct brd_device *brd;
430
431 mutex_lock(&brd_devices_mutex);
432 list_for_each_entry(brd, &brd_devices, brd_list) {
433 if (brd->brd_number == i)
434 goto out_unlock;
435 }
436
437 brd_alloc(i);
438out_unlock:
439 mutex_unlock(&brd_devices_mutex);
440}
441
442static void brd_del_one(struct brd_device *brd)
443{
444 list_del(&brd->brd_list);
445 del_gendisk(brd->brd_disk);
446 blk_cleanup_disk(brd->brd_disk);
447 brd_free_pages(brd);
448 kfree(brd);
449}
450
451static inline void brd_check_and_reset_par(void)
452{
453 if (unlikely(!max_part))
454 max_part = 1;
455
456
457
458
459
460 if ((1U << MINORBITS) % max_part != 0)
461 max_part = 1UL << fls(max_part);
462
463 if (max_part > DISK_MAX_PARTS) {
464 pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n",
465 DISK_MAX_PARTS, DISK_MAX_PARTS);
466 max_part = DISK_MAX_PARTS;
467 }
468}
469
470static int __init brd_init(void)
471{
472 struct brd_device *brd, *next;
473 int err, i;
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490 if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe))
491 return -EIO;
492
493 brd_check_and_reset_par();
494
495 brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
496
497 mutex_lock(&brd_devices_mutex);
498 for (i = 0; i < rd_nr; i++) {
499 err = brd_alloc(i);
500 if (err)
501 goto out_free;
502 }
503
504 mutex_unlock(&brd_devices_mutex);
505
506 pr_info("brd: module loaded\n");
507 return 0;
508
509out_free:
510 debugfs_remove_recursive(brd_debugfs_dir);
511
512 list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
513 brd_del_one(brd);
514 mutex_unlock(&brd_devices_mutex);
515 unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
516
517 pr_info("brd: module NOT loaded !!!\n");
518 return err;
519}
520
521static void __exit brd_exit(void)
522{
523 struct brd_device *brd, *next;
524
525 debugfs_remove_recursive(brd_debugfs_dir);
526
527 list_for_each_entry_safe(brd, next, &brd_devices, brd_list)
528 brd_del_one(brd);
529
530 unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
531
532 pr_info("brd: module unloaded\n");
533}
534
535module_init(brd_init);
536module_exit(brd_exit);
537
538