1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#define MAJOR_NR UBD_MAJOR
21#define UBD_SHIFT 4
22
23#include "linux/kernel.h"
24#include "linux/module.h"
25#include "linux/blkdev.h"
26#include "linux/hdreg.h"
27#include "linux/init.h"
28#include "linux/cdrom.h"
29#include "linux/proc_fs.h"
30#include "linux/ctype.h"
31#include "linux/capability.h"
32#include "linux/mm.h"
33#include "linux/vmalloc.h"
34#include "linux/blkpg.h"
35#include "linux/genhd.h"
36#include "linux/spinlock.h"
37#include "linux/platform_device.h"
38#include "linux/scatterlist.h"
39#include "asm/segment.h"
40#include "asm/uaccess.h"
41#include "asm/irq.h"
42#include "asm/types.h"
43#include "asm/tlbflush.h"
44#include "mem_user.h"
45#include "kern_util.h"
46#include "kern.h"
47#include "mconsole_kern.h"
48#include "init.h"
49#include "irq_user.h"
50#include "irq_kern.h"
51#include "ubd_user.h"
52#include "kern_util.h"
53#include "os.h"
54#include "mem.h"
55#include "mem_kern.h"
56#include "cow.h"
57
58enum ubd_req { UBD_READ, UBD_WRITE };
59
60struct io_thread_req {
61 struct request *req;
62 enum ubd_req op;
63 int fds[2];
64 unsigned long offsets[2];
65 unsigned long long offset;
66 unsigned long length;
67 char *buffer;
68 int sectorsize;
69 unsigned long sector_mask;
70 unsigned long long cow_offset;
71 unsigned long bitmap_words[2];
72 int error;
73};
74
75extern int open_ubd_file(char *file, struct openflags *openflags, int shared,
76 char **backing_file_out, int *bitmap_offset_out,
77 unsigned long *bitmap_len_out, int *data_offset_out,
78 int *create_cow_out);
79extern int create_cow_file(char *cow_file, char *backing_file,
80 struct openflags flags, int sectorsize,
81 int alignment, int *bitmap_offset_out,
82 unsigned long *bitmap_len_out,
83 int *data_offset_out);
84extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
85extern void do_io(struct io_thread_req *req);
86
87static inline int ubd_test_bit(__u64 bit, unsigned char *data)
88{
89 __u64 n;
90 int bits, off;
91
92 bits = sizeof(data[0]) * 8;
93 n = bit / bits;
94 off = bit % bits;
95 return (data[n] & (1 << off)) != 0;
96}
97
98static inline void ubd_set_bit(__u64 bit, unsigned char *data)
99{
100 __u64 n;
101 int bits, off;
102
103 bits = sizeof(data[0]) * 8;
104 n = bit / bits;
105 off = bit % bits;
106 data[n] |= (1 << off);
107}
108
109
110#define DRIVER_NAME "uml-blkdev"
111
112static DEFINE_MUTEX(ubd_lock);
113
114static int ubd_open(struct inode * inode, struct file * filp);
115static int ubd_release(struct inode * inode, struct file * file);
116static int ubd_ioctl(struct inode * inode, struct file * file,
117 unsigned int cmd, unsigned long arg);
118static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
119
120#define MAX_DEV (16)
121
122static struct block_device_operations ubd_blops = {
123 .owner = THIS_MODULE,
124 .open = ubd_open,
125 .release = ubd_release,
126 .ioctl = ubd_ioctl,
127 .getgeo = ubd_getgeo,
128};
129
130
131static int fake_major = MAJOR_NR;
132static struct gendisk *ubd_gendisk[MAX_DEV];
133static struct gendisk *fake_gendisk[MAX_DEV];
134
135#ifdef CONFIG_BLK_DEV_UBD_SYNC
136#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
137 .cl = 1 })
138#else
139#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
140 .cl = 1 })
141#endif
142static struct openflags global_openflags = OPEN_FLAGS;
143
144struct cow {
145
146 char *file;
147
148 int fd;
149 unsigned long *bitmap;
150 unsigned long bitmap_len;
151 int bitmap_offset;
152 int data_offset;
153};
154
155#define MAX_SG 64
156
157struct ubd {
158 struct list_head restart;
159
160
161 char *file;
162 int count;
163 int fd;
164 __u64 size;
165 struct openflags boot_openflags;
166 struct openflags openflags;
167 unsigned shared:1;
168 unsigned no_cow:1;
169 struct cow cow;
170 struct platform_device pdev;
171 struct request_queue *queue;
172 spinlock_t lock;
173 struct scatterlist sg[MAX_SG];
174 struct request *request;
175 int start_sg, end_sg;
176};
177
178#define DEFAULT_COW { \
179 .file = NULL, \
180 .fd = -1, \
181 .bitmap = NULL, \
182 .bitmap_offset = 0, \
183 .data_offset = 0, \
184}
185
186#define DEFAULT_UBD { \
187 .file = NULL, \
188 .count = 0, \
189 .fd = -1, \
190 .size = -1, \
191 .boot_openflags = OPEN_FLAGS, \
192 .openflags = OPEN_FLAGS, \
193 .no_cow = 0, \
194 .shared = 0, \
195 .cow = DEFAULT_COW, \
196 .lock = SPIN_LOCK_UNLOCKED, \
197 .request = NULL, \
198 .start_sg = 0, \
199 .end_sg = 0, \
200}
201
202
203struct ubd ubd_devs[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
204
205
206static int fake_ide = 0;
207static struct proc_dir_entry *proc_ide_root = NULL;
208static struct proc_dir_entry *proc_ide = NULL;
209
210static void make_proc_ide(void)
211{
212 proc_ide_root = proc_mkdir("ide", NULL);
213 proc_ide = proc_mkdir("ide0", proc_ide_root);
214}
215
216static int proc_ide_read_media(char *page, char **start, off_t off, int count,
217 int *eof, void *data)
218{
219 int len;
220
221 strcpy(page, "disk\n");
222 len = strlen("disk\n");
223 len -= off;
224 if (len < count){
225 *eof = 1;
226 if (len <= 0) return 0;
227 }
228 else len = count;
229 *start = page + off;
230 return len;
231}
232
233static void make_ide_entries(const char *dev_name)
234{
235 struct proc_dir_entry *dir, *ent;
236 char name[64];
237
238 if(proc_ide_root == NULL) make_proc_ide();
239
240 dir = proc_mkdir(dev_name, proc_ide);
241 if(!dir) return;
242
243 ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
244 if(!ent) return;
245 ent->data = NULL;
246 ent->read_proc = proc_ide_read_media;
247 ent->write_proc = NULL;
248 snprintf(name, sizeof(name), "ide0/%s", dev_name);
249 proc_symlink(dev_name, proc_ide_root, name);
250}
251
252static int fake_ide_setup(char *str)
253{
254 fake_ide = 1;
255 return 1;
256}
257
258__setup("fake_ide", fake_ide_setup);
259
260__uml_help(fake_ide_setup,
261"fake_ide\n"
262" Create ide0 entries that map onto ubd devices.\n\n"
263);
264
265static int parse_unit(char **ptr)
266{
267 char *str = *ptr, *end;
268 int n = -1;
269
270 if(isdigit(*str)) {
271 n = simple_strtoul(str, &end, 0);
272 if(end == str)
273 return -1;
274 *ptr = end;
275 }
276 else if (('a' <= *str) && (*str <= 'z')) {
277 n = *str - 'a';
278 str++;
279 *ptr = str;
280 }
281 return n;
282}
283
284
285
286
287
288static int ubd_setup_common(char *str, int *index_out, char **error_out)
289{
290 struct ubd *ubd_dev;
291 struct openflags flags = global_openflags;
292 char *backing_file;
293 int n, err = 0, i;
294
295 if(index_out) *index_out = -1;
296 n = *str;
297 if(n == '='){
298 char *end;
299 int major;
300
301 str++;
302 if(!strcmp(str, "sync")){
303 global_openflags = of_sync(global_openflags);
304 goto out1;
305 }
306
307 err = -EINVAL;
308 major = simple_strtoul(str, &end, 0);
309 if((*end != '\0') || (end == str)){
310 *error_out = "Didn't parse major number";
311 goto out1;
312 }
313
314 mutex_lock(&ubd_lock);
315 if(fake_major != MAJOR_NR){
316 *error_out = "Can't assign a fake major twice";
317 goto out1;
318 }
319
320 fake_major = major;
321
322 printk(KERN_INFO "Setting extra ubd major number to %d\n",
323 major);
324 err = 0;
325 out1:
326 mutex_unlock(&ubd_lock);
327 return err;
328 }
329
330 n = parse_unit(&str);
331 if(n < 0){
332 *error_out = "Couldn't parse device number";
333 return -EINVAL;
334 }
335 if(n >= MAX_DEV){
336 *error_out = "Device number out of range";
337 return 1;
338 }
339
340 err = -EBUSY;
341 mutex_lock(&ubd_lock);
342
343 ubd_dev = &ubd_devs[n];
344 if(ubd_dev->file != NULL){
345 *error_out = "Device is already configured";
346 goto out;
347 }
348
349 if (index_out)
350 *index_out = n;
351
352 err = -EINVAL;
353 for (i = 0; i < sizeof("rscd="); i++) {
354 switch (*str) {
355 case 'r':
356 flags.w = 0;
357 break;
358 case 's':
359 flags.s = 1;
360 break;
361 case 'd':
362 ubd_dev->no_cow = 1;
363 break;
364 case 'c':
365 ubd_dev->shared = 1;
366 break;
367 case '=':
368 str++;
369 goto break_loop;
370 default:
371 *error_out = "Expected '=' or flag letter "
372 "(r, s, c, or d)";
373 goto out;
374 }
375 str++;
376 }
377
378 if (*str == '=')
379 *error_out = "Too many flags specified";
380 else
381 *error_out = "Missing '='";
382 goto out;
383
384break_loop:
385 backing_file = strchr(str, ',');
386
387 if (backing_file == NULL)
388 backing_file = strchr(str, ':');
389
390 if(backing_file != NULL){
391 if(ubd_dev->no_cow){
392 *error_out = "Can't specify both 'd' and a cow file";
393 goto out;
394 }
395 else {
396 *backing_file = '\0';
397 backing_file++;
398 }
399 }
400 err = 0;
401 ubd_dev->file = str;
402 ubd_dev->cow.file = backing_file;
403 ubd_dev->boot_openflags = flags;
404out:
405 mutex_unlock(&ubd_lock);
406 return err;
407}
408
409static int ubd_setup(char *str)
410{
411 char *error;
412 int err;
413
414 err = ubd_setup_common(str, NULL, &error);
415 if(err)
416 printk(KERN_ERR "Failed to initialize device with \"%s\" : "
417 "%s\n", str, error);
418 return 1;
419}
420
421__setup("ubd", ubd_setup);
422__uml_help(ubd_setup,
423"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
424" This is used to associate a device with a file in the underlying\n"
425" filesystem. When specifying two filenames, the first one is the\n"
426" COW name and the second is the backing file name. As separator you can\n"
427" use either a ':' or a ',': the first one allows writing things like;\n"
428" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
429" while with a ',' the shell would not expand the 2nd '~'.\n"
430" When using only one filename, UML will detect whether to treat it like\n"
431" a COW file or a backing file. To override this detection, add the 'd'\n"
432" flag:\n"
433" ubd0d=BackingFile\n"
434" Usually, there is a filesystem in the file, but \n"
435" that's not required. Swap devices containing swap files can be\n"
436" specified like this. Also, a file which doesn't contain a\n"
437" filesystem can have its contents read in the virtual \n"
438" machine by running 'dd' on the device. <n> must be in the range\n"
439" 0 to 7. Appending an 'r' to the number will cause that device\n"
440" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
441" an 's' will cause data to be written to disk on the host immediately.\n"
442" 'c' will cause the device to be treated as being shared between multiple\n"
443" UMLs and file locking will be turned off - this is appropriate for a\n"
444" cluster filesystem and inappropriate at almost all other times.\n\n"
445);
446
447static int udb_setup(char *str)
448{
449 printk("udb%s specified on command line is almost certainly a ubd -> "
450 "udb TYPO\n", str);
451 return 1;
452}
453
454__setup("udb", udb_setup);
455__uml_help(udb_setup,
456"udb\n"
457" This option is here solely to catch ubd -> udb typos, which can be\n"
458" to impossible to catch visually unless you specifically look for\n"
459" them. The only result of any option starting with 'udb' is an error\n"
460" in the boot output.\n\n"
461);
462
463static void do_ubd_request(struct request_queue * q);
464
465
466int thread_fd = -1;
467
468static void ubd_end_request(struct request *req, int bytes, int error)
469{
470 blk_end_request(req, error, bytes);
471}
472
473
474
475static inline void ubd_finish(struct request *req, int bytes)
476{
477 if(bytes < 0){
478 ubd_end_request(req, 0, -EIO);
479 return;
480 }
481 ubd_end_request(req, bytes, 0);
482}
483
484static LIST_HEAD(restart);
485
486
487
488static void ubd_handler(void)
489{
490 struct io_thread_req *req;
491 struct request *rq;
492 struct ubd *ubd;
493 struct list_head *list, *next_ele;
494 unsigned long flags;
495 int n;
496
497 while(1){
498 n = os_read_file(thread_fd, &req,
499 sizeof(struct io_thread_req *));
500 if(n != sizeof(req)){
501 if(n == -EAGAIN)
502 break;
503 printk(KERN_ERR "spurious interrupt in ubd_handler, "
504 "err = %d\n", -n);
505 return;
506 }
507
508 rq = req->req;
509 rq->nr_sectors -= req->length >> 9;
510 if(rq->nr_sectors == 0)
511 ubd_finish(rq, rq->hard_nr_sectors << 9);
512 kfree(req);
513 }
514 reactivate_fd(thread_fd, UBD_IRQ);
515
516 list_for_each_safe(list, next_ele, &restart){
517 ubd = container_of(list, struct ubd, restart);
518 list_del_init(&ubd->restart);
519 spin_lock_irqsave(&ubd->lock, flags);
520 do_ubd_request(ubd->queue);
521 spin_unlock_irqrestore(&ubd->lock, flags);
522 }
523}
524
525static irqreturn_t ubd_intr(int irq, void *dev)
526{
527 ubd_handler();
528 return IRQ_HANDLED;
529}
530
531
532static int io_pid = -1;
533
534void kill_io_thread(void)
535{
536 if(io_pid != -1)
537 os_kill_process(io_pid, 1);
538}
539
540__uml_exitcall(kill_io_thread);
541
542static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
543{
544 char *file;
545
546 file = ubd_dev->cow.file ? ubd_dev->cow.file : ubd_dev->file;
547 return os_file_size(file, size_out);
548}
549
550static void ubd_close_dev(struct ubd *ubd_dev)
551{
552 os_close_file(ubd_dev->fd);
553 if(ubd_dev->cow.file == NULL)
554 return;
555
556 os_close_file(ubd_dev->cow.fd);
557 vfree(ubd_dev->cow.bitmap);
558 ubd_dev->cow.bitmap = NULL;
559}
560
561static int ubd_open_dev(struct ubd *ubd_dev)
562{
563 struct openflags flags;
564 char **back_ptr;
565 int err, create_cow, *create_ptr;
566 int fd;
567
568 ubd_dev->openflags = ubd_dev->boot_openflags;
569 create_cow = 0;
570 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
571 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
572
573 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
574 back_ptr, &ubd_dev->cow.bitmap_offset,
575 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
576 create_ptr);
577
578 if((fd == -ENOENT) && create_cow){
579 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
580 ubd_dev->openflags, 1 << 9, PAGE_SIZE,
581 &ubd_dev->cow.bitmap_offset,
582 &ubd_dev->cow.bitmap_len,
583 &ubd_dev->cow.data_offset);
584 if(fd >= 0){
585 printk(KERN_INFO "Creating \"%s\" as COW file for "
586 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
587 }
588 }
589
590 if(fd < 0){
591 printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
592 -fd);
593 return fd;
594 }
595 ubd_dev->fd = fd;
596
597 if(ubd_dev->cow.file != NULL){
598 blk_queue_max_sectors(ubd_dev->queue, 8 * sizeof(long));
599
600 err = -ENOMEM;
601 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
602 if(ubd_dev->cow.bitmap == NULL){
603 printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
604 goto error;
605 }
606 flush_tlb_kernel_vm();
607
608 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
609 ubd_dev->cow.bitmap_offset,
610 ubd_dev->cow.bitmap_len);
611 if(err < 0)
612 goto error;
613
614 flags = ubd_dev->openflags;
615 flags.w = 0;
616 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
617 NULL, NULL, NULL, NULL);
618 if(err < 0) goto error;
619 ubd_dev->cow.fd = err;
620 }
621 return 0;
622 error:
623 os_close_file(ubd_dev->fd);
624 return err;
625}
626
627static void ubd_device_release(struct device *dev)
628{
629 struct ubd *ubd_dev = dev->driver_data;
630
631 blk_cleanup_queue(ubd_dev->queue);
632 *ubd_dev = ((struct ubd) DEFAULT_UBD);
633}
634
635static int ubd_disk_register(int major, u64 size, int unit,
636 struct gendisk **disk_out)
637{
638 struct gendisk *disk;
639
640 disk = alloc_disk(1 << UBD_SHIFT);
641 if(disk == NULL)
642 return -ENOMEM;
643
644 disk->major = major;
645 disk->first_minor = unit << UBD_SHIFT;
646 disk->fops = &ubd_blops;
647 set_capacity(disk, size / 512);
648 if(major == MAJOR_NR)
649 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
650 else
651 sprintf(disk->disk_name, "ubd_fake%d", unit);
652
653
654 if (major == MAJOR_NR) {
655 ubd_devs[unit].pdev.id = unit;
656 ubd_devs[unit].pdev.name = DRIVER_NAME;
657 ubd_devs[unit].pdev.dev.release = ubd_device_release;
658 ubd_devs[unit].pdev.dev.driver_data = &ubd_devs[unit];
659 platform_device_register(&ubd_devs[unit].pdev);
660 disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
661 }
662
663 disk->private_data = &ubd_devs[unit];
664 disk->queue = ubd_devs[unit].queue;
665 add_disk(disk);
666
667 *disk_out = disk;
668 return 0;
669}
670
671#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
672
673static int ubd_add(int n, char **error_out)
674{
675 struct ubd *ubd_dev = &ubd_devs[n];
676 int err = 0;
677
678 if(ubd_dev->file == NULL)
679 goto out;
680
681 err = ubd_file_size(ubd_dev, &ubd_dev->size);
682 if(err < 0){
683 *error_out = "Couldn't determine size of device's file";
684 goto out;
685 }
686
687 ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
688
689 INIT_LIST_HEAD(&ubd_dev->restart);
690 sg_init_table(ubd_dev->sg, MAX_SG);
691
692 err = -ENOMEM;
693 ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
694 if (ubd_dev->queue == NULL) {
695 *error_out = "Failed to initialize device queue";
696 goto out;
697 }
698 ubd_dev->queue->queuedata = ubd_dev;
699
700 blk_queue_max_hw_segments(ubd_dev->queue, MAX_SG);
701 err = ubd_disk_register(MAJOR_NR, ubd_dev->size, n, &ubd_gendisk[n]);
702 if(err){
703 *error_out = "Failed to register device";
704 goto out_cleanup;
705 }
706
707 if(fake_major != MAJOR_NR)
708 ubd_disk_register(fake_major, ubd_dev->size, n,
709 &fake_gendisk[n]);
710
711
712
713
714
715 if (fake_ide)
716 make_ide_entries(ubd_gendisk[n]->disk_name);
717
718 err = 0;
719out:
720 return err;
721
722out_cleanup:
723 blk_cleanup_queue(ubd_dev->queue);
724 goto out;
725}
726
727static int ubd_config(char *str, char **error_out)
728{
729 int n, ret;
730
731
732
733
734
735 str = kstrdup(str, GFP_KERNEL);
736 if (str == NULL) {
737 *error_out = "Failed to allocate memory";
738 return -ENOMEM;
739 }
740
741 ret = ubd_setup_common(str, &n, error_out);
742 if (ret)
743 goto err_free;
744
745 if (n == -1) {
746 ret = 0;
747 goto err_free;
748 }
749
750 mutex_lock(&ubd_lock);
751 ret = ubd_add(n, error_out);
752 if (ret)
753 ubd_devs[n].file = NULL;
754 mutex_unlock(&ubd_lock);
755
756out:
757 return ret;
758
759err_free:
760 kfree(str);
761 goto out;
762}
763
764static int ubd_get_config(char *name, char *str, int size, char **error_out)
765{
766 struct ubd *ubd_dev;
767 int n, len = 0;
768
769 n = parse_unit(&name);
770 if((n >= MAX_DEV) || (n < 0)){
771 *error_out = "ubd_get_config : device number out of range";
772 return -1;
773 }
774
775 ubd_dev = &ubd_devs[n];
776 mutex_lock(&ubd_lock);
777
778 if(ubd_dev->file == NULL){
779 CONFIG_CHUNK(str, size, len, "", 1);
780 goto out;
781 }
782
783 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
784
785 if(ubd_dev->cow.file != NULL){
786 CONFIG_CHUNK(str, size, len, ",", 0);
787 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
788 }
789 else CONFIG_CHUNK(str, size, len, "", 1);
790
791 out:
792 mutex_unlock(&ubd_lock);
793 return len;
794}
795
796static int ubd_id(char **str, int *start_out, int *end_out)
797{
798 int n;
799
800 n = parse_unit(str);
801 *start_out = 0;
802 *end_out = MAX_DEV - 1;
803 return n;
804}
805
806static int ubd_remove(int n, char **error_out)
807{
808 struct gendisk *disk = ubd_gendisk[n];
809 struct ubd *ubd_dev;
810 int err = -ENODEV;
811
812 mutex_lock(&ubd_lock);
813
814 ubd_dev = &ubd_devs[n];
815
816 if(ubd_dev->file == NULL)
817 goto out;
818
819
820 err = -EBUSY;
821 if(ubd_dev->count > 0)
822 goto out;
823
824 ubd_gendisk[n] = NULL;
825 if(disk != NULL){
826 del_gendisk(disk);
827 put_disk(disk);
828 }
829
830 if(fake_gendisk[n] != NULL){
831 del_gendisk(fake_gendisk[n]);
832 put_disk(fake_gendisk[n]);
833 fake_gendisk[n] = NULL;
834 }
835
836 err = 0;
837 platform_device_unregister(&ubd_dev->pdev);
838out:
839 mutex_unlock(&ubd_lock);
840 return err;
841}
842
843
844
845
846static struct mc_device ubd_mc = {
847 .list = LIST_HEAD_INIT(ubd_mc.list),
848 .name = "ubd",
849 .config = ubd_config,
850 .get_config = ubd_get_config,
851 .id = ubd_id,
852 .remove = ubd_remove,
853};
854
855static int __init ubd_mc_init(void)
856{
857 mconsole_register_dev(&ubd_mc);
858 return 0;
859}
860
861__initcall(ubd_mc_init);
862
863static int __init ubd0_init(void)
864{
865 struct ubd *ubd_dev = &ubd_devs[0];
866
867 mutex_lock(&ubd_lock);
868 if(ubd_dev->file == NULL)
869 ubd_dev->file = "root_fs";
870 mutex_unlock(&ubd_lock);
871
872 return 0;
873}
874
875__initcall(ubd0_init);
876
877
878static struct platform_driver ubd_driver = {
879 .driver = {
880 .name = DRIVER_NAME,
881 },
882};
883
884static int __init ubd_init(void)
885{
886 char *error;
887 int i, err;
888
889 if (register_blkdev(MAJOR_NR, "ubd"))
890 return -1;
891
892 if (fake_major != MAJOR_NR) {
893 char name[sizeof("ubd_nnn\0")];
894
895 snprintf(name, sizeof(name), "ubd_%d", fake_major);
896 if (register_blkdev(fake_major, "ubd"))
897 return -1;
898 }
899 platform_driver_register(&ubd_driver);
900 mutex_lock(&ubd_lock);
901 for (i = 0; i < MAX_DEV; i++){
902 err = ubd_add(i, &error);
903 if(err)
904 printk(KERN_ERR "Failed to initialize ubd device %d :"
905 "%s\n", i, error);
906 }
907 mutex_unlock(&ubd_lock);
908 return 0;
909}
910
911late_initcall(ubd_init);
912
913static int __init ubd_driver_init(void){
914 unsigned long stack;
915 int err;
916
917
918 if(global_openflags.s){
919 printk(KERN_INFO "ubd: Synchronous mode\n");
920
921
922 }
923 stack = alloc_stack(0, 0);
924 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
925 &thread_fd);
926 if(io_pid < 0){
927 printk(KERN_ERR
928 "ubd : Failed to start I/O thread (errno = %d) - "
929 "falling back to synchronous I/O\n", -io_pid);
930 io_pid = -1;
931 return 0;
932 }
933 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
934 IRQF_DISABLED, "ubd", ubd_devs);
935 if(err != 0)
936 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
937 return 0;
938}
939
940device_initcall(ubd_driver_init);
941
942static int ubd_open(struct inode *inode, struct file *filp)
943{
944 struct gendisk *disk = inode->i_bdev->bd_disk;
945 struct ubd *ubd_dev = disk->private_data;
946 int err = 0;
947
948 if(ubd_dev->count == 0){
949 err = ubd_open_dev(ubd_dev);
950 if(err){
951 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
952 disk->disk_name, ubd_dev->file, -err);
953 goto out;
954 }
955 }
956 ubd_dev->count++;
957 set_disk_ro(disk, !ubd_dev->openflags.w);
958
959
960
961
962
963
964
965 out:
966 return err;
967}
968
969static int ubd_release(struct inode * inode, struct file * file)
970{
971 struct gendisk *disk = inode->i_bdev->bd_disk;
972 struct ubd *ubd_dev = disk->private_data;
973
974 if(--ubd_dev->count == 0)
975 ubd_close_dev(ubd_dev);
976 return 0;
977}
978
979static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
980 __u64 *cow_offset, unsigned long *bitmap,
981 __u64 bitmap_offset, unsigned long *bitmap_words,
982 __u64 bitmap_len)
983{
984 __u64 sector = io_offset >> 9;
985 int i, update_bitmap = 0;
986
987 for(i = 0; i < length >> 9; i++){
988 if(cow_mask != NULL)
989 ubd_set_bit(i, (unsigned char *) cow_mask);
990 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
991 continue;
992
993 update_bitmap = 1;
994 ubd_set_bit(sector + i, (unsigned char *) bitmap);
995 }
996
997 if(!update_bitmap)
998 return;
999
1000 *cow_offset = sector / (sizeof(unsigned long) * 8);
1001
1002
1003
1004
1005
1006
1007 if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
1008 sizeof(unsigned long) - 1))
1009 (*cow_offset)--;
1010
1011 bitmap_words[0] = bitmap[*cow_offset];
1012 bitmap_words[1] = bitmap[*cow_offset + 1];
1013
1014 *cow_offset *= sizeof(unsigned long);
1015 *cow_offset += bitmap_offset;
1016}
1017
1018static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
1019 __u64 bitmap_offset, __u64 bitmap_len)
1020{
1021 __u64 sector = req->offset >> 9;
1022 int i;
1023
1024 if(req->length > (sizeof(req->sector_mask) * 8) << 9)
1025 panic("Operation too long");
1026
1027 if(req->op == UBD_READ) {
1028 for(i = 0; i < req->length >> 9; i++){
1029 if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1030 ubd_set_bit(i, (unsigned char *)
1031 &req->sector_mask);
1032 }
1033 }
1034 else cowify_bitmap(req->offset, req->length, &req->sector_mask,
1035 &req->cow_offset, bitmap, bitmap_offset,
1036 req->bitmap_words, bitmap_len);
1037}
1038
1039
1040static void prepare_request(struct request *req, struct io_thread_req *io_req,
1041 unsigned long long offset, int page_offset,
1042 int len, struct page *page)
1043{
1044 struct gendisk *disk = req->rq_disk;
1045 struct ubd *ubd_dev = disk->private_data;
1046
1047 io_req->req = req;
1048 io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
1049 ubd_dev->fd;
1050 io_req->fds[1] = ubd_dev->fd;
1051 io_req->cow_offset = -1;
1052 io_req->offset = offset;
1053 io_req->length = len;
1054 io_req->error = 0;
1055 io_req->sector_mask = 0;
1056
1057 io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
1058 io_req->offsets[0] = 0;
1059 io_req->offsets[1] = ubd_dev->cow.data_offset;
1060 io_req->buffer = page_address(page) + page_offset;
1061 io_req->sectorsize = 1 << 9;
1062
1063 if(ubd_dev->cow.file != NULL)
1064 cowify_req(io_req, ubd_dev->cow.bitmap,
1065 ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
1066
1067}
1068
1069
1070static void do_ubd_request(struct request_queue *q)
1071{
1072 struct io_thread_req *io_req;
1073 struct request *req;
1074 int n, last_sectors;
1075
1076 while(1){
1077 struct ubd *dev = q->queuedata;
1078 if(dev->end_sg == 0){
1079 struct request *req = elv_next_request(q);
1080 if(req == NULL)
1081 return;
1082
1083 dev->request = req;
1084 blkdev_dequeue_request(req);
1085 dev->start_sg = 0;
1086 dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
1087 }
1088
1089 req = dev->request;
1090 last_sectors = 0;
1091 while(dev->start_sg < dev->end_sg){
1092 struct scatterlist *sg = &dev->sg[dev->start_sg];
1093
1094 req->sector += last_sectors;
1095 io_req = kmalloc(sizeof(struct io_thread_req),
1096 GFP_ATOMIC);
1097 if(io_req == NULL){
1098 if(list_empty(&dev->restart))
1099 list_add(&dev->restart, &restart);
1100 return;
1101 }
1102 prepare_request(req, io_req,
1103 (unsigned long long) req->sector << 9,
1104 sg->offset, sg->length, sg_page(sg));
1105
1106 last_sectors = sg->length >> 9;
1107 n = os_write_file(thread_fd, &io_req,
1108 sizeof(struct io_thread_req *));
1109 if(n != sizeof(struct io_thread_req *)){
1110 if(n != -EAGAIN)
1111 printk("write to io thread failed, "
1112 "errno = %d\n", -n);
1113 else if(list_empty(&dev->restart))
1114 list_add(&dev->restart, &restart);
1115 kfree(io_req);
1116 return;
1117 }
1118
1119 dev->start_sg++;
1120 }
1121 dev->end_sg = 0;
1122 dev->request = NULL;
1123 }
1124}
1125
1126static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1127{
1128 struct ubd *ubd_dev = bdev->bd_disk->private_data;
1129
1130 geo->heads = 128;
1131 geo->sectors = 32;
1132 geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1133 return 0;
1134}
1135
1136static int ubd_ioctl(struct inode * inode, struct file * file,
1137 unsigned int cmd, unsigned long arg)
1138{
1139 struct ubd *ubd_dev = inode->i_bdev->bd_disk->private_data;
1140 struct hd_driveid ubd_id = {
1141 .cyls = 0,
1142 .heads = 128,
1143 .sectors = 32,
1144 };
1145
1146 switch (cmd) {
1147 struct cdrom_volctrl volume;
1148 case HDIO_GET_IDENTITY:
1149 ubd_id.cyls = ubd_dev->size / (128 * 32 * 512);
1150 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1151 sizeof(ubd_id)))
1152 return -EFAULT;
1153 return 0;
1154
1155 case CDROMVOLREAD:
1156 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1157 return -EFAULT;
1158 volume.channel0 = 255;
1159 volume.channel1 = 255;
1160 volume.channel2 = 255;
1161 volume.channel3 = 255;
1162 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1163 return -EFAULT;
1164 return 0;
1165 }
1166 return -EINVAL;
1167}
1168
1169static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
1170{
1171 struct uml_stat buf1, buf2;
1172 int err;
1173
1174 if(from_cmdline == NULL)
1175 return 0;
1176 if(!strcmp(from_cmdline, from_cow))
1177 return 0;
1178
1179 err = os_stat_file(from_cmdline, &buf1);
1180 if(err < 0){
1181 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
1182 return 0;
1183 }
1184 err = os_stat_file(from_cow, &buf2);
1185 if(err < 0){
1186 printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
1187 return 1;
1188 }
1189 if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
1190 return 0;
1191
1192 printk("Backing file mismatch - \"%s\" requested,\n"
1193 "\"%s\" specified in COW header of \"%s\"\n",
1194 from_cmdline, from_cow, cow);
1195 return 1;
1196}
1197
1198static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1199{
1200 unsigned long modtime;
1201 unsigned long long actual;
1202 int err;
1203
1204 err = os_file_modtime(file, &modtime);
1205 if(err < 0){
1206 printk("Failed to get modification time of backing file "
1207 "\"%s\", err = %d\n", file, -err);
1208 return err;
1209 }
1210
1211 err = os_file_size(file, &actual);
1212 if(err < 0){
1213 printk("Failed to get size of backing file \"%s\", "
1214 "err = %d\n", file, -err);
1215 return err;
1216 }
1217
1218 if(actual != size){
1219
1220
1221 printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1222 "file\n", (unsigned long long) size, actual);
1223 return -EINVAL;
1224 }
1225 if(modtime != mtime){
1226 printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1227 "file\n", mtime, modtime);
1228 return -EINVAL;
1229 }
1230 return 0;
1231}
1232
1233int read_cow_bitmap(int fd, void *buf, int offset, int len)
1234{
1235 int err;
1236
1237 err = os_seek_file(fd, offset);
1238 if(err < 0)
1239 return err;
1240
1241 err = os_read_file(fd, buf, len);
1242 if(err < 0)
1243 return err;
1244
1245 return 0;
1246}
1247
1248int open_ubd_file(char *file, struct openflags *openflags, int shared,
1249 char **backing_file_out, int *bitmap_offset_out,
1250 unsigned long *bitmap_len_out, int *data_offset_out,
1251 int *create_cow_out)
1252{
1253 time_t mtime;
1254 unsigned long long size;
1255 __u32 version, align;
1256 char *backing_file;
1257 int fd, err, sectorsize, asked_switch, mode = 0644;
1258
1259 fd = os_open_file(file, *openflags, mode);
1260 if (fd < 0) {
1261 if ((fd == -ENOENT) && (create_cow_out != NULL))
1262 *create_cow_out = 1;
1263 if (!openflags->w ||
1264 ((fd != -EROFS) && (fd != -EACCES)))
1265 return fd;
1266 openflags->w = 0;
1267 fd = os_open_file(file, *openflags, mode);
1268 if (fd < 0)
1269 return fd;
1270 }
1271
1272 if(shared)
1273 printk("Not locking \"%s\" on the host\n", file);
1274 else {
1275 err = os_lock_file(fd, openflags->w);
1276 if(err < 0){
1277 printk("Failed to lock '%s', err = %d\n", file, -err);
1278 goto out_close;
1279 }
1280 }
1281
1282
1283 if(backing_file_out == NULL)
1284 return fd;
1285
1286 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1287 &size, §orsize, &align, bitmap_offset_out);
1288 if(err && (*backing_file_out != NULL)){
1289 printk("Failed to read COW header from COW file \"%s\", "
1290 "errno = %d\n", file, -err);
1291 goto out_close;
1292 }
1293 if(err)
1294 return fd;
1295
1296 asked_switch = path_requires_switch(*backing_file_out, backing_file, file);
1297
1298
1299 if (asked_switch && !backing_file_mismatch(*backing_file_out, size, mtime)) {
1300 printk("Switching backing file to '%s'\n", *backing_file_out);
1301 err = write_cow_header(file, fd, *backing_file_out,
1302 sectorsize, align, &size);
1303 if (err) {
1304 printk("Switch failed, errno = %d\n", -err);
1305 goto out_close;
1306 }
1307 } else {
1308 *backing_file_out = backing_file;
1309 err = backing_file_mismatch(*backing_file_out, size, mtime);
1310 if (err)
1311 goto out_close;
1312 }
1313
1314 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1315 bitmap_len_out, data_offset_out);
1316
1317 return fd;
1318 out_close:
1319 os_close_file(fd);
1320 return err;
1321}
1322
1323int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1324 int sectorsize, int alignment, int *bitmap_offset_out,
1325 unsigned long *bitmap_len_out, int *data_offset_out)
1326{
1327 int err, fd;
1328
1329 flags.c = 1;
1330 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
1331 if(fd < 0){
1332 err = fd;
1333 printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1334 -err);
1335 goto out;
1336 }
1337
1338 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1339 bitmap_offset_out, bitmap_len_out,
1340 data_offset_out);
1341 if(!err)
1342 return fd;
1343 os_close_file(fd);
1344 out:
1345 return err;
1346}
1347
1348static int update_bitmap(struct io_thread_req *req)
1349{
1350 int n;
1351
1352 if(req->cow_offset == -1)
1353 return 0;
1354
1355 n = os_seek_file(req->fds[1], req->cow_offset);
1356 if(n < 0){
1357 printk("do_io - bitmap lseek failed : err = %d\n", -n);
1358 return 1;
1359 }
1360
1361 n = os_write_file(req->fds[1], &req->bitmap_words,
1362 sizeof(req->bitmap_words));
1363 if(n != sizeof(req->bitmap_words)){
1364 printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
1365 req->fds[1]);
1366 return 1;
1367 }
1368
1369 return 0;
1370}
1371
1372void do_io(struct io_thread_req *req)
1373{
1374 char *buf;
1375 unsigned long len;
1376 int n, nsectors, start, end, bit;
1377 int err;
1378 __u64 off;
1379
1380 nsectors = req->length / req->sectorsize;
1381 start = 0;
1382 do {
1383 bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
1384 end = start;
1385 while((end < nsectors) &&
1386 (ubd_test_bit(end, (unsigned char *)
1387 &req->sector_mask) == bit))
1388 end++;
1389
1390 off = req->offset + req->offsets[bit] +
1391 start * req->sectorsize;
1392 len = (end - start) * req->sectorsize;
1393 buf = &req->buffer[start * req->sectorsize];
1394
1395 err = os_seek_file(req->fds[bit], off);
1396 if(err < 0){
1397 printk("do_io - lseek failed : err = %d\n", -err);
1398 req->error = 1;
1399 return;
1400 }
1401 if(req->op == UBD_READ){
1402 n = 0;
1403 do {
1404 buf = &buf[n];
1405 len -= n;
1406 n = os_read_file(req->fds[bit], buf, len);
1407 if (n < 0) {
1408 printk("do_io - read failed, err = %d "
1409 "fd = %d\n", -n, req->fds[bit]);
1410 req->error = 1;
1411 return;
1412 }
1413 } while((n < len) && (n != 0));
1414 if (n < len) memset(&buf[n], 0, len - n);
1415 } else {
1416 n = os_write_file(req->fds[bit], buf, len);
1417 if(n != len){
1418 printk("do_io - write failed err = %d "
1419 "fd = %d\n", -n, req->fds[bit]);
1420 req->error = 1;
1421 return;
1422 }
1423 }
1424
1425 start = end;
1426 } while(start < nsectors);
1427
1428 req->error = update_bitmap(req);
1429}
1430
1431
1432
1433
1434int kernel_fd = -1;
1435
1436
1437static int io_count = 0;
1438
1439int io_thread(void *arg)
1440{
1441 struct io_thread_req *req;
1442 int n;
1443
1444 ignore_sigwinch_sig();
1445 while(1){
1446 n = os_read_file(kernel_fd, &req,
1447 sizeof(struct io_thread_req *));
1448 if(n != sizeof(struct io_thread_req *)){
1449 if(n < 0)
1450 printk("io_thread - read failed, fd = %d, "
1451 "err = %d\n", kernel_fd, -n);
1452 else {
1453 printk("io_thread - short read, fd = %d, "
1454 "length = %d\n", kernel_fd, n);
1455 }
1456 continue;
1457 }
1458 io_count++;
1459 do_io(req);
1460 n = os_write_file(kernel_fd, &req,
1461 sizeof(struct io_thread_req *));
1462 if(n != sizeof(struct io_thread_req *))
1463 printk("io_thread - write failed, fd = %d, err = %d\n",
1464 kernel_fd, -n);
1465 }
1466
1467 return 0;
1468}
1469