1/*P:400 This contains run_guest() which actually calls into the Host<->Guest 2 * Switcher and analyzes the return, such as determining if the Guest wants the 3 * Host to do something. This file also contains useful helper routines. :*/ 4#include <linux/module.h> 5#include <linux/stringify.h> 6#include <linux/stddef.h> 7#include <linux/io.h> 8#include <linux/mm.h> 9#include <linux/vmalloc.h> 10#include <linux/cpu.h> 11#include <linux/freezer.h> 12#include <linux/highmem.h> 13#include <asm/paravirt.h> 14#include <asm/pgtable.h> 15#include <asm/uaccess.h> 16#include <asm/poll.h> 17#include <asm/asm-offsets.h> 18#include "lg.h" 19 20 21static struct vm_struct *switcher_vma; 22static struct page **switcher_page; 23 24/* This One Big lock protects all inter-guest data structures. */ 25DEFINE_MUTEX(lguest_lock); 26 27/*H:010 We need to set up the Switcher at a high virtual address. Remember the 28 * Switcher is a few hundred bytes of assembler code which actually changes the 29 * CPU to run the Guest, and then changes back to the Host when a trap or 30 * interrupt happens. 31 * 32 * The Switcher code must be at the same virtual address in the Guest as the 33 * Host since it will be running as the switchover occurs. 34 * 35 * Trying to map memory at a particular address is an unusual thing to do, so 36 * it's not a simple one-liner. */ 37static __init int map_switcher(void) 38{ 39 int i, err; 40 struct page **pagep; 41 42 /* 43 * Map the Switcher in to high memory. 44 * 45 * It turns out that if we choose the address 0xFFC00000 (4MB under the 46 * top virtual address), it makes setting up the page tables really 47 * easy. 48 */ 49 50 /* We allocate an array of struct page pointers. map_vm_area() wants 51 * this, rather than just an array of pages. */ 52 switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES, 53 GFP_KERNEL); 54 if (!switcher_page) { 55 err = -ENOMEM; 56 goto out; 57 } 58 59 /* Now we actually allocate the pages. The Guest will see these pages, 60 * so we make sure they're zeroed. */ 61 for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) { 62 unsigned long addr = get_zeroed_page(GFP_KERNEL); 63 if (!addr) { 64 err = -ENOMEM; 65 goto free_some_pages; 66 } 67 switcher_page[i] = virt_to_page(addr); 68 } 69 70 /* First we check that the Switcher won't overlap the fixmap area at 71 * the top of memory. It's currently nowhere near, but it could have 72 * very strange effects if it ever happened. */ 73 if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){ 74 err = -ENOMEM; 75 printk("lguest: mapping switcher would thwack fixmap\n"); 76 goto free_pages; 77 } 78 79 /* Now we reserve the "virtual memory area" we want: 0xFFC00000 80 * (SWITCHER_ADDR). We might not get it in theory, but in practice 81 * it's worked so far. The end address needs +1 because __get_vm_area 82 * allocates an extra guard page, so we need space for that. */ 83 switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE, 84 VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR 85 + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE); 86 if (!switcher_vma) { 87 err = -ENOMEM; 88 printk("lguest: could not map switcher pages high\n"); 89 goto free_pages; 90 } 91 92 /* This code actually sets up the pages we've allocated to appear at 93 * SWITCHER_ADDR. map_vm_area() takes the vma we allocated above, the 94 * kind of pages we're mapping (kernel pages), and a pointer to our 95 * array of struct pages. It increments that pointer, but we don't 96 * care. */ 97 pagep = switcher_page; 98 err = map_vm_area(switcher_vma, PAGE_KERNEL, &pagep); 99 if (err) { 100 printk("lguest: map_vm_area failed: %i\n", err); 101 goto free_vma; 102 } 103 104 /* Now the Switcher is mapped at the right address, we can't fail! 105 * Copy in the compiled-in Switcher code (from <arch>_switcher.S). */ 106 memcpy(switcher_vma->addr, start_switcher_text, 107 end_switcher_text - start_switcher_text); 108 109 printk(KERN_INFO "lguest: mapped switcher at %p\n", 110 switcher_vma->addr); 111 /* And we succeeded... */ 112 return 0; 113 114free_vma: 115 vunmap(switcher_vma->addr); 116free_pages: 117 i = TOTAL_SWITCHER_PAGES; 118free_some_pages: 119 for (--i; i >= 0; i--) 120 __free_pages(switcher_page[i], 0); 121 kfree(switcher_page); 122out: 123 return err; 124} 125/*:*/ 126 127/* Cleaning up the mapping when the module is unloaded is almost... 128 * too easy. */ 129static void unmap_switcher(void) 130{ 131 unsigned int i; 132 133 /* vunmap() undoes *both* map_vm_area() and __get_vm_area(). */ 134 vunmap(switcher_vma->addr); 135 /* Now we just need to free the pages we copied the switcher into */ 136 for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) 137 __free_pages(switcher_page[i], 0); 138 kfree(switcher_page); 139} 140 141/*H:032 142 * Dealing With Guest Memory. 143 * 144 * Before we go too much further into the Host, we need to grok the routines 145 * we use to deal with Guest memory. 146 * 147 * When the Guest gives us (what it thinks is) a physical address, we can use 148 * the normal copy_from_user() & copy_to_user() on the corresponding place in 149 * the memory region allocated by the Launcher. 150 * 151 * But we can't trust the Guest: it might be trying to access the Launcher 152 * code. We have to check that the range is below the pfn_limit the Launcher 153 * gave us. We have to make sure that addr + len doesn't give us a false 154 * positive by overflowing, too. */ 155bool lguest_address_ok(const struct lguest *lg, 156 unsigned long addr, unsigned long len) 157{ 158 return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr); 159} 160 161/* This routine copies memory from the Guest. Here we can see how useful the 162 * kill_lguest() routine we met in the Launcher can be: we return a random 163 * value (all zeroes) instead of needing to return an error. */ 164void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes) 165{ 166 if (!lguest_address_ok(cpu->lg, addr, bytes) 167 || copy_from_user(b, cpu->lg->mem_base + addr, bytes) != 0) { 168 /* copy_from_user should do this, but as we rely on it... */ 169 memset(b, 0, bytes); 170 kill_guest(cpu, "bad read address %#lx len %u", addr, bytes); 171 } 172} 173 174/* This is the write (copy into Guest) version. */ 175void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b, 176 unsigned bytes) 177{ 178 if (!lguest_address_ok(cpu->lg, addr, bytes) 179 || copy_to_user(cpu->lg->mem_base + addr, b, bytes) != 0) 180 kill_guest(cpu, "bad write address %#lx len %u", addr, bytes); 181} 182/*:*/ 183 184/*H:030 Let's jump straight to the the main loop which runs the Guest. 185 * Remember, this is called by the Launcher reading /dev/lguest, and we keep 186 * going around and around until something interesting happens. */ 187int run_guest(struct lg_cpu *cpu, unsigned long __user *user) 188{ 189 /* We stop running once the Guest is dead. */ 190 while (!cpu->lg->dead) { 191 /* First we run any hypercalls the Guest wants done. */ 192 if (cpu->hcall) 193 do_hypercalls(cpu); 194 195 /* It's possible the Guest did a NOTIFY hypercall to the 196 * Launcher, in which case we return from the read() now. */ 197 if (cpu->pending_notify) { 198 if (put_user(cpu->pending_notify, user)) 199 return -EFAULT; 200 return sizeof(cpu->pending_notify); 201 } 202 203 /* Check for signals */ 204 if (signal_pending(current)) 205 return -ERESTARTSYS; 206 207 /* If Waker set break_out, return to Launcher. */ 208 if (cpu->break_out) 209 return -EAGAIN; 210 211 /* Check if there are any interrupts which can be delivered now: 212 * if so, this sets up the hander to be executed when we next 213 * run the Guest. */ 214 maybe_do_interrupt(cpu); 215 216 /* All long-lived kernel loops need to check with this horrible 217 * thing called the freezer. If the Host is trying to suspend, 218 * it stops us. */ 219 try_to_freeze(); 220 221 /* Just make absolutely sure the Guest is still alive. One of 222 * those hypercalls could have been fatal, for example. */ 223 if (cpu->lg->dead) 224 break; 225 226 /* If the Guest asked to be stopped, we sleep. The Guest's 227 * clock timer or LHREQ_BREAK from the Waker will wake us. */ 228 if (cpu->halted) { 229 set_current_state(TASK_INTERRUPTIBLE); 230 schedule(); 231 continue; 232 } 233 234 /* OK, now we're ready to jump into the Guest. First we put up 235 * the "Do Not Disturb" sign: */ 236 local_irq_disable(); 237 238 /* Actually run the Guest until something happens. */ 239 lguest_arch_run_guest(cpu); 240 241 /* Now we're ready to be interrupted or moved to other CPUs */ 242 local_irq_enable(); 243 244 /* Now we deal with whatever happened to the Guest. */ 245 lguest_arch_handle_trap(cpu); 246 } 247 248 /* Special case: Guest is 'dead' but wants a reboot. */ 249 if (cpu->lg->dead == ERR_PTR(-ERESTART)) 250 return -ERESTART; 251 252 /* The Guest is dead => "No such file or directory" */ 253 return -ENOENT; 254} 255 256/*H:000 257 * Welcome to the Host! 258 * 259 * By this point your brain has been tickled by the Guest code and numbed by 260 * the Launcher code; prepare for it to be stretched by the Host code. This is 261 * the heart. Let's begin at the initialization routine for the Host's lg 262 * module. 263 */ 264static int __init init(void) 265{ 266 int err; 267 268 /* Lguest can't run under Xen, VMI or itself. It does Tricky Stuff. */ 269 if (paravirt_enabled()) { 270 printk("lguest is afraid of being a guest\n"); 271 return -EPERM; 272 } 273 274 /* First we put the Switcher up in very high virtual memory. */ 275 err = map_switcher(); 276 if (err) 277 goto out; 278 279 /* Now we set up the pagetable implementation for the Guests. */ 280 err = init_pagetables(switcher_page, SHARED_SWITCHER_PAGES); 281 if (err) 282 goto unmap; 283 284 /* We might need to reserve an interrupt vector. */ 285 err = init_interrupts(); 286 if (err) 287 goto free_pgtables; 288 289 /* /dev/lguest needs to be registered. */ 290 err = lguest_device_init(); 291 if (err) 292 goto free_interrupts; 293 294 /* Finally we do some architecture-specific setup. */ 295 lguest_arch_host_init(); 296 297 /* All good! */ 298 return 0; 299 300free_interrupts: 301 free_interrupts(); 302free_pgtables: 303 free_pagetables(); 304unmap: 305 unmap_switcher(); 306out: 307 return err; 308} 309 310/* Cleaning up is just the same code, backwards. With a little French. */ 311static void __exit fini(void) 312{ 313 lguest_device_remove(); 314 free_interrupts(); 315 free_pagetables(); 316 unmap_switcher(); 317 318 lguest_arch_host_fini(); 319} 320/*:*/ 321 322/* The Host side of lguest can be a module. This is a nice way for people to 323 * play with it. */ 324module_init(init); 325module_exit(fini); 326MODULE_LICENSE("GPL"); 327MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); 328