linux/arch/s390/mm/pgtable.c
<<
>>
Prefs
   1/*
   2 *  arch/s390/mm/pgtable.c
   3 *
   4 *    Copyright IBM Corp. 2007
   5 *    Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
   6 */
   7
   8#include <linux/sched.h>
   9#include <linux/kernel.h>
  10#include <linux/errno.h>
  11#include <linux/mm.h>
  12#include <linux/swap.h>
  13#include <linux/smp.h>
  14#include <linux/highmem.h>
  15#include <linux/slab.h>
  16#include <linux/pagemap.h>
  17#include <linux/spinlock.h>
  18#include <linux/module.h>
  19#include <linux/quicklist.h>
  20
  21#include <asm/system.h>
  22#include <asm/pgtable.h>
  23#include <asm/pgalloc.h>
  24#include <asm/tlb.h>
  25#include <asm/tlbflush.h>
  26#include <asm/mmu_context.h>
  27
  28#ifndef CONFIG_64BIT
  29#define ALLOC_ORDER     1
  30#define TABLES_PER_PAGE 4
  31#define FRAG_MASK       15UL
  32#define SECOND_HALVES   10UL
  33
  34void clear_table_pgstes(unsigned long *table)
  35{
  36        clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
  37        memset(table + 256, 0, PAGE_SIZE/4);
  38        clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
  39        memset(table + 768, 0, PAGE_SIZE/4);
  40}
  41
  42#else
  43#define ALLOC_ORDER     2
  44#define TABLES_PER_PAGE 2
  45#define FRAG_MASK       3UL
  46#define SECOND_HALVES   2UL
  47
  48void clear_table_pgstes(unsigned long *table)
  49{
  50        clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
  51        memset(table + 256, 0, PAGE_SIZE/2);
  52}
  53
  54#endif
  55
  56unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
  57{
  58        struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
  59
  60        if (!page)
  61                return NULL;
  62        page->index = 0;
  63        if (noexec) {
  64                struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
  65                if (!shadow) {
  66                        __free_pages(page, ALLOC_ORDER);
  67                        return NULL;
  68                }
  69                page->index = page_to_phys(shadow);
  70        }
  71        spin_lock(&mm->page_table_lock);
  72        list_add(&page->lru, &mm->context.crst_list);
  73        spin_unlock(&mm->page_table_lock);
  74        return (unsigned long *) page_to_phys(page);
  75}
  76
  77void crst_table_free(struct mm_struct *mm, unsigned long *table)
  78{
  79        unsigned long *shadow = get_shadow_table(table);
  80        struct page *page = virt_to_page(table);
  81
  82        spin_lock(&mm->page_table_lock);
  83        list_del(&page->lru);
  84        spin_unlock(&mm->page_table_lock);
  85        if (shadow)
  86                free_pages((unsigned long) shadow, ALLOC_ORDER);
  87        free_pages((unsigned long) table, ALLOC_ORDER);
  88}
  89
  90#ifdef CONFIG_64BIT
  91int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
  92{
  93        unsigned long *table, *pgd;
  94        unsigned long entry;
  95
  96        BUG_ON(limit > (1UL << 53));
  97repeat:
  98        table = crst_table_alloc(mm, mm->context.noexec);
  99        if (!table)
 100                return -ENOMEM;
 101        spin_lock(&mm->page_table_lock);
 102        if (mm->context.asce_limit < limit) {
 103                pgd = (unsigned long *) mm->pgd;
 104                if (mm->context.asce_limit <= (1UL << 31)) {
 105                        entry = _REGION3_ENTRY_EMPTY;
 106                        mm->context.asce_limit = 1UL << 42;
 107                        mm->context.asce_bits = _ASCE_TABLE_LENGTH |
 108                                                _ASCE_USER_BITS |
 109                                                _ASCE_TYPE_REGION3;
 110                } else {
 111                        entry = _REGION2_ENTRY_EMPTY;
 112                        mm->context.asce_limit = 1UL << 53;
 113                        mm->context.asce_bits = _ASCE_TABLE_LENGTH |
 114                                                _ASCE_USER_BITS |
 115                                                _ASCE_TYPE_REGION2;
 116                }
 117                crst_table_init(table, entry);
 118                pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
 119                mm->pgd = (pgd_t *) table;
 120                table = NULL;
 121        }
 122        spin_unlock(&mm->page_table_lock);
 123        if (table)
 124                crst_table_free(mm, table);
 125        if (mm->context.asce_limit < limit)
 126                goto repeat;
 127        update_mm(mm, current);
 128        return 0;
 129}
 130
 131void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
 132{
 133        pgd_t *pgd;
 134
 135        if (mm->context.asce_limit <= limit)
 136                return;
 137        __tlb_flush_mm(mm);
 138        while (mm->context.asce_limit > limit) {
 139                pgd = mm->pgd;
 140                switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
 141                case _REGION_ENTRY_TYPE_R2:
 142                        mm->context.asce_limit = 1UL << 42;
 143                        mm->context.asce_bits = _ASCE_TABLE_LENGTH |
 144                                                _ASCE_USER_BITS |
 145                                                _ASCE_TYPE_REGION3;
 146                        break;
 147                case _REGION_ENTRY_TYPE_R3:
 148                        mm->context.asce_limit = 1UL << 31;
 149                        mm->context.asce_bits = _ASCE_TABLE_LENGTH |
 150                                                _ASCE_USER_BITS |
 151                                                _ASCE_TYPE_SEGMENT;
 152                        break;
 153                default:
 154                        BUG();
 155                }
 156                mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
 157                crst_table_free(mm, (unsigned long *) pgd);
 158        }
 159        update_mm(mm, current);
 160}
 161#endif
 162
 163/*
 164 * page table entry allocation/free routines.
 165 */
 166unsigned long *page_table_alloc(struct mm_struct *mm)
 167{
 168        struct page *page;
 169        unsigned long *table;
 170        unsigned long bits;
 171
 172        bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
 173        spin_lock(&mm->page_table_lock);
 174        page = NULL;
 175        if (!list_empty(&mm->context.pgtable_list)) {
 176                page = list_first_entry(&mm->context.pgtable_list,
 177                                        struct page, lru);
 178                if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
 179                        page = NULL;
 180        }
 181        if (!page) {
 182                spin_unlock(&mm->page_table_lock);
 183                page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
 184                if (!page)
 185                        return NULL;
 186                pgtable_page_ctor(page);
 187                page->flags &= ~FRAG_MASK;
 188                table = (unsigned long *) page_to_phys(page);
 189                if (mm->context.pgstes)
 190                        clear_table_pgstes(table);
 191                else
 192                        clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
 193                spin_lock(&mm->page_table_lock);
 194                list_add(&page->lru, &mm->context.pgtable_list);
 195        }
 196        table = (unsigned long *) page_to_phys(page);
 197        while (page->flags & bits) {
 198                table += 256;
 199                bits <<= 1;
 200        }
 201        page->flags |= bits;
 202        if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
 203                list_move_tail(&page->lru, &mm->context.pgtable_list);
 204        spin_unlock(&mm->page_table_lock);
 205        return table;
 206}
 207
 208void page_table_free(struct mm_struct *mm, unsigned long *table)
 209{
 210        struct page *page;
 211        unsigned long bits;
 212
 213        bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
 214        bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
 215        page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
 216        spin_lock(&mm->page_table_lock);
 217        page->flags ^= bits;
 218        if (page->flags & FRAG_MASK) {
 219                /* Page now has some free pgtable fragments. */
 220                list_move(&page->lru, &mm->context.pgtable_list);
 221                page = NULL;
 222        } else
 223                /* All fragments of the 4K page have been freed. */
 224                list_del(&page->lru);
 225        spin_unlock(&mm->page_table_lock);
 226        if (page) {
 227                pgtable_page_dtor(page);
 228                __free_page(page);
 229        }
 230}
 231
 232void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
 233{
 234        struct page *page;
 235
 236        spin_lock(&mm->page_table_lock);
 237        /* Free shadow region and segment tables. */
 238        list_for_each_entry(page, &mm->context.crst_list, lru)
 239                if (page->index) {
 240                        free_pages((unsigned long) page->index, ALLOC_ORDER);
 241                        page->index = 0;
 242                }
 243        /* "Free" second halves of page tables. */
 244        list_for_each_entry(page, &mm->context.pgtable_list, lru)
 245                page->flags &= ~SECOND_HALVES;
 246        spin_unlock(&mm->page_table_lock);
 247        mm->context.noexec = 0;
 248        update_mm(mm, tsk);
 249}
 250
 251/*
 252 * switch on pgstes for its userspace process (for kvm)
 253 */
 254int s390_enable_sie(void)
 255{
 256        struct task_struct *tsk = current;
 257        struct mm_struct *mm, *old_mm;
 258
 259        /* Do we have pgstes? if yes, we are done */
 260        if (tsk->mm->context.pgstes)
 261                return 0;
 262
 263        /* lets check if we are allowed to replace the mm */
 264        task_lock(tsk);
 265        if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
 266            tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {
 267                task_unlock(tsk);
 268                return -EINVAL;
 269        }
 270        task_unlock(tsk);
 271
 272        /* we copy the mm with pgstes enabled */
 273        tsk->mm->context.pgstes = 1;
 274        mm = dup_mm(tsk);
 275        tsk->mm->context.pgstes = 0;
 276        if (!mm)
 277                return -ENOMEM;
 278
 279        /* Now lets check again if somebody attached ptrace etc */
 280        task_lock(tsk);
 281        if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
 282            tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) {
 283                mmput(mm);
 284                task_unlock(tsk);
 285                return -EINVAL;
 286        }
 287
 288        /* ok, we are alone. No ptrace, no threads, etc. */
 289        old_mm = tsk->mm;
 290        tsk->mm = tsk->active_mm = mm;
 291        preempt_disable();
 292        update_mm(mm, tsk);
 293        cpu_set(smp_processor_id(), mm->cpu_vm_mask);
 294        preempt_enable();
 295        task_unlock(tsk);
 296        mmput(old_mm);
 297        return 0;
 298}
 299EXPORT_SYMBOL_GPL(s390_enable_sie);
 300