linux/arch/s390/mm/gup.c
<<
>>
Prefs
   1/*
   2 *  Lockless get_user_pages_fast for s390
   3 *
   4 *  Copyright IBM Corp. 2010
   5 *  Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
   6 */
   7#include <linux/sched.h>
   8#include <linux/mm.h>
   9#include <linux/hugetlb.h>
  10#include <linux/vmstat.h>
  11#include <linux/pagemap.h>
  12#include <linux/rwsem.h>
  13#include <asm/pgtable.h>
  14
  15/*
  16 * The performance critical leaf functions are made noinline otherwise gcc
  17 * inlines everything into a single function which results in too much
  18 * register pressure.
  19 */
  20static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
  21                unsigned long end, int write, struct page **pages, int *nr)
  22{
  23        unsigned long mask;
  24        pte_t *ptep, pte;
  25        struct page *page;
  26
  27        mask = (write ? _PAGE_RO : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
  28
  29        ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
  30        do {
  31                pte = *ptep;
  32                barrier();
  33                if ((pte_val(pte) & mask) != 0)
  34                        return 0;
  35                VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
  36                page = pte_page(pte);
  37                if (!page_cache_get_speculative(page))
  38                        return 0;
  39                if (unlikely(pte_val(pte) != pte_val(*ptep))) {
  40                        put_page(page);
  41                        return 0;
  42                }
  43                pages[*nr] = page;
  44                (*nr)++;
  45
  46        } while (ptep++, addr += PAGE_SIZE, addr != end);
  47
  48        return 1;
  49}
  50
  51static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
  52                unsigned long end, int write, struct page **pages, int *nr)
  53{
  54        unsigned long mask, result;
  55        struct page *head, *page;
  56        int refs;
  57
  58        result = write ? 0 : _SEGMENT_ENTRY_RO;
  59        mask = result | _SEGMENT_ENTRY_INV;
  60        if ((pmd_val(pmd) & mask) != result)
  61                return 0;
  62        VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
  63
  64        refs = 0;
  65        head = pmd_page(pmd);
  66        page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
  67        do {
  68                VM_BUG_ON(compound_head(page) != head);
  69                pages[*nr] = page;
  70                (*nr)++;
  71                page++;
  72                refs++;
  73        } while (addr += PAGE_SIZE, addr != end);
  74
  75        if (!page_cache_add_speculative(head, refs)) {
  76                *nr -= refs;
  77                return 0;
  78        }
  79
  80        if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
  81                *nr -= refs;
  82                while (refs--)
  83                        put_page(head);
  84        }
  85
  86        return 1;
  87}
  88
  89
  90static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
  91                unsigned long end, int write, struct page **pages, int *nr)
  92{
  93        unsigned long next;
  94        pmd_t *pmdp, pmd;
  95
  96        pmdp = (pmd_t *) pudp;
  97#ifdef CONFIG_64BIT
  98        if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
  99                pmdp = (pmd_t *) pud_deref(pud);
 100        pmdp += pmd_index(addr);
 101#endif
 102        do {
 103                pmd = *pmdp;
 104                barrier();
 105                next = pmd_addr_end(addr, end);
 106                if (pmd_none(pmd))
 107                        return 0;
 108                if (unlikely(pmd_huge(pmd))) {
 109                        if (!gup_huge_pmd(pmdp, pmd, addr, next,
 110                                          write, pages, nr))
 111                                return 0;
 112                } else if (!gup_pte_range(pmdp, pmd, addr, next,
 113                                          write, pages, nr))
 114                        return 0;
 115        } while (pmdp++, addr = next, addr != end);
 116
 117        return 1;
 118}
 119
 120static inline int gup_pud_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
 121                unsigned long end, int write, struct page **pages, int *nr)
 122{
 123        unsigned long next;
 124        pud_t *pudp, pud;
 125
 126        pudp = (pud_t *) pgdp;
 127#ifdef CONFIG_64BIT
 128        if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
 129                pudp = (pud_t *) pgd_deref(pgd);
 130        pudp += pud_index(addr);
 131#endif
 132        do {
 133                pud = *pudp;
 134                barrier();
 135                next = pud_addr_end(addr, end);
 136                if (pud_none(pud))
 137                        return 0;
 138                if (!gup_pmd_range(pudp, pud, addr, next, write, pages, nr))
 139                        return 0;
 140        } while (pudp++, addr = next, addr != end);
 141
 142        return 1;
 143}
 144
 145/**
 146 * get_user_pages_fast() - pin user pages in memory
 147 * @start:      starting user address
 148 * @nr_pages:   number of pages from start to pin
 149 * @write:      whether pages will be written to
 150 * @pages:      array that receives pointers to the pages pinned.
 151 *              Should be at least nr_pages long.
 152 *
 153 * Attempt to pin user pages in memory without taking mm->mmap_sem.
 154 * If not successful, it will fall back to taking the lock and
 155 * calling get_user_pages().
 156 *
 157 * Returns number of pages pinned. This may be fewer than the number
 158 * requested. If nr_pages is 0 or negative, returns 0. If no pages
 159 * were pinned, returns -errno.
 160 */
 161int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 162                        struct page **pages)
 163{
 164        struct mm_struct *mm = current->mm;
 165        unsigned long addr, len, end;
 166        unsigned long next;
 167        pgd_t *pgdp, pgd;
 168        int nr = 0;
 169
 170        start &= PAGE_MASK;
 171        addr = start;
 172        len = (unsigned long) nr_pages << PAGE_SHIFT;
 173        end = start + len;
 174        if (end < start)
 175                goto slow_irqon;
 176
 177        /*
 178         * local_irq_disable() doesn't prevent pagetable teardown, but does
 179         * prevent the pagetables from being freed on s390.
 180         *
 181         * So long as we atomically load page table pointers versus teardown,
 182         * we can follow the address down to the the page and take a ref on it.
 183         */
 184        local_irq_disable();
 185        pgdp = pgd_offset(mm, addr);
 186        do {
 187                pgd = *pgdp;
 188                barrier();
 189                next = pgd_addr_end(addr, end);
 190                if (pgd_none(pgd))
 191                        goto slow;
 192                if (!gup_pud_range(pgdp, pgd, addr, next, write, pages, &nr))
 193                        goto slow;
 194        } while (pgdp++, addr = next, addr != end);
 195        local_irq_enable();
 196
 197        VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
 198        return nr;
 199
 200        {
 201                int ret;
 202slow:
 203                local_irq_enable();
 204slow_irqon:
 205                /* Try to get the remaining pages with get_user_pages */
 206                start += nr << PAGE_SHIFT;
 207                pages += nr;
 208
 209                down_read(&mm->mmap_sem);
 210                ret = get_user_pages(current, mm, start,
 211                        (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
 212                up_read(&mm->mmap_sem);
 213
 214                /* Have to be a bit careful with return values */
 215                if (nr > 0) {
 216                        if (ret < 0)
 217                                ret = nr;
 218                        else
 219                                ret += nr;
 220                }
 221
 222                return ret;
 223        }
 224}
 225