linux/drivers/gpu/drm/i915/i915_gem.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2008 Intel Corporation
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice (including the next
  12 * paragraph) shall be included in all copies or substantial portions of the
  13 * Software.
  14 *
  15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21 * IN THE SOFTWARE.
  22 *
  23 * Authors:
  24 *    Eric Anholt <eric@anholt.net>
  25 *
  26 */
  27
  28#include "drmP.h"
  29#include "drm.h"
  30#include "i915_drm.h"
  31#include "i915_drv.h"
  32#include "i915_trace.h"
  33#include "intel_drv.h"
  34#include <linux/shmem_fs.h>
  35#include <linux/slab.h>
  36#include <linux/swap.h>
  37#include <linux/pci.h>
  38
  39static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj);
  40static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
  41static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
  42static __must_check int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj,
  43                                                          bool write);
  44static __must_check int i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
  45                                                                  uint64_t offset,
  46                                                                  uint64_t size);
  47static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj);
  48static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
  49                                                    unsigned alignment,
  50                                                    bool map_and_fenceable);
  51static void i915_gem_clear_fence_reg(struct drm_device *dev,
  52                                     struct drm_i915_fence_reg *reg);
  53static int i915_gem_phys_pwrite(struct drm_device *dev,
  54                                struct drm_i915_gem_object *obj,
  55                                struct drm_i915_gem_pwrite *args,
  56                                struct drm_file *file);
  57static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj);
  58
  59static int i915_gem_inactive_shrink(struct shrinker *shrinker,
  60                                    struct shrink_control *sc);
  61
  62/* some bookkeeping */
  63static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
  64                                  size_t size)
  65{
  66        dev_priv->mm.object_count++;
  67        dev_priv->mm.object_memory += size;
  68}
  69
  70static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
  71                                     size_t size)
  72{
  73        dev_priv->mm.object_count--;
  74        dev_priv->mm.object_memory -= size;
  75}
  76
  77static int
  78i915_gem_wait_for_error(struct drm_device *dev)
  79{
  80        struct drm_i915_private *dev_priv = dev->dev_private;
  81        struct completion *x = &dev_priv->error_completion;
  82        unsigned long flags;
  83        int ret;
  84
  85        if (!atomic_read(&dev_priv->mm.wedged))
  86                return 0;
  87
  88        ret = wait_for_completion_interruptible(x);
  89        if (ret)
  90                return ret;
  91
  92        if (atomic_read(&dev_priv->mm.wedged)) {
  93                /* GPU is hung, bump the completion count to account for
  94                 * the token we just consumed so that we never hit zero and
  95                 * end up waiting upon a subsequent completion event that
  96                 * will never happen.
  97                 */
  98                spin_lock_irqsave(&x->wait.lock, flags);
  99                x->done++;
 100                spin_unlock_irqrestore(&x->wait.lock, flags);
 101        }
 102        return 0;
 103}
 104
 105int i915_mutex_lock_interruptible(struct drm_device *dev)
 106{
 107        int ret;
 108
 109        ret = i915_gem_wait_for_error(dev);
 110        if (ret)
 111                return ret;
 112
 113        ret = mutex_lock_interruptible(&dev->struct_mutex);
 114        if (ret)
 115                return ret;
 116
 117        WARN_ON(i915_verify_lists(dev));
 118        return 0;
 119}
 120
 121static inline bool
 122i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
 123{
 124        return obj->gtt_space && !obj->active && obj->pin_count == 0;
 125}
 126
 127void i915_gem_do_init(struct drm_device *dev,
 128                      unsigned long start,
 129                      unsigned long mappable_end,
 130                      unsigned long end)
 131{
 132        drm_i915_private_t *dev_priv = dev->dev_private;
 133
 134        drm_mm_init(&dev_priv->mm.gtt_space, start, end - start);
 135
 136        dev_priv->mm.gtt_start = start;
 137        dev_priv->mm.gtt_mappable_end = mappable_end;
 138        dev_priv->mm.gtt_end = end;
 139        dev_priv->mm.gtt_total = end - start;
 140        dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
 141
 142        /* Take over this portion of the GTT */
 143        intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE);
 144}
 145
 146int
 147i915_gem_init_ioctl(struct drm_device *dev, void *data,
 148                    struct drm_file *file)
 149{
 150        struct drm_i915_gem_init *args = data;
 151
 152        if (args->gtt_start >= args->gtt_end ||
 153            (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
 154                return -EINVAL;
 155
 156        mutex_lock(&dev->struct_mutex);
 157        i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end);
 158        mutex_unlock(&dev->struct_mutex);
 159
 160        return 0;
 161}
 162
 163int
 164i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 165                            struct drm_file *file)
 166{
 167        struct drm_i915_private *dev_priv = dev->dev_private;
 168        struct drm_i915_gem_get_aperture *args = data;
 169        struct drm_i915_gem_object *obj;
 170        size_t pinned;
 171
 172        if (!(dev->driver->driver_features & DRIVER_GEM))
 173                return -ENODEV;
 174
 175        pinned = 0;
 176        mutex_lock(&dev->struct_mutex);
 177        list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list)
 178                pinned += obj->gtt_space->size;
 179        mutex_unlock(&dev->struct_mutex);
 180
 181        args->aper_size = dev_priv->mm.gtt_total;
 182        args->aper_available_size = args->aper_size - pinned;
 183
 184        return 0;
 185}
 186
 187static int
 188i915_gem_create(struct drm_file *file,
 189                struct drm_device *dev,
 190                uint64_t size,
 191                uint32_t *handle_p)
 192{
 193        struct drm_i915_gem_object *obj;
 194        int ret;
 195        u32 handle;
 196
 197        size = roundup(size, PAGE_SIZE);
 198        if (size == 0)
 199                return -EINVAL;
 200
 201        /* Allocate the new object */
 202        obj = i915_gem_alloc_object(dev, size);
 203        if (obj == NULL)
 204                return -ENOMEM;
 205
 206        ret = drm_gem_handle_create(file, &obj->base, &handle);
 207        if (ret) {
 208                drm_gem_object_release(&obj->base);
 209                i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
 210                kfree(obj);
 211                return ret;
 212        }
 213
 214        /* drop reference from allocate - handle holds it now */
 215        drm_gem_object_unreference(&obj->base);
 216        trace_i915_gem_object_create(obj);
 217
 218        *handle_p = handle;
 219        return 0;
 220}
 221
 222int
 223i915_gem_dumb_create(struct drm_file *file,
 224                     struct drm_device *dev,
 225                     struct drm_mode_create_dumb *args)
 226{
 227        /* have to work out size/pitch and return them */
 228        args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
 229        args->size = args->pitch * args->height;
 230        return i915_gem_create(file, dev,
 231                               args->size, &args->handle);
 232}
 233
 234int i915_gem_dumb_destroy(struct drm_file *file,
 235                          struct drm_device *dev,
 236                          uint32_t handle)
 237{
 238        return drm_gem_handle_delete(file, handle);
 239}
 240
 241/**
 242 * Creates a new mm object and returns a handle to it.
 243 */
 244int
 245i915_gem_create_ioctl(struct drm_device *dev, void *data,
 246                      struct drm_file *file)
 247{
 248        struct drm_i915_gem_create *args = data;
 249        return i915_gem_create(file, dev,
 250                               args->size, &args->handle);
 251}
 252
 253static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
 254{
 255        drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
 256
 257        return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
 258                obj->tiling_mode != I915_TILING_NONE;
 259}
 260
 261static inline void
 262slow_shmem_copy(struct page *dst_page,
 263                int dst_offset,
 264                struct page *src_page,
 265                int src_offset,
 266                int length)
 267{
 268        char *dst_vaddr, *src_vaddr;
 269
 270        dst_vaddr = kmap(dst_page);
 271        src_vaddr = kmap(src_page);
 272
 273        memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
 274
 275        kunmap(src_page);
 276        kunmap(dst_page);
 277}
 278
 279static inline void
 280slow_shmem_bit17_copy(struct page *gpu_page,
 281                      int gpu_offset,
 282                      struct page *cpu_page,
 283                      int cpu_offset,
 284                      int length,
 285                      int is_read)
 286{
 287        char *gpu_vaddr, *cpu_vaddr;
 288
 289        /* Use the unswizzled path if this page isn't affected. */
 290        if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
 291                if (is_read)
 292                        return slow_shmem_copy(cpu_page, cpu_offset,
 293                                               gpu_page, gpu_offset, length);
 294                else
 295                        return slow_shmem_copy(gpu_page, gpu_offset,
 296                                               cpu_page, cpu_offset, length);
 297        }
 298
 299        gpu_vaddr = kmap(gpu_page);
 300        cpu_vaddr = kmap(cpu_page);
 301
 302        /* Copy the data, XORing A6 with A17 (1). The user already knows he's
 303         * XORing with the other bits (A9 for Y, A9 and A10 for X)
 304         */
 305        while (length > 0) {
 306                int cacheline_end = ALIGN(gpu_offset + 1, 64);
 307                int this_length = min(cacheline_end - gpu_offset, length);
 308                int swizzled_gpu_offset = gpu_offset ^ 64;
 309
 310                if (is_read) {
 311                        memcpy(cpu_vaddr + cpu_offset,
 312                               gpu_vaddr + swizzled_gpu_offset,
 313                               this_length);
 314                } else {
 315                        memcpy(gpu_vaddr + swizzled_gpu_offset,
 316                               cpu_vaddr + cpu_offset,
 317                               this_length);
 318                }
 319                cpu_offset += this_length;
 320                gpu_offset += this_length;
 321                length -= this_length;
 322        }
 323
 324        kunmap(cpu_page);
 325        kunmap(gpu_page);
 326}
 327
 328/**
 329 * This is the fast shmem pread path, which attempts to copy_from_user directly
 330 * from the backing pages of the object to the user's address space.  On a
 331 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
 332 */
 333static int
 334i915_gem_shmem_pread_fast(struct drm_device *dev,
 335                          struct drm_i915_gem_object *obj,
 336                          struct drm_i915_gem_pread *args,
 337                          struct drm_file *file)
 338{
 339        struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 340        ssize_t remain;
 341        loff_t offset;
 342        char __user *user_data;
 343        int page_offset, page_length;
 344
 345        user_data = (char __user *) (uintptr_t) args->data_ptr;
 346        remain = args->size;
 347
 348        offset = args->offset;
 349
 350        while (remain > 0) {
 351                struct page *page;
 352                char *vaddr;
 353                int ret;
 354
 355                /* Operation in this page
 356                 *
 357                 * page_offset = offset within page
 358                 * page_length = bytes to copy for this page
 359                 */
 360                page_offset = offset_in_page(offset);
 361                page_length = remain;
 362                if ((page_offset + remain) > PAGE_SIZE)
 363                        page_length = PAGE_SIZE - page_offset;
 364
 365                page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
 366                if (IS_ERR(page))
 367                        return PTR_ERR(page);
 368
 369                vaddr = kmap_atomic(page);
 370                ret = __copy_to_user_inatomic(user_data,
 371                                              vaddr + page_offset,
 372                                              page_length);
 373                kunmap_atomic(vaddr);
 374
 375                mark_page_accessed(page);
 376                page_cache_release(page);
 377                if (ret)
 378                        return -EFAULT;
 379
 380                remain -= page_length;
 381                user_data += page_length;
 382                offset += page_length;
 383        }
 384
 385        return 0;
 386}
 387
 388/**
 389 * This is the fallback shmem pread path, which allocates temporary storage
 390 * in kernel space to copy_to_user into outside of the struct_mutex, so we
 391 * can copy out of the object's backing pages while holding the struct mutex
 392 * and not take page faults.
 393 */
 394static int
 395i915_gem_shmem_pread_slow(struct drm_device *dev,
 396                          struct drm_i915_gem_object *obj,
 397                          struct drm_i915_gem_pread *args,
 398                          struct drm_file *file)
 399{
 400        struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 401        struct mm_struct *mm = current->mm;
 402        struct page **user_pages;
 403        ssize_t remain;
 404        loff_t offset, pinned_pages, i;
 405        loff_t first_data_page, last_data_page, num_pages;
 406        int shmem_page_offset;
 407        int data_page_index, data_page_offset;
 408        int page_length;
 409        int ret;
 410        uint64_t data_ptr = args->data_ptr;
 411        int do_bit17_swizzling;
 412
 413        remain = args->size;
 414
 415        /* Pin the user pages containing the data.  We can't fault while
 416         * holding the struct mutex, yet we want to hold it while
 417         * dereferencing the user data.
 418         */
 419        first_data_page = data_ptr / PAGE_SIZE;
 420        last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 421        num_pages = last_data_page - first_data_page + 1;
 422
 423        user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
 424        if (user_pages == NULL)
 425                return -ENOMEM;
 426
 427        mutex_unlock(&dev->struct_mutex);
 428        down_read(&mm->mmap_sem);
 429        pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
 430                                      num_pages, 1, 0, user_pages, NULL);
 431        up_read(&mm->mmap_sem);
 432        mutex_lock(&dev->struct_mutex);
 433        if (pinned_pages < num_pages) {
 434                ret = -EFAULT;
 435                goto out;
 436        }
 437
 438        ret = i915_gem_object_set_cpu_read_domain_range(obj,
 439                                                        args->offset,
 440                                                        args->size);
 441        if (ret)
 442                goto out;
 443
 444        do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 445
 446        offset = args->offset;
 447
 448        while (remain > 0) {
 449                struct page *page;
 450
 451                /* Operation in this page
 452                 *
 453                 * shmem_page_offset = offset within page in shmem file
 454                 * data_page_index = page number in get_user_pages return
 455                 * data_page_offset = offset with data_page_index page.
 456                 * page_length = bytes to copy for this page
 457                 */
 458                shmem_page_offset = offset_in_page(offset);
 459                data_page_index = data_ptr / PAGE_SIZE - first_data_page;
 460                data_page_offset = offset_in_page(data_ptr);
 461
 462                page_length = remain;
 463                if ((shmem_page_offset + page_length) > PAGE_SIZE)
 464                        page_length = PAGE_SIZE - shmem_page_offset;
 465                if ((data_page_offset + page_length) > PAGE_SIZE)
 466                        page_length = PAGE_SIZE - data_page_offset;
 467
 468                page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
 469                if (IS_ERR(page)) {
 470                        ret = PTR_ERR(page);
 471                        goto out;
 472                }
 473
 474                if (do_bit17_swizzling) {
 475                        slow_shmem_bit17_copy(page,
 476                                              shmem_page_offset,
 477                                              user_pages[data_page_index],
 478                                              data_page_offset,
 479                                              page_length,
 480                                              1);
 481                } else {
 482                        slow_shmem_copy(user_pages[data_page_index],
 483                                        data_page_offset,
 484                                        page,
 485                                        shmem_page_offset,
 486                                        page_length);
 487                }
 488
 489                mark_page_accessed(page);
 490                page_cache_release(page);
 491
 492                remain -= page_length;
 493                data_ptr += page_length;
 494                offset += page_length;
 495        }
 496
 497out:
 498        for (i = 0; i < pinned_pages; i++) {
 499                SetPageDirty(user_pages[i]);
 500                mark_page_accessed(user_pages[i]);
 501                page_cache_release(user_pages[i]);
 502        }
 503        drm_free_large(user_pages);
 504
 505        return ret;
 506}
 507
 508/**
 509 * Reads data from the object referenced by handle.
 510 *
 511 * On error, the contents of *data are undefined.
 512 */
 513int
 514i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 515                     struct drm_file *file)
 516{
 517        struct drm_i915_gem_pread *args = data;
 518        struct drm_i915_gem_object *obj;
 519        int ret = 0;
 520
 521        if (args->size == 0)
 522                return 0;
 523
 524        if (!access_ok(VERIFY_WRITE,
 525                       (char __user *)(uintptr_t)args->data_ptr,
 526                       args->size))
 527                return -EFAULT;
 528
 529        ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr,
 530                                       args->size);
 531        if (ret)
 532                return -EFAULT;
 533
 534        ret = i915_mutex_lock_interruptible(dev);
 535        if (ret)
 536                return ret;
 537
 538        obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
 539        if (&obj->base == NULL) {
 540                ret = -ENOENT;
 541                goto unlock;
 542        }
 543
 544        /* Bounds check source.  */
 545        if (args->offset > obj->base.size ||
 546            args->size > obj->base.size - args->offset) {
 547                ret = -EINVAL;
 548                goto out;
 549        }
 550
 551        trace_i915_gem_object_pread(obj, args->offset, args->size);
 552
 553        ret = i915_gem_object_set_cpu_read_domain_range(obj,
 554                                                        args->offset,
 555                                                        args->size);
 556        if (ret)
 557                goto out;
 558
 559        ret = -EFAULT;
 560        if (!i915_gem_object_needs_bit17_swizzle(obj))
 561                ret = i915_gem_shmem_pread_fast(dev, obj, args, file);
 562        if (ret == -EFAULT)
 563                ret = i915_gem_shmem_pread_slow(dev, obj, args, file);
 564
 565out:
 566        drm_gem_object_unreference(&obj->base);
 567unlock:
 568        mutex_unlock(&dev->struct_mutex);
 569        return ret;
 570}
 571
 572/* This is the fast write path which cannot handle
 573 * page faults in the source data
 574 */
 575
 576static inline int
 577fast_user_write(struct io_mapping *mapping,
 578                loff_t page_base, int page_offset,
 579                char __user *user_data,
 580                int length)
 581{
 582        char *vaddr_atomic;
 583        unsigned long unwritten;
 584
 585        vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
 586        unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
 587                                                      user_data, length);
 588        io_mapping_unmap_atomic(vaddr_atomic);
 589        return unwritten;
 590}
 591
 592/* Here's the write path which can sleep for
 593 * page faults
 594 */
 595
 596static inline void
 597slow_kernel_write(struct io_mapping *mapping,
 598                  loff_t gtt_base, int gtt_offset,
 599                  struct page *user_page, int user_offset,
 600                  int length)
 601{
 602        char __iomem *dst_vaddr;
 603        char *src_vaddr;
 604
 605        dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
 606        src_vaddr = kmap(user_page);
 607
 608        memcpy_toio(dst_vaddr + gtt_offset,
 609                    src_vaddr + user_offset,
 610                    length);
 611
 612        kunmap(user_page);
 613        io_mapping_unmap(dst_vaddr);
 614}
 615
 616/**
 617 * This is the fast pwrite path, where we copy the data directly from the
 618 * user into the GTT, uncached.
 619 */
 620static int
 621i915_gem_gtt_pwrite_fast(struct drm_device *dev,
 622                         struct drm_i915_gem_object *obj,
 623                         struct drm_i915_gem_pwrite *args,
 624                         struct drm_file *file)
 625{
 626        drm_i915_private_t *dev_priv = dev->dev_private;
 627        ssize_t remain;
 628        loff_t offset, page_base;
 629        char __user *user_data;
 630        int page_offset, page_length;
 631
 632        user_data = (char __user *) (uintptr_t) args->data_ptr;
 633        remain = args->size;
 634
 635        offset = obj->gtt_offset + args->offset;
 636
 637        while (remain > 0) {
 638                /* Operation in this page
 639                 *
 640                 * page_base = page offset within aperture
 641                 * page_offset = offset within page
 642                 * page_length = bytes to copy for this page
 643                 */
 644                page_base = offset & PAGE_MASK;
 645                page_offset = offset_in_page(offset);
 646                page_length = remain;
 647                if ((page_offset + remain) > PAGE_SIZE)
 648                        page_length = PAGE_SIZE - page_offset;
 649
 650                /* If we get a fault while copying data, then (presumably) our
 651                 * source page isn't available.  Return the error and we'll
 652                 * retry in the slow path.
 653                 */
 654                if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
 655                                    page_offset, user_data, page_length))
 656                        return -EFAULT;
 657
 658                remain -= page_length;
 659                user_data += page_length;
 660                offset += page_length;
 661        }
 662
 663        return 0;
 664}
 665
 666/**
 667 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
 668 * the memory and maps it using kmap_atomic for copying.
 669 *
 670 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
 671 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
 672 */
 673static int
 674i915_gem_gtt_pwrite_slow(struct drm_device *dev,
 675                         struct drm_i915_gem_object *obj,
 676                         struct drm_i915_gem_pwrite *args,
 677                         struct drm_file *file)
 678{
 679        drm_i915_private_t *dev_priv = dev->dev_private;
 680        ssize_t remain;
 681        loff_t gtt_page_base, offset;
 682        loff_t first_data_page, last_data_page, num_pages;
 683        loff_t pinned_pages, i;
 684        struct page **user_pages;
 685        struct mm_struct *mm = current->mm;
 686        int gtt_page_offset, data_page_offset, data_page_index, page_length;
 687        int ret;
 688        uint64_t data_ptr = args->data_ptr;
 689
 690        remain = args->size;
 691
 692        /* Pin the user pages containing the data.  We can't fault while
 693         * holding the struct mutex, and all of the pwrite implementations
 694         * want to hold it while dereferencing the user data.
 695         */
 696        first_data_page = data_ptr / PAGE_SIZE;
 697        last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 698        num_pages = last_data_page - first_data_page + 1;
 699
 700        user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
 701        if (user_pages == NULL)
 702                return -ENOMEM;
 703
 704        mutex_unlock(&dev->struct_mutex);
 705        down_read(&mm->mmap_sem);
 706        pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
 707                                      num_pages, 0, 0, user_pages, NULL);
 708        up_read(&mm->mmap_sem);
 709        mutex_lock(&dev->struct_mutex);
 710        if (pinned_pages < num_pages) {
 711                ret = -EFAULT;
 712                goto out_unpin_pages;
 713        }
 714
 715        ret = i915_gem_object_set_to_gtt_domain(obj, true);
 716        if (ret)
 717                goto out_unpin_pages;
 718
 719        ret = i915_gem_object_put_fence(obj);
 720        if (ret)
 721                goto out_unpin_pages;
 722
 723        offset = obj->gtt_offset + args->offset;
 724
 725        while (remain > 0) {
 726                /* Operation in this page
 727                 *
 728                 * gtt_page_base = page offset within aperture
 729                 * gtt_page_offset = offset within page in aperture
 730                 * data_page_index = page number in get_user_pages return
 731                 * data_page_offset = offset with data_page_index page.
 732                 * page_length = bytes to copy for this page
 733                 */
 734                gtt_page_base = offset & PAGE_MASK;
 735                gtt_page_offset = offset_in_page(offset);
 736                data_page_index = data_ptr / PAGE_SIZE - first_data_page;
 737                data_page_offset = offset_in_page(data_ptr);
 738
 739                page_length = remain;
 740                if ((gtt_page_offset + page_length) > PAGE_SIZE)
 741                        page_length = PAGE_SIZE - gtt_page_offset;
 742                if ((data_page_offset + page_length) > PAGE_SIZE)
 743                        page_length = PAGE_SIZE - data_page_offset;
 744
 745                slow_kernel_write(dev_priv->mm.gtt_mapping,
 746                                  gtt_page_base, gtt_page_offset,
 747                                  user_pages[data_page_index],
 748                                  data_page_offset,
 749                                  page_length);
 750
 751                remain -= page_length;
 752                offset += page_length;
 753                data_ptr += page_length;
 754        }
 755
 756out_unpin_pages:
 757        for (i = 0; i < pinned_pages; i++)
 758                page_cache_release(user_pages[i]);
 759        drm_free_large(user_pages);
 760
 761        return ret;
 762}
 763
 764/**
 765 * This is the fast shmem pwrite path, which attempts to directly
 766 * copy_from_user into the kmapped pages backing the object.
 767 */
 768static int
 769i915_gem_shmem_pwrite_fast(struct drm_device *dev,
 770                           struct drm_i915_gem_object *obj,
 771                           struct drm_i915_gem_pwrite *args,
 772                           struct drm_file *file)
 773{
 774        struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 775        ssize_t remain;
 776        loff_t offset;
 777        char __user *user_data;
 778        int page_offset, page_length;
 779
 780        user_data = (char __user *) (uintptr_t) args->data_ptr;
 781        remain = args->size;
 782
 783        offset = args->offset;
 784        obj->dirty = 1;
 785
 786        while (remain > 0) {
 787                struct page *page;
 788                char *vaddr;
 789                int ret;
 790
 791                /* Operation in this page
 792                 *
 793                 * page_offset = offset within page
 794                 * page_length = bytes to copy for this page
 795                 */
 796                page_offset = offset_in_page(offset);
 797                page_length = remain;
 798                if ((page_offset + remain) > PAGE_SIZE)
 799                        page_length = PAGE_SIZE - page_offset;
 800
 801                page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
 802                if (IS_ERR(page))
 803                        return PTR_ERR(page);
 804
 805                vaddr = kmap_atomic(page);
 806                ret = __copy_from_user_inatomic(vaddr + page_offset,
 807                                                user_data,
 808                                                page_length);
 809                kunmap_atomic(vaddr);
 810
 811                set_page_dirty(page);
 812                mark_page_accessed(page);
 813                page_cache_release(page);
 814
 815                /* If we get a fault while copying data, then (presumably) our
 816                 * source page isn't available.  Return the error and we'll
 817                 * retry in the slow path.
 818                 */
 819                if (ret)
 820                        return -EFAULT;
 821
 822                remain -= page_length;
 823                user_data += page_length;
 824                offset += page_length;
 825        }
 826
 827        return 0;
 828}
 829
 830/**
 831 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
 832 * the memory and maps it using kmap_atomic for copying.
 833 *
 834 * This avoids taking mmap_sem for faulting on the user's address while the
 835 * struct_mutex is held.
 836 */
 837static int
 838i915_gem_shmem_pwrite_slow(struct drm_device *dev,
 839                           struct drm_i915_gem_object *obj,
 840                           struct drm_i915_gem_pwrite *args,
 841                           struct drm_file *file)
 842{
 843        struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
 844        struct mm_struct *mm = current->mm;
 845        struct page **user_pages;
 846        ssize_t remain;
 847        loff_t offset, pinned_pages, i;
 848        loff_t first_data_page, last_data_page, num_pages;
 849        int shmem_page_offset;
 850        int data_page_index,  data_page_offset;
 851        int page_length;
 852        int ret;
 853        uint64_t data_ptr = args->data_ptr;
 854        int do_bit17_swizzling;
 855
 856        remain = args->size;
 857
 858        /* Pin the user pages containing the data.  We can't fault while
 859         * holding the struct mutex, and all of the pwrite implementations
 860         * want to hold it while dereferencing the user data.
 861         */
 862        first_data_page = data_ptr / PAGE_SIZE;
 863        last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
 864        num_pages = last_data_page - first_data_page + 1;
 865
 866        user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
 867        if (user_pages == NULL)
 868                return -ENOMEM;
 869
 870        mutex_unlock(&dev->struct_mutex);
 871        down_read(&mm->mmap_sem);
 872        pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
 873                                      num_pages, 0, 0, user_pages, NULL);
 874        up_read(&mm->mmap_sem);
 875        mutex_lock(&dev->struct_mutex);
 876        if (pinned_pages < num_pages) {
 877                ret = -EFAULT;
 878                goto out;
 879        }
 880
 881        ret = i915_gem_object_set_to_cpu_domain(obj, 1);
 882        if (ret)
 883                goto out;
 884
 885        do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 886
 887        offset = args->offset;
 888        obj->dirty = 1;
 889
 890        while (remain > 0) {
 891                struct page *page;
 892
 893                /* Operation in this page
 894                 *
 895                 * shmem_page_offset = offset within page in shmem file
 896                 * data_page_index = page number in get_user_pages return
 897                 * data_page_offset = offset with data_page_index page.
 898                 * page_length = bytes to copy for this page
 899                 */
 900                shmem_page_offset = offset_in_page(offset);
 901                data_page_index = data_ptr / PAGE_SIZE - first_data_page;
 902                data_page_offset = offset_in_page(data_ptr);
 903
 904                page_length = remain;
 905                if ((shmem_page_offset + page_length) > PAGE_SIZE)
 906                        page_length = PAGE_SIZE - shmem_page_offset;
 907                if ((data_page_offset + page_length) > PAGE_SIZE)
 908                        page_length = PAGE_SIZE - data_page_offset;
 909
 910                page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
 911                if (IS_ERR(page)) {
 912                        ret = PTR_ERR(page);
 913                        goto out;
 914                }
 915
 916                if (do_bit17_swizzling) {
 917                        slow_shmem_bit17_copy(page,
 918                                              shmem_page_offset,
 919                                              user_pages[data_page_index],
 920                                              data_page_offset,
 921                                              page_length,
 922                                              0);
 923                } else {
 924                        slow_shmem_copy(page,
 925                                        shmem_page_offset,
 926                                        user_pages[data_page_index],
 927                                        data_page_offset,
 928                                        page_length);
 929                }
 930
 931                set_page_dirty(page);
 932                mark_page_accessed(page);
 933                page_cache_release(page);
 934
 935                remain -= page_length;
 936                data_ptr += page_length;
 937                offset += page_length;
 938        }
 939
 940out:
 941        for (i = 0; i < pinned_pages; i++)
 942                page_cache_release(user_pages[i]);
 943        drm_free_large(user_pages);
 944
 945        return ret;
 946}
 947
 948/**
 949 * Writes data to the object referenced by handle.
 950 *
 951 * On error, the contents of the buffer that were to be modified are undefined.
 952 */
 953int
 954i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 955                      struct drm_file *file)
 956{
 957        struct drm_i915_gem_pwrite *args = data;
 958        struct drm_i915_gem_object *obj;
 959        int ret;
 960
 961        if (args->size == 0)
 962                return 0;
 963
 964        if (!access_ok(VERIFY_READ,
 965                       (char __user *)(uintptr_t)args->data_ptr,
 966                       args->size))
 967                return -EFAULT;
 968
 969        ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr,
 970                                      args->size);
 971        if (ret)
 972                return -EFAULT;
 973
 974        ret = i915_mutex_lock_interruptible(dev);
 975        if (ret)
 976                return ret;
 977
 978        obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
 979        if (&obj->base == NULL) {
 980                ret = -ENOENT;
 981                goto unlock;
 982        }
 983
 984        /* Bounds check destination. */
 985        if (args->offset > obj->base.size ||
 986            args->size > obj->base.size - args->offset) {
 987                ret = -EINVAL;
 988                goto out;
 989        }
 990
 991        trace_i915_gem_object_pwrite(obj, args->offset, args->size);
 992
 993        /* We can only do the GTT pwrite on untiled buffers, as otherwise
 994         * it would end up going through the fenced access, and we'll get
 995         * different detiling behavior between reading and writing.
 996         * pread/pwrite currently are reading and writing from the CPU
 997         * perspective, requiring manual detiling by the client.
 998         */
 999        if (obj->phys_obj)
1000                ret = i915_gem_phys_pwrite(dev, obj, args, file);
1001        else if (obj->gtt_space &&
1002                 obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
1003                ret = i915_gem_object_pin(obj, 0, true);
1004                if (ret)
1005                        goto out;
1006
1007                ret = i915_gem_object_set_to_gtt_domain(obj, true);
1008                if (ret)
1009                        goto out_unpin;
1010
1011                ret = i915_gem_object_put_fence(obj);
1012                if (ret)
1013                        goto out_unpin;
1014
1015                ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
1016                if (ret == -EFAULT)
1017                        ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file);
1018
1019out_unpin:
1020                i915_gem_object_unpin(obj);
1021        } else {
1022                ret = i915_gem_object_set_to_cpu_domain(obj, 1);
1023                if (ret)
1024                        goto out;
1025
1026                ret = -EFAULT;
1027                if (!i915_gem_object_needs_bit17_swizzle(obj))
1028                        ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file);
1029                if (ret == -EFAULT)
1030                        ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file);
1031        }
1032
1033out:
1034        drm_gem_object_unreference(&obj->base);
1035unlock:
1036        mutex_unlock(&dev->struct_mutex);
1037        return ret;
1038}
1039
1040/**
1041 * Called when user space prepares to use an object with the CPU, either
1042 * through the mmap ioctl's mapping or a GTT mapping.
1043 */
1044int
1045i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1046                          struct drm_file *file)
1047{
1048        struct drm_i915_gem_set_domain *args = data;
1049        struct drm_i915_gem_object *obj;
1050        uint32_t read_domains = args->read_domains;
1051        uint32_t write_domain = args->write_domain;
1052        int ret;
1053
1054        if (!(dev->driver->driver_features & DRIVER_GEM))
1055                return -ENODEV;
1056
1057        /* Only handle setting domains to types used by the CPU. */
1058        if (write_domain & I915_GEM_GPU_DOMAINS)
1059                return -EINVAL;
1060
1061        if (read_domains & I915_GEM_GPU_DOMAINS)
1062                return -EINVAL;
1063
1064        /* Having something in the write domain implies it's in the read
1065         * domain, and only that read domain.  Enforce that in the request.
1066         */
1067        if (write_domain != 0 && read_domains != write_domain)
1068                return -EINVAL;
1069
1070        ret = i915_mutex_lock_interruptible(dev);
1071        if (ret)
1072                return ret;
1073
1074        obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1075        if (&obj->base == NULL) {
1076                ret = -ENOENT;
1077                goto unlock;
1078        }
1079
1080        if (read_domains & I915_GEM_DOMAIN_GTT) {
1081                ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1082
1083                /* Silently promote "you're not bound, there was nothing to do"
1084                 * to success, since the client was just asking us to
1085                 * make sure everything was done.
1086                 */
1087                if (ret == -EINVAL)
1088                        ret = 0;
1089        } else {
1090                ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1091        }
1092
1093        drm_gem_object_unreference(&obj->base);
1094unlock:
1095        mutex_unlock(&dev->struct_mutex);
1096        return ret;
1097}
1098
1099/**
1100 * Called when user space has done writes to this buffer
1101 */
1102int
1103i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1104                         struct drm_file *file)
1105{
1106        struct drm_i915_gem_sw_finish *args = data;
1107        struct drm_i915_gem_object *obj;
1108        int ret = 0;
1109
1110        if (!(dev->driver->driver_features & DRIVER_GEM))
1111                return -ENODEV;
1112
1113        ret = i915_mutex_lock_interruptible(dev);
1114        if (ret)
1115                return ret;
1116
1117        obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1118        if (&obj->base == NULL) {
1119                ret = -ENOENT;
1120                goto unlock;
1121        }
1122
1123        /* Pinned buffers may be scanout, so flush the cache */
1124        if (obj->pin_count)
1125                i915_gem_object_flush_cpu_write_domain(obj);
1126
1127        drm_gem_object_unreference(&obj->base);
1128unlock:
1129        mutex_unlock(&dev->struct_mutex);
1130        return ret;
1131}
1132
1133/**
1134 * Maps the contents of an object, returning the address it is mapped
1135 * into.
1136 *
1137 * While the mapping holds a reference on the contents of the object, it doesn't
1138 * imply a ref on the object itself.
1139 */
1140int
1141i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1142                    struct drm_file *file)
1143{
1144        struct drm_i915_private *dev_priv = dev->dev_private;
1145        struct drm_i915_gem_mmap *args = data;
1146        struct drm_gem_object *obj;
1147        unsigned long addr;
1148
1149        if (!(dev->driver->driver_features & DRIVER_GEM))
1150                return -ENODEV;
1151
1152        obj = drm_gem_object_lookup(dev, file, args->handle);
1153        if (obj == NULL)
1154                return -ENOENT;
1155
1156        if (obj->size > dev_priv->mm.gtt_mappable_end) {
1157                drm_gem_object_unreference_unlocked(obj);
1158                return -E2BIG;
1159        }
1160
1161        down_write(&current->mm->mmap_sem);
1162        addr = do_mmap(obj->filp, 0, args->size,
1163                       PROT_READ | PROT_WRITE, MAP_SHARED,
1164                       args->offset);
1165        up_write(&current->mm->mmap_sem);
1166        drm_gem_object_unreference_unlocked(obj);
1167        if (IS_ERR((void *)addr))
1168                return addr;
1169
1170        args->addr_ptr = (uint64_t) addr;
1171
1172        return 0;
1173}
1174
1175/**
1176 * i915_gem_fault - fault a page into the GTT
1177 * vma: VMA in question
1178 * vmf: fault info
1179 *
1180 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1181 * from userspace.  The fault handler takes care of binding the object to
1182 * the GTT (if needed), allocating and programming a fence register (again,
1183 * only if needed based on whether the old reg is still valid or the object
1184 * is tiled) and inserting a new PTE into the faulting process.
1185 *
1186 * Note that the faulting process may involve evicting existing objects
1187 * from the GTT and/or fence registers to make room.  So performance may
1188 * suffer if the GTT working set is large or there are few fence registers
1189 * left.
1190 */
1191int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1192{
1193        struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
1194        struct drm_device *dev = obj->base.dev;
1195        drm_i915_private_t *dev_priv = dev->dev_private;
1196        pgoff_t page_offset;
1197        unsigned long pfn;
1198        int ret = 0;
1199        bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1200
1201        /* We don't use vmf->pgoff since that has the fake offset */
1202        page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1203                PAGE_SHIFT;
1204
1205        ret = i915_mutex_lock_interruptible(dev);
1206        if (ret)
1207                goto out;
1208
1209        trace_i915_gem_object_fault(obj, page_offset, true, write);
1210
1211        /* Now bind it into the GTT if needed */
1212        if (!obj->map_and_fenceable) {
1213                ret = i915_gem_object_unbind(obj);
1214                if (ret)
1215                        goto unlock;
1216        }
1217        if (!obj->gtt_space) {
1218                ret = i915_gem_object_bind_to_gtt(obj, 0, true);
1219                if (ret)
1220                        goto unlock;
1221
1222                ret = i915_gem_object_set_to_gtt_domain(obj, write);
1223                if (ret)
1224                        goto unlock;
1225        }
1226
1227        if (obj->tiling_mode == I915_TILING_NONE)
1228                ret = i915_gem_object_put_fence(obj);
1229        else
1230                ret = i915_gem_object_get_fence(obj, NULL);
1231        if (ret)
1232                goto unlock;
1233
1234        if (i915_gem_object_is_inactive(obj))
1235                list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1236
1237        obj->fault_mappable = true;
1238
1239        pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) +
1240                page_offset;
1241
1242        /* Finally, remap it using the new GTT offset */
1243        ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1244unlock:
1245        mutex_unlock(&dev->struct_mutex);
1246out:
1247        switch (ret) {
1248        case -EIO:
1249        case -EAGAIN:
1250                /* Give the error handler a chance to run and move the
1251                 * objects off the GPU active list. Next time we service the
1252                 * fault, we should be able to transition the page into the
1253                 * GTT without touching the GPU (and so avoid further
1254                 * EIO/EGAIN). If the GPU is wedged, then there is no issue
1255                 * with coherency, just lost writes.
1256                 */
1257                set_need_resched();
1258        case 0:
1259        case -ERESTARTSYS:
1260        case -EINTR:
1261                return VM_FAULT_NOPAGE;
1262        case -ENOMEM:
1263                return VM_FAULT_OOM;
1264        default:
1265                return VM_FAULT_SIGBUS;
1266        }
1267}
1268
1269/**
1270 * i915_gem_release_mmap - remove physical page mappings
1271 * @obj: obj in question
1272 *
1273 * Preserve the reservation of the mmapping with the DRM core code, but
1274 * relinquish ownership of the pages back to the system.
1275 *
1276 * It is vital that we remove the page mapping if we have mapped a tiled
1277 * object through the GTT and then lose the fence register due to
1278 * resource pressure. Similarly if the object has been moved out of the
1279 * aperture, than pages mapped into userspace must be revoked. Removing the
1280 * mapping will then trigger a page fault on the next user access, allowing
1281 * fixup by i915_gem_fault().
1282 */
1283void
1284i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1285{
1286        if (!obj->fault_mappable)
1287                return;
1288
1289        if (obj->base.dev->dev_mapping)
1290                unmap_mapping_range(obj->base.dev->dev_mapping,
1291                                    (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
1292                                    obj->base.size, 1);
1293
1294        obj->fault_mappable = false;
1295}
1296
1297static uint32_t
1298i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1299{
1300        uint32_t gtt_size;
1301
1302        if (INTEL_INFO(dev)->gen >= 4 ||
1303            tiling_mode == I915_TILING_NONE)
1304                return size;
1305
1306        /* Previous chips need a power-of-two fence region when tiling */
1307        if (INTEL_INFO(dev)->gen == 3)
1308                gtt_size = 1024*1024;
1309        else
1310                gtt_size = 512*1024;
1311
1312        while (gtt_size < size)
1313                gtt_size <<= 1;
1314
1315        return gtt_size;
1316}
1317
1318/**
1319 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1320 * @obj: object to check
1321 *
1322 * Return the required GTT alignment for an object, taking into account
1323 * potential fence register mapping.
1324 */
1325static uint32_t
1326i915_gem_get_gtt_alignment(struct drm_device *dev,
1327                           uint32_t size,
1328                           int tiling_mode)
1329{
1330        /*
1331         * Minimum alignment is 4k (GTT page size), but might be greater
1332         * if a fence register is needed for the object.
1333         */
1334        if (INTEL_INFO(dev)->gen >= 4 ||
1335            tiling_mode == I915_TILING_NONE)
1336                return 4096;
1337
1338        /*
1339         * Previous chips need to be aligned to the size of the smallest
1340         * fence register that can contain the object.
1341         */
1342        return i915_gem_get_gtt_size(dev, size, tiling_mode);
1343}
1344
1345/**
1346 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
1347 *                                       unfenced object
1348 * @dev: the device
1349 * @size: size of the object
1350 * @tiling_mode: tiling mode of the object
1351 *
1352 * Return the required GTT alignment for an object, only taking into account
1353 * unfenced tiled surface requirements.
1354 */
1355uint32_t
1356i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
1357                                    uint32_t size,
1358                                    int tiling_mode)
1359{
1360        /*
1361         * Minimum alignment is 4k (GTT page size) for sane hw.
1362         */
1363        if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
1364            tiling_mode == I915_TILING_NONE)
1365                return 4096;
1366
1367        /* Previous hardware however needs to be aligned to a power-of-two
1368         * tile height. The simplest method for determining this is to reuse
1369         * the power-of-tile object size.
1370         */
1371        return i915_gem_get_gtt_size(dev, size, tiling_mode);
1372}
1373
1374int
1375i915_gem_mmap_gtt(struct drm_file *file,
1376                  struct drm_device *dev,
1377                  uint32_t handle,
1378                  uint64_t *offset)
1379{
1380        struct drm_i915_private *dev_priv = dev->dev_private;
1381        struct drm_i915_gem_object *obj;
1382        int ret;
1383
1384        if (!(dev->driver->driver_features & DRIVER_GEM))
1385                return -ENODEV;
1386
1387        ret = i915_mutex_lock_interruptible(dev);
1388        if (ret)
1389                return ret;
1390
1391        obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1392        if (&obj->base == NULL) {
1393                ret = -ENOENT;
1394                goto unlock;
1395        }
1396
1397        if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
1398                ret = -E2BIG;
1399                goto out;
1400        }
1401
1402        if (obj->madv != I915_MADV_WILLNEED) {
1403                DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1404                ret = -EINVAL;
1405                goto out;
1406        }
1407
1408        if (!obj->base.map_list.map) {
1409                ret = drm_gem_create_mmap_offset(&obj->base);
1410                if (ret)
1411                        goto out;
1412        }
1413
1414        *offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT;
1415
1416out:
1417        drm_gem_object_unreference(&obj->base);
1418unlock:
1419        mutex_unlock(&dev->struct_mutex);
1420        return ret;
1421}
1422
1423/**
1424 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1425 * @dev: DRM device
1426 * @data: GTT mapping ioctl data
1427 * @file: GEM object info
1428 *
1429 * Simply returns the fake offset to userspace so it can mmap it.
1430 * The mmap call will end up in drm_gem_mmap(), which will set things
1431 * up so we can get faults in the handler above.
1432 *
1433 * The fault handler will take care of binding the object into the GTT
1434 * (since it may have been evicted to make room for something), allocating
1435 * a fence register, and mapping the appropriate aperture address into
1436 * userspace.
1437 */
1438int
1439i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1440                        struct drm_file *file)
1441{
1442        struct drm_i915_gem_mmap_gtt *args = data;
1443
1444        if (!(dev->driver->driver_features & DRIVER_GEM))
1445                return -ENODEV;
1446
1447        return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
1448}
1449
1450
1451static int
1452i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
1453                              gfp_t gfpmask)
1454{
1455        int page_count, i;
1456        struct address_space *mapping;
1457        struct inode *inode;
1458        struct page *page;
1459
1460        /* Get the list of pages out of our struct file.  They'll be pinned
1461         * at this point until we release them.
1462         */
1463        page_count = obj->base.size / PAGE_SIZE;
1464        BUG_ON(obj->pages != NULL);
1465        obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
1466        if (obj->pages == NULL)
1467                return -ENOMEM;
1468
1469        inode = obj->base.filp->f_path.dentry->d_inode;
1470        mapping = inode->i_mapping;
1471        gfpmask |= mapping_gfp_mask(mapping);
1472
1473        for (i = 0; i < page_count; i++) {
1474                page = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
1475                if (IS_ERR(page))
1476                        goto err_pages;
1477
1478                obj->pages[i] = page;
1479        }
1480
1481        if (i915_gem_object_needs_bit17_swizzle(obj))
1482                i915_gem_object_do_bit_17_swizzle(obj);
1483
1484        return 0;
1485
1486err_pages:
1487        while (i--)
1488                page_cache_release(obj->pages[i]);
1489
1490        drm_free_large(obj->pages);
1491        obj->pages = NULL;
1492        return PTR_ERR(page);
1493}
1494
1495static void
1496i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1497{
1498        int page_count = obj->base.size / PAGE_SIZE;
1499        int i;
1500
1501        BUG_ON(obj->madv == __I915_MADV_PURGED);
1502
1503        if (i915_gem_object_needs_bit17_swizzle(obj))
1504                i915_gem_object_save_bit_17_swizzle(obj);
1505
1506        if (obj->madv == I915_MADV_DONTNEED)
1507                obj->dirty = 0;
1508
1509        for (i = 0; i < page_count; i++) {
1510                if (obj->dirty)
1511                        set_page_dirty(obj->pages[i]);
1512
1513                if (obj->madv == I915_MADV_WILLNEED)
1514                        mark_page_accessed(obj->pages[i]);
1515
1516                page_cache_release(obj->pages[i]);
1517        }
1518        obj->dirty = 0;
1519
1520        drm_free_large(obj->pages);
1521        obj->pages = NULL;
1522}
1523
1524void
1525i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
1526                               struct intel_ring_buffer *ring,
1527                               u32 seqno)
1528{
1529        struct drm_device *dev = obj->base.dev;
1530        struct drm_i915_private *dev_priv = dev->dev_private;
1531
1532        BUG_ON(ring == NULL);
1533        obj->ring = ring;
1534
1535        /* Add a reference if we're newly entering the active list. */
1536        if (!obj->active) {
1537                drm_gem_object_reference(&obj->base);
1538                obj->active = 1;
1539        }
1540
1541        /* Move from whatever list we were on to the tail of execution. */
1542        list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
1543        list_move_tail(&obj->ring_list, &ring->active_list);
1544
1545        obj->last_rendering_seqno = seqno;
1546        if (obj->fenced_gpu_access) {
1547                struct drm_i915_fence_reg *reg;
1548
1549                BUG_ON(obj->fence_reg == I915_FENCE_REG_NONE);
1550
1551                obj->last_fenced_seqno = seqno;
1552                obj->last_fenced_ring = ring;
1553
1554                reg = &dev_priv->fence_regs[obj->fence_reg];
1555                list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
1556        }
1557}
1558
1559static void
1560i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
1561{
1562        list_del_init(&obj->ring_list);
1563        obj->last_rendering_seqno = 0;
1564}
1565
1566static void
1567i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
1568{
1569        struct drm_device *dev = obj->base.dev;
1570        drm_i915_private_t *dev_priv = dev->dev_private;
1571
1572        BUG_ON(!obj->active);
1573        list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
1574
1575        i915_gem_object_move_off_active(obj);
1576}
1577
1578static void
1579i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
1580{
1581        struct drm_device *dev = obj->base.dev;
1582        struct drm_i915_private *dev_priv = dev->dev_private;
1583
1584        if (obj->pin_count != 0)
1585                list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list);
1586        else
1587                list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
1588
1589        BUG_ON(!list_empty(&obj->gpu_write_list));
1590        BUG_ON(!obj->active);
1591        obj->ring = NULL;
1592
1593        i915_gem_object_move_off_active(obj);
1594        obj->fenced_gpu_access = false;
1595
1596        obj->active = 0;
1597        obj->pending_gpu_write = false;
1598        drm_gem_object_unreference(&obj->base);
1599
1600        WARN_ON(i915_verify_lists(dev));
1601}
1602
1603/* Immediately discard the backing storage */
1604static void
1605i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1606{
1607        struct inode *inode;
1608
1609        /* Our goal here is to return as much of the memory as
1610         * is possible back to the system as we are called from OOM.
1611         * To do this we must instruct the shmfs to drop all of its
1612         * backing pages, *now*.
1613         */
1614        inode = obj->base.filp->f_path.dentry->d_inode;
1615        shmem_truncate_range(inode, 0, (loff_t)-1);
1616
1617        obj->madv = __I915_MADV_PURGED;
1618}
1619
1620static inline int
1621i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
1622{
1623        return obj->madv == I915_MADV_DONTNEED;
1624}
1625
1626static void
1627i915_gem_process_flushing_list(struct intel_ring_buffer *ring,
1628                               uint32_t flush_domains)
1629{
1630        struct drm_i915_gem_object *obj, *next;
1631
1632        list_for_each_entry_safe(obj, next,
1633                                 &ring->gpu_write_list,
1634                                 gpu_write_list) {
1635                if (obj->base.write_domain & flush_domains) {
1636                        uint32_t old_write_domain = obj->base.write_domain;
1637
1638                        obj->base.write_domain = 0;
1639                        list_del_init(&obj->gpu_write_list);
1640                        i915_gem_object_move_to_active(obj, ring,
1641                                                       i915_gem_next_request_seqno(ring));
1642
1643                        trace_i915_gem_object_change_domain(obj,
1644                                                            obj->base.read_domains,
1645                                                            old_write_domain);
1646                }
1647        }
1648}
1649
1650int
1651i915_add_request(struct intel_ring_buffer *ring,
1652                 struct drm_file *file,
1653                 struct drm_i915_gem_request *request)
1654{
1655        drm_i915_private_t *dev_priv = ring->dev->dev_private;
1656        uint32_t seqno;
1657        int was_empty;
1658        int ret;
1659
1660        BUG_ON(request == NULL);
1661
1662        ret = ring->add_request(ring, &seqno);
1663        if (ret)
1664            return ret;
1665
1666        trace_i915_gem_request_add(ring, seqno);
1667
1668        request->seqno = seqno;
1669        request->ring = ring;
1670        request->emitted_jiffies = jiffies;
1671        was_empty = list_empty(&ring->request_list);
1672        list_add_tail(&request->list, &ring->request_list);
1673
1674        if (file) {
1675                struct drm_i915_file_private *file_priv = file->driver_priv;
1676
1677                spin_lock(&file_priv->mm.lock);
1678                request->file_priv = file_priv;
1679                list_add_tail(&request->client_list,
1680                              &file_priv->mm.request_list);
1681                spin_unlock(&file_priv->mm.lock);
1682        }
1683
1684        ring->outstanding_lazy_request = false;
1685
1686        if (!dev_priv->mm.suspended) {
1687                if (i915_enable_hangcheck) {
1688                        mod_timer(&dev_priv->hangcheck_timer,
1689                                  jiffies +
1690                                  msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
1691                }
1692                if (was_empty)
1693                        queue_delayed_work(dev_priv->wq,
1694                                           &dev_priv->mm.retire_work, HZ);
1695        }
1696        return 0;
1697}
1698
1699static inline void
1700i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1701{
1702        struct drm_i915_file_private *file_priv = request->file_priv;
1703
1704        if (!file_priv)
1705                return;
1706
1707        spin_lock(&file_priv->mm.lock);
1708        if (request->file_priv) {
1709                list_del(&request->client_list);
1710                request->file_priv = NULL;
1711        }
1712        spin_unlock(&file_priv->mm.lock);
1713}
1714
1715static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
1716                                      struct intel_ring_buffer *ring)
1717{
1718        while (!list_empty(&ring->request_list)) {
1719                struct drm_i915_gem_request *request;
1720
1721                request = list_first_entry(&ring->request_list,
1722                                           struct drm_i915_gem_request,
1723                                           list);
1724
1725                list_del(&request->list);
1726                i915_gem_request_remove_from_client(request);
1727                kfree(request);
1728        }
1729
1730        while (!list_empty(&ring->active_list)) {
1731                struct drm_i915_gem_object *obj;
1732
1733                obj = list_first_entry(&ring->active_list,
1734                                       struct drm_i915_gem_object,
1735                                       ring_list);
1736
1737                obj->base.write_domain = 0;
1738                list_del_init(&obj->gpu_write_list);
1739                i915_gem_object_move_to_inactive(obj);
1740        }
1741}
1742
1743static void i915_gem_reset_fences(struct drm_device *dev)
1744{
1745        struct drm_i915_private *dev_priv = dev->dev_private;
1746        int i;
1747
1748        for (i = 0; i < dev_priv->num_fence_regs; i++) {
1749                struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1750                struct drm_i915_gem_object *obj = reg->obj;
1751
1752                if (!obj)
1753                        continue;
1754
1755                if (obj->tiling_mode)
1756                        i915_gem_release_mmap(obj);
1757
1758                reg->obj->fence_reg = I915_FENCE_REG_NONE;
1759                reg->obj->fenced_gpu_access = false;
1760                reg->obj->last_fenced_seqno = 0;
1761                reg->obj->last_fenced_ring = NULL;
1762                i915_gem_clear_fence_reg(dev, reg);
1763        }
1764}
1765
1766void i915_gem_reset(struct drm_device *dev)
1767{
1768        struct drm_i915_private *dev_priv = dev->dev_private;
1769        struct drm_i915_gem_object *obj;
1770        int i;
1771
1772        for (i = 0; i < I915_NUM_RINGS; i++)
1773                i915_gem_reset_ring_lists(dev_priv, &dev_priv->ring[i]);
1774
1775        /* Remove anything from the flushing lists. The GPU cache is likely
1776         * to be lost on reset along with the data, so simply move the
1777         * lost bo to the inactive list.
1778         */
1779        while (!list_empty(&dev_priv->mm.flushing_list)) {
1780                obj = list_first_entry(&dev_priv->mm.flushing_list,
1781                                      struct drm_i915_gem_object,
1782                                      mm_list);
1783
1784                obj->base.write_domain = 0;
1785                list_del_init(&obj->gpu_write_list);
1786                i915_gem_object_move_to_inactive(obj);
1787        }
1788
1789        /* Move everything out of the GPU domains to ensure we do any
1790         * necessary invalidation upon reuse.
1791         */
1792        list_for_each_entry(obj,
1793                            &dev_priv->mm.inactive_list,
1794                            mm_list)
1795        {
1796                obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
1797        }
1798
1799        /* The fence registers are invalidated so clear them out */
1800        i915_gem_reset_fences(dev);
1801}
1802
1803/**
1804 * This function clears the request list as sequence numbers are passed.
1805 */
1806static void
1807i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
1808{
1809        uint32_t seqno;
1810        int i;
1811
1812        if (list_empty(&ring->request_list))
1813                return;
1814
1815        WARN_ON(i915_verify_lists(ring->dev));
1816
1817        seqno = ring->get_seqno(ring);
1818
1819        for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
1820                if (seqno >= ring->sync_seqno[i])
1821                        ring->sync_seqno[i] = 0;
1822
1823        while (!list_empty(&ring->request_list)) {
1824                struct drm_i915_gem_request *request;
1825
1826                request = list_first_entry(&ring->request_list,
1827                                           struct drm_i915_gem_request,
1828                                           list);
1829
1830                if (!i915_seqno_passed(seqno, request->seqno))
1831                        break;
1832
1833                trace_i915_gem_request_retire(ring, request->seqno);
1834
1835                list_del(&request->list);
1836                i915_gem_request_remove_from_client(request);
1837                kfree(request);
1838        }
1839
1840        /* Move any buffers on the active list that are no longer referenced
1841         * by the ringbuffer to the flushing/inactive lists as appropriate.
1842         */
1843        while (!list_empty(&ring->active_list)) {
1844                struct drm_i915_gem_object *obj;
1845
1846                obj = list_first_entry(&ring->active_list,
1847                                      struct drm_i915_gem_object,
1848                                      ring_list);
1849
1850                if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
1851                        break;
1852
1853                if (obj->base.write_domain != 0)
1854                        i915_gem_object_move_to_flushing(obj);
1855                else
1856                        i915_gem_object_move_to_inactive(obj);
1857        }
1858
1859        if (unlikely(ring->trace_irq_seqno &&
1860                     i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
1861                ring->irq_put(ring);
1862                ring->trace_irq_seqno = 0;
1863        }
1864
1865        WARN_ON(i915_verify_lists(ring->dev));
1866}
1867
1868void
1869i915_gem_retire_requests(struct drm_device *dev)
1870{
1871        drm_i915_private_t *dev_priv = dev->dev_private;
1872        int i;
1873
1874        if (!list_empty(&dev_priv->mm.deferred_free_list)) {
1875            struct drm_i915_gem_object *obj, *next;
1876
1877            /* We must be careful that during unbind() we do not
1878             * accidentally infinitely recurse into retire requests.
1879             * Currently:
1880             *   retire -> free -> unbind -> wait -> retire_ring
1881             */
1882            list_for_each_entry_safe(obj, next,
1883                                     &dev_priv->mm.deferred_free_list,
1884                                     mm_list)
1885                    i915_gem_free_object_tail(obj);
1886        }
1887
1888        for (i = 0; i < I915_NUM_RINGS; i++)
1889                i915_gem_retire_requests_ring(&dev_priv->ring[i]);
1890}
1891
1892static void
1893i915_gem_retire_work_handler(struct work_struct *work)
1894{
1895        drm_i915_private_t *dev_priv;
1896        struct drm_device *dev;
1897        bool idle;
1898        int i;
1899
1900        dev_priv = container_of(work, drm_i915_private_t,
1901                                mm.retire_work.work);
1902        dev = dev_priv->dev;
1903
1904        /* Come back later if the device is busy... */
1905        if (!mutex_trylock(&dev->struct_mutex)) {
1906                queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1907                return;
1908        }
1909
1910        i915_gem_retire_requests(dev);
1911
1912        /* Send a periodic flush down the ring so we don't hold onto GEM
1913         * objects indefinitely.
1914         */
1915        idle = true;
1916        for (i = 0; i < I915_NUM_RINGS; i++) {
1917                struct intel_ring_buffer *ring = &dev_priv->ring[i];
1918
1919                if (!list_empty(&ring->gpu_write_list)) {
1920                        struct drm_i915_gem_request *request;
1921                        int ret;
1922
1923                        ret = i915_gem_flush_ring(ring,
1924                                                  0, I915_GEM_GPU_DOMAINS);
1925                        request = kzalloc(sizeof(*request), GFP_KERNEL);
1926                        if (ret || request == NULL ||
1927                            i915_add_request(ring, NULL, request))
1928                            kfree(request);
1929                }
1930
1931                idle &= list_empty(&ring->request_list);
1932        }
1933
1934        if (!dev_priv->mm.suspended && !idle)
1935                queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1936
1937        mutex_unlock(&dev->struct_mutex);
1938}
1939
1940/**
1941 * Waits for a sequence number to be signaled, and cleans up the
1942 * request and object lists appropriately for that event.
1943 */
1944int
1945i915_wait_request(struct intel_ring_buffer *ring,
1946                  uint32_t seqno)
1947{
1948        drm_i915_private_t *dev_priv = ring->dev->dev_private;
1949        u32 ier;
1950        int ret = 0;
1951
1952        BUG_ON(seqno == 0);
1953
1954        if (atomic_read(&dev_priv->mm.wedged)) {
1955                struct completion *x = &dev_priv->error_completion;
1956                bool recovery_complete;
1957                unsigned long flags;
1958
1959                /* Give the error handler a chance to run. */
1960                spin_lock_irqsave(&x->wait.lock, flags);
1961                recovery_complete = x->done > 0;
1962                spin_unlock_irqrestore(&x->wait.lock, flags);
1963
1964                return recovery_complete ? -EIO : -EAGAIN;
1965        }
1966
1967        if (seqno == ring->outstanding_lazy_request) {
1968                struct drm_i915_gem_request *request;
1969
1970                request = kzalloc(sizeof(*request), GFP_KERNEL);
1971                if (request == NULL)
1972                        return -ENOMEM;
1973
1974                ret = i915_add_request(ring, NULL, request);
1975                if (ret) {
1976                        kfree(request);
1977                        return ret;
1978                }
1979
1980                seqno = request->seqno;
1981        }
1982
1983        if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
1984                if (HAS_PCH_SPLIT(ring->dev))
1985                        ier = I915_READ(DEIER) | I915_READ(GTIER);
1986                else
1987                        ier = I915_READ(IER);
1988                if (!ier) {
1989                        DRM_ERROR("something (likely vbetool) disabled "
1990                                  "interrupts, re-enabling\n");
1991                        ring->dev->driver->irq_preinstall(ring->dev);
1992                        ring->dev->driver->irq_postinstall(ring->dev);
1993                }
1994
1995                trace_i915_gem_request_wait_begin(ring, seqno);
1996
1997                ring->waiting_seqno = seqno;
1998                if (ring->irq_get(ring)) {
1999                        if (dev_priv->mm.interruptible)
2000                                ret = wait_event_interruptible(ring->irq_queue,
2001                                                               i915_seqno_passed(ring->get_seqno(ring), seqno)
2002                                                               || atomic_read(&dev_priv->mm.wedged));
2003                        else
2004                                wait_event(ring->irq_queue,
2005                                           i915_seqno_passed(ring->get_seqno(ring), seqno)
2006                                           || atomic_read(&dev_priv->mm.wedged));
2007
2008                        ring->irq_put(ring);
2009                } else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring),
2010                                                             seqno) ||
2011                                           atomic_read(&dev_priv->mm.wedged), 3000))
2012                        ret = -EBUSY;
2013                ring->waiting_seqno = 0;
2014
2015                trace_i915_gem_request_wait_end(ring, seqno);
2016        }
2017        if (atomic_read(&dev_priv->mm.wedged))
2018                ret = -EAGAIN;
2019
2020        if (ret && ret != -ERESTARTSYS)
2021                DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n",
2022                          __func__, ret, seqno, ring->get_seqno(ring),
2023                          dev_priv->next_seqno);
2024
2025        /* Directly dispatch request retiring.  While we have the work queue
2026         * to handle this, the waiter on a request often wants an associated
2027         * buffer to have made it to the inactive list, and we would need
2028         * a separate wait queue to handle that.
2029         */
2030        if (ret == 0)
2031                i915_gem_retire_requests_ring(ring);
2032
2033        return ret;
2034}
2035
2036/**
2037 * Ensures that all rendering to the object has completed and the object is
2038 * safe to unbind from the GTT or access from the CPU.
2039 */
2040int
2041i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
2042{
2043        int ret;
2044
2045        /* This function only exists to support waiting for existing rendering,
2046         * not for emitting required flushes.
2047         */
2048        BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0);
2049
2050        /* If there is rendering queued on the buffer being evicted, wait for
2051         * it.
2052         */
2053        if (obj->active) {
2054                ret = i915_wait_request(obj->ring, obj->last_rendering_seqno);
2055                if (ret)
2056                        return ret;
2057        }
2058
2059        return 0;
2060}
2061
2062static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2063{
2064        u32 old_write_domain, old_read_domains;
2065
2066        /* Act a barrier for all accesses through the GTT */
2067        mb();
2068
2069        /* Force a pagefault for domain tracking on next user access */
2070        i915_gem_release_mmap(obj);
2071
2072        if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2073                return;
2074
2075        old_read_domains = obj->base.read_domains;
2076        old_write_domain = obj->base.write_domain;
2077
2078        obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2079        obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2080
2081        trace_i915_gem_object_change_domain(obj,
2082                                            old_read_domains,
2083                                            old_write_domain);
2084}
2085
2086/**
2087 * Unbinds an object from the GTT aperture.
2088 */
2089int
2090i915_gem_object_unbind(struct drm_i915_gem_object *obj)
2091{
2092        int ret = 0;
2093
2094        if (obj->gtt_space == NULL)
2095                return 0;
2096
2097        if (obj->pin_count != 0) {
2098                DRM_ERROR("Attempting to unbind pinned buffer\n");
2099                return -EINVAL;
2100        }
2101
2102        ret = i915_gem_object_finish_gpu(obj);
2103        if (ret == -ERESTARTSYS)
2104                return ret;
2105        /* Continue on if we fail due to EIO, the GPU is hung so we
2106         * should be safe and we need to cleanup or else we might
2107         * cause memory corruption through use-after-free.
2108         */
2109
2110        i915_gem_object_finish_gtt(obj);
2111
2112        /* Move the object to the CPU domain to ensure that
2113         * any possible CPU writes while it's not in the GTT
2114         * are flushed when we go to remap it.
2115         */
2116        if (ret == 0)
2117                ret = i915_gem_object_set_to_cpu_domain(obj, 1);
2118        if (ret == -ERESTARTSYS)
2119                return ret;
2120        if (ret) {
2121                /* In the event of a disaster, abandon all caches and
2122                 * hope for the best.
2123                 */
2124                i915_gem_clflush_object(obj);
2125                obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2126        }
2127
2128        /* release the fence reg _after_ flushing */
2129        ret = i915_gem_object_put_fence(obj);
2130        if (ret == -ERESTARTSYS)
2131                return ret;
2132
2133        trace_i915_gem_object_unbind(obj);
2134
2135        i915_gem_gtt_unbind_object(obj);
2136        i915_gem_object_put_pages_gtt(obj);
2137
2138        list_del_init(&obj->gtt_list);
2139        list_del_init(&obj->mm_list);
2140        /* Avoid an unnecessary call to unbind on rebind. */
2141        obj->map_and_fenceable = true;
2142
2143        drm_mm_put_block(obj->gtt_space);
2144        obj->gtt_space = NULL;
2145        obj->gtt_offset = 0;
2146
2147        if (i915_gem_object_is_purgeable(obj))
2148                i915_gem_object_truncate(obj);
2149
2150        return ret;
2151}
2152
2153int
2154i915_gem_flush_ring(struct intel_ring_buffer *ring,
2155                    uint32_t invalidate_domains,
2156                    uint32_t flush_domains)
2157{
2158        int ret;
2159
2160        if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0)
2161                return 0;
2162
2163        trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains);
2164
2165        ret = ring->flush(ring, invalidate_domains, flush_domains);
2166        if (ret)
2167                return ret;
2168
2169        if (flush_domains & I915_GEM_GPU_DOMAINS)
2170                i915_gem_process_flushing_list(ring, flush_domains);
2171
2172        return 0;
2173}
2174
2175static int i915_ring_idle(struct intel_ring_buffer *ring)
2176{
2177        int ret;
2178
2179        if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list))
2180                return 0;
2181
2182        if (!list_empty(&ring->gpu_write_list)) {
2183                ret = i915_gem_flush_ring(ring,
2184                                    I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2185                if (ret)
2186                        return ret;
2187        }
2188
2189        return i915_wait_request(ring, i915_gem_next_request_seqno(ring));
2190}
2191
2192int
2193i915_gpu_idle(struct drm_device *dev)
2194{
2195        drm_i915_private_t *dev_priv = dev->dev_private;
2196        int ret, i;
2197
2198        /* Flush everything onto the inactive list. */
2199        for (i = 0; i < I915_NUM_RINGS; i++) {
2200                ret = i915_ring_idle(&dev_priv->ring[i]);
2201                if (ret)
2202                        return ret;
2203        }
2204
2205        return 0;
2206}
2207
2208static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj,
2209                                       struct intel_ring_buffer *pipelined)
2210{
2211        struct drm_device *dev = obj->base.dev;
2212        drm_i915_private_t *dev_priv = dev->dev_private;
2213        u32 size = obj->gtt_space->size;
2214        int regnum = obj->fence_reg;
2215        uint64_t val;
2216
2217        val = (uint64_t)((obj->gtt_offset + size - 4096) &
2218                         0xfffff000) << 32;
2219        val |= obj->gtt_offset & 0xfffff000;
2220        val |= (uint64_t)((obj->stride / 128) - 1) <<
2221                SANDYBRIDGE_FENCE_PITCH_SHIFT;
2222
2223        if (obj->tiling_mode == I915_TILING_Y)
2224                val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2225        val |= I965_FENCE_REG_VALID;
2226
2227        if (pipelined) {
2228                int ret = intel_ring_begin(pipelined, 6);
2229                if (ret)
2230                        return ret;
2231
2232                intel_ring_emit(pipelined, MI_NOOP);
2233                intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2234                intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8);
2235                intel_ring_emit(pipelined, (u32)val);
2236                intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4);
2237                intel_ring_emit(pipelined, (u32)(val >> 32));
2238                intel_ring_advance(pipelined);
2239        } else
2240                I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val);
2241
2242        return 0;
2243}
2244
2245static int i965_write_fence_reg(struct drm_i915_gem_object *obj,
2246                                struct intel_ring_buffer *pipelined)
2247{
2248        struct drm_device *dev = obj->base.dev;
2249        drm_i915_private_t *dev_priv = dev->dev_private;
2250        u32 size = obj->gtt_space->size;
2251        int regnum = obj->fence_reg;
2252        uint64_t val;
2253
2254        val = (uint64_t)((obj->gtt_offset + size - 4096) &
2255                    0xfffff000) << 32;
2256        val |= obj->gtt_offset & 0xfffff000;
2257        val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2258        if (obj->tiling_mode == I915_TILING_Y)
2259                val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2260        val |= I965_FENCE_REG_VALID;
2261
2262        if (pipelined) {
2263                int ret = intel_ring_begin(pipelined, 6);
2264                if (ret)
2265                        return ret;
2266
2267                intel_ring_emit(pipelined, MI_NOOP);
2268                intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
2269                intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8);
2270                intel_ring_emit(pipelined, (u32)val);
2271                intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4);
2272                intel_ring_emit(pipelined, (u32)(val >> 32));
2273                intel_ring_advance(pipelined);
2274        } else
2275                I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val);
2276
2277        return 0;
2278}
2279
2280static int i915_write_fence_reg(struct drm_i915_gem_object *obj,
2281                                struct intel_ring_buffer *pipelined)
2282{
2283        struct drm_device *dev = obj->base.dev;
2284        drm_i915_private_t *dev_priv = dev->dev_private;
2285        u32 size = obj->gtt_space->size;
2286        u32 fence_reg, val, pitch_val;
2287        int tile_width;
2288
2289        if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
2290                 (size & -size) != size ||
2291                 (obj->gtt_offset & (size - 1)),
2292                 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
2293                 obj->gtt_offset, obj->map_and_fenceable, size))
2294                return -EINVAL;
2295
2296        if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2297                tile_width = 128;
2298        else
2299                tile_width = 512;
2300
2301        /* Note: pitch better be a power of two tile widths */
2302        pitch_val = obj->stride / tile_width;
2303        pitch_val = ffs(pitch_val) - 1;
2304
2305        val = obj->gtt_offset;
2306        if (obj->tiling_mode == I915_TILING_Y)
2307                val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2308        val |= I915_FENCE_SIZE_BITS(size);
2309        val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2310        val |= I830_FENCE_REG_VALID;
2311
2312        fence_reg = obj->fence_reg;
2313        if (fence_reg < 8)
2314                fence_reg = FENCE_REG_830_0 + fence_reg * 4;
2315        else
2316                fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
2317
2318        if (pipelined) {
2319                int ret = intel_ring_begin(pipelined, 4);
2320                if (ret)
2321                        return ret;
2322
2323                intel_ring_emit(pipelined, MI_NOOP);
2324                intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
2325                intel_ring_emit(pipelined, fence_reg);
2326                intel_ring_emit(pipelined, val);
2327                intel_ring_advance(pipelined);
2328        } else
2329                I915_WRITE(fence_reg, val);
2330
2331        return 0;
2332}
2333
2334static int i830_write_fence_reg(struct drm_i915_gem_object *obj,
2335                                struct intel_ring_buffer *pipelined)
2336{
2337        struct drm_device *dev = obj->base.dev;
2338        drm_i915_private_t *dev_priv = dev->dev_private;
2339        u32 size = obj->gtt_space->size;
2340        int regnum = obj->fence_reg;
2341        uint32_t val;
2342        uint32_t pitch_val;
2343
2344        if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
2345                 (size & -size) != size ||
2346                 (obj->gtt_offset & (size - 1)),
2347                 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
2348                 obj->gtt_offset, size))
2349                return -EINVAL;
2350
2351        pitch_val = obj->stride / 128;
2352        pitch_val = ffs(pitch_val) - 1;
2353
2354        val = obj->gtt_offset;
2355        if (obj->tiling_mode == I915_TILING_Y)
2356                val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2357        val |= I830_FENCE_SIZE_BITS(size);
2358        val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2359        val |= I830_FENCE_REG_VALID;
2360
2361        if (pipelined) {
2362                int ret = intel_ring_begin(pipelined, 4);
2363                if (ret)
2364                        return ret;
2365
2366                intel_ring_emit(pipelined, MI_NOOP);
2367                intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
2368                intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4);
2369                intel_ring_emit(pipelined, val);
2370                intel_ring_advance(pipelined);
2371        } else
2372                I915_WRITE(FENCE_REG_830_0 + regnum * 4, val);
2373
2374        return 0;
2375}
2376
2377static bool ring_passed_seqno(struct intel_ring_buffer *ring, u32 seqno)
2378{
2379        return i915_seqno_passed(ring->get_seqno(ring), seqno);
2380}
2381
2382static int
2383i915_gem_object_flush_fence(struct drm_i915_gem_object *obj,
2384                            struct intel_ring_buffer *pipelined)
2385{
2386        int ret;
2387
2388        if (obj->fenced_gpu_access) {
2389                if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
2390                        ret = i915_gem_flush_ring(obj->last_fenced_ring,
2391                                                  0, obj->base.write_domain);
2392                        if (ret)
2393                                return ret;
2394                }
2395
2396                obj->fenced_gpu_access = false;
2397        }
2398
2399        if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) {
2400                if (!ring_passed_seqno(obj->last_fenced_ring,
2401                                       obj->last_fenced_seqno)) {
2402                        ret = i915_wait_request(obj->last_fenced_ring,
2403                                                obj->last_fenced_seqno);
2404                        if (ret)
2405                                return ret;
2406                }
2407
2408                obj->last_fenced_seqno = 0;
2409                obj->last_fenced_ring = NULL;
2410        }
2411
2412        /* Ensure that all CPU reads are completed before installing a fence
2413         * and all writes before removing the fence.
2414         */
2415        if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
2416                mb();
2417
2418        return 0;
2419}
2420
2421int
2422i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
2423{
2424        int ret;
2425
2426        if (obj->tiling_mode)
2427                i915_gem_release_mmap(obj);
2428
2429        ret = i915_gem_object_flush_fence(obj, NULL);
2430        if (ret)
2431                return ret;
2432
2433        if (obj->fence_reg != I915_FENCE_REG_NONE) {
2434                struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2435                i915_gem_clear_fence_reg(obj->base.dev,
2436                                         &dev_priv->fence_regs[obj->fence_reg]);
2437
2438                obj->fence_reg = I915_FENCE_REG_NONE;
2439        }
2440
2441        return 0;
2442}
2443
2444static struct drm_i915_fence_reg *
2445i915_find_fence_reg(struct drm_device *dev,
2446                    struct intel_ring_buffer *pipelined)
2447{
2448        struct drm_i915_private *dev_priv = dev->dev_private;
2449        struct drm_i915_fence_reg *reg, *first, *avail;
2450        int i;
2451
2452        /* First try to find a free reg */
2453        avail = NULL;
2454        for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2455                reg = &dev_priv->fence_regs[i];
2456                if (!reg->obj)
2457                        return reg;
2458
2459                if (!reg->obj->pin_count)
2460                        avail = reg;
2461        }
2462
2463        if (avail == NULL)
2464                return NULL;
2465
2466        /* None available, try to steal one or wait for a user to finish */
2467        avail = first = NULL;
2468        list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
2469                if (reg->obj->pin_count)
2470                        continue;
2471
2472                if (first == NULL)
2473                        first = reg;
2474
2475                if (!pipelined ||
2476                    !reg->obj->last_fenced_ring ||
2477                    reg->obj->last_fenced_ring == pipelined) {
2478                        avail = reg;
2479                        break;
2480                }
2481        }
2482
2483        if (avail == NULL)
2484                avail = first;
2485
2486        return avail;
2487}
2488
2489/**
2490 * i915_gem_object_get_fence - set up a fence reg for an object
2491 * @obj: object to map through a fence reg
2492 * @pipelined: ring on which to queue the change, or NULL for CPU access
2493 * @interruptible: must we wait uninterruptibly for the register to retire?
2494 *
2495 * When mapping objects through the GTT, userspace wants to be able to write
2496 * to them without having to worry about swizzling if the object is tiled.
2497 *
2498 * This function walks the fence regs looking for a free one for @obj,
2499 * stealing one if it can't find any.
2500 *
2501 * It then sets up the reg based on the object's properties: address, pitch
2502 * and tiling format.
2503 */
2504int
2505i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
2506                          struct intel_ring_buffer *pipelined)
2507{
2508        struct drm_device *dev = obj->base.dev;
2509        struct drm_i915_private *dev_priv = dev->dev_private;
2510        struct drm_i915_fence_reg *reg;
2511        int ret;
2512
2513        /* XXX disable pipelining. There are bugs. Shocking. */
2514        pipelined = NULL;
2515
2516        /* Just update our place in the LRU if our fence is getting reused. */
2517        if (obj->fence_reg != I915_FENCE_REG_NONE) {
2518                reg = &dev_priv->fence_regs[obj->fence_reg];
2519                list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2520
2521                if (obj->tiling_changed) {
2522                        ret = i915_gem_object_flush_fence(obj, pipelined);
2523                        if (ret)
2524                                return ret;
2525
2526                        if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
2527                                pipelined = NULL;
2528
2529                        if (pipelined) {
2530                                reg->setup_seqno =
2531                                        i915_gem_next_request_seqno(pipelined);
2532                                obj->last_fenced_seqno = reg->setup_seqno;
2533                                obj->last_fenced_ring = pipelined;
2534                        }
2535
2536                        goto update;
2537                }
2538
2539                if (!pipelined) {
2540                        if (reg->setup_seqno) {
2541                                if (!ring_passed_seqno(obj->last_fenced_ring,
2542                                                       reg->setup_seqno)) {
2543                                        ret = i915_wait_request(obj->last_fenced_ring,
2544                                                                reg->setup_seqno);
2545                                        if (ret)
2546                                                return ret;
2547                                }
2548
2549                                reg->setup_seqno = 0;
2550                        }
2551                } else if (obj->last_fenced_ring &&
2552                           obj->last_fenced_ring != pipelined) {
2553                        ret = i915_gem_object_flush_fence(obj, pipelined);
2554                        if (ret)
2555                                return ret;
2556                }
2557
2558                return 0;
2559        }
2560
2561        reg = i915_find_fence_reg(dev, pipelined);
2562        if (reg == NULL)
2563                return -ENOSPC;
2564
2565        ret = i915_gem_object_flush_fence(obj, pipelined);
2566        if (ret)
2567                return ret;
2568
2569        if (reg->obj) {
2570                struct drm_i915_gem_object *old = reg->obj;
2571
2572                drm_gem_object_reference(&old->base);
2573
2574                if (old->tiling_mode)
2575                        i915_gem_release_mmap(old);
2576
2577                ret = i915_gem_object_flush_fence(old, pipelined);
2578                if (ret) {
2579                        drm_gem_object_unreference(&old->base);
2580                        return ret;
2581                }
2582
2583                if (old->last_fenced_seqno == 0 && obj->last_fenced_seqno == 0)
2584                        pipelined = NULL;
2585
2586                old->fence_reg = I915_FENCE_REG_NONE;
2587                old->last_fenced_ring = pipelined;
2588                old->last_fenced_seqno =
2589                        pipelined ? i915_gem_next_request_seqno(pipelined) : 0;
2590
2591                drm_gem_object_unreference(&old->base);
2592        } else if (obj->last_fenced_seqno == 0)
2593                pipelined = NULL;
2594
2595        reg->obj = obj;
2596        list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2597        obj->fence_reg = reg - dev_priv->fence_regs;
2598        obj->last_fenced_ring = pipelined;
2599
2600        reg->setup_seqno =
2601                pipelined ? i915_gem_next_request_seqno(pipelined) : 0;
2602        obj->last_fenced_seqno = reg->setup_seqno;
2603
2604update:
2605        obj->tiling_changed = false;
2606        switch (INTEL_INFO(dev)->gen) {
2607        case 7:
2608        case 6:
2609                ret = sandybridge_write_fence_reg(obj, pipelined);
2610                break;
2611        case 5:
2612        case 4:
2613                ret = i965_write_fence_reg(obj, pipelined);
2614                break;
2615        case 3:
2616                ret = i915_write_fence_reg(obj, pipelined);
2617                break;
2618        case 2:
2619                ret = i830_write_fence_reg(obj, pipelined);
2620                break;
2621        }
2622
2623        return ret;
2624}
2625
2626/**
2627 * i915_gem_clear_fence_reg - clear out fence register info
2628 * @obj: object to clear
2629 *
2630 * Zeroes out the fence register itself and clears out the associated
2631 * data structures in dev_priv and obj.
2632 */
2633static void
2634i915_gem_clear_fence_reg(struct drm_device *dev,
2635                         struct drm_i915_fence_reg *reg)
2636{
2637        drm_i915_private_t *dev_priv = dev->dev_private;
2638        uint32_t fence_reg = reg - dev_priv->fence_regs;
2639
2640        switch (INTEL_INFO(dev)->gen) {
2641        case 7:
2642        case 6:
2643                I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + fence_reg*8, 0);
2644                break;
2645        case 5:
2646        case 4:
2647                I915_WRITE64(FENCE_REG_965_0 + fence_reg*8, 0);
2648                break;
2649        case 3:
2650                if (fence_reg >= 8)
2651                        fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
2652                else
2653        case 2:
2654                        fence_reg = FENCE_REG_830_0 + fence_reg * 4;
2655
2656                I915_WRITE(fence_reg, 0);
2657                break;
2658        }
2659
2660        list_del_init(&reg->lru_list);
2661        reg->obj = NULL;
2662        reg->setup_seqno = 0;
2663}
2664
2665/**
2666 * Finds free space in the GTT aperture and binds the object there.
2667 */
2668static int
2669i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2670                            unsigned alignment,
2671                            bool map_and_fenceable)
2672{
2673        struct drm_device *dev = obj->base.dev;
2674        drm_i915_private_t *dev_priv = dev->dev_private;
2675        struct drm_mm_node *free_space;
2676        gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
2677        u32 size, fence_size, fence_alignment, unfenced_alignment;
2678        bool mappable, fenceable;
2679        int ret;
2680
2681        if (obj->madv != I915_MADV_WILLNEED) {
2682                DRM_ERROR("Attempting to bind a purgeable object\n");
2683                return -EINVAL;
2684        }
2685
2686        fence_size = i915_gem_get_gtt_size(dev,
2687                                           obj->base.size,
2688                                           obj->tiling_mode);
2689        fence_alignment = i915_gem_get_gtt_alignment(dev,
2690                                                     obj->base.size,
2691                                                     obj->tiling_mode);
2692        unfenced_alignment =
2693                i915_gem_get_unfenced_gtt_alignment(dev,
2694                                                    obj->base.size,
2695                                                    obj->tiling_mode);
2696
2697        if (alignment == 0)
2698                alignment = map_and_fenceable ? fence_alignment :
2699                                                unfenced_alignment;
2700        if (map_and_fenceable && alignment & (fence_alignment - 1)) {
2701                DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2702                return -EINVAL;
2703        }
2704
2705        size = map_and_fenceable ? fence_size : obj->base.size;
2706
2707        /* If the object is bigger than the entire aperture, reject it early
2708         * before evicting everything in a vain attempt to find space.
2709         */
2710        if (obj->base.size >
2711            (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
2712                DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2713                return -E2BIG;
2714        }
2715
2716 search_free:
2717        if (map_and_fenceable)
2718                free_space =
2719                        drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
2720                                                    size, alignment, 0,
2721                                                    dev_priv->mm.gtt_mappable_end,
2722                                                    0);
2723        else
2724                free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2725                                                size, alignment, 0);
2726
2727        if (free_space != NULL) {
2728                if (map_and_fenceable)
2729                        obj->gtt_space =
2730                                drm_mm_get_block_range_generic(free_space,
2731                                                               size, alignment, 0,
2732                                                               dev_priv->mm.gtt_mappable_end,
2733                                                               0);
2734                else
2735                        obj->gtt_space =
2736                                drm_mm_get_block(free_space, size, alignment);
2737        }
2738        if (obj->gtt_space == NULL) {
2739                /* If the gtt is empty and we're still having trouble
2740                 * fitting our object in, we're out of memory.
2741                 */
2742                ret = i915_gem_evict_something(dev, size, alignment,
2743                                               map_and_fenceable);
2744                if (ret)
2745                        return ret;
2746
2747                goto search_free;
2748        }
2749
2750        ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
2751        if (ret) {
2752                drm_mm_put_block(obj->gtt_space);
2753                obj->gtt_space = NULL;
2754
2755                if (ret == -ENOMEM) {
2756                        /* first try to reclaim some memory by clearing the GTT */
2757                        ret = i915_gem_evict_everything(dev, false);
2758                        if (ret) {
2759                                /* now try to shrink everyone else */
2760                                if (gfpmask) {
2761                                        gfpmask = 0;
2762                                        goto search_free;
2763                                }
2764
2765                                return -ENOMEM;
2766                        }
2767
2768                        goto search_free;
2769                }
2770
2771                return ret;
2772        }
2773
2774        ret = i915_gem_gtt_bind_object(obj);
2775        if (ret) {
2776                i915_gem_object_put_pages_gtt(obj);
2777                drm_mm_put_block(obj->gtt_space);
2778                obj->gtt_space = NULL;
2779
2780                if (i915_gem_evict_everything(dev, false))
2781                        return ret;
2782
2783                goto search_free;
2784        }
2785
2786        list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
2787        list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2788
2789        /* Assert that the object is not currently in any GPU domain. As it
2790         * wasn't in the GTT, there shouldn't be any way it could have been in
2791         * a GPU cache
2792         */
2793        BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
2794        BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2795
2796        obj->gtt_offset = obj->gtt_space->start;
2797
2798        fenceable =
2799                obj->gtt_space->size == fence_size &&
2800                (obj->gtt_space->start & (fence_alignment - 1)) == 0;
2801
2802        mappable =
2803                obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
2804
2805        obj->map_and_fenceable = mappable && fenceable;
2806
2807        trace_i915_gem_object_bind(obj, map_and_fenceable);
2808        return 0;
2809}
2810
2811void
2812i915_gem_clflush_object(struct drm_i915_gem_object *obj)
2813{
2814        /* If we don't have a page list set up, then we're not pinned
2815         * to GPU, and we can ignore the cache flush because it'll happen
2816         * again at bind time.
2817         */
2818        if (obj->pages == NULL)
2819                return;
2820
2821        /* If the GPU is snooping the contents of the CPU cache,
2822         * we do not need to manually clear the CPU cache lines.  However,
2823         * the caches are only snooped when the render cache is
2824         * flushed/invalidated.  As we always have to emit invalidations
2825         * and flushes when moving into and out of the RENDER domain, correct
2826         * snooping behaviour occurs naturally as the result of our domain
2827         * tracking.
2828         */
2829        if (obj->cache_level != I915_CACHE_NONE)
2830                return;
2831
2832        trace_i915_gem_object_clflush(obj);
2833
2834        drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
2835}
2836
2837/** Flushes any GPU write domain for the object if it's dirty. */
2838static int
2839i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj)
2840{
2841        if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0)
2842                return 0;
2843
2844        /* Queue the GPU write cache flushing we need. */
2845        return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
2846}
2847
2848/** Flushes the GTT write domain for the object if it's dirty. */
2849static void
2850i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
2851{
2852        uint32_t old_write_domain;
2853
2854        if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
2855                return;
2856
2857        /* No actual flushing is required for the GTT write domain.  Writes
2858         * to it immediately go to main memory as far as we know, so there's
2859         * no chipset flush.  It also doesn't land in render cache.
2860         *
2861         * However, we do have to enforce the order so that all writes through
2862         * the GTT land before any writes to the device, such as updates to
2863         * the GATT itself.
2864         */
2865        wmb();
2866
2867        old_write_domain = obj->base.write_domain;
2868        obj->base.write_domain = 0;
2869
2870        trace_i915_gem_object_change_domain(obj,
2871                                            obj->base.read_domains,
2872                                            old_write_domain);
2873}
2874
2875/** Flushes the CPU write domain for the object if it's dirty. */
2876static void
2877i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2878{
2879        uint32_t old_write_domain;
2880
2881        if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
2882                return;
2883
2884        i915_gem_clflush_object(obj);
2885        intel_gtt_chipset_flush();
2886        old_write_domain = obj->base.write_domain;
2887        obj->base.write_domain = 0;
2888
2889        trace_i915_gem_object_change_domain(obj,
2890                                            obj->base.read_domains,
2891                                            old_write_domain);
2892}
2893
2894/**
2895 * Moves a single object to the GTT read, and possibly write domain.
2896 *
2897 * This function returns when the move is complete, including waiting on
2898 * flushes to occur.
2899 */
2900int
2901i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2902{
2903        uint32_t old_write_domain, old_read_domains;
2904        int ret;
2905
2906        /* Not valid to be called on unbound objects. */
2907        if (obj->gtt_space == NULL)
2908                return -EINVAL;
2909
2910        if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
2911                return 0;
2912
2913        ret = i915_gem_object_flush_gpu_write_domain(obj);
2914        if (ret)
2915                return ret;
2916
2917        if (obj->pending_gpu_write || write) {
2918                ret = i915_gem_object_wait_rendering(obj);
2919                if (ret)
2920                        return ret;
2921        }
2922
2923        i915_gem_object_flush_cpu_write_domain(obj);
2924
2925        old_write_domain = obj->base.write_domain;
2926        old_read_domains = obj->base.read_domains;
2927
2928        /* It should now be out of any other write domains, and we can update
2929         * the domain values for our changes.
2930         */
2931        BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2932        obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2933        if (write) {
2934                obj->base.read_domains = I915_GEM_DOMAIN_GTT;
2935                obj->base.write_domain = I915_GEM_DOMAIN_GTT;
2936                obj->dirty = 1;
2937        }
2938
2939        trace_i915_gem_object_change_domain(obj,
2940                                            old_read_domains,
2941                                            old_write_domain);
2942
2943        return 0;
2944}
2945
2946int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
2947                                    enum i915_cache_level cache_level)
2948{
2949        int ret;
2950
2951        if (obj->cache_level == cache_level)
2952                return 0;
2953
2954        if (obj->pin_count) {
2955                DRM_DEBUG("can not change the cache level of pinned objects\n");
2956                return -EBUSY;
2957        }
2958
2959        if (obj->gtt_space) {
2960                ret = i915_gem_object_finish_gpu(obj);
2961                if (ret)
2962                        return ret;
2963
2964                i915_gem_object_finish_gtt(obj);
2965
2966                /* Before SandyBridge, you could not use tiling or fence
2967                 * registers with snooped memory, so relinquish any fences
2968                 * currently pointing to our region in the aperture.
2969                 */
2970                if (INTEL_INFO(obj->base.dev)->gen < 6) {
2971                        ret = i915_gem_object_put_fence(obj);
2972                        if (ret)
2973                                return ret;
2974                }
2975
2976                i915_gem_gtt_rebind_object(obj, cache_level);
2977        }
2978
2979        if (cache_level == I915_CACHE_NONE) {
2980                u32 old_read_domains, old_write_domain;
2981
2982                /* If we're coming from LLC cached, then we haven't
2983                 * actually been tracking whether the data is in the
2984                 * CPU cache or not, since we only allow one bit set
2985                 * in obj->write_domain and have been skipping the clflushes.
2986                 * Just set it to the CPU cache for now.
2987                 */
2988                WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
2989                WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
2990
2991                old_read_domains = obj->base.read_domains;
2992                old_write_domain = obj->base.write_domain;
2993
2994                obj->base.read_domains = I915_GEM_DOMAIN_CPU;
2995                obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2996
2997                trace_i915_gem_object_change_domain(obj,
2998                                                    old_read_domains,
2999                                                    old_write_domain);
3000        }
3001
3002        obj->cache_level = cache_level;
3003        return 0;
3004}
3005
3006/*
3007 * Prepare buffer for display plane (scanout, cursors, etc).
3008 * Can be called from an uninterruptible phase (modesetting) and allows
3009 * any flushes to be pipelined (for pageflips).
3010 *
3011 * For the display plane, we want to be in the GTT but out of any write
3012 * domains. So in many ways this looks like set_to_gtt_domain() apart from the
3013 * ability to pipeline the waits, pinning and any additional subtleties
3014 * that may differentiate the display plane from ordinary buffers.
3015 */
3016int
3017i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3018                                     u32 alignment,
3019                                     struct intel_ring_buffer *pipelined)
3020{
3021        u32 old_read_domains, old_write_domain;
3022        int ret;
3023
3024        ret = i915_gem_object_flush_gpu_write_domain(obj);
3025        if (ret)
3026                return ret;
3027
3028        if (pipelined != obj->ring) {
3029                ret = i915_gem_object_wait_rendering(obj);
3030                if (ret == -ERESTARTSYS)
3031                        return ret;
3032        }
3033
3034        /* The display engine is not coherent with the LLC cache on gen6.  As
3035         * a result, we make sure that the pinning that is about to occur is
3036         * done with uncached PTEs. This is lowest common denominator for all
3037         * chipsets.
3038         *
3039         * However for gen6+, we could do better by using the GFDT bit instead
3040         * of uncaching, which would allow us to flush all the LLC-cached data
3041         * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3042         */
3043        ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
3044        if (ret)
3045                return ret;
3046
3047        /* As the user may map the buffer once pinned in the display plane
3048         * (e.g. libkms for the bootup splash), we have to ensure that we
3049         * always use map_and_fenceable for all scanout buffers.
3050         */
3051        ret = i915_gem_object_pin(obj, alignment, true);
3052        if (ret)
3053                return ret;
3054
3055        i915_gem_object_flush_cpu_write_domain(obj);
3056
3057        old_write_domain = obj->base.write_domain;
3058        old_read_domains = obj->base.read_domains;
3059
3060        /* It should now be out of any other write domains, and we can update
3061         * the domain values for our changes.
3062         */
3063        BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3064        obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3065
3066        trace_i915_gem_object_change_domain(obj,
3067                                            old_read_domains,
3068                                            old_write_domain);
3069
3070        return 0;
3071}
3072
3073int
3074i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
3075{
3076        int ret;
3077
3078        if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
3079                return 0;
3080
3081        if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
3082                ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
3083                if (ret)
3084                        return ret;
3085        }
3086
3087        ret = i915_gem_object_wait_rendering(obj);
3088        if (ret)
3089                return ret;
3090
3091        /* Ensure that we invalidate the GPU's caches and TLBs. */
3092        obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
3093        return 0;
3094}
3095
3096/**
3097 * Moves a single object to the CPU read, and possibly write domain.
3098 *
3099 * This function returns when the move is complete, including waiting on
3100 * flushes to occur.
3101 */
3102static int
3103i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3104{
3105        uint32_t old_write_domain, old_read_domains;
3106        int ret;
3107
3108        if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3109                return 0;
3110
3111        ret = i915_gem_object_flush_gpu_write_domain(obj);
3112        if (ret)
3113                return ret;
3114
3115        ret = i915_gem_object_wait_rendering(obj);
3116        if (ret)
3117                return ret;
3118
3119        i915_gem_object_flush_gtt_write_domain(obj);
3120
3121        /* If we have a partially-valid cache of the object in the CPU,
3122         * finish invalidating it and free the per-page flags.
3123         */
3124        i915_gem_object_set_to_full_cpu_read_domain(obj);
3125
3126        old_write_domain = obj->base.write_domain;
3127        old_read_domains = obj->base.read_domains;
3128
3129        /* Flush the CPU cache if it's still invalid. */
3130        if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3131                i915_gem_clflush_object(obj);
3132
3133                obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3134        }
3135
3136        /* It should now be out of any other write domains, and we can update
3137         * the domain values for our changes.
3138         */
3139        BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3140
3141        /* If we're writing through the CPU, then the GPU read domains will
3142         * need to be invalidated at next use.
3143         */
3144        if (write) {
3145                obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3146                obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3147        }
3148
3149        trace_i915_gem_object_change_domain(obj,
3150                                            old_read_domains,
3151                                            old_write_domain);
3152
3153        return 0;
3154}
3155
3156/**
3157 * Moves the object from a partially CPU read to a full one.
3158 *
3159 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3160 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3161 */
3162static void
3163i915_gem_object_set_to_full_cpu_read_domain(struct drm_i915_gem_object *obj)
3164{
3165        if (!obj->page_cpu_valid)
3166                return;
3167
3168        /* If we're partially in the CPU read domain, finish moving it in.
3169         */
3170        if (obj->base.read_domains & I915_GEM_DOMAIN_CPU) {
3171                int i;
3172
3173                for (i = 0; i <= (obj->base.size - 1) / PAGE_SIZE; i++) {
3174                        if (obj->page_cpu_valid[i])
3175                                continue;
3176                        drm_clflush_pages(obj->pages + i, 1);
3177                }
3178        }
3179
3180        /* Free the page_cpu_valid mappings which are now stale, whether
3181         * or not we've got I915_GEM_DOMAIN_CPU.
3182         */
3183        kfree(obj->page_cpu_valid);
3184        obj->page_cpu_valid = NULL;
3185}
3186
3187/**
3188 * Set the CPU read domain on a range of the object.
3189 *
3190 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3191 * not entirely valid.  The page_cpu_valid member of the object flags which
3192 * pages have been flushed, and will be respected by
3193 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3194 * of the whole object.
3195 *
3196 * This function returns when the move is complete, including waiting on
3197 * flushes to occur.
3198 */
3199static int
3200i915_gem_object_set_cpu_read_domain_range(struct drm_i915_gem_object *obj,
3201                                          uint64_t offset, uint64_t size)
3202{
3203        uint32_t old_read_domains;
3204        int i, ret;
3205
3206        if (offset == 0 && size == obj->base.size)
3207                return i915_gem_object_set_to_cpu_domain(obj, 0);
3208
3209        ret = i915_gem_object_flush_gpu_write_domain(obj);
3210        if (ret)
3211                return ret;
3212
3213        ret = i915_gem_object_wait_rendering(obj);
3214        if (ret)
3215                return ret;
3216
3217        i915_gem_object_flush_gtt_write_domain(obj);
3218
3219        /* If we're already fully in the CPU read domain, we're done. */
3220        if (obj->page_cpu_valid == NULL &&
3221            (obj->base.read_domains & I915_GEM_DOMAIN_CPU) != 0)
3222                return 0;
3223
3224        /* Otherwise, create/clear the per-page CPU read domain flag if we're
3225         * newly adding I915_GEM_DOMAIN_CPU
3226         */
3227        if (obj->page_cpu_valid == NULL) {
3228                obj->page_cpu_valid = kzalloc(obj->base.size / PAGE_SIZE,
3229                                              GFP_KERNEL);
3230                if (obj->page_cpu_valid == NULL)
3231                        return -ENOMEM;
3232        } else if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
3233                memset(obj->page_cpu_valid, 0, obj->base.size / PAGE_SIZE);
3234
3235        /* Flush the cache on any pages that are still invalid from the CPU's
3236         * perspective.
3237         */
3238        for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3239             i++) {
3240                if (obj->page_cpu_valid[i])
3241                        continue;
3242
3243                drm_clflush_pages(obj->pages + i, 1);
3244
3245                obj->page_cpu_valid[i] = 1;
3246        }
3247
3248        /* It should now be out of any other write domains, and we can update
3249         * the domain values for our changes.
3250         */
3251        BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3252
3253        old_read_domains = obj->base.read_domains;
3254        obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3255
3256        trace_i915_gem_object_change_domain(obj,
3257                                            old_read_domains,
3258                                            obj->base.write_domain);
3259
3260        return 0;
3261}
3262
3263/* Throttle our rendering by waiting until the ring has completed our requests
3264 * emitted over 20 msec ago.
3265 *
3266 * Note that if we were to use the current jiffies each time around the loop,
3267 * we wouldn't escape the function with any frames outstanding if the time to
3268 * render a frame was over 20ms.
3269 *
3270 * This should get us reasonable parallelism between CPU and GPU but also
3271 * relatively low latency when blocking on a particular request to finish.
3272 */
3273static int
3274i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3275{
3276        struct drm_i915_private *dev_priv = dev->dev_private;
3277        struct drm_i915_file_private *file_priv = file->driver_priv;
3278        unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3279        struct drm_i915_gem_request *request;
3280        struct intel_ring_buffer *ring = NULL;
3281        u32 seqno = 0;
3282        int ret;
3283
3284        if (atomic_read(&dev_priv->mm.wedged))
3285                return -EIO;
3286
3287        spin_lock(&file_priv->mm.lock);
3288        list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3289                if (time_after_eq(request->emitted_jiffies, recent_enough))
3290                        break;
3291
3292                ring = request->ring;
3293                seqno = request->seqno;
3294        }
3295        spin_unlock(&file_priv->mm.lock);
3296
3297        if (seqno == 0)
3298                return 0;
3299
3300        ret = 0;
3301        if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
3302                /* And wait for the seqno passing without holding any locks and
3303                 * causing extra latency for others. This is safe as the irq
3304                 * generation is designed to be run atomically and so is
3305                 * lockless.
3306                 */
3307                if (ring->irq_get(ring)) {
3308                        ret = wait_event_interruptible(ring->irq_queue,
3309                                                       i915_seqno_passed(ring->get_seqno(ring), seqno)
3310                                                       || atomic_read(&dev_priv->mm.wedged));
3311                        ring->irq_put(ring);
3312
3313                        if (ret == 0 && atomic_read(&dev_priv->mm.wedged))
3314                                ret = -EIO;
3315                } else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring),
3316                                                             seqno) ||
3317                                    atomic_read(&dev_priv->mm.wedged), 3000)) {
3318                        ret = -EBUSY;
3319                }
3320        }
3321
3322        if (ret == 0)
3323                queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3324
3325        return ret;
3326}
3327
3328int
3329i915_gem_object_pin(struct drm_i915_gem_object *obj,
3330                    uint32_t alignment,
3331                    bool map_and_fenceable)
3332{
3333        struct drm_device *dev = obj->base.dev;
3334        struct drm_i915_private *dev_priv = dev->dev_private;
3335        int ret;
3336
3337        BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
3338        WARN_ON(i915_verify_lists(dev));
3339
3340        if (obj->gtt_space != NULL) {
3341                if ((alignment && obj->gtt_offset & (alignment - 1)) ||
3342                    (map_and_fenceable && !obj->map_and_fenceable)) {
3343                        WARN(obj->pin_count,
3344                             "bo is already pinned with incorrect alignment:"
3345                             " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
3346                             " obj->map_and_fenceable=%d\n",
3347                             obj->gtt_offset, alignment,
3348                             map_and_fenceable,
3349                             obj->map_and_fenceable);
3350                        ret = i915_gem_object_unbind(obj);
3351                        if (ret)
3352                                return ret;
3353                }
3354        }
3355
3356        if (obj->gtt_space == NULL) {
3357                ret = i915_gem_object_bind_to_gtt(obj, alignment,
3358                                                  map_and_fenceable);
3359                if (ret)
3360                        return ret;
3361        }
3362
3363        if (obj->pin_count++ == 0) {
3364                if (!obj->active)
3365                        list_move_tail(&obj->mm_list,
3366                                       &dev_priv->mm.pinned_list);
3367        }
3368        obj->pin_mappable |= map_and_fenceable;
3369
3370        WARN_ON(i915_verify_lists(dev));
3371        return 0;
3372}
3373
3374void
3375i915_gem_object_unpin(struct drm_i915_gem_object *obj)
3376{
3377        struct drm_device *dev = obj->base.dev;
3378        drm_i915_private_t *dev_priv = dev->dev_private;
3379
3380        WARN_ON(i915_verify_lists(dev));
3381        BUG_ON(obj->pin_count == 0);
3382        BUG_ON(obj->gtt_space == NULL);
3383
3384        if (--obj->pin_count == 0) {
3385                if (!obj->active)
3386                        list_move_tail(&obj->mm_list,
3387                                       &dev_priv->mm.inactive_list);
3388                obj->pin_mappable = false;
3389        }
3390        WARN_ON(i915_verify_lists(dev));
3391}
3392
3393int
3394i915_gem_pin_ioctl(struct drm_device *dev, void *data,
3395                   struct drm_file *file)
3396{
3397        struct drm_i915_gem_pin *args = data;
3398        struct drm_i915_gem_object *obj;
3399        int ret;
3400
3401        ret = i915_mutex_lock_interruptible(dev);
3402        if (ret)
3403                return ret;
3404
3405        obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3406        if (&obj->base == NULL) {
3407                ret = -ENOENT;
3408                goto unlock;
3409        }
3410
3411        if (obj->madv != I915_MADV_WILLNEED) {
3412                DRM_ERROR("Attempting to pin a purgeable buffer\n");
3413                ret = -EINVAL;
3414                goto out;
3415        }
3416
3417        if (obj->pin_filp != NULL && obj->pin_filp != file) {
3418                DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3419                          args->handle);
3420                ret = -EINVAL;
3421                goto out;
3422        }
3423
3424        obj->user_pin_count++;
3425        obj->pin_filp = file;
3426        if (obj->user_pin_count == 1) {
3427                ret = i915_gem_object_pin(obj, args->alignment, true);
3428                if (ret)
3429                        goto out;
3430        }
3431
3432        /* XXX - flush the CPU caches for pinned objects
3433         * as the X server doesn't manage domains yet
3434         */
3435        i915_gem_object_flush_cpu_write_domain(obj);
3436        args->offset = obj->gtt_offset;
3437out:
3438        drm_gem_object_unreference(&obj->base);
3439unlock:
3440        mutex_unlock(&dev->struct_mutex);
3441        return ret;
3442}
3443
3444int
3445i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
3446                     struct drm_file *file)
3447{
3448        struct drm_i915_gem_pin *args = data;
3449        struct drm_i915_gem_object *obj;
3450        int ret;
3451
3452        ret = i915_mutex_lock_interruptible(dev);
3453        if (ret)
3454                return ret;
3455
3456        obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3457        if (&obj->base == NULL) {
3458                ret = -ENOENT;
3459                goto unlock;
3460        }
3461
3462        if (obj->pin_filp != file) {
3463                DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
3464                          args->handle);
3465                ret = -EINVAL;
3466                goto out;
3467        }
3468        obj->user_pin_count--;
3469        if (obj->user_pin_count == 0) {
3470                obj->pin_filp = NULL;
3471                i915_gem_object_unpin(obj);
3472        }
3473
3474out:
3475        drm_gem_object_unreference(&obj->base);
3476unlock:
3477        mutex_unlock(&dev->struct_mutex);
3478        return ret;
3479}
3480
3481int
3482i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3483                    struct drm_file *file)
3484{
3485        struct drm_i915_gem_busy *args = data;
3486        struct drm_i915_gem_object *obj;
3487        int ret;
3488
3489        ret = i915_mutex_lock_interruptible(dev);
3490        if (ret)
3491                return ret;
3492
3493        obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3494        if (&obj->base == NULL) {
3495                ret = -ENOENT;
3496                goto unlock;
3497        }
3498
3499        /* Count all active objects as busy, even if they are currently not used
3500         * by the gpu. Users of this interface expect objects to eventually
3501         * become non-busy without any further actions, therefore emit any
3502         * necessary flushes here.
3503         */
3504        args->busy = obj->active;
3505        if (args->busy) {
3506                /* Unconditionally flush objects, even when the gpu still uses this
3507                 * object. Userspace calling this function indicates that it wants to
3508                 * use this buffer rather sooner than later, so issuing the required
3509                 * flush earlier is beneficial.
3510                 */
3511                if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
3512                        ret = i915_gem_flush_ring(obj->ring,
3513                                                  0, obj->base.write_domain);
3514                } else if (obj->ring->outstanding_lazy_request ==
3515                           obj->last_rendering_seqno) {
3516                        struct drm_i915_gem_request *request;
3517
3518                        /* This ring is not being cleared by active usage,
3519                         * so emit a request to do so.
3520                         */
3521                        request = kzalloc(sizeof(*request), GFP_KERNEL);
3522                        if (request) {
3523                                ret = i915_add_request(obj->ring, NULL, request);
3524                                if (ret)
3525                                        kfree(request);
3526                        } else
3527                                ret = -ENOMEM;
3528                }
3529
3530                /* Update the active list for the hardware's current position.
3531                 * Otherwise this only updates on a delayed timer or when irqs
3532                 * are actually unmasked, and our working set ends up being
3533                 * larger than required.
3534                 */
3535                i915_gem_retire_requests_ring(obj->ring);
3536
3537                args->busy = obj->active;
3538        }
3539
3540        drm_gem_object_unreference(&obj->base);
3541unlock:
3542        mutex_unlock(&dev->struct_mutex);
3543        return ret;
3544}
3545
3546int
3547i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3548                        struct drm_file *file_priv)
3549{
3550        return i915_gem_ring_throttle(dev, file_priv);
3551}
3552
3553int
3554i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3555                       struct drm_file *file_priv)
3556{
3557        struct drm_i915_gem_madvise *args = data;
3558        struct drm_i915_gem_object *obj;
3559        int ret;
3560
3561        switch (args->madv) {
3562        case I915_MADV_DONTNEED:
3563        case I915_MADV_WILLNEED:
3564            break;
3565        default:
3566            return -EINVAL;
3567        }
3568
3569        ret = i915_mutex_lock_interruptible(dev);
3570        if (ret)
3571                return ret;
3572
3573        obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
3574        if (&obj->base == NULL) {
3575                ret = -ENOENT;
3576                goto unlock;
3577        }
3578
3579        if (obj->pin_count) {
3580                ret = -EINVAL;
3581                goto out;
3582        }
3583
3584        if (obj->madv != __I915_MADV_PURGED)
3585                obj->madv = args->madv;
3586
3587        /* if the object is no longer bound, discard its backing storage */
3588        if (i915_gem_object_is_purgeable(obj) &&
3589            obj->gtt_space == NULL)
3590                i915_gem_object_truncate(obj);
3591
3592        args->retained = obj->madv != __I915_MADV_PURGED;
3593
3594out:
3595        drm_gem_object_unreference(&obj->base);
3596unlock:
3597        mutex_unlock(&dev->struct_mutex);
3598        return ret;
3599}
3600
3601struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
3602                                                  size_t size)
3603{
3604        struct drm_i915_private *dev_priv = dev->dev_private;
3605        struct drm_i915_gem_object *obj;
3606        struct address_space *mapping;
3607
3608        obj = kzalloc(sizeof(*obj), GFP_KERNEL);
3609        if (obj == NULL)
3610                return NULL;
3611
3612        if (drm_gem_object_init(dev, &obj->base, size) != 0) {
3613                kfree(obj);
3614                return NULL;
3615        }
3616
3617        mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3618        mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE);
3619
3620        i915_gem_info_add_obj(dev_priv, size);
3621
3622        obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3623        obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3624
3625        if (IS_GEN6(dev) || IS_GEN7(dev)) {
3626                /* On Gen6, we can have the GPU use the LLC (the CPU
3627                 * cache) for about a 10% performance improvement
3628                 * compared to uncached.  Graphics requests other than
3629                 * display scanout are coherent with the CPU in
3630                 * accessing this cache.  This means in this mode we
3631                 * don't need to clflush on the CPU side, and on the
3632                 * GPU side we only need to flush internal caches to
3633                 * get data visible to the CPU.
3634                 *
3635                 * However, we maintain the display planes as UC, and so
3636                 * need to rebind when first used as such.
3637                 */
3638                obj->cache_level = I915_CACHE_LLC;
3639        } else
3640                obj->cache_level = I915_CACHE_NONE;
3641
3642        obj->base.driver_private = NULL;
3643        obj->fence_reg = I915_FENCE_REG_NONE;
3644        INIT_LIST_HEAD(&obj->mm_list);
3645        INIT_LIST_HEAD(&obj->gtt_list);
3646        INIT_LIST_HEAD(&obj->ring_list);
3647        INIT_LIST_HEAD(&obj->exec_list);
3648        INIT_LIST_HEAD(&obj->gpu_write_list);
3649        obj->madv = I915_MADV_WILLNEED;
3650        /* Avoid an unnecessary call to unbind on the first bind. */
3651        obj->map_and_fenceable = true;
3652
3653        return obj;
3654}
3655
3656int i915_gem_init_object(struct drm_gem_object *obj)
3657{
3658        BUG();
3659
3660        return 0;
3661}
3662
3663static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj)
3664{
3665        struct drm_device *dev = obj->base.dev;
3666        drm_i915_private_t *dev_priv = dev->dev_private;
3667        int ret;
3668
3669        ret = i915_gem_object_unbind(obj);
3670        if (ret == -ERESTARTSYS) {
3671                list_move(&obj->mm_list,
3672                          &dev_priv->mm.deferred_free_list);
3673                return;
3674        }
3675
3676        trace_i915_gem_object_destroy(obj);
3677
3678        if (obj->base.map_list.map)
3679                drm_gem_free_mmap_offset(&obj->base);
3680
3681        drm_gem_object_release(&obj->base);
3682        i915_gem_info_remove_obj(dev_priv, obj->base.size);
3683
3684        kfree(obj->page_cpu_valid);
3685        kfree(obj->bit_17);
3686        kfree(obj);
3687}
3688
3689void i915_gem_free_object(struct drm_gem_object *gem_obj)
3690{
3691        struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
3692        struct drm_device *dev = obj->base.dev;
3693
3694        while (obj->pin_count > 0)
3695                i915_gem_object_unpin(obj);
3696
3697        if (obj->phys_obj)
3698                i915_gem_detach_phys_object(dev, obj);
3699
3700        i915_gem_free_object_tail(obj);
3701}
3702
3703int
3704i915_gem_idle(struct drm_device *dev)
3705{
3706        drm_i915_private_t *dev_priv = dev->dev_private;
3707        int ret;
3708
3709        mutex_lock(&dev->struct_mutex);
3710
3711        if (dev_priv->mm.suspended) {
3712                mutex_unlock(&dev->struct_mutex);
3713                return 0;
3714        }
3715
3716        ret = i915_gpu_idle(dev);
3717        if (ret) {
3718                mutex_unlock(&dev->struct_mutex);
3719                return ret;
3720        }
3721
3722        /* Under UMS, be paranoid and evict. */
3723        if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
3724                ret = i915_gem_evict_inactive(dev, false);
3725                if (ret) {
3726                        mutex_unlock(&dev->struct_mutex);
3727                        return ret;
3728                }
3729        }
3730
3731        i915_gem_reset_fences(dev);
3732
3733        /* Hack!  Don't let anybody do execbuf while we don't control the chip.
3734         * We need to replace this with a semaphore, or something.
3735         * And not confound mm.suspended!
3736         */
3737        dev_priv->mm.suspended = 1;
3738        del_timer_sync(&dev_priv->hangcheck_timer);
3739
3740        i915_kernel_lost_context(dev);
3741        i915_gem_cleanup_ringbuffer(dev);
3742
3743        mutex_unlock(&dev->struct_mutex);
3744
3745        /* Cancel the retire work handler, which should be idle now. */
3746        cancel_delayed_work_sync(&dev_priv->mm.retire_work);
3747
3748        return 0;
3749}
3750
3751int
3752i915_gem_init_ringbuffer(struct drm_device *dev)
3753{
3754        drm_i915_private_t *dev_priv = dev->dev_private;
3755        int ret;
3756
3757        ret = intel_init_render_ring_buffer(dev);
3758        if (ret)
3759                return ret;
3760
3761        if (HAS_BSD(dev)) {
3762                ret = intel_init_bsd_ring_buffer(dev);
3763                if (ret)
3764                        goto cleanup_render_ring;
3765        }
3766
3767        if (HAS_BLT(dev)) {
3768                ret = intel_init_blt_ring_buffer(dev);
3769                if (ret)
3770                        goto cleanup_bsd_ring;
3771        }
3772
3773        dev_priv->next_seqno = 1;
3774
3775        return 0;
3776
3777cleanup_bsd_ring:
3778        intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
3779cleanup_render_ring:
3780        intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
3781        return ret;
3782}
3783
3784void
3785i915_gem_cleanup_ringbuffer(struct drm_device *dev)
3786{
3787        drm_i915_private_t *dev_priv = dev->dev_private;
3788        int i;
3789
3790        for (i = 0; i < I915_NUM_RINGS; i++)
3791                intel_cleanup_ring_buffer(&dev_priv->ring[i]);
3792}
3793
3794int
3795i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
3796                       struct drm_file *file_priv)
3797{
3798        drm_i915_private_t *dev_priv = dev->dev_private;
3799        int ret, i;
3800
3801        if (drm_core_check_feature(dev, DRIVER_MODESET))
3802                return 0;
3803
3804        if (atomic_read(&dev_priv->mm.wedged)) {
3805                DRM_ERROR("Reenabling wedged hardware, good luck\n");
3806                atomic_set(&dev_priv->mm.wedged, 0);
3807        }
3808
3809        mutex_lock(&dev->struct_mutex);
3810        dev_priv->mm.suspended = 0;
3811
3812        ret = i915_gem_init_ringbuffer(dev);
3813        if (ret != 0) {
3814                mutex_unlock(&dev->struct_mutex);
3815                return ret;
3816        }
3817
3818        BUG_ON(!list_empty(&dev_priv->mm.active_list));
3819        BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
3820        BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
3821        for (i = 0; i < I915_NUM_RINGS; i++) {
3822                BUG_ON(!list_empty(&dev_priv->ring[i].active_list));
3823                BUG_ON(!list_empty(&dev_priv->ring[i].request_list));
3824        }
3825        mutex_unlock(&dev->struct_mutex);
3826
3827        ret = drm_irq_install(dev);
3828        if (ret)
3829                goto cleanup_ringbuffer;
3830
3831        return 0;
3832
3833cleanup_ringbuffer:
3834        mutex_lock(&dev->struct_mutex);
3835        i915_gem_cleanup_ringbuffer(dev);
3836        dev_priv->mm.suspended = 1;
3837        mutex_unlock(&dev->struct_mutex);
3838
3839        return ret;
3840}
3841
3842int
3843i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
3844                       struct drm_file *file_priv)
3845{
3846        if (drm_core_check_feature(dev, DRIVER_MODESET))
3847                return 0;
3848
3849        drm_irq_uninstall(dev);
3850        return i915_gem_idle(dev);
3851}
3852
3853void
3854i915_gem_lastclose(struct drm_device *dev)
3855{
3856        int ret;
3857
3858        if (drm_core_check_feature(dev, DRIVER_MODESET))
3859                return;
3860
3861        ret = i915_gem_idle(dev);
3862        if (ret)
3863                DRM_ERROR("failed to idle hardware: %d\n", ret);
3864}
3865
3866static void
3867init_ring_lists(struct intel_ring_buffer *ring)
3868{
3869        INIT_LIST_HEAD(&ring->active_list);
3870        INIT_LIST_HEAD(&ring->request_list);
3871        INIT_LIST_HEAD(&ring->gpu_write_list);
3872}
3873
3874void
3875i915_gem_load(struct drm_device *dev)
3876{
3877        int i;
3878        drm_i915_private_t *dev_priv = dev->dev_private;
3879
3880        INIT_LIST_HEAD(&dev_priv->mm.active_list);
3881        INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
3882        INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
3883        INIT_LIST_HEAD(&dev_priv->mm.pinned_list);
3884        INIT_LIST_HEAD(&dev_priv->mm.fence_list);
3885        INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
3886        INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
3887        for (i = 0; i < I915_NUM_RINGS; i++)
3888                init_ring_lists(&dev_priv->ring[i]);
3889        for (i = 0; i < I915_MAX_NUM_FENCES; i++)
3890                INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
3891        INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
3892                          i915_gem_retire_work_handler);
3893        init_completion(&dev_priv->error_completion);
3894
3895        /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
3896        if (IS_GEN3(dev)) {
3897                u32 tmp = I915_READ(MI_ARB_STATE);
3898                if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
3899                        /* arb state is a masked write, so set bit + bit in mask */
3900                        tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
3901                        I915_WRITE(MI_ARB_STATE, tmp);
3902                }
3903        }
3904
3905        dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
3906
3907        /* Old X drivers will take 0-2 for front, back, depth buffers */
3908        if (!drm_core_check_feature(dev, DRIVER_MODESET))
3909                dev_priv->fence_reg_start = 3;
3910
3911        if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
3912                dev_priv->num_fence_regs = 16;
3913        else
3914                dev_priv->num_fence_regs = 8;
3915
3916        /* Initialize fence registers to zero */
3917        for (i = 0; i < dev_priv->num_fence_regs; i++) {
3918                i915_gem_clear_fence_reg(dev, &dev_priv->fence_regs[i]);
3919        }
3920
3921        i915_gem_detect_bit_6_swizzle(dev);
3922        init_waitqueue_head(&dev_priv->pending_flip_queue);
3923
3924        dev_priv->mm.interruptible = true;
3925
3926        dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
3927        dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
3928        register_shrinker(&dev_priv->mm.inactive_shrinker);
3929}
3930
3931/*
3932 * Create a physically contiguous memory object for this object
3933 * e.g. for cursor + overlay regs
3934 */
3935static int i915_gem_init_phys_object(struct drm_device *dev,
3936                                     int id, int size, int align)
3937{
3938        drm_i915_private_t *dev_priv = dev->dev_private;
3939        struct drm_i915_gem_phys_object *phys_obj;
3940        int ret;
3941
3942        if (dev_priv->mm.phys_objs[id - 1] || !size)
3943                return 0;
3944
3945        phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
3946        if (!phys_obj)
3947                return -ENOMEM;
3948
3949        phys_obj->id = id;
3950
3951        phys_obj->handle = drm_pci_alloc(dev, size, align);
3952        if (!phys_obj->handle) {
3953                ret = -ENOMEM;
3954                goto kfree_obj;
3955        }
3956#ifdef CONFIG_X86
3957        set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
3958#endif
3959
3960        dev_priv->mm.phys_objs[id - 1] = phys_obj;
3961
3962        return 0;
3963kfree_obj:
3964        kfree(phys_obj);
3965        return ret;
3966}
3967
3968static void i915_gem_free_phys_object(struct drm_device *dev, int id)
3969{
3970        drm_i915_private_t *dev_priv = dev->dev_private;
3971        struct drm_i915_gem_phys_object *phys_obj;
3972
3973        if (!dev_priv->mm.phys_objs[id - 1])
3974                return;
3975
3976        phys_obj = dev_priv->mm.phys_objs[id - 1];
3977        if (phys_obj->cur_obj) {
3978                i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
3979        }
3980
3981#ifdef CONFIG_X86
3982        set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
3983#endif
3984        drm_pci_free(dev, phys_obj->handle);
3985        kfree(phys_obj);
3986        dev_priv->mm.phys_objs[id - 1] = NULL;
3987}
3988
3989void i915_gem_free_all_phys_object(struct drm_device *dev)
3990{
3991        int i;
3992
3993        for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
3994                i915_gem_free_phys_object(dev, i);
3995}
3996
3997void i915_gem_detach_phys_object(struct drm_device *dev,
3998                                 struct drm_i915_gem_object *obj)
3999{
4000        struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
4001        char *vaddr;
4002        int i;
4003        int page_count;
4004
4005        if (!obj->phys_obj)
4006                return;
4007        vaddr = obj->phys_obj->handle->vaddr;
4008
4009        page_count = obj->base.size / PAGE_SIZE;
4010        for (i = 0; i < page_count; i++) {
4011                struct page *page = shmem_read_mapping_page(mapping, i);
4012                if (!IS_ERR(page)) {
4013                        char *dst = kmap_atomic(page);
4014                        memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
4015                        kunmap_atomic(dst);
4016
4017                        drm_clflush_pages(&page, 1);
4018
4019                        set_page_dirty(page);
4020                        mark_page_accessed(page);
4021                        page_cache_release(page);
4022                }
4023        }
4024        intel_gtt_chipset_flush();
4025
4026        obj->phys_obj->cur_obj = NULL;
4027        obj->phys_obj = NULL;
4028}
4029
4030int
4031i915_gem_attach_phys_object(struct drm_device *dev,
4032                            struct drm_i915_gem_object *obj,
4033                            int id,
4034                            int align)
4035{
4036        struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
4037        drm_i915_private_t *dev_priv = dev->dev_private;
4038        int ret = 0;
4039        int page_count;
4040        int i;
4041
4042        if (id > I915_MAX_PHYS_OBJECT)
4043                return -EINVAL;
4044
4045        if (obj->phys_obj) {
4046                if (obj->phys_obj->id == id)
4047                        return 0;
4048                i915_gem_detach_phys_object(dev, obj);
4049        }
4050
4051        /* create a new object */
4052        if (!dev_priv->mm.phys_objs[id - 1]) {
4053                ret = i915_gem_init_phys_object(dev, id,
4054                                                obj->base.size, align);
4055                if (ret) {
4056                        DRM_ERROR("failed to init phys object %d size: %zu\n",
4057                                  id, obj->base.size);
4058                        return ret;
4059                }
4060        }
4061
4062        /* bind to the object */
4063        obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
4064        obj->phys_obj->cur_obj = obj;
4065
4066        page_count = obj->base.size / PAGE_SIZE;
4067
4068        for (i = 0; i < page_count; i++) {
4069                struct page *page;
4070                char *dst, *src;
4071
4072                page = shmem_read_mapping_page(mapping, i);
4073                if (IS_ERR(page))
4074                        return PTR_ERR(page);
4075
4076                src = kmap_atomic(page);
4077                dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4078                memcpy(dst, src, PAGE_SIZE);
4079                kunmap_atomic(src);
4080
4081                mark_page_accessed(page);
4082                page_cache_release(page);
4083        }
4084
4085        return 0;
4086}
4087
4088static int
4089i915_gem_phys_pwrite(struct drm_device *dev,
4090                     struct drm_i915_gem_object *obj,
4091                     struct drm_i915_gem_pwrite *args,
4092                     struct drm_file *file_priv)
4093{
4094        void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
4095        char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
4096
4097        if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
4098                unsigned long unwritten;
4099
4100                /* The physical object once assigned is fixed for the lifetime
4101                 * of the obj, so we can safely drop the lock and continue
4102                 * to access vaddr.
4103                 */
4104                mutex_unlock(&dev->struct_mutex);
4105                unwritten = copy_from_user(vaddr, user_data, args->size);
4106                mutex_lock(&dev->struct_mutex);
4107                if (unwritten)
4108                        return -EFAULT;
4109        }
4110
4111        intel_gtt_chipset_flush();
4112        return 0;
4113}
4114
4115void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4116{
4117        struct drm_i915_file_private *file_priv = file->driver_priv;
4118
4119        /* Clean up our request list when the client is going away, so that
4120         * later retire_requests won't dereference our soon-to-be-gone
4121         * file_priv.
4122         */
4123        spin_lock(&file_priv->mm.lock);
4124        while (!list_empty(&file_priv->mm.request_list)) {
4125                struct drm_i915_gem_request *request;
4126
4127                request = list_first_entry(&file_priv->mm.request_list,
4128                                           struct drm_i915_gem_request,
4129                                           client_list);
4130                list_del(&request->client_list);
4131                request->file_priv = NULL;
4132        }
4133        spin_unlock(&file_priv->mm.lock);
4134}
4135
4136static int
4137i915_gpu_is_active(struct drm_device *dev)
4138{
4139        drm_i915_private_t *dev_priv = dev->dev_private;
4140        int lists_empty;
4141
4142        lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
4143                      list_empty(&dev_priv->mm.active_list);
4144
4145        return !lists_empty;
4146}
4147
4148static int
4149i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
4150{
4151        struct drm_i915_private *dev_priv =
4152                container_of(shrinker,
4153                             struct drm_i915_private,
4154                             mm.inactive_shrinker);
4155        struct drm_device *dev = dev_priv->dev;
4156        struct drm_i915_gem_object *obj, *next;
4157        int nr_to_scan = sc->nr_to_scan;
4158        int cnt;
4159
4160        if (!mutex_trylock(&dev->struct_mutex))
4161                return 0;
4162
4163        /* "fast-path" to count number of available objects */
4164        if (nr_to_scan == 0) {
4165                cnt = 0;
4166                list_for_each_entry(obj,
4167                                    &dev_priv->mm.inactive_list,
4168                                    mm_list)
4169                        cnt++;
4170                mutex_unlock(&dev->struct_mutex);
4171                return cnt / 100 * sysctl_vfs_cache_pressure;
4172        }
4173
4174rescan:
4175        /* first scan for clean buffers */
4176        i915_gem_retire_requests(dev);
4177
4178        list_for_each_entry_safe(obj, next,
4179                                 &dev_priv->mm.inactive_list,
4180                                 mm_list) {
4181                if (i915_gem_object_is_purgeable(obj)) {
4182                        if (i915_gem_object_unbind(obj) == 0 &&
4183                            --nr_to_scan == 0)
4184                                break;
4185                }
4186        }
4187
4188        /* second pass, evict/count anything still on the inactive list */
4189        cnt = 0;
4190        list_for_each_entry_safe(obj, next,
4191                                 &dev_priv->mm.inactive_list,
4192                                 mm_list) {
4193                if (nr_to_scan &&
4194                    i915_gem_object_unbind(obj) == 0)
4195                        nr_to_scan--;
4196                else
4197                        cnt++;
4198        }
4199
4200        if (nr_to_scan && i915_gpu_is_active(dev)) {
4201                /*
4202                 * We are desperate for pages, so as a last resort, wait
4203                 * for the GPU to finish and discard whatever we can.
4204                 * This has a dramatic impact to reduce the number of
4205                 * OOM-killer events whilst running the GPU aggressively.
4206                 */
4207                if (i915_gpu_idle(dev) == 0)
4208                        goto rescan;
4209        }
4210        mutex_unlock(&dev->struct_mutex);
4211        return cnt / 100 * sysctl_vfs_cache_pressure;
4212}
4213
lxr.linux.no kindly hosted by Redpill Linpro AS, provider of Linux consulting and operations services since 1995.