Index: head/sys/dev/drm2/i915/i915_gem.c =================================================================== --- head/sys/dev/drm2/i915/i915_gem.c (revision 287173) +++ head/sys/dev/drm2/i915/i915_gem.c (revision 287174) @@ -1,4362 +1,4376 @@ -/*- +/* * Copyright © 2008 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * Authors: * Eric Anholt * * Copyright (c) 2011 The FreeBSD Foundation * All rights reserved. * * This software was developed by Konstantin Belousov under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #define __user #define __force #define __iomem #define __must_check #define to_user_ptr(x) ((void *)(uintptr_t)(x)) #define offset_in_page(x) ((x) & PAGE_MASK) #define page_to_phys(x) VM_PAGE_TO_PHYS(x) static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, unsigned alignment, bool map_and_fenceable); static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file); static void i915_gem_write_fence(struct drm_device *dev, int reg, struct drm_i915_gem_object *obj); static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, struct drm_i915_fence_reg *fence, bool enable); static void i915_gem_lowmem(void *arg); static void i915_gem_object_truncate(struct drm_i915_gem_object *obj); static int i915_gem_object_get_pages_range(struct drm_i915_gem_object *obj, off_t start, off_t end); static void i915_gem_object_put_pages_range(struct drm_i915_gem_object *obj, off_t start, off_t end); static vm_page_t i915_gem_wire_page(vm_object_t object, vm_pindex_t pindex, bool *fresh); MALLOC_DEFINE(DRM_I915_GEM, "i915gem", "Allocations from i915 gem"); long i915_gem_wired_pages_cnt; static bool cpu_cache_is_coherent(struct drm_device *dev, enum i915_cache_level level) { return HAS_LLC(dev) || level != I915_CACHE_NONE; } static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) return true; return obj->pin_display; } static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) { if (obj->tiling_mode) i915_gem_release_mmap(obj); /* As we do not have an associated fence register, we will force * a tiling change if we ever need to acquire one. */ obj->fence_dirty = false; obj->fence_reg = I915_FENCE_REG_NONE; } /* some bookkeeping */ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, size_t size) { dev_priv->mm.object_count++; dev_priv->mm.object_memory += size; } static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv, size_t size) { dev_priv->mm.object_count--; dev_priv->mm.object_memory -= size; } static int i915_gem_wait_for_error(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; int ret; if (!atomic_load_acq_int(&dev_priv->mm.wedged)) return (0); mtx_lock(&dev_priv->error_completion_lock); while (dev_priv->error_completion == 0) { ret = -msleep(&dev_priv->error_completion, &dev_priv->error_completion_lock, PCATCH, "915wco", 0); if (ret == -ERESTART) ret = -ERESTARTSYS; if (ret != 0) { mtx_unlock(&dev_priv->error_completion_lock); return (ret); } } mtx_unlock(&dev_priv->error_completion_lock); if (atomic_load_acq_int(&dev_priv->mm.wedged)) { /* GPU is hung, bump the completion count to account for * the token we just consumed so that we never hit zero and * end up waiting upon a subsequent completion event that * will never happen. */ mtx_lock(&dev_priv->error_completion_lock); dev_priv->error_completion++; mtx_unlock(&dev_priv->error_completion_lock); } return 0; } int i915_mutex_lock_interruptible(struct drm_device *dev) { int ret; ret = i915_gem_wait_for_error(dev); if (ret) return ret; /* * interruptible shall it be. might indeed be if dev_lock is * changed to sx */ ret = -sx_xlock_sig(&dev->dev_struct_lock); if (ret) return ret; return 0; } -static bool +static inline bool i915_gem_object_is_inactive(struct drm_i915_gem_object *obj) { - - return !obj->active; + return obj->gtt_space && !obj->active; } int i915_gem_init_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_init *args = data; drm_i915_private_t *dev_priv = dev->dev_private; int ret; if (drm_core_check_feature(dev, DRIVER_MODESET)) return -ENODEV; if (args->gtt_start >= args->gtt_end || (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1)) return -EINVAL; if (mtx_initialized(&dev_priv->mm.gtt_space.unused_lock)) return -EBUSY; /* GEM with user mode setting was never supported on ilk and later. */ if (INTEL_INFO(dev)->gen >= 5) return -ENODEV; /* * XXXKIB. The second-time initialization should be guarded * against. */ DRM_LOCK(dev); ret = i915_gem_init_global_gtt(dev, args->gtt_start, args->gtt_end, args->gtt_end); DRM_UNLOCK(dev); return ret; } int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_get_aperture *args = data; struct drm_i915_gem_object *obj; size_t pinned; pinned = 0; DRM_LOCK(dev); list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) if (obj->pin_count) pinned += obj->gtt_space->size; DRM_UNLOCK(dev); args->aper_size = dev_priv->mm.gtt_total; args->aper_available_size = args->aper_size - pinned; return 0; } static int i915_gem_create(struct drm_file *file, struct drm_device *dev, uint64_t size, uint32_t *handle_p) { struct drm_i915_gem_object *obj; int ret; u32 handle; size = roundup(size, PAGE_SIZE); if (size == 0) return -EINVAL; /* Allocate the new object */ obj = i915_gem_alloc_object(dev, size); if (obj == NULL) return -ENOMEM; ret = drm_gem_handle_create(file, &obj->base, &handle); if (ret) { drm_gem_object_release(&obj->base); i915_gem_info_remove_obj(dev->dev_private, obj->base.size); free(obj, DRM_I915_GEM); return ret; } /* drop reference from allocate - handle holds it now */ drm_gem_object_unreference(&obj->base); CTR2(KTR_DRM, "object_create %p %x", obj, size); *handle_p = handle; return 0; } int i915_gem_dumb_create(struct drm_file *file, struct drm_device *dev, struct drm_mode_create_dumb *args) { /* have to work out size/pitch and return them */ args->pitch = roundup2(args->width * ((args->bpp + 7) / 8), 64); args->size = args->pitch * args->height; return i915_gem_create(file, dev, args->size, &args->handle); } int i915_gem_dumb_destroy(struct drm_file *file, struct drm_device *dev, uint32_t handle) { return drm_gem_handle_delete(file, handle); } /** * Creates a new mm object and returns a handle to it. */ int i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_create *args = data; return i915_gem_create(file, dev, args->size, &args->handle); } static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj) { drm_i915_private_t *dev_priv = obj->base.dev->dev_private; return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 && obj->tiling_mode != I915_TILING_NONE; } static inline int __copy_to_user_inatomic(void __user *to, const void *from, unsigned n) { return (copyout_nofault(from, to, n) != 0 ? n : 0); } static inline unsigned long __copy_from_user_inatomic_nocache(void *to, const void __user *from, unsigned long n) { /* * XXXKIB. Equivalent Linux function is implemented using * MOVNTI for aligned moves. For unaligned head and tail, * normal move is performed. As such, it is not incorrect, if * only somewhat slower, to use normal copyin. All uses * except shmem_pwrite_fast() have the destination mapped WC. */ return ((copyin_nofault(__DECONST(void *, from), to, n) != 0 ? n : 0)); } static inline int fault_in_multipages_readable(const char __user *uaddr, int size) { char c; int ret = 0; const char __user *end = uaddr + size - 1; if (unlikely(size == 0)) return ret; while (uaddr <= end) { ret = -copyin(uaddr, &c, 1); if (ret != 0) return -EFAULT; uaddr += PAGE_SIZE; } /* Check whether the range spilled into the next page. */ if (((unsigned long)uaddr & ~PAGE_MASK) == ((unsigned long)end & ~PAGE_MASK)) { ret = -copyin(end, &c, 1); } return ret; } static inline int fault_in_multipages_writeable(char __user *uaddr, int size) { int ret = 0; char __user *end = uaddr + size - 1; if (unlikely(size == 0)) return ret; /* * Writing zeroes into userspace here is OK, because we know that if * the zero gets there, we'll be overwriting it. */ while (uaddr <= end) { ret = subyte(uaddr, 0); if (ret != 0) return -EFAULT; uaddr += PAGE_SIZE; } /* Check whether the range spilled into the next page. */ if (((unsigned long)uaddr & ~PAGE_MASK) == ((unsigned long)end & ~PAGE_MASK)) ret = subyte(end, 0); return ret; } static inline int __copy_to_user_swizzled(char __user *cpu_vaddr, const char *gpu_vaddr, int gpu_offset, int length) { int ret, cpu_offset = 0; while (length > 0) { int cacheline_end = roundup2(gpu_offset + 1, 64); int this_length = min(cacheline_end - gpu_offset, length); int swizzled_gpu_offset = gpu_offset ^ 64; ret = __copy_to_user(cpu_vaddr + cpu_offset, gpu_vaddr + swizzled_gpu_offset, this_length); if (ret) return ret + length; cpu_offset += this_length; gpu_offset += this_length; length -= this_length; } return 0; } static inline int __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, const char __user *cpu_vaddr, int length) { int ret, cpu_offset = 0; while (length > 0) { int cacheline_end = roundup2(gpu_offset + 1, 64); int this_length = min(cacheline_end - gpu_offset, length); int swizzled_gpu_offset = gpu_offset ^ 64; ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset, cpu_vaddr + cpu_offset, this_length); if (ret) return ret + length; cpu_offset += this_length; gpu_offset += this_length; length -= this_length; } return 0; } /* Per-page copy function for the shmem pread fastpath. * Flushes invalid cachelines before reading the target if * needs_clflush is set. */ static int shmem_pread_fast(vm_page_t page, int shmem_page_offset, int page_length, char __user *user_data, bool page_do_bit17_swizzling, bool needs_clflush) { char *vaddr; struct sf_buf *sf; int ret; if (unlikely(page_do_bit17_swizzling)) return -EINVAL; sched_pin(); sf = sf_buf_alloc(page, SFB_NOWAIT | SFB_CPUPRIVATE); if (sf == NULL) { sched_unpin(); return (-EFAULT); } vaddr = (char *)sf_buf_kva(sf); if (needs_clflush) drm_clflush_virt_range(vaddr + shmem_page_offset, page_length); ret = __copy_to_user_inatomic(user_data, vaddr + shmem_page_offset, page_length); sf_buf_free(sf); sched_unpin(); return ret ? -EFAULT : 0; } static void shmem_clflush_swizzled_range(char *addr, unsigned long length, bool swizzled) { if (unlikely(swizzled)) { unsigned long start = (unsigned long) addr; unsigned long end = (unsigned long) addr + length; /* For swizzling simply ensure that we always flush both * channels. Lame, but simple and it works. Swizzled * pwrite/pread is far from a hotpath - current userspace * doesn't use it at all. */ start = rounddown2(start, 128); end = roundup2(end, 128); drm_clflush_virt_range((void *)start, end - start); } else { drm_clflush_virt_range(addr, length); } } /* Only difference to the fast-path function is that this can handle bit17 * and uses non-atomic copy and kmap functions. */ static int shmem_pread_slow(vm_page_t page, int shmem_page_offset, int page_length, char __user *user_data, bool page_do_bit17_swizzling, bool needs_clflush) { char *vaddr; struct sf_buf *sf; int ret; sf = sf_buf_alloc(page, 0); vaddr = (char *)sf_buf_kva(sf); if (needs_clflush) shmem_clflush_swizzled_range(vaddr + shmem_page_offset, page_length, page_do_bit17_swizzling); if (page_do_bit17_swizzling) ret = __copy_to_user_swizzled(user_data, vaddr, shmem_page_offset, page_length); else ret = __copy_to_user(user_data, vaddr + shmem_page_offset, page_length); sf_buf_free(sf); return ret ? - EFAULT : 0; } static int i915_gem_shmem_pread(struct drm_device *dev, struct drm_i915_gem_object *obj, struct drm_i915_gem_pread *args, struct drm_file *file) { char __user *user_data; ssize_t remain, sremain; off_t offset, soffset; int shmem_page_offset, page_length, ret = 0; int obj_do_bit17_swizzling, page_do_bit17_swizzling; int prefaulted = 0; int needs_clflush = 0; user_data = to_user_ptr(args->data_ptr); sremain = remain = args->size; obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { /* If we're not in the cpu read domain, set ourself into the gtt * read domain and manually flush cachelines (if required). This * optimizes for the case when the gpu will dirty the data * anyway again before the next pread happens. */ needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level); ret = i915_gem_object_set_to_gtt_domain(obj, false); if (ret) return ret; } soffset = offset = args->offset; ret = i915_gem_object_get_pages_range(obj, soffset, soffset + sremain); if (ret) return ret; i915_gem_object_pin_pages(obj); VM_OBJECT_WLOCK(obj->base.vm_obj); for (vm_page_t page = vm_page_find_least(obj->base.vm_obj, OFF_TO_IDX(offset));; page = vm_page_next(page)) { VM_OBJECT_WUNLOCK(obj->base.vm_obj); if (remain <= 0) break; /* Operation in this page * * shmem_page_offset = offset within page in shmem file * page_length = bytes to copy for this page */ shmem_page_offset = offset_in_page(offset); page_length = remain; if ((shmem_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - shmem_page_offset; page_do_bit17_swizzling = obj_do_bit17_swizzling && (page_to_phys(page) & (1 << 17)) != 0; ret = shmem_pread_fast(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, needs_clflush); if (ret == 0) goto next_page; DRM_UNLOCK(dev); if (likely(!i915_prefault_disable) && !prefaulted) { ret = fault_in_multipages_writeable(user_data, remain); /* Userspace is tricking us, but we've already clobbered * its pages with the prefault and promised to write the * data up to the first fault. Hence ignore any errors * and just continue. */ (void)ret; prefaulted = 1; } ret = shmem_pread_slow(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, needs_clflush); DRM_LOCK(dev); next_page: vm_page_reference(page); if (ret) goto out; remain -= page_length; user_data += page_length; offset += page_length; VM_OBJECT_WLOCK(obj->base.vm_obj); } out: i915_gem_object_unpin_pages(obj); i915_gem_object_put_pages_range(obj, soffset, soffset + sremain); return ret; } /** * Reads data from the object referenced by handle. * * On error, the contents of *data are undefined. */ int i915_gem_pread_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_pread *args = data; struct drm_i915_gem_object *obj; int ret = 0; if (args->size == 0) return 0; if (!useracc(to_user_ptr(args->data_ptr), args->size, VM_PROT_WRITE)) return -EFAULT; ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (&obj->base == NULL) { ret = -ENOENT; goto unlock; } /* Bounds check source. */ if (args->offset > obj->base.size || args->size > obj->base.size - args->offset) { ret = -EINVAL; goto out; } #if 1 KIB_NOTYET(); #else /* prime objects have no backing filp to GEM pread/pwrite * pages from. */ if (!obj->base.filp) { ret = -EINVAL; goto out; } #endif CTR3(KTR_DRM, "pread %p %jx %jx", obj, args->offset, args->size); ret = i915_gem_shmem_pread(dev, obj, args, file); out: drm_gem_object_unreference(&obj->base); unlock: DRM_UNLOCK(dev); return ret; } /* This is the fast write path which cannot handle * page faults in the source data */ static inline int fast_user_write(struct drm_device *dev, off_t page_base, int page_offset, char __user *user_data, int length) { void __iomem *vaddr_atomic; void *vaddr; unsigned long unwritten; vaddr_atomic = pmap_mapdev_attr(dev->agp->base + page_base, length, PAT_WRITE_COMBINING); /* We can use the cpu mem copy function because this is X86. */ vaddr = (char __force*)vaddr_atomic + page_offset; unwritten = __copy_from_user_inatomic_nocache(vaddr, user_data, length); pmap_unmapdev((vm_offset_t)vaddr_atomic, length); return unwritten; } /** * This is the fast pwrite path, where we copy the data directly from the * user into the GTT, uncached. */ static int i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file) { ssize_t remain; off_t offset, page_base; char __user *user_data; int page_offset, page_length, ret; ret = i915_gem_object_pin(obj, 0, true); /* XXXKIB ret = i915_gem_obj_ggtt_pin(obj, 0, true, true); */ if (ret) goto out; ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) goto out_unpin; ret = i915_gem_object_put_fence(obj); if (ret) goto out_unpin; user_data = to_user_ptr(args->data_ptr); remain = args->size; offset = obj->gtt_offset + args->offset; while (remain > 0) { /* Operation in this page * * page_base = page offset within aperture * page_offset = offset within page * page_length = bytes to copy for this page */ page_base = offset & ~PAGE_MASK; page_offset = offset_in_page(offset); page_length = remain; if ((page_offset + remain) > PAGE_SIZE) page_length = PAGE_SIZE - page_offset; /* If we get a fault while copying data, then (presumably) our * source page isn't available. Return the error and we'll * retry in the slow path. */ if (fast_user_write(dev, page_base, page_offset, user_data, page_length)) { ret = -EFAULT; goto out_unpin; } remain -= page_length; user_data += page_length; offset += page_length; } out_unpin: i915_gem_object_unpin(obj); out: return ret; } /* Per-page copy function for the shmem pwrite fastpath. * Flushes invalid cachelines before writing to the target if * needs_clflush_before is set and flushes out any written cachelines after * writing if needs_clflush is set. */ static int shmem_pwrite_fast(vm_page_t page, int shmem_page_offset, int page_length, char __user *user_data, bool page_do_bit17_swizzling, bool needs_clflush_before, bool needs_clflush_after) { char *vaddr; struct sf_buf *sf; int ret; if (unlikely(page_do_bit17_swizzling)) return -EINVAL; sched_pin(); sf = sf_buf_alloc(page, SFB_NOWAIT | SFB_CPUPRIVATE); if (sf == NULL) { sched_unpin(); return (-EFAULT); } vaddr = (char *)sf_buf_kva(sf); if (needs_clflush_before) drm_clflush_virt_range(vaddr + shmem_page_offset, page_length); ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset, user_data, page_length); if (needs_clflush_after) drm_clflush_virt_range(vaddr + shmem_page_offset, page_length); sf_buf_free(sf); sched_unpin(); return ret ? -EFAULT : 0; } /* Only difference to the fast-path function is that this can handle bit17 * and uses non-atomic copy and kmap functions. */ static int shmem_pwrite_slow(vm_page_t page, int shmem_page_offset, int page_length, char __user *user_data, bool page_do_bit17_swizzling, bool needs_clflush_before, bool needs_clflush_after) { char *vaddr; struct sf_buf *sf; int ret; sf = sf_buf_alloc(page, 0); vaddr = (char *)sf_buf_kva(sf); if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) shmem_clflush_swizzled_range(vaddr + shmem_page_offset, page_length, page_do_bit17_swizzling); if (page_do_bit17_swizzling) ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, user_data, page_length); else ret = __copy_from_user(vaddr + shmem_page_offset, user_data, page_length); if (needs_clflush_after) shmem_clflush_swizzled_range(vaddr + shmem_page_offset, page_length, page_do_bit17_swizzling); sf_buf_free(sf); return ret ? -EFAULT : 0; } static int i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file) { ssize_t remain, sremain; off_t offset, soffset; char __user *user_data; int shmem_page_offset, page_length, ret = 0; int obj_do_bit17_swizzling, page_do_bit17_swizzling; int hit_slowpath = 0; int needs_clflush_after = 0; int needs_clflush_before = 0; user_data = to_user_ptr(args->data_ptr); sremain = remain = args->size; obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { /* If we're not in the cpu write domain, set ourself into the gtt * write domain and manually flush cachelines (if required). This * optimizes for the case when the gpu will use the data * right away and we therefore have to clflush anyway. */ needs_clflush_after = cpu_write_needs_clflush(obj); ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) return ret; } /* Same trick applies to invalidate partially written cachelines read * before writing. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) needs_clflush_before = !cpu_cache_is_coherent(dev, obj->cache_level); soffset = offset = args->offset; ret = i915_gem_object_get_pages_range(obj, soffset, soffset + sremain); if (ret) return ret; i915_gem_object_pin_pages(obj); obj->dirty = 1; VM_OBJECT_WLOCK(obj->base.vm_obj); for (vm_page_t page = vm_page_find_least(obj->base.vm_obj, OFF_TO_IDX(offset));; page = vm_page_next(page)) { VM_OBJECT_WUNLOCK(obj->base.vm_obj); int partial_cacheline_write; if (remain <= 0) break; /* Operation in this page * * shmem_page_offset = offset within page in shmem file * page_length = bytes to copy for this page */ shmem_page_offset = offset_in_page(offset); page_length = remain; if ((shmem_page_offset + page_length) > PAGE_SIZE) page_length = PAGE_SIZE - shmem_page_offset; /* If we don't overwrite a cacheline completely we need to be * careful to have up-to-date data by first clflushing. Don't * overcomplicate things and flush the entire patch. */ partial_cacheline_write = needs_clflush_before && ((shmem_page_offset | page_length) & (cpu_clflush_line_size - 1)); page_do_bit17_swizzling = obj_do_bit17_swizzling && (page_to_phys(page) & (1 << 17)) != 0; ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, partial_cacheline_write, needs_clflush_after); if (ret == 0) goto next_page; hit_slowpath = 1; DRM_UNLOCK(dev); ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, partial_cacheline_write, needs_clflush_after); DRM_LOCK(dev); next_page: vm_page_dirty(page); vm_page_reference(page); if (ret) goto out; remain -= page_length; user_data += page_length; offset += page_length; VM_OBJECT_WLOCK(obj->base.vm_obj); } out: i915_gem_object_unpin_pages(obj); i915_gem_object_put_pages_range(obj, soffset, soffset + sremain); if (hit_slowpath) { /* * Fixup: Flush cpu caches in case we didn't flush the dirty * cachelines in-line while writing and the object moved * out of the cpu write domain while we've dropped the lock. */ if (!needs_clflush_after && obj->base.write_domain != I915_GEM_DOMAIN_CPU) { i915_gem_clflush_object(obj); i915_gem_chipset_flush(dev); } } if (needs_clflush_after) i915_gem_chipset_flush(dev); return ret; } /** * Writes data to the object referenced by handle. * * On error, the contents of the buffer that were to be modified are undefined. */ int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_pwrite *args = data; struct drm_i915_gem_object *obj; int ret; if (args->size == 0) return 0; if (!useracc(to_user_ptr(args->data_ptr), args->size, VM_PROT_READ)) return -EFAULT; if (likely(!i915_prefault_disable)) { ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr), args->size); if (ret) return -EFAULT; } ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (&obj->base == NULL) { ret = -ENOENT; goto unlock; } /* Bounds check destination. */ if (args->offset > obj->base.size || args->size > obj->base.size - args->offset) { ret = -EINVAL; goto out; } #if 1 KIB_NOTYET(); #else /* prime objects have no backing filp to GEM pread/pwrite * pages from. */ if (!obj->base.filp) { ret = -EINVAL; goto out; } #endif CTR3(KTR_DRM, "pwrite %p %jx %jx", obj, args->offset, args->size); ret = -EFAULT; /* We can only do the GTT pwrite on untiled buffers, as otherwise * it would end up going through the fenced access, and we'll get * different detiling behavior between reading and writing. * pread/pwrite currently are reading and writing from the CPU * perspective, requiring manual detiling by the client. */ if (obj->phys_obj) { ret = i915_gem_phys_pwrite(dev, obj, args, file); goto out; } if (obj->tiling_mode == I915_TILING_NONE && obj->base.write_domain != I915_GEM_DOMAIN_CPU && cpu_write_needs_clflush(obj)) { ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file); /* Note that the gtt paths might fail with non-page-backed user * pointers (e.g. gtt mappings when moving data between * textures). Fallback to the shmem path in that case. */ } if (ret == -EFAULT || ret == -ENOSPC) ret = i915_gem_shmem_pwrite(dev, obj, args, file); out: drm_gem_object_unreference(&obj->base); unlock: DRM_UNLOCK(dev); return ret; } static int i915_gem_check_wedge(struct drm_i915_private *dev_priv) { DRM_LOCK_ASSERT(dev_priv->dev); if (atomic_load_acq_int(&dev_priv->mm.wedged) != 0) { bool recovery_complete; /* Give the error handler a chance to run. */ mtx_lock(&dev_priv->error_completion_lock); recovery_complete = (&dev_priv->error_completion) > 0; mtx_unlock(&dev_priv->error_completion_lock); return (recovery_complete ? -EIO : -EAGAIN); } return 0; } /* * Compare seqno against outstanding lazy request. Emit a request if they are * equal. */ static int i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) { int ret; DRM_LOCK_ASSERT(ring->dev); ret = 0; if (seqno == ring->outstanding_lazy_request) { struct drm_i915_gem_request *request; request = malloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO); ret = i915_add_request(ring, NULL, request); if (ret != 0) { free(request, DRM_I915_GEM); return ret; } MPASS(seqno == request->seqno); } return ret; } static int __wait_seqno(struct intel_ring_buffer *ring, u32 seqno, bool interruptible) { drm_i915_private_t *dev_priv = ring->dev->dev_private; int ret = 0, flags; if (i915_seqno_passed(ring->get_seqno(ring), seqno)) return 0; CTR2(KTR_DRM, "request_wait_begin %s %d", ring->name, seqno); mtx_lock(&dev_priv->irq_lock); if (!ring->irq_get(ring)) { mtx_unlock(&dev_priv->irq_lock); return -ENODEV; } flags = interruptible ? PCATCH : 0; while (!i915_seqno_passed(ring->get_seqno(ring), seqno) && !atomic_load_acq_int(&dev_priv->mm.wedged) && ret == 0) { ret = -msleep(ring, &dev_priv->irq_lock, flags, "915gwr", 0); if (ret == -ERESTART) ret = -ERESTARTSYS; } ring->irq_put(ring); mtx_unlock(&dev_priv->irq_lock); CTR3(KTR_DRM, "request_wait_end %s %d %d", ring->name, seqno, ret); return ret; } /** * Waits for a sequence number to be signaled, and cleans up the * request and object lists appropriately for that event. */ int i915_wait_request(struct intel_ring_buffer *ring, uint32_t seqno) { struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; int ret; KASSERT(seqno != 0, ("Zero seqno")); ret = i915_gem_check_wedge(dev_priv); if (ret) return ret; ret = i915_gem_check_olr(ring, seqno); if (ret) return ret; ret = __wait_seqno(ring, seqno, dev_priv->mm.interruptible); if (atomic_load_acq_int(&dev_priv->mm.wedged)) ret = -EAGAIN; return ret; } /** * Ensures that all rendering to the object has completed and the object is * safe to unbind from the GTT or access from the CPU. */ static __must_check int i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) { int ret; KASSERT((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0, ("In GPU write domain")); CTR5(KTR_DRM, "object_wait_rendering %p %s %x %d %d", obj, obj->ring != NULL ? obj->ring->name : "none", obj->gtt_offset, obj->active, obj->last_rendering_seqno); if (obj->active) { ret = i915_wait_request(obj->ring, obj->last_rendering_seqno); if (ret != 0) return (ret); i915_gem_retire_requests_ring(obj->ring); } return 0; } int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_set_domain *args = data; struct drm_i915_gem_object *obj; uint32_t read_domains = args->read_domains; uint32_t write_domain = args->write_domain; int ret; - if ((write_domain & I915_GEM_GPU_DOMAINS) != 0 || - (read_domains & I915_GEM_GPU_DOMAINS) != 0 || - (write_domain != 0 && read_domains != write_domain)) + /* Only handle setting domains to types used by the CPU. */ + if (write_domain & I915_GEM_GPU_DOMAINS) return -EINVAL; + if (read_domains & I915_GEM_GPU_DOMAINS) + return -EINVAL; + + /* Having something in the write domain implies it's in the read + * domain, and only that read domain. Enforce that in the request. + */ + if (write_domain != 0 && read_domains != write_domain) + return -EINVAL; + ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (&obj->base == NULL) { ret = -ENOENT; goto unlock; } if (read_domains & I915_GEM_DOMAIN_GTT) { ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); /* Silently promote "you're not bound, there was nothing to do" * to success, since the client was just asking us to * make sure everything was done. */ if (ret == -EINVAL) ret = 0; } else { ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); } drm_gem_object_unreference(&obj->base); unlock: DRM_UNLOCK(dev); return ret; } /** * Called when user space has done writes to this buffer */ int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_sw_finish *args = data; struct drm_i915_gem_object *obj; int ret = 0; ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (&obj->base == NULL) { ret = -ENOENT; goto unlock; } /* Pinned buffers may be scanout, so flush the cache */ if (obj->pin_count) i915_gem_object_flush_cpu_write_domain(obj); drm_gem_object_unreference(&obj->base); unlock: DRM_UNLOCK(dev); return ret; } /** * Maps the contents of an object, returning the address it is mapped * into. * * While the mapping holds a reference on the contents of the object, it doesn't * imply a ref on the object itself. */ int i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_mmap *args = data; struct drm_gem_object *obj; struct proc *p; vm_map_t map; vm_offset_t addr; vm_size_t size; int error, rv; obj = drm_gem_object_lookup(dev, file, args->handle); if (obj == NULL) return -ENOENT; error = 0; if (args->size == 0) goto out; p = curproc; map = &p->p_vmspace->vm_map; size = round_page(args->size); PROC_LOCK(p); if (map->size + size > lim_cur_proc(p, RLIMIT_VMEM)) { PROC_UNLOCK(p); error = -ENOMEM; goto out; } PROC_UNLOCK(p); addr = 0; vm_object_reference(obj->vm_obj); rv = vm_map_find(map, obj->vm_obj, args->offset, &addr, args->size, 0, VMFS_OPTIMAL_SPACE, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, MAP_INHERIT_SHARE); if (rv != KERN_SUCCESS) { vm_object_deallocate(obj->vm_obj); error = -vm_mmap_to_errno(rv); } else { args->addr_ptr = (uint64_t)addr; } out: drm_gem_object_unreference(obj); return (error); } static int i915_gem_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color) { *color = 0; /* XXXKIB */ return (0); } /** * i915_gem_fault - fault a page into the GTT * vma: VMA in question * vmf: fault info * * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped * from userspace. The fault handler takes care of binding the object to * the GTT (if needed), allocating and programming a fence register (again, * only if needed based on whether the old reg is still valid or the object * is tiled) and inserting a new PTE into the faulting process. * * Note that the faulting process may involve evicting existing objects * from the GTT and/or fence registers to make room. So performance may * suffer if the GTT working set is large or there are few fence registers * left. */ int i915_intr_pf; static int i915_gem_pager_fault(vm_object_t vm_obj, vm_ooffset_t offset, int prot, vm_page_t *mres) { struct drm_gem_object *gem_obj; struct drm_i915_gem_object *obj; struct drm_device *dev; drm_i915_private_t *dev_priv; vm_page_t page, oldpage; int cause, ret; bool write; gem_obj = vm_obj->handle; obj = to_intel_bo(gem_obj); dev = obj->base.dev; dev_priv = dev->dev_private; #if 0 write = (prot & VM_PROT_WRITE) != 0; #else write = true; #endif vm_object_pip_add(vm_obj, 1); /* * Remove the placeholder page inserted by vm_fault() from the * object before dropping the object lock. If * i915_gem_release_mmap() is active in parallel on this gem * object, then it owns the drm device sx and might find the * placeholder already. Then, since the page is busy, * i915_gem_release_mmap() sleeps waiting for the busy state * of the page cleared. We will be not able to acquire drm * device lock until i915_gem_release_mmap() is able to make a * progress. */ if (*mres != NULL) { oldpage = *mres; vm_page_lock(oldpage); vm_page_remove(oldpage); vm_page_unlock(oldpage); *mres = NULL; } else oldpage = NULL; VM_OBJECT_WUNLOCK(vm_obj); retry: cause = ret = 0; page = NULL; if (i915_intr_pf) { ret = i915_mutex_lock_interruptible(dev); if (ret != 0) { cause = 10; goto out; } } else DRM_LOCK(dev); /* * Since the object lock was dropped, other thread might have * faulted on the same GTT address and instantiated the * mapping for the page. Recheck. */ VM_OBJECT_WLOCK(vm_obj); page = vm_page_lookup(vm_obj, OFF_TO_IDX(offset)); if (page != NULL) { if (vm_page_busied(page)) { DRM_UNLOCK(dev); vm_page_lock(page); VM_OBJECT_WUNLOCK(vm_obj); vm_page_busy_sleep(page, "915pee"); goto retry; } goto have_page; } else VM_OBJECT_WUNLOCK(vm_obj); /* Now bind it into the GTT if needed */ if (!obj->map_and_fenceable) { ret = i915_gem_object_unbind(obj); if (ret != 0) { cause = 20; goto unlock; } } if (!obj->gtt_space) { ret = i915_gem_object_bind_to_gtt(obj, 0, true); if (ret != 0) { cause = 30; goto unlock; } ret = i915_gem_object_set_to_gtt_domain(obj, write); if (ret != 0) { cause = 40; goto unlock; } } if (!obj->has_global_gtt_mapping) i915_gem_gtt_bind_object(obj, obj->cache_level); ret = i915_gem_object_get_fence(obj); if (ret != 0) { cause = 50; goto unlock; } if (i915_gem_object_is_inactive(obj)) list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); obj->fault_mappable = true; VM_OBJECT_WLOCK(vm_obj); page = PHYS_TO_VM_PAGE(dev->agp->base + obj->gtt_offset + offset); KASSERT((page->flags & PG_FICTITIOUS) != 0, ("physical address %#jx not fictitious", (uintmax_t)(dev->agp->base + obj->gtt_offset + offset))); if (page == NULL) { VM_OBJECT_WUNLOCK(vm_obj); cause = 60; ret = -EFAULT; goto unlock; } KASSERT((page->flags & PG_FICTITIOUS) != 0, ("not fictitious %p", page)); KASSERT(page->wire_count == 1, ("wire_count not 1 %p", page)); if (vm_page_busied(page)) { DRM_UNLOCK(dev); vm_page_lock(page); VM_OBJECT_WUNLOCK(vm_obj); vm_page_busy_sleep(page, "915pbs"); goto retry; } if (vm_page_insert(page, vm_obj, OFF_TO_IDX(offset))) { DRM_UNLOCK(dev); VM_OBJECT_WUNLOCK(vm_obj); VM_WAIT; goto retry; } page->valid = VM_PAGE_BITS_ALL; have_page: *mres = page; vm_page_xbusy(page); CTR4(KTR_DRM, "fault %p %jx %x phys %x", gem_obj, offset, prot, page->phys_addr); DRM_UNLOCK(dev); if (oldpage != NULL) { vm_page_lock(oldpage); vm_page_free(oldpage); vm_page_unlock(oldpage); } vm_object_pip_wakeup(vm_obj); return (VM_PAGER_OK); unlock: DRM_UNLOCK(dev); out: KASSERT(ret != 0, ("i915_gem_pager_fault: wrong return")); CTR5(KTR_DRM, "fault_fail %p %jx %x err %d %d", gem_obj, offset, prot, -ret, cause); if (ret == -EAGAIN || ret == -EIO || ret == -EINTR) { kern_yield(PRI_USER); goto retry; } VM_OBJECT_WLOCK(vm_obj); vm_object_pip_wakeup(vm_obj); return (VM_PAGER_ERROR); } static void i915_gem_pager_dtor(void *handle) { struct drm_gem_object *obj; struct drm_device *dev; obj = handle; dev = obj->dev; DRM_LOCK(dev); drm_gem_free_mmap_offset(obj); i915_gem_release_mmap(to_intel_bo(obj)); drm_gem_object_unreference(obj); DRM_UNLOCK(dev); } struct cdev_pager_ops i915_gem_pager_ops = { .cdev_pg_fault = i915_gem_pager_fault, .cdev_pg_ctor = i915_gem_pager_ctor, .cdev_pg_dtor = i915_gem_pager_dtor }; /** * i915_gem_release_mmap - remove physical page mappings * @obj: obj in question * * Preserve the reservation of the mmapping with the DRM core code, but * relinquish ownership of the pages back to the system. * * It is vital that we remove the page mapping if we have mapped a tiled * object through the GTT and then lose the fence register due to * resource pressure. Similarly if the object has been moved out of the * aperture, than pages mapped into userspace must be revoked. Removing the * mapping will then trigger a page fault on the next user access, allowing * fixup by i915_gem_fault(). */ void i915_gem_release_mmap(struct drm_i915_gem_object *obj) { vm_object_t devobj; vm_page_t page; int i, page_count; if (!obj->fault_mappable) return; CTR3(KTR_DRM, "release_mmap %p %x %x", obj, obj->gtt_offset, OFF_TO_IDX(obj->base.size)); devobj = cdev_pager_lookup(obj); if (devobj != NULL) { page_count = OFF_TO_IDX(obj->base.size); VM_OBJECT_WLOCK(devobj); retry: for (i = 0; i < page_count; i++) { page = vm_page_lookup(devobj, i); if (page == NULL) continue; if (vm_page_sleep_if_busy(page, "915unm")) goto retry; cdev_pager_free_page(devobj, page); } VM_OBJECT_WUNLOCK(devobj); vm_object_deallocate(devobj); } obj->fault_mappable = false; } static uint32_t i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode) { uint32_t gtt_size; if (INTEL_INFO(dev)->gen >= 4 || tiling_mode == I915_TILING_NONE) return size; /* Previous chips need a power-of-two fence region when tiling */ if (INTEL_INFO(dev)->gen == 3) gtt_size = 1024*1024; else gtt_size = 512*1024; while (gtt_size < size) gtt_size <<= 1; return gtt_size; } /** * i915_gem_get_gtt_alignment - return required GTT alignment for an object * @obj: object to check * * Return the required GTT alignment for an object, taking into account * potential fence register mapping. */ static uint32_t i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size, int tiling_mode) { /* * Minimum alignment is 4k (GTT page size), but might be greater * if a fence register is needed for the object. */ if (INTEL_INFO(dev)->gen >= 4 || tiling_mode == I915_TILING_NONE) return 4096; /* * Previous chips need to be aligned to the size of the smallest * fence register that can contain the object. */ return i915_gem_get_gtt_size(dev, size, tiling_mode); } /** * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an * unfenced object * @dev: the device * @size: size of the object * @tiling_mode: tiling mode of the object * * Return the required GTT alignment for an object, only taking into account * unfenced tiled surface requirements. */ uint32_t i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev, uint32_t size, int tiling_mode) { - if (tiling_mode == I915_TILING_NONE) - return 4096; - /* * Minimum alignment is 4k (GTT page size) for sane hw. */ - if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev)) + if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) || + tiling_mode == I915_TILING_NONE) return 4096; /* Previous hardware however needs to be aligned to a power-of-two * tile height. The simplest method for determining this is to reuse * the power-of-tile object size. */ return i915_gem_get_gtt_size(dev, size, tiling_mode); } int i915_gem_mmap_gtt(struct drm_file *file, struct drm_device *dev, uint32_t handle, uint64_t *offset) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; int ret; ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle)); if (&obj->base == NULL) { ret = -ENOENT; goto unlock; } if (obj->base.size > dev_priv->mm.gtt_mappable_end) { ret = -E2BIG; goto out; } if (obj->madv != I915_MADV_WILLNEED) { DRM_ERROR("Attempting to mmap a purgeable buffer\n"); ret = -EINVAL; goto out; } ret = drm_gem_create_mmap_offset(&obj->base); if (ret) goto out; *offset = DRM_GEM_MAPPING_OFF(obj->base.map_list.key) | DRM_GEM_MAPPING_KEY; out: drm_gem_object_unreference(&obj->base); unlock: DRM_UNLOCK(dev); return ret; } /** * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing * @dev: DRM device * @data: GTT mapping ioctl data * @file: GEM object info * * Simply returns the fake offset to userspace so it can mmap it. * The mmap call will end up in drm_gem_mmap(), which will set things * up so we can get faults in the handler above. * * The fault handler will take care of binding the object into the GTT * (since it may have been evicted to make room for something), allocating * a fence register, and mapping the appropriate aperture address into * userspace. */ int i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_mmap_gtt *args = data; return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset); } /* Immediately discard the backing storage */ static void i915_gem_object_truncate(struct drm_i915_gem_object *obj) { vm_object_t vm_obj; vm_obj = obj->base.vm_obj; VM_OBJECT_WLOCK(vm_obj); vm_object_page_remove(vm_obj, 0, 0, false); VM_OBJECT_WUNLOCK(vm_obj); drm_gem_free_mmap_offset(&obj->base); obj->madv = I915_MADV_PURGED_INTERNAL; } static inline int i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) { return obj->madv == I915_MADV_DONTNEED; } static void i915_gem_object_put_pages_range_locked(struct drm_i915_gem_object *obj, vm_pindex_t si, vm_pindex_t ei) { vm_object_t vm_obj; vm_page_t page; vm_pindex_t i; vm_obj = obj->base.vm_obj; VM_OBJECT_ASSERT_LOCKED(vm_obj); for (i = si, page = vm_page_lookup(vm_obj, i); i < ei; page = vm_page_next(page), i++) { KASSERT(page->pindex == i, ("pindex %jx %jx", (uintmax_t)page->pindex, (uintmax_t)i)); vm_page_lock(page); vm_page_unwire(page, PQ_INACTIVE); if (page->wire_count == 0) atomic_add_long(&i915_gem_wired_pages_cnt, -1); vm_page_unlock(page); } } #define GEM_PARANOID_CHECK_GTT 0 #if GEM_PARANOID_CHECK_GTT static void i915_gem_assert_pages_not_mapped(struct drm_device *dev, vm_page_t *ma, int page_count) { struct drm_i915_private *dev_priv; vm_paddr_t pa; unsigned long start, end; u_int i; int j; dev_priv = dev->dev_private; start = OFF_TO_IDX(dev_priv->mm.gtt_start); end = OFF_TO_IDX(dev_priv->mm.gtt_end); for (i = start; i < end; i++) { pa = intel_gtt_read_pte_paddr(i); for (j = 0; j < page_count; j++) { if (pa == VM_PAGE_TO_PHYS(ma[j])) { panic("Page %p in GTT pte index %d pte %x", ma[i], i, intel_gtt_read_pte(i)); } } } } #endif static void i915_gem_object_put_pages_range(struct drm_i915_gem_object *obj, off_t start, off_t end) { vm_object_t vm_obj; vm_obj = obj->base.vm_obj; VM_OBJECT_WLOCK(vm_obj); i915_gem_object_put_pages_range_locked(obj, OFF_TO_IDX(trunc_page(start)), OFF_TO_IDX(round_page(end))); VM_OBJECT_WUNLOCK(vm_obj); } static void i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) { vm_page_t page; int page_count, i; KASSERT(obj->madv != I915_MADV_PURGED_INTERNAL, ("Purged object")); if (obj->tiling_mode != I915_TILING_NONE) i915_gem_object_save_bit_17_swizzle(obj); if (obj->madv == I915_MADV_DONTNEED) obj->dirty = 0; page_count = obj->base.size / PAGE_SIZE; VM_OBJECT_WLOCK(obj->base.vm_obj); #if GEM_PARANOID_CHECK_GTT i915_gem_assert_pages_not_mapped(obj->base.dev, obj->pages, page_count); #endif for (i = 0; i < page_count; i++) { page = obj->pages[i]; if (obj->dirty) vm_page_dirty(page); if (obj->madv == I915_MADV_WILLNEED) vm_page_reference(page); vm_page_lock(page); vm_page_unwire(obj->pages[i], PQ_ACTIVE); vm_page_unlock(page); atomic_add_long(&i915_gem_wired_pages_cnt, -1); } VM_OBJECT_WUNLOCK(obj->base.vm_obj); obj->dirty = 0; free(obj->pages, DRM_I915_GEM); obj->pages = NULL; } static int i915_gpu_is_active(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; return (!list_empty(&dev_priv->mm.flushing_list) || !list_empty(&dev_priv->mm.active_list)); } static void i915_gem_lowmem(void *arg) { struct drm_device *dev; struct drm_i915_private *dev_priv; struct drm_i915_gem_object *obj, *next; int cnt, cnt_fail, cnt_total; dev = arg; dev_priv = dev->dev_private; if (!sx_try_xlock(&dev->dev_struct_lock)) return; CTR0(KTR_DRM, "gem_lowmem"); rescan: /* first scan for clean buffers */ i915_gem_retire_requests(dev); cnt_total = cnt_fail = cnt = 0; list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list, mm_list) { if (i915_gem_object_is_purgeable(obj)) { if (i915_gem_object_unbind(obj) != 0) cnt_total++; } else cnt_total++; } /* second pass, evict/count anything still on the inactive list */ list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list, mm_list) { if (i915_gem_object_unbind(obj) == 0) cnt++; else cnt_fail++; } if (cnt_fail > cnt_total / 100 && i915_gpu_is_active(dev)) { /* * We are desperate for pages, so as a last resort, wait * for the GPU to finish and discard whatever we can. * This has a dramatic impact to reduce the number of * OOM-killer events whilst running the GPU aggressively. */ if (i915_gpu_idle(dev) == 0) goto rescan; } DRM_UNLOCK(dev); } static int i915_gem_object_get_pages_range(struct drm_i915_gem_object *obj, off_t start, off_t end) { vm_object_t vm_obj; vm_page_t page; vm_pindex_t si, ei, i; bool need_swizzle, fresh; need_swizzle = i915_gem_object_needs_bit17_swizzle(obj) != 0; vm_obj = obj->base.vm_obj; si = OFF_TO_IDX(trunc_page(start)); ei = OFF_TO_IDX(round_page(end)); VM_OBJECT_WLOCK(vm_obj); for (i = si; i < ei; i++) { page = i915_gem_wire_page(vm_obj, i, &fresh); if (page == NULL) goto failed; if (need_swizzle && fresh) i915_gem_object_do_bit_17_swizzle_page(obj, page); } VM_OBJECT_WUNLOCK(vm_obj); return (0); failed: i915_gem_object_put_pages_range_locked(obj, si, i); VM_OBJECT_WUNLOCK(vm_obj); return (-EIO); } static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, int flags) { vm_object_t vm_obj; vm_page_t page; vm_pindex_t i, page_count; int res; KASSERT(obj->pages == NULL, ("Obj already has pages")); page_count = OFF_TO_IDX(obj->base.size); obj->pages = malloc(page_count * sizeof(vm_page_t), DRM_I915_GEM, M_WAITOK); res = i915_gem_object_get_pages_range(obj, 0, obj->base.size); if (res != 0) { free(obj->pages, DRM_I915_GEM); obj->pages = NULL; return (res); } vm_obj = obj->base.vm_obj; VM_OBJECT_WLOCK(vm_obj); for (i = 0, page = vm_page_lookup(vm_obj, 0); i < page_count; i++, page = vm_page_next(page)) { KASSERT(page->pindex == i, ("pindex %jx %jx", (uintmax_t)page->pindex, (uintmax_t)i)); obj->pages[i] = page; } VM_OBJECT_WUNLOCK(vm_obj); return (0); } void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, struct intel_ring_buffer *ring, uint32_t seqno) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_fence_reg *reg; KASSERT(ring != NULL, ("NULL ring")); obj->ring = ring; /* Add a reference if we're newly entering the active list. */ if (!obj->active) { drm_gem_object_reference(&obj->base); obj->active = 1; } /* Move from whatever list we were on to the tail of execution. */ list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); list_move_tail(&obj->ring_list, &ring->active_list); obj->last_rendering_seqno = seqno; if (obj->fenced_gpu_access) { obj->last_fenced_seqno = seqno; /* Bump MRU to take account of the delayed flush */ if (obj->fence_reg != I915_FENCE_REG_NONE) { reg = &dev_priv->fence_regs[obj->fence_reg]; list_move_tail(®->lru_list, &dev_priv->mm.fence_list); } } } static void i915_gem_object_move_off_active(struct drm_i915_gem_object *obj) { list_del_init(&obj->ring_list); obj->last_rendering_seqno = 0; obj->last_fenced_seqno = 0; } static void i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj) { struct drm_device *dev = obj->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; KASSERT(obj->active, ("Object not active")); list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list); i915_gem_object_move_off_active(obj); } static void i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); KASSERT(list_empty(&obj->gpu_write_list), ("On gpu_write_list")); KASSERT(obj->active, ("Object not active")); obj->ring = NULL; i915_gem_object_move_off_active(obj); obj->fenced_gpu_access = false; obj->active = 0; obj->pending_gpu_write = false; drm_gem_object_unreference(&obj->base); #if 1 KIB_NOTYET(); #else WARN_ON(i915_verify_lists(dev)); #endif } static u32 i915_gem_get_seqno(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; u32 seqno = dev_priv->next_seqno; /* reserve 0 for non-seqno */ if (++dev_priv->next_seqno == 0) dev_priv->next_seqno = 1; return seqno; } u32 i915_gem_next_request_seqno(struct intel_ring_buffer *ring) { if (ring->outstanding_lazy_request == 0) ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev); return ring->outstanding_lazy_request; } int i915_add_request(struct intel_ring_buffer *ring, struct drm_file *file, struct drm_i915_gem_request *request) { drm_i915_private_t *dev_priv = ring->dev->dev_private; struct drm_i915_file_private *file_priv; uint32_t seqno; u32 request_ring_position; int was_empty; int ret; KASSERT(request != NULL, ("NULL request in add")); DRM_LOCK_ASSERT(ring->dev); seqno = i915_gem_next_request_seqno(ring); request_ring_position = intel_ring_get_tail(ring); ret = ring->add_request(ring, &seqno); if (ret != 0) return ret; CTR2(KTR_DRM, "request_add %s %d", ring->name, seqno); request->seqno = seqno; request->ring = ring; request->tail = request_ring_position; request->emitted_jiffies = ticks; was_empty = list_empty(&ring->request_list); list_add_tail(&request->list, &ring->request_list); if (file) { file_priv = file->driver_priv; mtx_lock(&file_priv->mm.lck); request->file_priv = file_priv; list_add_tail(&request->client_list, &file_priv->mm.request_list); mtx_unlock(&file_priv->mm.lck); } ring->outstanding_lazy_request = 0; if (!dev_priv->mm.suspended) { if (i915_enable_hangcheck) { callout_schedule(&dev_priv->hangcheck_timer, DRM_I915_HANGCHECK_PERIOD); } if (was_empty) taskqueue_enqueue_timeout(dev_priv->tq, &dev_priv->mm.retire_task, hz); } return 0; } static inline void i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) { struct drm_i915_file_private *file_priv = request->file_priv; if (!file_priv) return; DRM_LOCK_ASSERT(request->ring->dev); mtx_lock(&file_priv->mm.lck); if (request->file_priv) { list_del(&request->client_list); request->file_priv = NULL; } mtx_unlock(&file_priv->mm.lck); } static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, struct intel_ring_buffer *ring) { if (ring->dev != NULL) DRM_LOCK_ASSERT(ring->dev); while (!list_empty(&ring->request_list)) { struct drm_i915_gem_request *request; request = list_first_entry(&ring->request_list, struct drm_i915_gem_request, list); list_del(&request->list); i915_gem_request_remove_from_client(request); free(request, DRM_I915_GEM); } while (!list_empty(&ring->active_list)) { struct drm_i915_gem_object *obj; obj = list_first_entry(&ring->active_list, struct drm_i915_gem_object, ring_list); obj->base.write_domain = 0; list_del_init(&obj->gpu_write_list); i915_gem_object_move_to_inactive(obj); } } static void i915_gem_reset_fences(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; int i; for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; i915_gem_write_fence(dev, i, NULL); if (reg->obj) i915_gem_object_fence_lost(reg->obj); reg->pin_count = 0; reg->obj = NULL; INIT_LIST_HEAD(®->lru_list); } INIT_LIST_HEAD(&dev_priv->mm.fence_list); } void i915_gem_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; struct intel_ring_buffer *ring; int i; for_each_ring(ring, dev_priv, i) i915_gem_reset_ring_lists(dev_priv, ring); /* Remove anything from the flushing lists. The GPU cache is likely * to be lost on reset along with the data, so simply move the * lost bo to the inactive list. */ while (!list_empty(&dev_priv->mm.flushing_list)) { obj = list_first_entry(&dev_priv->mm.flushing_list, struct drm_i915_gem_object, mm_list); obj->base.write_domain = 0; list_del_init(&obj->gpu_write_list); i915_gem_object_move_to_inactive(obj); } /* Move everything out of the GPU domains to ensure we do any * necessary invalidation upon reuse. */ list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list) { obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; } /* The fence registers are invalidated so clear them out */ i915_gem_reset_fences(dev); } /** * This function clears the request list as sequence numbers are passed. */ void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) { uint32_t seqno; int i; if (list_empty(&ring->request_list)) return; seqno = ring->get_seqno(ring); CTR2(KTR_DRM, "retire_request_ring %s %d", ring->name, seqno); for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) if (seqno >= ring->sync_seqno[i]) ring->sync_seqno[i] = 0; while (!list_empty(&ring->request_list)) { struct drm_i915_gem_request *request; request = list_first_entry(&ring->request_list, struct drm_i915_gem_request, list); if (!i915_seqno_passed(seqno, request->seqno)) break; CTR2(KTR_DRM, "retire_request_seqno_passed %s %d", ring->name, seqno); ring->last_retired_head = request->tail; list_del(&request->list); i915_gem_request_remove_from_client(request); free(request, DRM_I915_GEM); } /* Move any buffers on the active list that are no longer referenced * by the ringbuffer to the flushing/inactive lists as appropriate. */ while (!list_empty(&ring->active_list)) { struct drm_i915_gem_object *obj; obj = list_first_entry(&ring->active_list, struct drm_i915_gem_object, ring_list); if (!i915_seqno_passed(seqno, obj->last_rendering_seqno)) break; if (obj->base.write_domain != 0) i915_gem_object_move_to_flushing(obj); else i915_gem_object_move_to_inactive(obj); } if (ring->trace_irq_seqno && i915_seqno_passed(seqno, ring->trace_irq_seqno)) { struct drm_i915_private *dev_priv = ring->dev->dev_private; mtx_lock(&dev_priv->irq_lock); ring->irq_put(ring); mtx_unlock(&dev_priv->irq_lock); ring->trace_irq_seqno = 0; } } void i915_gem_retire_requests(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; struct intel_ring_buffer *ring; int i; for_each_ring(ring, dev_priv, i) i915_gem_retire_requests_ring(ring); } static void i915_gem_process_flushing_list(struct intel_ring_buffer *ring, uint32_t flush_domains) { struct drm_i915_gem_object *obj, *next; uint32_t old_write_domain; list_for_each_entry_safe(obj, next, &ring->gpu_write_list, gpu_write_list) { if (obj->base.write_domain & flush_domains) { old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; list_del_init(&obj->gpu_write_list); i915_gem_object_move_to_active(obj, ring, i915_gem_next_request_seqno(ring)); CTR3(KTR_DRM, "object_change_domain process_flush %p %x %x", obj, obj->base.read_domains, old_write_domain); } } } int i915_gem_flush_ring(struct intel_ring_buffer *ring, uint32_t invalidate_domains, uint32_t flush_domains) { int ret; if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0) return 0; CTR3(KTR_DRM, "ring_flush %s %x %x", ring->name, invalidate_domains, flush_domains); ret = ring->flush(ring, invalidate_domains, flush_domains); if (ret) return ret; if (flush_domains & I915_GEM_GPU_DOMAINS) i915_gem_process_flushing_list(ring, flush_domains); return 0; } static void i915_gem_retire_task_handler(void *arg, int pending) { drm_i915_private_t *dev_priv; struct drm_device *dev; struct intel_ring_buffer *ring; bool idle; int i; dev_priv = arg; dev = dev_priv->dev; /* Come back later if the device is busy... */ if (!sx_try_xlock(&dev->dev_struct_lock)) { taskqueue_enqueue_timeout(dev_priv->tq, &dev_priv->mm.retire_task, hz); return; } CTR0(KTR_DRM, "retire_task"); i915_gem_retire_requests(dev); /* Send a periodic flush down the ring so we don't hold onto GEM * objects indefinitely. */ idle = true; for_each_ring(ring, dev_priv, i) { struct intel_ring_buffer *ring = &dev_priv->rings[i]; if (!list_empty(&ring->gpu_write_list)) { struct drm_i915_gem_request *request; int ret; ret = i915_gem_flush_ring(ring, 0, I915_GEM_GPU_DOMAINS); request = malloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO); if (ret || request == NULL || i915_add_request(ring, NULL, request)) free(request, DRM_I915_GEM); } idle &= list_empty(&ring->request_list); } if (!dev_priv->mm.suspended && !idle) taskqueue_enqueue_timeout(dev_priv->tq, &dev_priv->mm.retire_task, hz); DRM_UNLOCK(dev); } int i915_gem_object_sync(struct drm_i915_gem_object *obj, struct intel_ring_buffer *to) { struct intel_ring_buffer *from = obj->ring; u32 seqno; int ret, idx; if (from == NULL || to == from) return 0; if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) return i915_gem_object_wait_rendering(obj); idx = intel_ring_sync_index(from, to); seqno = obj->last_rendering_seqno; if (seqno <= from->sync_seqno[idx]) return 0; if (seqno == from->outstanding_lazy_request) { struct drm_i915_gem_request *request; request = malloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO); ret = i915_add_request(from, NULL, request); if (ret) { free(request, DRM_I915_GEM); return ret; } seqno = request->seqno; } ret = to->sync_to(to, from, seqno); if (!ret) from->sync_seqno[idx] = seqno; return ret; } static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) { u32 old_write_domain, old_read_domains; /* Act a barrier for all accesses through the GTT */ mb(); /* Force a pagefault for domain tracking on next user access */ i915_gem_release_mmap(obj); if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) return; old_read_domains = obj->base.read_domains; old_write_domain = obj->base.write_domain; obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; CTR3(KTR_DRM, "object_change_domain finish gtt %p %x %x", obj, old_read_domains, old_write_domain); } /** * Unbinds an object from the GTT aperture. */ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) { drm_i915_private_t *dev_priv = obj->base.dev->dev_private; int ret = 0; if (obj->gtt_space == NULL) return 0; if (obj->pin_count) { DRM_ERROR("Attempting to unbind pinned buffer\n"); return -EINVAL; } ret = i915_gem_object_finish_gpu(obj); if (ret == -ERESTARTSYS || ret == -EINTR) return ret; i915_gem_object_finish_gtt(obj); if (ret == 0) ret = i915_gem_object_set_to_cpu_domain(obj, 1); if (ret == -ERESTARTSYS || ret == -EINTR) return ret; if (ret != 0) { i915_gem_clflush_object(obj); obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; } /* release the fence reg _after_ flushing */ ret = i915_gem_object_put_fence(obj); if (ret) return ret; if (obj->has_global_gtt_mapping) i915_gem_gtt_unbind_object(obj); if (obj->has_aliasing_ppgtt_mapping) { i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj); obj->has_aliasing_ppgtt_mapping = 0; } i915_gem_gtt_finish_object(obj); i915_gem_object_put_pages_gtt(obj); list_del_init(&obj->gtt_list); list_del_init(&obj->mm_list); obj->map_and_fenceable = true; drm_mm_put_block(obj->gtt_space); obj->gtt_space = NULL; obj->gtt_offset = 0; if (i915_gem_object_is_purgeable(obj)) i915_gem_object_truncate(obj); CTR1(KTR_DRM, "object_unbind %p", obj); return ret; } static int i915_ring_idle(struct intel_ring_buffer *ring) { int ret; if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list)) return 0; if (!list_empty(&ring->gpu_write_list)) { ret = i915_gem_flush_ring(ring, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); if (ret != 0) return ret; } return (i915_wait_request(ring, i915_gem_next_request_seqno(ring))); } int i915_gpu_idle(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; struct intel_ring_buffer *ring; int ret, i; /* Flush everything onto the inactive list. */ for_each_ring(ring, dev_priv, i) { ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); if (ret) return ret; ret = i915_ring_idle(ring); if (ret) return ret; /* Is the device fubar? */ if (!list_empty(&ring->gpu_write_list)) return -EBUSY; } return 0; } static void sandybridge_write_fence_reg(struct drm_device *dev, int reg, struct drm_i915_gem_object *obj) { drm_i915_private_t *dev_priv = dev->dev_private; uint64_t val; if (obj) { u32 size = obj->gtt_space->size; val = (uint64_t)((obj->gtt_offset + size - 4096) & 0xfffff000) << 32; val |= obj->gtt_offset & 0xfffff000; val |= (uint64_t)((obj->stride / 128) - 1) << SANDYBRIDGE_FENCE_PITCH_SHIFT; if (obj->tiling_mode == I915_TILING_Y) val |= 1 << I965_FENCE_TILING_Y_SHIFT; val |= I965_FENCE_REG_VALID; } else val = 0; I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + reg * 8, val); POSTING_READ(FENCE_REG_SANDYBRIDGE_0 + reg * 8); } static void i965_write_fence_reg(struct drm_device *dev, int reg, struct drm_i915_gem_object *obj) { drm_i915_private_t *dev_priv = dev->dev_private; uint64_t val; if (obj) { u32 size = obj->gtt_space->size; val = (uint64_t)((obj->gtt_offset + size - 4096) & 0xfffff000) << 32; val |= obj->gtt_offset & 0xfffff000; val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT; if (obj->tiling_mode == I915_TILING_Y) val |= 1 << I965_FENCE_TILING_Y_SHIFT; val |= I965_FENCE_REG_VALID; } else val = 0; I915_WRITE64(FENCE_REG_965_0 + reg * 8, val); POSTING_READ(FENCE_REG_965_0 + reg * 8); } static void i915_write_fence_reg(struct drm_device *dev, int reg, struct drm_i915_gem_object *obj) { drm_i915_private_t *dev_priv = dev->dev_private; u32 val; if (obj) { u32 size = obj->gtt_space->size; int pitch_val; int tile_width; if ((obj->gtt_offset & ~I915_FENCE_START_MASK) || (size & -size) != size || (obj->gtt_offset & (size - 1))) printf( "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", obj->gtt_offset, obj->map_and_fenceable, size); if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) tile_width = 128; else tile_width = 512; /* Note: pitch better be a power of two tile widths */ pitch_val = obj->stride / tile_width; pitch_val = ffs(pitch_val) - 1; val = obj->gtt_offset; if (obj->tiling_mode == I915_TILING_Y) val |= 1 << I830_FENCE_TILING_Y_SHIFT; val |= I915_FENCE_SIZE_BITS(size); val |= pitch_val << I830_FENCE_PITCH_SHIFT; val |= I830_FENCE_REG_VALID; } else val = 0; if (reg < 8) reg = FENCE_REG_830_0 + reg * 4; else reg = FENCE_REG_945_8 + (reg - 8) * 4; I915_WRITE(reg, val); POSTING_READ(reg); } static void i830_write_fence_reg(struct drm_device *dev, int reg, struct drm_i915_gem_object *obj) { drm_i915_private_t *dev_priv = dev->dev_private; uint32_t val; if (obj) { u32 size = obj->gtt_space->size; uint32_t pitch_val; if ((obj->gtt_offset & ~I830_FENCE_START_MASK) || (size & -size) != size || (obj->gtt_offset & (size - 1))) printf( "object 0x%08x not 512K or pot-size 0x%08x aligned\n", obj->gtt_offset, size); pitch_val = obj->stride / 128; pitch_val = ffs(pitch_val) - 1; val = obj->gtt_offset; if (obj->tiling_mode == I915_TILING_Y) val |= 1 << I830_FENCE_TILING_Y_SHIFT; val |= I830_FENCE_SIZE_BITS(size); val |= pitch_val << I830_FENCE_PITCH_SHIFT; val |= I830_FENCE_REG_VALID; } else val = 0; I915_WRITE(FENCE_REG_830_0 + reg * 4, val); POSTING_READ(FENCE_REG_830_0 + reg * 4); } static void i915_gem_write_fence(struct drm_device *dev, int reg, struct drm_i915_gem_object *obj) { switch (INTEL_INFO(dev)->gen) { case 7: case 6: sandybridge_write_fence_reg(dev, reg, obj); break; case 5: case 4: i965_write_fence_reg(dev, reg, obj); break; case 3: i915_write_fence_reg(dev, reg, obj); break; case 2: i830_write_fence_reg(dev, reg, obj); break; default: break; } } static inline int fence_number(struct drm_i915_private *dev_priv, struct drm_i915_fence_reg *fence) { return fence - dev_priv->fence_regs; } static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, struct drm_i915_fence_reg *fence, bool enable) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; int fence_reg = fence_number(dev_priv, fence); i915_gem_write_fence(dev, fence_reg, enable ? obj : NULL); if (enable) { obj->fence_reg = fence_reg; fence->obj = obj; list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); } else { obj->fence_reg = I915_FENCE_REG_NONE; fence->obj = NULL; list_del_init(&fence->lru_list); } } static int i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) { int ret; if (obj->fenced_gpu_access) { if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); if (ret) return ret; } obj->fenced_gpu_access = false; } if (obj->last_fenced_seqno) { ret = i915_wait_request(obj->ring, obj->last_fenced_seqno); if (ret) return ret; obj->last_fenced_seqno = 0; } /* Ensure that all CPU reads are completed before installing a fence * and all writes before removing the fence. */ if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) mb(); return 0; } int i915_gem_object_put_fence(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; int ret; ret = i915_gem_object_flush_fence(obj); if (ret) return ret; if (obj->fence_reg == I915_FENCE_REG_NONE) return 0; i915_gem_object_update_fence(obj, &dev_priv->fence_regs[obj->fence_reg], false); i915_gem_object_fence_lost(obj); return 0; } static struct drm_i915_fence_reg * i915_find_fence_reg(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_fence_reg *reg, *avail; int i; /* First try to find a free reg */ avail = NULL; for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) { reg = &dev_priv->fence_regs[i]; if (!reg->obj) return reg; if (!reg->pin_count) avail = reg; } if (avail == NULL) return NULL; /* None available, try to steal one or wait for a user to finish */ list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { if (reg->pin_count) continue; return reg; } return NULL; } /** * i915_gem_object_get_fence - set up fencing for an object * @obj: object to map through a fence reg * * When mapping objects through the GTT, userspace wants to be able to write * to them without having to worry about swizzling if the object is tiled. * This function walks the fence regs looking for a free one for @obj, * stealing one if it can't find any. * * It then sets up the reg based on the object's properties: address, pitch * and tiling format. * * For an untiled surface, this removes any existing fence. */ int i915_gem_object_get_fence(struct drm_i915_gem_object *obj) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; bool enable = obj->tiling_mode != I915_TILING_NONE; struct drm_i915_fence_reg *reg; int ret; /* Have we updated the tiling parameters upon the object and so * will need to serialise the write to the associated fence register? */ if (obj->fence_dirty) { ret = i915_gem_object_flush_fence(obj); if (ret) return ret; } /* Just update our place in the LRU if our fence is getting reused. */ if (obj->fence_reg != I915_FENCE_REG_NONE) { reg = &dev_priv->fence_regs[obj->fence_reg]; if (!obj->fence_dirty) { list_move_tail(®->lru_list, &dev_priv->mm.fence_list); return 0; } } else if (enable) { reg = i915_find_fence_reg(dev); if (reg == NULL) return -EDEADLK; if (reg->obj) { struct drm_i915_gem_object *old = reg->obj; ret = i915_gem_object_flush_fence(old); if (ret) return ret; i915_gem_object_fence_lost(old); } } else return 0; i915_gem_object_update_fence(obj, reg, enable); obj->fence_dirty = false; return 0; } /** * Finds free space in the GTT aperture and binds the object there. */ static int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, unsigned alignment, bool map_and_fenceable) { struct drm_device *dev = obj->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; struct drm_mm_node *free_space; u32 size, fence_size, fence_alignment, unfenced_alignment; bool mappable, fenceable; int ret; if (obj->madv != I915_MADV_WILLNEED) { DRM_ERROR("Attempting to bind a purgeable object\n"); return -EINVAL; } fence_size = i915_gem_get_gtt_size(dev, obj->base.size, obj->tiling_mode); fence_alignment = i915_gem_get_gtt_alignment(dev, obj->base.size, obj->tiling_mode); unfenced_alignment = i915_gem_get_unfenced_gtt_alignment(dev, obj->base.size, obj->tiling_mode); if (alignment == 0) alignment = map_and_fenceable ? fence_alignment : unfenced_alignment; if (map_and_fenceable && alignment & (fence_alignment - 1)) { DRM_ERROR("Invalid object alignment requested %u\n", alignment); return -EINVAL; } size = map_and_fenceable ? fence_size : obj->base.size; /* If the object is bigger than the entire aperture, reject it early * before evicting everything in a vain attempt to find space. */ if (obj->base.size > (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) { DRM_ERROR("Attempting to bind an object larger than the aperture\n"); return -E2BIG; } search_free: if (map_and_fenceable) free_space = drm_mm_search_free_in_range( &dev_priv->mm.gtt_space, size, alignment, 0, dev_priv->mm.gtt_mappable_end, 0); else free_space = drm_mm_search_free(&dev_priv->mm.gtt_space, size, alignment, 0); if (free_space != NULL) { if (map_and_fenceable) obj->gtt_space = drm_mm_get_block_range_generic( free_space, size, alignment, 0, 0, dev_priv->mm.gtt_mappable_end, 1); else obj->gtt_space = drm_mm_get_block_generic(free_space, size, alignment, 0, 1); } if (obj->gtt_space == NULL) { ret = i915_gem_evict_something(dev, size, alignment, map_and_fenceable); if (ret != 0) return ret; goto search_free; } ret = i915_gem_object_get_pages_gtt(obj, 0); if (ret) { drm_mm_put_block(obj->gtt_space); obj->gtt_space = NULL; /* * i915_gem_object_get_pages_gtt() cannot return * ENOMEM, since we use vm_page_grab(). */ return ret; } ret = i915_gem_gtt_prepare_object(obj); if (ret) { i915_gem_object_put_pages_gtt(obj); drm_mm_put_block(obj->gtt_space); obj->gtt_space = NULL; if (i915_gem_evict_everything(dev, false)) return ret; goto search_free; } if (!dev_priv->mm.aliasing_ppgtt) i915_gem_gtt_bind_object(obj, obj->cache_level); list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list); list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); KASSERT((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0, ("Object in gpu read domain")); KASSERT((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0, ("Object in gpu write domain")); obj->gtt_offset = obj->gtt_space->start; fenceable = obj->gtt_space->size == fence_size && (obj->gtt_space->start & (fence_alignment - 1)) == 0; mappable = obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; obj->map_and_fenceable = mappable && fenceable; CTR4(KTR_DRM, "object_bind %p %x %x %d", obj, obj->gtt_offset, obj->base.size, map_and_fenceable); return 0; } void i915_gem_clflush_object(struct drm_i915_gem_object *obj) { /* If we don't have a page list set up, then we're not pinned * to GPU, and we can ignore the cache flush because it'll happen * again at bind time. */ if (obj->pages == NULL) return; /* If the GPU is snooping the contents of the CPU cache, * we do not need to manually clear the CPU cache lines. However, * the caches are only snooped when the render cache is * flushed/invalidated. As we always have to emit invalidations * and flushes when moving into and out of the RENDER domain, correct * snooping behaviour occurs naturally as the result of our domain * tracking. */ if (obj->cache_level != I915_CACHE_NONE) return; CTR1(KTR_DRM, "object_clflush %p", obj); drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); } /** Flushes the GTT write domain for the object if it's dirty. */ static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) { uint32_t old_write_domain; if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) return; /* No actual flushing is required for the GTT write domain. Writes * to it immediately go to main memory as far as we know, so there's * no chipset flush. It also doesn't land in render cache. * * However, we do have to enforce the order so that all writes through * the GTT land before any writes to the device, such as updates to * the GATT itself. */ wmb(); old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; CTR3(KTR_DRM, "object_change_domain flush gtt_write %p %x %x", obj, obj->base.read_domains, old_write_domain); } /** Flushes the CPU write domain for the object if it's dirty. */ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) { uint32_t old_write_domain; if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) return; i915_gem_clflush_object(obj); intel_gtt_chipset_flush(); old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; CTR3(KTR_DRM, "object_change_domain flush_cpu_write %p %x %x", obj, obj->base.read_domains, old_write_domain); } static int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj) { if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) return (0); return (i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain)); } /** * Moves a single object to the GTT read, and possibly write domain. * * This function returns when the move is complete, including waiting on * flushes to occur. */ int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { drm_i915_private_t *dev_priv = obj->base.dev->dev_private; uint32_t old_write_domain, old_read_domains; int ret; /* Not valid to be called on unbound objects. */ if (obj->gtt_space == NULL) return -EINVAL; if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) return 0; ret = i915_gem_object_flush_gpu_write_domain(obj); if (ret) - return (ret); + return ret; if (obj->pending_gpu_write || write) { ret = i915_gem_object_wait_rendering(obj); if (ret) return (ret); } i915_gem_object_flush_cpu_write_domain(obj); old_write_domain = obj->base.write_domain; old_read_domains = obj->base.read_domains; /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ KASSERT((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) == 0, ("In GTT write domain")); obj->base.read_domains |= I915_GEM_DOMAIN_GTT; if (write) { obj->base.read_domains = I915_GEM_DOMAIN_GTT; obj->base.write_domain = I915_GEM_DOMAIN_GTT; obj->dirty = 1; } CTR3(KTR_DRM, "object_change_domain set_to_gtt %p %x %x", obj, old_read_domains, old_write_domain); /* And bump the LRU for this access */ if (i915_gem_object_is_inactive(obj)) list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); return 0; } int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { struct drm_device *dev = obj->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; int ret; if (obj->cache_level == cache_level) return 0; if (obj->pin_count) { DRM_DEBUG("can not change the cache level of pinned objects\n"); return -EBUSY; } if (obj->gtt_space) { ret = i915_gem_object_finish_gpu(obj); if (ret) return ret; i915_gem_object_finish_gtt(obj); /* Before SandyBridge, you could not use tiling or fence * registers with snooped memory, so relinquish any fences * currently pointing to our region in the aperture. */ if (INTEL_INFO(obj->base.dev)->gen < 6) { ret = i915_gem_object_put_fence(obj); if (ret) return ret; } if (obj->has_global_gtt_mapping) i915_gem_gtt_bind_object(obj, cache_level); if (obj->has_aliasing_ppgtt_mapping) i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, obj, cache_level); } if (cache_level == I915_CACHE_NONE) { u32 old_read_domains, old_write_domain; /* If we're coming from LLC cached, then we haven't * actually been tracking whether the data is in the * CPU cache or not, since we only allow one bit set * in obj->write_domain and have been skipping the clflushes. * Just set it to the CPU cache for now. */ KASSERT((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) == 0, ("obj %p in CPU write domain", obj)); KASSERT((obj->base.read_domains & ~I915_GEM_DOMAIN_CPU) == 0, ("obj %p in CPU read domain", obj)); old_read_domains = obj->base.read_domains; old_write_domain = obj->base.write_domain; obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU; CTR3(KTR_DRM, "object_change_domain set_cache_level %p %x %x", obj, old_read_domains, old_write_domain); } obj->cache_level = cache_level; return 0; } static bool is_pin_display(struct drm_i915_gem_object *obj) { /* There are 3 sources that pin objects: * 1. The display engine (scanouts, sprites, cursors); * 2. Reservations for execbuffer; * 3. The user. * * We can ignore reservations as we hold the struct_mutex and * are only called outside of the reservation path. The user * can only increment pin_count once, and so if after * subtracting the potential reference by the user, any pin_count * remains, it must be due to another use by the display engine. */ return obj->pin_count - !!obj->user_pin_count; } int i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, struct intel_ring_buffer *pipelined) { u32 old_read_domains, old_write_domain; int ret; ret = i915_gem_object_flush_gpu_write_domain(obj); if (ret) return ret; if (pipelined != obj->ring) { ret = i915_gem_object_sync(obj, pipelined); if (ret) return ret; } /* Mark the pin_display early so that we account for the * display coherency whilst setting up the cache domains. */ obj->pin_display = true; /* The display engine is not coherent with the LLC cache on gen6. As * a result, we make sure that the pinning that is about to occur is * done with uncached PTEs. This is lowest common denominator for all * chipsets. * * However for gen6+, we could do better by using the GFDT bit instead * of uncaching, which would allow us to flush all the LLC-cached data * with that bit in the PTE to main memory with just one PIPE_CONTROL. */ ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE); if (ret) goto err_unpin_display; /* As the user may map the buffer once pinned in the display plane * (e.g. libkms for the bootup splash), we have to ensure that we * always use map_and_fenceable for all scanout buffers. */ ret = i915_gem_object_pin(obj, alignment, true); if (ret) goto err_unpin_display; i915_gem_object_flush_cpu_write_domain(obj); old_write_domain = obj->base.write_domain; old_read_domains = obj->base.read_domains; KASSERT((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) == 0, ("obj %p in GTT write domain", obj)); obj->base.read_domains |= I915_GEM_DOMAIN_GTT; CTR3(KTR_DRM, "object_change_domain pin_to_display_plan %p %x %x", obj, old_read_domains, obj->base.write_domain); return 0; err_unpin_display: obj->pin_display = is_pin_display(obj); return ret; } void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj) { i915_gem_object_unpin(obj); obj->pin_display = is_pin_display(obj); } int i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) { int ret; if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) return 0; if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); if (ret) return ret; } ret = i915_gem_object_wait_rendering(obj); if (ret) return ret; /* Ensure that we invalidate the GPU's caches and TLBs. */ obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; return 0; } +/** + * Moves a single object to the CPU read, and possibly write domain. + * + * This function returns when the move is complete, including waiting on + * flushes to occur. + */ int i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) { uint32_t old_write_domain, old_read_domains; int ret; if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) return 0; ret = i915_gem_object_flush_gpu_write_domain(obj); if (ret) return ret; if (write || obj->pending_gpu_write) { ret = i915_gem_object_wait_rendering(obj); if (ret) return ret; } i915_gem_object_flush_gtt_write_domain(obj); old_write_domain = obj->base.write_domain; old_read_domains = obj->base.read_domains; /* Flush the CPU cache if it's still invalid. */ if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) { i915_gem_clflush_object(obj); obj->base.read_domains |= I915_GEM_DOMAIN_CPU; } /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ KASSERT((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) == 0, ("In cpu write domain")); /* If we're writing through the CPU, then the GPU read domains will * need to be invalidated at next use. */ if (write) { obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU; } CTR3(KTR_DRM, "object_change_domain set_to_cpu %p %x %x", obj, old_read_domains, old_write_domain); return 0; } /* Throttle our rendering by waiting until the ring has completed our requests * emitted over 20 msec ago. * * Note that if we were to use the current jiffies each time around the loop, * we wouldn't escape the function with any frames outstanding if the time to * render a frame was over 20ms. * * This should get us reasonable parallelism between CPU and GPU but also * relatively low latency when blocking on a particular request to finish. */ static int i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_file_private *file_priv = file->driver_priv; unsigned long recent_enough = ticks - (20 * hz / 1000); struct drm_i915_gem_request *request; struct intel_ring_buffer *ring = NULL; u32 seqno = 0; int ret; if (atomic_load_acq_int(&dev_priv->mm.wedged)) return -EIO; mtx_lock(&file_priv->mm.lck); list_for_each_entry(request, &file_priv->mm.request_list, client_list) { if (time_after_eq(request->emitted_jiffies, recent_enough)) break; ring = request->ring; seqno = request->seqno; } mtx_unlock(&file_priv->mm.lck); if (seqno == 0) return 0; ret = __wait_seqno(ring, seqno, true); if (ret == 0) taskqueue_enqueue_timeout(dev_priv->tq, &dev_priv->mm.retire_task, 0); return ret; } int i915_gem_object_pin(struct drm_i915_gem_object *obj, uint32_t alignment, bool map_and_fenceable) { int ret; if (obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT) return -EBUSY; if (obj->gtt_space != NULL) { if ((alignment && obj->gtt_offset & (alignment - 1)) || (map_and_fenceable && !obj->map_and_fenceable)) { DRM_DEBUG("bo is already pinned with incorrect alignment:" " offset=%x, req.alignment=%x, req.map_and_fenceable=%d," " obj->map_and_fenceable=%d\n", obj->gtt_offset, alignment, map_and_fenceable, obj->map_and_fenceable); ret = i915_gem_object_unbind(obj); if (ret) return ret; } } if (obj->gtt_space == NULL) { ret = i915_gem_object_bind_to_gtt(obj, alignment, map_and_fenceable); if (ret) return ret; } if (!obj->has_global_gtt_mapping && map_and_fenceable) i915_gem_gtt_bind_object(obj, obj->cache_level); obj->pin_count++; obj->pin_mappable |= map_and_fenceable; return 0; } void i915_gem_object_unpin(struct drm_i915_gem_object *obj) { KASSERT(obj->pin_count != 0, ("zero pin count")); KASSERT(obj->gtt_space != NULL, ("No gtt mapping")); if (--obj->pin_count == 0) obj->pin_mappable = false; } int i915_gem_pin_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_pin *args = data; struct drm_i915_gem_object *obj; struct drm_gem_object *gobj; int ret; ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; gobj = drm_gem_object_lookup(dev, file, args->handle); if (gobj == NULL) { ret = -ENOENT; goto unlock; } obj = to_intel_bo(gobj); if (obj->madv != I915_MADV_WILLNEED) { DRM_ERROR("Attempting to pin a purgeable buffer\n"); ret = -EINVAL; goto out; } if (obj->pin_filp != NULL && obj->pin_filp != file) { DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n", args->handle); ret = -EINVAL; goto out; } obj->user_pin_count++; obj->pin_filp = file; if (obj->user_pin_count == 1) { ret = i915_gem_object_pin(obj, args->alignment, true); if (ret) goto out; } /* XXX - flush the CPU caches for pinned objects * as the X server doesn't manage domains yet */ i915_gem_object_flush_cpu_write_domain(obj); args->offset = obj->gtt_offset; out: drm_gem_object_unreference(&obj->base); unlock: DRM_UNLOCK(dev); return ret; } int i915_gem_unpin_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_pin *args = data; struct drm_i915_gem_object *obj; int ret; ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (&obj->base == NULL) { ret = -ENOENT; goto unlock; } if (obj->pin_filp != file) { DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n", args->handle); ret = -EINVAL; goto out; } obj->user_pin_count--; if (obj->user_pin_count == 0) { obj->pin_filp = NULL; i915_gem_object_unpin(obj); } out: drm_gem_object_unreference(&obj->base); unlock: DRM_UNLOCK(dev); return ret; } int i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; int ret; ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (&obj->base == NULL) { ret = -ENOENT; goto unlock; } args->busy = obj->active; if (args->busy) { if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); } else { ret = i915_gem_check_olr(obj->ring, obj->last_rendering_seqno); } i915_gem_retire_requests_ring(obj->ring); args->busy = obj->active; } drm_gem_object_unreference(&obj->base); unlock: DRM_UNLOCK(dev); return ret; } int i915_gem_throttle_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { - return i915_gem_ring_throttle(dev, file_priv); } int i915_gem_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_i915_gem_madvise *args = data; struct drm_i915_gem_object *obj; int ret; switch (args->madv) { case I915_MADV_DONTNEED: case I915_MADV_WILLNEED: break; default: return -EINVAL; } ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle)); if (&obj->base == NULL) { ret = -ENOENT; goto unlock; } if (obj->pin_count) { ret = -EINVAL; goto out; } if (obj->madv != I915_MADV_PURGED_INTERNAL) obj->madv = args->madv; /* if the object is no longer attached, discard its backing storage */ if (i915_gem_object_is_purgeable(obj) && obj->gtt_space == NULL) i915_gem_object_truncate(obj); args->retained = obj->madv != I915_MADV_PURGED_INTERNAL; out: drm_gem_object_unreference(&obj->base); unlock: DRM_UNLOCK(dev); return ret; } struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, size_t size) { struct drm_i915_private *dev_priv; struct drm_i915_gem_object *obj; dev_priv = dev->dev_private; obj = malloc(sizeof(*obj), DRM_I915_GEM, M_WAITOK | M_ZERO); if (drm_gem_object_init(dev, &obj->base, size) != 0) { free(obj, DRM_I915_GEM); return NULL; } obj->base.write_domain = I915_GEM_DOMAIN_CPU; obj->base.read_domains = I915_GEM_DOMAIN_CPU; if (HAS_LLC(dev)) { /* On some devices, we can have the GPU use the LLC (the CPU * cache) for about a 10% performance improvement * compared to uncached. Graphics requests other than * display scanout are coherent with the CPU in * accessing this cache. This means in this mode we * don't need to clflush on the CPU side, and on the * GPU side we only need to flush internal caches to * get data visible to the CPU. * * However, we maintain the display planes as UC, and so * need to rebind when first used as such. */ obj->cache_level = I915_CACHE_LLC; } else obj->cache_level = I915_CACHE_NONE; obj->base.driver_private = NULL; obj->fence_reg = I915_FENCE_REG_NONE; INIT_LIST_HEAD(&obj->mm_list); INIT_LIST_HEAD(&obj->gtt_list); INIT_LIST_HEAD(&obj->ring_list); INIT_LIST_HEAD(&obj->exec_list); INIT_LIST_HEAD(&obj->gpu_write_list); obj->madv = I915_MADV_WILLNEED; /* Avoid an unnecessary call to unbind on the first bind. */ obj->map_and_fenceable = true; i915_gem_info_add_obj(dev_priv, size); return obj; } int i915_gem_init_object(struct drm_gem_object *obj) { printf("i915_gem_init_object called\n"); return 0; } void i915_gem_free_object(struct drm_gem_object *gem_obj) { struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); struct drm_device *dev = obj->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; CTR1(KTR_DRM, "object_destroy_tail %p", obj); if (obj->phys_obj) i915_gem_detach_phys_object(dev, obj); obj->pin_count = 0; if (i915_gem_object_unbind(obj) == -ERESTARTSYS) { bool was_interruptible; was_interruptible = dev_priv->mm.interruptible; dev_priv->mm.interruptible = false; if (i915_gem_object_unbind(obj)) printf("i915_gem_free_object: unbind\n"); dev_priv->mm.interruptible = was_interruptible; } drm_gem_free_mmap_offset(&obj->base); drm_gem_object_release(&obj->base); i915_gem_info_remove_obj(dev_priv, obj->base.size); free(obj->bit_17, DRM_I915_GEM); free(obj, DRM_I915_GEM); } int i915_gem_idle(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; int ret; DRM_LOCK(dev); if (dev_priv->mm.suspended) { DRM_UNLOCK(dev); return 0; } ret = i915_gpu_idle(dev); if (ret) { DRM_UNLOCK(dev); return ret; } i915_gem_retire_requests(dev); /* Under UMS, be paranoid and evict. */ if (!drm_core_check_feature(dev, DRIVER_MODESET)) { ret = i915_gem_evict_everything(dev, false); if (ret) { DRM_UNLOCK(dev); return ret; } } i915_gem_reset_fences(dev); /* Hack! Don't let anybody do execbuf while we don't control the chip. * We need to replace this with a semaphore, or something. * And not confound mm.suspended! */ dev_priv->mm.suspended = 1; callout_stop(&dev_priv->hangcheck_timer); i915_kernel_lost_context(dev); i915_gem_cleanup_ringbuffer(dev); DRM_UNLOCK(dev); /* Cancel the retire work handler, which should be idle now. */ taskqueue_cancel_timeout(dev_priv->tq, &dev_priv->mm.retire_task, NULL); return ret; } void i915_gem_init_swizzling(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; if (INTEL_INFO(dev)->gen < 5 || dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE) return; I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) | DISP_TILE_SURFACE_SWIZZLING); if (IS_GEN5(dev)) return; I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL); if (IS_GEN6(dev)) I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); else I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); } int i915_gem_init_hw(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; int ret; i915_gem_init_swizzling(dev); ret = intel_init_render_ring_buffer(dev); if (ret) return ret; if (HAS_BSD(dev)) { ret = intel_init_bsd_ring_buffer(dev); if (ret) goto cleanup_render_ring; } if (HAS_BLT(dev)) { ret = intel_init_blt_ring_buffer(dev); if (ret) goto cleanup_bsd_ring; } dev_priv->next_seqno = 1; /* * XXX: There was some w/a described somewhere suggesting loading * contexts before PPGTT. */ i915_gem_context_init(dev); i915_gem_init_ppgtt(dev); return 0; cleanup_bsd_ring: intel_cleanup_ring_buffer(&dev_priv->rings[VCS]); cleanup_render_ring: intel_cleanup_ring_buffer(&dev_priv->rings[RCS]); return ret; } static bool intel_enable_ppgtt(struct drm_device *dev) { if (i915_enable_ppgtt >= 0) return i915_enable_ppgtt; /* Disable ppgtt on SNB if VT-d is on. */ if (INTEL_INFO(dev)->gen == 6 && intel_iommu_enabled) return false; return true; } int i915_gem_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; unsigned long gtt_size, mappable_size; int ret; gtt_size = dev_priv->mm.gtt.gtt_total_entries << PAGE_SHIFT; mappable_size = dev_priv->mm.gtt.gtt_mappable_entries << PAGE_SHIFT; DRM_LOCK(dev); if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) { /* PPGTT pdes are stolen from global gtt ptes, so shrink the * aperture accordingly when using aliasing ppgtt. */ gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE; i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size); ret = i915_gem_init_aliasing_ppgtt(dev); if (ret) { DRM_UNLOCK(dev); return ret; } } else { /* Let GEM Manage all of the aperture. * * However, leave one page at the end still bound to the scratch * page. There are a number of places where the hardware * apparently prefetches past the end of the object, and we've * seen multiple hangs with the GPU head pointer stuck in a * batchbuffer bound at the last page of the aperture. One page * should be enough to keep any prefetching inside of the * aperture. */ i915_gem_init_global_gtt(dev, 0, mappable_size, gtt_size); } ret = i915_gem_init_hw(dev); DRM_UNLOCK(dev); if (ret) { i915_gem_cleanup_aliasing_ppgtt(dev); return ret; } /* Allow hardware batchbuffers unless told otherwise, but not for KMS. */ if (!drm_core_check_feature(dev, DRIVER_MODESET)) dev_priv->dri1.allow_batchbuffer = 1; return 0; } void i915_gem_cleanup_ringbuffer(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; struct intel_ring_buffer *ring; int i; for_each_ring(ring, dev_priv, i) intel_cleanup_ring_buffer(ring); } int i915_gem_entervt_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { drm_i915_private_t *dev_priv = dev->dev_private; int ret; if (drm_core_check_feature(dev, DRIVER_MODESET)) return 0; if (atomic_load_acq_int(&dev_priv->mm.wedged) != 0) { DRM_ERROR("Reenabling wedged hardware, good luck\n"); atomic_store_rel_int(&dev_priv->mm.wedged, 0); } DRM_LOCK(dev); dev_priv->mm.suspended = 0; ret = i915_gem_init_hw(dev); if (ret != 0) { DRM_UNLOCK(dev); return ret; } KASSERT(list_empty(&dev_priv->mm.active_list), ("active list")); KASSERT(list_empty(&dev_priv->mm.flushing_list), ("flushing list")); KASSERT(list_empty(&dev_priv->mm.inactive_list), ("inactive list")); DRM_UNLOCK(dev); ret = drm_irq_install(dev); if (ret) goto cleanup_ringbuffer; return 0; cleanup_ringbuffer: DRM_LOCK(dev); i915_gem_cleanup_ringbuffer(dev); dev_priv->mm.suspended = 1; DRM_UNLOCK(dev); return ret; } int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { if (drm_core_check_feature(dev, DRIVER_MODESET)) return 0; drm_irq_uninstall(dev); return i915_gem_idle(dev); } void i915_gem_lastclose(struct drm_device *dev) { int ret; if (drm_core_check_feature(dev, DRIVER_MODESET)) return; ret = i915_gem_idle(dev); if (ret) DRM_ERROR("failed to idle hardware: %d\n", ret); } static void init_ring_lists(struct intel_ring_buffer *ring) { INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); INIT_LIST_HEAD(&ring->gpu_write_list); } void i915_gem_load(struct drm_device *dev) { int i; drm_i915_private_t *dev_priv = dev->dev_private; INIT_LIST_HEAD(&dev_priv->mm.active_list); INIT_LIST_HEAD(&dev_priv->mm.flushing_list); INIT_LIST_HEAD(&dev_priv->mm.inactive_list); INIT_LIST_HEAD(&dev_priv->mm.fence_list); INIT_LIST_HEAD(&dev_priv->mm.gtt_list); for (i = 0; i < I915_NUM_RINGS; i++) init_ring_lists(&dev_priv->rings[i]); for (i = 0; i < I915_MAX_NUM_FENCES; i++) INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); TIMEOUT_TASK_INIT(dev_priv->tq, &dev_priv->mm.retire_task, 0, i915_gem_retire_task_handler, dev_priv); dev_priv->error_completion = 0; /* On GEN3 we really need to make sure the ARB C3 LP bit is set */ if (IS_GEN3(dev)) { I915_WRITE(MI_ARB_STATE, _MASKED_BIT_ENABLE(MI_ARB_C3_LP_WRITE_ENABLE)); } dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; /* Old X drivers will take 0-2 for front, back, depth buffers */ if (!drm_core_check_feature(dev, DRIVER_MODESET)) dev_priv->fence_reg_start = 3; if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) dev_priv->num_fence_regs = 16; else dev_priv->num_fence_regs = 8; /* Initialize fence registers to zero */ i915_gem_reset_fences(dev); i915_gem_detect_bit_6_swizzle(dev); dev_priv->mm.interruptible = true; dev_priv->mm.i915_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, i915_gem_lowmem, dev, EVENTHANDLER_PRI_ANY); } void i915_gem_unload(struct drm_device *dev) { struct drm_i915_private *dev_priv; dev_priv = dev->dev_private; EVENTHANDLER_DEREGISTER(vm_lowmem, dev_priv->mm.i915_lowmem); } +/* + * Create a physically contiguous memory object for this object + * e.g. for cursor + overlay regs + */ static int i915_gem_init_phys_object(struct drm_device *dev, int id, int size, int align) { drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_phys_object *phys_obj; int ret; if (dev_priv->mm.phys_objs[id - 1] || !size) return 0; phys_obj = malloc(sizeof(struct drm_i915_gem_phys_object), DRM_I915_GEM, M_WAITOK | M_ZERO); phys_obj->id = id; phys_obj->handle = drm_pci_alloc(dev, size, align, BUS_SPACE_MAXADDR); if (!phys_obj->handle) { ret = -ENOMEM; goto kfree_obj; } pmap_change_attr((vm_offset_t)phys_obj->handle->vaddr, size / PAGE_SIZE, PAT_WRITE_COMBINING); dev_priv->mm.phys_objs[id - 1] = phys_obj; return 0; kfree_obj: free(phys_obj, DRM_I915_GEM); return ret; } static void i915_gem_free_phys_object(struct drm_device *dev, int id) { drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_phys_object *phys_obj; if (!dev_priv->mm.phys_objs[id - 1]) return; phys_obj = dev_priv->mm.phys_objs[id - 1]; if (phys_obj->cur_obj) { i915_gem_detach_phys_object(dev, phys_obj->cur_obj); } drm_pci_free(dev, phys_obj->handle); free(phys_obj, DRM_I915_GEM); dev_priv->mm.phys_objs[id - 1] = NULL; } void i915_gem_free_all_phys_object(struct drm_device *dev) { int i; for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++) i915_gem_free_phys_object(dev, i); } void i915_gem_detach_phys_object(struct drm_device *dev, struct drm_i915_gem_object *obj) { vm_page_t page; struct sf_buf *sf; char *vaddr, *dst; int i, page_count; if (!obj->phys_obj) return; vaddr = obj->phys_obj->handle->vaddr; page_count = obj->base.size / PAGE_SIZE; VM_OBJECT_WLOCK(obj->base.vm_obj); for (i = 0; i < page_count; i++) { page = i915_gem_wire_page(obj->base.vm_obj, i, NULL); if (page == NULL) continue; /* XXX */ VM_OBJECT_WUNLOCK(obj->base.vm_obj); sf = sf_buf_alloc(page, 0); if (sf != NULL) { dst = (char *)sf_buf_kva(sf); memcpy(dst, vaddr + IDX_TO_OFF(i), PAGE_SIZE); sf_buf_free(sf); } drm_clflush_pages(&page, 1); VM_OBJECT_WLOCK(obj->base.vm_obj); vm_page_reference(page); vm_page_lock(page); vm_page_dirty(page); vm_page_unwire(page, PQ_INACTIVE); vm_page_unlock(page); atomic_add_long(&i915_gem_wired_pages_cnt, -1); } VM_OBJECT_WUNLOCK(obj->base.vm_obj); intel_gtt_chipset_flush(); obj->phys_obj->cur_obj = NULL; obj->phys_obj = NULL; } int i915_gem_attach_phys_object(struct drm_device *dev, struct drm_i915_gem_object *obj, int id, int align) { drm_i915_private_t *dev_priv = dev->dev_private; vm_page_t page; struct sf_buf *sf; char *dst, *src; int ret = 0; int page_count; int i; if (id > I915_MAX_PHYS_OBJECT) return -EINVAL; if (obj->phys_obj) { if (obj->phys_obj->id == id) return 0; i915_gem_detach_phys_object(dev, obj); } /* create a new object */ if (!dev_priv->mm.phys_objs[id - 1]) { ret = i915_gem_init_phys_object(dev, id, obj->base.size, align); if (ret) { DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->base.size); return ret; } } /* bind to the object */ obj->phys_obj = dev_priv->mm.phys_objs[id - 1]; obj->phys_obj->cur_obj = obj; page_count = obj->base.size / PAGE_SIZE; VM_OBJECT_WLOCK(obj->base.vm_obj); for (i = 0; i < page_count; i++) { page = i915_gem_wire_page(obj->base.vm_obj, i, NULL); if (page == NULL) { ret = -EIO; break; } VM_OBJECT_WUNLOCK(obj->base.vm_obj); sf = sf_buf_alloc(page, 0); src = (char *)sf_buf_kva(sf); dst = (char *)obj->phys_obj->handle->vaddr + IDX_TO_OFF(i); memcpy(dst, src, PAGE_SIZE); sf_buf_free(sf); VM_OBJECT_WLOCK(obj->base.vm_obj); vm_page_reference(page); vm_page_lock(page); vm_page_unwire(page, PQ_INACTIVE); vm_page_unlock(page); atomic_add_long(&i915_gem_wired_pages_cnt, -1); } VM_OBJECT_WUNLOCK(obj->base.vm_obj); return ret; } static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file_priv) { void *vaddr = (char *)obj->phys_obj->handle->vaddr + args->offset; char __user *user_data = to_user_ptr(args->data_ptr); if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { unsigned long unwritten; /* The physical object once assigned is fixed for the lifetime * of the obj, so we can safely drop the lock and continue * to access vaddr. */ DRM_UNLOCK(dev); unwritten = copy_from_user(vaddr, user_data, args->size); DRM_LOCK(dev); if (unwritten) return -EFAULT; } i915_gem_chipset_flush(dev); return 0; } void i915_gem_release(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; /* Clean up our request list when the client is going away, so that * later retire_requests won't dereference our soon-to-be-gone * file_priv. */ mtx_lock(&file_priv->mm.lck); while (!list_empty(&file_priv->mm.request_list)) { struct drm_i915_gem_request *request; request = list_first_entry(&file_priv->mm.request_list, struct drm_i915_gem_request, client_list); list_del(&request->client_list); request->file_priv = NULL; } mtx_unlock(&file_priv->mm.lck); } static vm_page_t i915_gem_wire_page(vm_object_t object, vm_pindex_t pindex, bool *fresh) { vm_page_t page; int rv; VM_OBJECT_ASSERT_WLOCKED(object); page = vm_page_grab(object, pindex, VM_ALLOC_NORMAL); if (page->valid != VM_PAGE_BITS_ALL) { if (vm_pager_has_page(object, pindex, NULL, NULL)) { rv = vm_pager_get_pages(object, &page, 1, 0); if (rv != VM_PAGER_OK) { vm_page_lock(page); vm_page_free(page); vm_page_unlock(page); return (NULL); } if (fresh != NULL) *fresh = true; } else { pmap_zero_page(page); page->valid = VM_PAGE_BITS_ALL; page->dirty = 0; if (fresh != NULL) *fresh = false; } } else if (fresh != NULL) { *fresh = false; } vm_page_lock(page); vm_page_wire(page); vm_page_unlock(page); vm_page_xunbusy(page); atomic_add_long(&i915_gem_wired_pages_cnt, 1); return (page); } #undef __user #undef __force #undef __iomem #undef __must_check #undef to_user_ptr #undef offset_in_page #undef page_to_phys Index: head/sys/dev/drm2/i915/i915_gem_context.c =================================================================== --- head/sys/dev/drm2/i915/i915_gem_context.c (revision 287173) +++ head/sys/dev/drm2/i915/i915_gem_context.c (revision 287174) @@ -1,549 +1,549 @@ /* * Copyright © 2011-2012 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * Authors: * Ben Widawsky * */ /* * This file implements HW context support. On gen5+ a HW context consists of an * opaque GPU object which is referenced at times of context saves and restores. * With RC6 enabled, the context is also referenced as the GPU enters and exists * from RC6 (GPU has it's own internal power context, except on gen5). Though * something like a context does exist for the media ring, the code only * supports contexts for the render ring. * * In software, there is a distinction between contexts created by the user, * and the default HW context. The default HW context is used by GPU clients * that do not request setup of their own hardware context. The default * context's state is never restored to help prevent programming errors. This * would happen if a client ran and piggy-backed off another clients GPU state. * The default context only exists to give the GPU some offset to load as the * current to invoke a save of the context we actually care about. In fact, the * code could likely be constructed, albeit in a more complicated fashion, to * never use the default context, though that limits the driver's ability to * swap out, and/or destroy other contexts. * * All other contexts are created as a request by the GPU client. These contexts * store GPU state, and thus allow GPU clients to not re-emit state (and * potentially query certain state) at any time. The kernel driver makes * certain that the appropriate commands are inserted. * * The context life cycle is semi-complicated in that context BOs may live * longer than the context itself because of the way the hardware, and object * tracking works. Below is a very crude representation of the state machine * describing the context life. * refcount pincount active * S0: initial state 0 0 0 * S1: context created 1 0 0 * S2: context is currently running 2 1 X * S3: GPU referenced, but not current 2 0 1 * S4: context is current, but destroyed 1 1 0 * S5: like S3, but destroyed 1 0 1 * * The most common (but not all) transitions: * S0->S1: client creates a context * S1->S2: client submits execbuf with context * S2->S3: other clients submits execbuf with context * S3->S1: context object was retired * S3->S2: clients submits another execbuf * S2->S4: context destroy called with current context * S3->S5->S0: destroy path * S4->S5->S0: destroy path on current context * * There are two confusing terms used above: * The "current context" means the context which is currently running on the * GPU. The GPU has loaded it's state already and has stored away the gtt * offset of the BO. The GPU is not actively referencing the data at this * offset, but it will on the next context switch. The only way to avoid this * is to do a GPU reset. * * An "active context' is one which was previously the "current context" and is * on the active list waiting for the next context switch to occur. Until this * happens, the object must remain at the same gtt offset. It is therefore * possible to destroy a context, but it is still active. * */ #include __FBSDID("$FreeBSD$"); #include #include #include "i915_drv.h" /* This is a HW constraint. The value below is the largest known requirement * I've seen in a spec to date, and that was a workaround for a non-shipping * part. It should be safe to decrease this, but it's more future proof as is. */ #define CONTEXT_ALIGN (64<<10) static struct i915_hw_context * i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id); static int do_switch(struct i915_hw_context *to); static int get_context_size(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; int ret; u32 reg; switch (INTEL_INFO(dev)->gen) { case 6: reg = I915_READ(CXT_SIZE); ret = GEN6_CXT_TOTAL_SIZE(reg) * 64; break; case 7: reg = I915_READ(GEN7_CXT_SIZE); #ifdef FREEBSD_WIP if (IS_HASWELL(dev)) ret = HSW_CXT_TOTAL_SIZE(reg) * 64; else #endif ret = GEN7_CXT_TOTAL_SIZE(reg) * 64; break; default: panic("i915_gem_context: Unsupported Intel GPU generation %d", INTEL_INFO(dev)->gen); } return ret; } static void do_destroy(struct i915_hw_context *ctx) { #if defined(INVARIANTS) struct drm_device *dev = ctx->obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; #endif if (ctx->file_priv) drm_gem_names_remove(&ctx->file_priv->context_idr, ctx->id); else KASSERT(ctx == dev_priv->rings[RCS].default_context, ("i915_gem_context: ctx != default_context")); drm_gem_object_unreference(&ctx->obj->base); free(ctx, DRM_I915_GEM); } static int create_hw_context(struct drm_device *dev, struct drm_i915_file_private *file_priv, struct i915_hw_context **ret_ctx) { struct drm_i915_private *dev_priv = dev->dev_private; struct i915_hw_context *ctx; int ret, id; ctx = malloc(sizeof(*ctx), DRM_I915_GEM, M_NOWAIT | M_ZERO); if (ctx == NULL) return (-ENOMEM); ctx->obj = i915_gem_alloc_object(dev, dev_priv->hw_context_size); if (ctx->obj == NULL) { free(ctx, DRM_I915_GEM); DRM_DEBUG_DRIVER("Context object allocated failed\n"); return (-ENOMEM); } if (INTEL_INFO(dev)->gen >= 7) { ret = i915_gem_object_set_cache_level(ctx->obj, I915_CACHE_LLC_MLC); if (ret) goto err_out; } /* The ring associated with the context object is handled by the normal * object tracking code. We give an initial ring value simple to pass an * assertion in the context switch code. */ ctx->ring = &dev_priv->rings[RCS]; /* Default context will never have a file_priv */ if (file_priv == NULL) { *ret_ctx = ctx; return (0); } ctx->file_priv = file_priv; again: id = 0; ret = drm_gem_name_create(&file_priv->context_idr, ctx, &id); if (ret == 0) ctx->id = id; if (ret == -EAGAIN) goto again; else if (ret) goto err_out; *ret_ctx = ctx; return (0); err_out: do_destroy(ctx); return (ret); } static inline bool is_default_context(struct i915_hw_context *ctx) { return (ctx == ctx->ring->default_context); } /** * The default context needs to exist per ring that uses contexts. It stores the * context state of the GPU for applications that don't utilize HW contexts, as * well as an idle case. */ static int create_default_context(struct drm_i915_private *dev_priv) { struct i915_hw_context *ctx; int ret; DRM_LOCK_ASSERT(dev_priv->dev); ret = create_hw_context(dev_priv->dev, NULL, &ctx); if (ret != 0) return (ret); /* We may need to do things with the shrinker which require us to * immediately switch back to the default context. This can cause a * problem as pinning the default context also requires GTT space which * may not be available. To avoid this we always pin the * default context. */ dev_priv->rings[RCS].default_context = ctx; ret = i915_gem_object_pin(ctx->obj, CONTEXT_ALIGN, false); if (ret) goto err_destroy; ret = do_switch(ctx); if (ret) goto err_unpin; DRM_DEBUG_DRIVER("Default HW context loaded\n"); return 0; err_unpin: i915_gem_object_unpin(ctx->obj); err_destroy: do_destroy(ctx); return ret; } void i915_gem_context_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; uint32_t ctx_size; if (!HAS_HW_CONTEXTS(dev)) { dev_priv->hw_contexts_disabled = true; return; } /* If called from reset, or thaw... we've been here already */ if (dev_priv->hw_contexts_disabled || dev_priv->rings[RCS].default_context) return; ctx_size = get_context_size(dev); dev_priv->hw_context_size = get_context_size(dev); dev_priv->hw_context_size = roundup(dev_priv->hw_context_size, 4096); if (ctx_size <= 0 || ctx_size > (1<<20)) { dev_priv->hw_contexts_disabled = true; return; } if (create_default_context(dev_priv)) { dev_priv->hw_contexts_disabled = true; return; } DRM_DEBUG_DRIVER("HW context support initialized\n"); } void i915_gem_context_fini(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; if (dev_priv->hw_contexts_disabled) return; /* The only known way to stop the gpu from accessing the hw context is * to reset it. Do this as the very last operation to avoid confusing * other code, leading to spurious errors. */ intel_gpu_reset(dev); i915_gem_object_unpin(dev_priv->rings[RCS].default_context->obj); do_destroy(dev_priv->rings[RCS].default_context); } -static int context_idr_cleanup(uint32_t id, void *p, void *data) +static int context_idr_cleanup(int id, void *p, void *data) { struct i915_hw_context *ctx = p; KASSERT(id != DEFAULT_CONTEXT_ID, ("i915_gem_context: id == DEFAULT_CONTEXT_ID in cleanup")); do_destroy(ctx); return 0; } void i915_gem_context_close(struct drm_device *dev, struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; DRM_LOCK(dev); drm_gem_names_foreach(&file_priv->context_idr, context_idr_cleanup, NULL); drm_gem_names_fini(&file_priv->context_idr); DRM_UNLOCK(dev); } static struct i915_hw_context * i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id) { return (struct i915_hw_context *)drm_gem_find_ptr(&file_priv->context_idr, id); } static inline int mi_set_context(struct intel_ring_buffer *ring, struct i915_hw_context *new_context, u32 hw_flags) { int ret; /* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value * explicitly, so we rely on the value at ring init, stored in * itlb_before_ctx_switch. */ if (IS_GEN6(ring->dev) && ring->itlb_before_ctx_switch) { ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, 0); if (ret) return ret; } ret = intel_ring_begin(ring, 6); if (ret) return ret; if (IS_GEN7(ring->dev)) intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE); else intel_ring_emit(ring, MI_NOOP); intel_ring_emit(ring, MI_NOOP); intel_ring_emit(ring, MI_SET_CONTEXT); intel_ring_emit(ring, new_context->obj->gtt_offset | MI_MM_SPACE_GTT | MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN | hw_flags); /* w/a: MI_SET_CONTEXT must always be followed by MI_NOOP */ intel_ring_emit(ring, MI_NOOP); if (IS_GEN7(ring->dev)) intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); else intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); return ret; } static int do_switch(struct i915_hw_context *to) { struct intel_ring_buffer *ring = to->ring; struct drm_i915_gem_object *from_obj = ring->last_context_obj; u32 hw_flags = 0; int ret; KASSERT(!(from_obj != NULL && from_obj->pin_count == 0), ("i915_gem_context: invalid \"from\" context")); if (from_obj == to->obj) return 0; ret = i915_gem_object_pin(to->obj, CONTEXT_ALIGN, false); if (ret) return ret; /* Clear this page out of any CPU caches for coherent swap-in/out. Note * that thanks to write = false in this call and us not setting any gpu * write domains when putting a context object onto the active list * (when switching away from it), this won't block. * XXX: We need a real interface to do this instead of trickery. */ ret = i915_gem_object_set_to_gtt_domain(to->obj, false); if (ret) { i915_gem_object_unpin(to->obj); return ret; } if (!to->obj->has_global_gtt_mapping) i915_gem_gtt_bind_object(to->obj, to->obj->cache_level); if (!to->is_initialized || is_default_context(to)) hw_flags |= MI_RESTORE_INHIBIT; else if (from_obj == to->obj) /* not yet expected */ hw_flags |= MI_FORCE_RESTORE; ret = mi_set_context(ring, to, hw_flags); if (ret) { i915_gem_object_unpin(to->obj); return ret; } /* The backing object for the context is done after switching to the * *next* context. Therefore we cannot retire the previous context until * the next context has already started running. In fact, the below code * is a bit suboptimal because the retiring can occur simply after the * MI_SET_CONTEXT instead of when the next seqno has completed. */ if (from_obj != NULL) { from_obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; i915_gem_object_move_to_active(from_obj, ring, i915_gem_next_request_seqno(ring)); /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the * whole damn pipeline, we don't need to explicitly mark the * object dirty. The only exception is that the context must be * correct in case the object gets swapped out. Ideally we'd be * able to defer doing this until we know the object would be * swapped, but there is no way to do that yet. */ from_obj->dirty = 1; KASSERT(from_obj->ring == ring, ("i915_gem_context: from_ring != ring")); i915_gem_object_unpin(from_obj); drm_gem_object_unreference(&from_obj->base); } drm_gem_object_reference(&to->obj->base); ring->last_context_obj = to->obj; to->is_initialized = true; return 0; } /** * i915_switch_context() - perform a GPU context switch. * @ring: ring for which we'll execute the context switch * @file_priv: file_priv associated with the context, may be NULL * @id: context id number * @seqno: sequence number by which the new context will be switched to * @flags: * * The context life cycle is simple. The context refcount is incremented and * decremented by 1 and create and destroy. If the context is in use by the GPU, * it will have a refoucnt > 1. This allows us to destroy the context abstract * object while letting the normal object tracking destroy the backing BO. */ int i915_switch_context(struct intel_ring_buffer *ring, struct drm_file *file, int to_id) { struct drm_i915_private *dev_priv = ring->dev->dev_private; struct i915_hw_context *to; if (dev_priv->hw_contexts_disabled) return 0; if (ring != &dev_priv->rings[RCS]) return 0; if (to_id == DEFAULT_CONTEXT_ID) { to = ring->default_context; } else { if (file == NULL) return -EINVAL; to = i915_gem_context_get(file->driver_priv, to_id); if (to == NULL) return -ENOENT; } return do_switch(to); } int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_context_create *args = data; struct drm_i915_file_private *file_priv = file->driver_priv; struct i915_hw_context *ctx; int ret; if (!(dev->driver->driver_features & DRIVER_GEM)) return -ENODEV; if (dev_priv->hw_contexts_disabled) return -ENODEV; ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; ret = create_hw_context(dev, file_priv, &ctx); DRM_UNLOCK(dev); if (ret != 0) return (ret); args->ctx_id = ctx->id; DRM_DEBUG_DRIVER("HW context %d created\n", args->ctx_id); return 0; } int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_context_destroy *args = data; struct drm_i915_file_private *file_priv = file->driver_priv; struct i915_hw_context *ctx; int ret; if (!(dev->driver->driver_features & DRIVER_GEM)) return -ENODEV; ret = i915_mutex_lock_interruptible(dev); if (ret) return ret; ctx = i915_gem_context_get(file_priv, args->ctx_id); if (!ctx) { DRM_UNLOCK(dev); return -ENOENT; } do_destroy(ctx); DRM_UNLOCK(dev); DRM_DEBUG_DRIVER("HW context %d destroyed\n", args->ctx_id); return 0; } Index: head/sys/dev/drm2/i915/i915_gem_execbuffer.c =================================================================== --- head/sys/dev/drm2/i915/i915_gem_execbuffer.c (revision 287173) +++ head/sys/dev/drm2/i915/i915_gem_execbuffer.c (revision 287174) @@ -1,1540 +1,1547 @@ /* * Copyright © 2008,2010 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * Authors: * Eric Anholt * Chris Wilson * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include struct change_domains { uint32_t invalidate_domains; uint32_t flush_domains; uint32_t flush_rings; uint32_t flips; }; /* * Set the next domain for the specified object. This * may not actually perform the necessary flushing/invaliding though, * as that may want to be batched with other set_domain operations * * This is (we hope) the only really tricky part of gem. The goal * is fairly simple -- track which caches hold bits of the object * and make sure they remain coherent. A few concrete examples may * help to explain how it works. For shorthand, we use the notation * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the * a pair of read and write domain masks. * * Case 1: the batch buffer * * 1. Allocated * 2. Written by CPU * 3. Mapped to GTT * 4. Read by GPU * 5. Unmapped from GTT * 6. Freed * * Let's take these a step at a time * * 1. Allocated * Pages allocated from the kernel may still have * cache contents, so we set them to (CPU, CPU) always. * 2. Written by CPU (using pwrite) * The pwrite function calls set_domain (CPU, CPU) and * this function does nothing (as nothing changes) * 3. Mapped by GTT * This function asserts that the object is not * currently in any GPU-based read or write domains * 4. Read by GPU * i915_gem_execbuffer calls set_domain (COMMAND, 0). * As write_domain is zero, this function adds in the * current read domains (CPU+COMMAND, 0). * flush_domains is set to CPU. * invalidate_domains is set to COMMAND * clflush is run to get data out of the CPU caches * then i915_dev_set_domain calls i915_gem_flush to * emit an MI_FLUSH and drm_agp_chipset_flush * 5. Unmapped from GTT * i915_gem_object_unbind calls set_domain (CPU, CPU) * flush_domains and invalidate_domains end up both zero * so no flushing/invalidating happens * 6. Freed * yay, done * * Case 2: The shared render buffer * * 1. Allocated * 2. Mapped to GTT * 3. Read/written by GPU * 4. set_domain to (CPU,CPU) * 5. Read/written by CPU * 6. Read/written by GPU * * 1. Allocated * Same as last example, (CPU, CPU) * 2. Mapped to GTT * Nothing changes (assertions find that it is not in the GPU) * 3. Read/written by GPU * execbuffer calls set_domain (RENDER, RENDER) * flush_domains gets CPU * invalidate_domains gets GPU * clflush (obj) * MI_FLUSH and drm_agp_chipset_flush * 4. set_domain (CPU, CPU) * flush_domains gets GPU * invalidate_domains gets CPU * wait_rendering (obj) to make sure all drawing is complete. * This will include an MI_FLUSH to get the data from GPU * to memory * clflush (obj) to invalidate the CPU cache * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) * 5. Read/written by CPU * cache lines are loaded and dirtied * 6. Read written by GPU * Same as last GPU access * * Case 3: The constant buffer * * 1. Allocated * 2. Written by CPU * 3. Read by GPU * 4. Updated (written) by CPU again * 5. Read by GPU * * 1. Allocated * (CPU, CPU) * 2. Written by CPU * (CPU, CPU) * 3. Read by GPU * (CPU+RENDER, 0) * flush_domains = CPU * invalidate_domains = RENDER * clflush (obj) * MI_FLUSH * drm_agp_chipset_flush * 4. Updated (written) by CPU again * (CPU, CPU) * flush_domains = 0 (no previous write domain) * invalidate_domains = 0 (no new read domains) * 5. Read by GPU * (CPU+RENDER, 0) * flush_domains = CPU * invalidate_domains = RENDER * clflush (obj) * MI_FLUSH * drm_agp_chipset_flush */ static void i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, struct intel_ring_buffer *ring, struct change_domains *cd) { uint32_t invalidate_domains = 0, flush_domains = 0; /* * If the object isn't moving to a new write domain, * let the object stay in multiple read domains */ if (obj->base.pending_write_domain == 0) obj->base.pending_read_domains |= obj->base.read_domains; /* * Flush the current write domain if * the new read domains don't match. Invalidate * any read domains which differ from the old * write domain */ if (obj->base.write_domain && (((obj->base.write_domain != obj->base.pending_read_domains || obj->ring != ring)) || (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) { flush_domains |= obj->base.write_domain; invalidate_domains |= obj->base.pending_read_domains & ~obj->base.write_domain; } /* * Invalidate any read caches which may have * stale data. That is, any new read domains. */ invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains; if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) i915_gem_clflush_object(obj); if (obj->base.pending_write_domain) cd->flips |= atomic_load_acq_int(&obj->pending_flip); /* The actual obj->write_domain will be updated with * pending_write_domain after we emit the accumulated flush for all * of our domain changes in execbuffers (which clears objects' * write_domains). So if we have a current write domain that we * aren't changing, set pending_write_domain to that. */ if (flush_domains == 0 && obj->base.pending_write_domain == 0) obj->base.pending_write_domain = obj->base.write_domain; cd->invalidate_domains |= invalidate_domains; cd->flush_domains |= flush_domains; if (flush_domains & I915_GEM_GPU_DOMAINS) cd->flush_rings |= intel_ring_flag(obj->ring); if (invalidate_domains & I915_GEM_GPU_DOMAINS) cd->flush_rings |= intel_ring_flag(ring); } struct eb_objects { u_long hashmask; LIST_HEAD(, drm_i915_gem_object) *buckets; }; static struct eb_objects * eb_create(int size) { struct eb_objects *eb; eb = malloc(sizeof(*eb), DRM_I915_GEM, M_WAITOK | M_ZERO); eb->buckets = hashinit(size, DRM_I915_GEM, &eb->hashmask); return (eb); } static void eb_reset(struct eb_objects *eb) { int i; for (i = 0; i <= eb->hashmask; i++) LIST_INIT(&eb->buckets[i]); } static void eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj) { LIST_INSERT_HEAD(&eb->buckets[obj->exec_handle & eb->hashmask], obj, exec_node); } static struct drm_i915_gem_object * eb_get_object(struct eb_objects *eb, unsigned long handle) { struct drm_i915_gem_object *obj; LIST_FOREACH(obj, &eb->buckets[handle & eb->hashmask], exec_node) { if (obj->exec_handle == handle) return (obj); } return (NULL); } static void eb_destroy(struct eb_objects *eb) { free(eb->buckets, DRM_I915_GEM); free(eb, DRM_I915_GEM); } static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) { return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || obj->cache_level != I915_CACHE_NONE); } static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_objects *eb, struct drm_i915_gem_relocation_entry *reloc) { struct drm_device *dev = obj->base.dev; struct drm_gem_object *target_obj; struct drm_i915_gem_object *target_i915_obj; uint32_t target_offset; int ret = -EINVAL; /* we've already hold a reference to all valid objects */ target_obj = &eb_get_object(eb, reloc->target_handle)->base; if (unlikely(target_obj == NULL)) return -ENOENT; target_i915_obj = to_intel_bo(target_obj); target_offset = target_i915_obj->gtt_offset; #if WATCH_RELOC DRM_INFO("%s: obj %p offset %08x target %d " "read %08x write %08x gtt %08x " "presumed %08x delta %08x\n", __func__, obj, (int) reloc->offset, (int) reloc->target_handle, (int) reloc->read_domains, (int) reloc->write_domain, (int) target_offset, (int) reloc->presumed_offset, reloc->delta); #endif /* The target buffer should have appeared before us in the * exec_object list, so it should have a GTT space bound by now. */ if (unlikely(target_offset == 0)) { DRM_DEBUG("No GTT space found for object %d\n", reloc->target_handle); return ret; } /* Validate that the target is in a valid r/w GPU domain */ if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { DRM_DEBUG("reloc with multiple write domains: " "obj %p target %d offset %d " "read %08x write %08x", obj, reloc->target_handle, (int) reloc->offset, reloc->read_domains, reloc->write_domain); return ret; } if (unlikely((reloc->write_domain | reloc->read_domains) & ~I915_GEM_GPU_DOMAINS)) { DRM_DEBUG("reloc with read/write non-GPU domains: " "obj %p target %d offset %d " "read %08x write %08x", obj, reloc->target_handle, (int) reloc->offset, reloc->read_domains, reloc->write_domain); return ret; } if (unlikely(reloc->write_domain && target_obj->pending_write_domain && reloc->write_domain != target_obj->pending_write_domain)) { DRM_DEBUG("Write domain conflict: " "obj %p target %d offset %d " "new %08x old %08x\n", obj, reloc->target_handle, (int) reloc->offset, reloc->write_domain, target_obj->pending_write_domain); return ret; } target_obj->pending_read_domains |= reloc->read_domains; target_obj->pending_write_domain |= reloc->write_domain; /* If the relocation already has the right value in it, no * more work needs to be done. */ if (target_offset == reloc->presumed_offset) return 0; /* Check that the relocation address is valid... */ if (unlikely(reloc->offset > obj->base.size - 4)) { DRM_DEBUG("Relocation beyond object bounds: " "obj %p target %d offset %d size %d.\n", obj, reloc->target_handle, (int) reloc->offset, (int) obj->base.size); return ret; } if (unlikely(reloc->offset & 3)) { DRM_DEBUG("Relocation not 4-byte aligned: " "obj %p target %d offset %d.\n", obj, reloc->target_handle, (int) reloc->offset); return ret; } /* We can't wait for rendering with pagefaults disabled */ if (obj->active && (curthread->td_pflags & TDP_NOFAULTING) != 0) return (-EFAULT); reloc->delta += target_offset; if (use_cpu_reloc(obj)) { uint32_t page_offset = reloc->offset & PAGE_MASK; char *vaddr; struct sf_buf *sf; ret = i915_gem_object_set_to_cpu_domain(obj, 1); if (ret) return ret; sf = sf_buf_alloc(obj->pages[OFF_TO_IDX(reloc->offset)], SFB_NOWAIT); if (sf == NULL) return (-ENOMEM); vaddr = (void *)sf_buf_kva(sf); *(uint32_t *)(vaddr + page_offset) = reloc->delta; sf_buf_free(sf); } else { uint32_t *reloc_entry; char *reloc_page; ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) return ret; ret = i915_gem_object_put_fence(obj); if (ret) return ret; - /* - * Map the page containing the relocation we're going - * to perform. - */ + /* Map the page containing the relocation we're going to perform. */ reloc->offset += obj->gtt_offset; reloc_page = pmap_mapdev_attr(dev->agp->base + (reloc->offset & ~PAGE_MASK), PAGE_SIZE, PAT_WRITE_COMBINING); reloc_entry = (uint32_t *)(reloc_page + (reloc->offset & PAGE_MASK)); *(volatile uint32_t *)reloc_entry = reloc->delta; pmap_unmapdev((vm_offset_t)reloc_page, PAGE_SIZE); } /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and * pipe_control writes because the gpu doesn't properly redirect them * through the ppgtt for non_secure batchbuffers. */ if (unlikely(IS_GEN6(dev) && reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && !target_i915_obj->has_global_gtt_mapping)) { i915_gem_gtt_bind_object(target_i915_obj, target_i915_obj->cache_level); } /* and update the user's relocation entry */ reloc->presumed_offset = target_offset; return 0; } static int i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj, - struct eb_objects *eb) + struct eb_objects *eb) { #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; struct drm_i915_gem_relocation_entry *user_relocs; struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; int remain, ret; user_relocs = (void *)(uintptr_t)entry->relocs_ptr; remain = entry->relocation_count; while (remain) { struct drm_i915_gem_relocation_entry *r = stack_reloc; int count = remain; if (count > DRM_ARRAY_SIZE(stack_reloc)) count = DRM_ARRAY_SIZE(stack_reloc); remain -= count; ret = -copyin_nofault(user_relocs, r, count*sizeof(r[0])); if (ret != 0) return (ret); do { u64 offset = r->presumed_offset; - + ret = i915_gem_execbuffer_relocate_entry(obj, eb, r); if (ret) return ret; if (r->presumed_offset != offset && copyout_nofault(&r->presumed_offset, &user_relocs->presumed_offset, sizeof(r->presumed_offset))) { return -EFAULT; } user_relocs++; r++; } while (--count); } + + return 0; #undef N_RELOC - return (0); } static int i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj, - struct eb_objects *eb, struct drm_i915_gem_relocation_entry *relocs) + struct eb_objects *eb, + struct drm_i915_gem_relocation_entry *relocs) { const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; int i, ret; for (i = 0; i < entry->relocation_count; i++) { ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]); if (ret) return ret; } return 0; } static int i915_gem_execbuffer_relocate(struct drm_device *dev, struct eb_objects *eb, struct list_head *objects) { struct drm_i915_gem_object *obj; int ret, pflags; /* Try to move as many of the relocation targets off the active list * to avoid unnecessary fallbacks to the slow path, as we cannot wait * for the retirement with pagefaults disabled. */ i915_gem_retire_requests(dev); ret = 0; pflags = vm_fault_disable_pagefaults(); /* This is the fast path and we cannot handle a pagefault whilst * holding the device lock lest the user pass in the relocations * contained within a mmaped bo. For in such a case we, the page * fault handler would call i915_gem_fault() and we would try to * acquire the device lock again. Obviously this is bad. */ list_for_each_entry(obj, objects, exec_list) { ret = i915_gem_execbuffer_relocate_object(obj, eb); - if (ret != 0) + if (ret) break; } vm_fault_enable_pagefaults(pflags); - return (ret); + + return ret; } #define __EXEC_OBJECT_HAS_FENCE (1<<31) static int need_reloc_mappable(struct drm_i915_gem_object *obj) { struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; return entry->relocation_count && !use_cpu_reloc(obj); } static int pin_and_fence_object(struct drm_i915_gem_object *obj, struct intel_ring_buffer *ring) { struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; bool need_fence, need_mappable; int ret; need_fence = has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; need_mappable = need_fence || need_reloc_mappable(obj); ret = i915_gem_object_pin(obj, entry->alignment, need_mappable); if (ret) return ret; if (has_fenced_gpu_access) { if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { ret = i915_gem_object_get_fence(obj); if (ret) goto err_unpin; if (i915_gem_object_pin_fence(obj)) entry->flags |= __EXEC_OBJECT_HAS_FENCE; obj->pending_fenced_gpu_access = true; } } entry->offset = obj->gtt_offset; return 0; err_unpin: i915_gem_object_unpin(obj); return ret; } static int i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, struct drm_file *file, struct list_head *objects) { drm_i915_private_t *dev_priv; struct drm_i915_gem_object *obj; - int ret, retry; - bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; struct list_head ordered_objects; + bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; + int ret, retry; dev_priv = ring->dev->dev_private; INIT_LIST_HEAD(&ordered_objects); while (!list_empty(objects)) { struct drm_i915_gem_exec_object2 *entry; bool need_fence, need_mappable; obj = list_first_entry(objects, struct drm_i915_gem_object, exec_list); entry = obj->exec_entry; need_fence = has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; need_mappable = need_fence || need_reloc_mappable(obj); if (need_mappable) list_move(&obj->exec_list, &ordered_objects); else list_move_tail(&obj->exec_list, &ordered_objects); obj->base.pending_read_domains = 0; obj->base.pending_write_domain = 0; } list_splice(&ordered_objects, objects); /* Attempt to pin all of the buffers into the GTT. * This is done in 3 phases: * * 1a. Unbind all objects that do not match the GTT constraints for * the execbuffer (fenceable, mappable, alignment etc). - * 1b. Increment pin count for already bound objects and obtain - * a fence register if required. + * 1b. Increment pin count for already bound objects. * 2. Bind new objects. * 3. Decrement pin count. * - * This avoid unnecessary unbinding of later objects in order to makr + * This avoid unnecessary unbinding of later objects in order to make * room for the earlier objects *unless* we need to defragment. */ retry = 0; do { ret = 0; /* Unbind any ill-fitting objects or pin. */ list_for_each_entry(obj, objects, exec_list) { struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; bool need_fence, need_mappable; if (!obj->gtt_space) continue; need_fence = has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; need_mappable = need_fence || need_reloc_mappable(obj); if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || (need_mappable && !obj->map_and_fenceable)) ret = i915_gem_object_unbind(obj); else ret = pin_and_fence_object(obj, ring); if (ret) goto err; } /* Bind fresh objects */ list_for_each_entry(obj, objects, exec_list) { if (obj->gtt_space) continue; ret = pin_and_fence_object(obj, ring); if (ret) { int ret_ignore; /* This can potentially raise a harmless * -EINVAL if we failed to bind in the above * call. It cannot raise -EINTR since we know * that the bo is freshly bound and so will * not need to be flushed or waited upon. */ ret_ignore = i915_gem_object_unbind(obj); (void)ret_ignore; if (obj->gtt_space != NULL) printf("%s: gtt_space\n", __func__); break; } } /* Decrement pin count for bound objects */ list_for_each_entry(obj, objects, exec_list) { struct drm_i915_gem_exec_object2 *entry; if (!obj->gtt_space) continue; entry = obj->exec_entry; if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { i915_gem_object_unpin_fence(obj); entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; } i915_gem_object_unpin(obj); /* ... and ensure ppgtt mapping exist if needed. */ if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, obj, obj->cache_level); obj->has_aliasing_ppgtt_mapping = 1; } } if (ret != -ENOSPC || retry > 1) return ret; /* First attempt, just clear anything that is purgeable. * Second attempt, clear the entire GTT. */ ret = i915_gem_evict_everything(ring->dev, retry == 0); if (ret) return ret; retry++; } while (1); err: list_for_each_entry_continue_reverse(obj, objects, exec_list) { struct drm_i915_gem_exec_object2 *entry; if (!obj->gtt_space) continue; entry = obj->exec_entry; if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { i915_gem_object_unpin_fence(obj); entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; } i915_gem_object_unpin(obj); } return ret; } static int i915_gem_execbuffer_relocate_slow(struct drm_device *dev, - struct drm_file *file, struct intel_ring_buffer *ring, - struct list_head *objects, struct eb_objects *eb, - struct drm_i915_gem_exec_object2 *exec, int count) + struct drm_file *file, + struct intel_ring_buffer *ring, + struct list_head *objects, + struct eb_objects *eb, + struct drm_i915_gem_exec_object2 *exec, + int count) { struct drm_i915_gem_relocation_entry *reloc; struct drm_i915_gem_object *obj; int *reloc_offset; int i, total, ret; /* We may process another execbuffer during the unlock... */ while (!list_empty(objects)) { obj = list_first_entry(objects, struct drm_i915_gem_object, exec_list); list_del_init(&obj->exec_list); drm_gem_object_unreference(&obj->base); } DRM_UNLOCK(dev); total = 0; for (i = 0; i < count; i++) total += exec[i].relocation_count; reloc_offset = malloc(count * sizeof(*reloc_offset), DRM_I915_GEM, M_WAITOK | M_ZERO); reloc = malloc(total * sizeof(*reloc), DRM_I915_GEM, M_WAITOK | M_ZERO); total = 0; for (i = 0; i < count; i++) { struct drm_i915_gem_relocation_entry *user_relocs; user_relocs = (void *)(uintptr_t)exec[i].relocs_ptr; ret = -copyin(user_relocs, reloc + total, exec[i].relocation_count * sizeof(*reloc)); if (ret != 0) { DRM_LOCK(dev); goto err; } reloc_offset[i] = total; total += exec[i].relocation_count; } ret = i915_mutex_lock_interruptible(dev); if (ret) { DRM_LOCK(dev); goto err; } /* reacquire the objects */ eb_reset(eb); for (i = 0; i < count; i++) { struct drm_i915_gem_object *obj; obj = to_intel_bo(drm_gem_object_lookup(dev, file, exec[i].handle)); if (&obj->base == NULL) { DRM_DEBUG("Invalid object handle %d at index %d\n", exec[i].handle, i); ret = -ENOENT; goto err; } list_add_tail(&obj->exec_list, objects); obj->exec_handle = exec[i].handle; obj->exec_entry = &exec[i]; eb_add_object(eb, obj); } ret = i915_gem_execbuffer_reserve(ring, file, objects); if (ret) goto err; list_for_each_entry(obj, objects, exec_list) { int offset = obj->exec_entry - exec; ret = i915_gem_execbuffer_relocate_object_slow(obj, eb, - reloc + reloc_offset[offset]); + reloc + reloc_offset[offset]); if (ret) goto err; } /* Leave the user relocations as are, this is the painfully slow path, * and we want to avoid the complication of dropping the lock whilst * having buffers reserved in the aperture and so causing spurious * ENOSPC for random operations. */ err: free(reloc, DRM_I915_GEM); free(reloc_offset, DRM_I915_GEM); return ret; } static int i915_gem_execbuffer_flush(struct drm_device *dev, uint32_t invalidate_domains, uint32_t flush_domains, uint32_t flush_rings) { drm_i915_private_t *dev_priv = dev->dev_private; int i, ret; if (flush_domains & I915_GEM_DOMAIN_CPU) intel_gtt_chipset_flush(); if (flush_domains & I915_GEM_DOMAIN_GTT) wmb(); if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) { for (i = 0; i < I915_NUM_RINGS; i++) if (flush_rings & (1 << i)) { ret = i915_gem_flush_ring(&dev_priv->rings[i], invalidate_domains, flush_domains); if (ret) return ret; } } return 0; } static int i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) { u32 plane, flip_mask; int ret; /* Check for any pending flips. As we only maintain a flip queue depth * of 1, we can simply insert a WAIT for the next display flip prior * to executing the batch and avoid stalling the CPU. */ for (plane = 0; flips >> plane; plane++) { if (((flips >> plane) & 1) == 0) continue; if (plane) flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; else flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; ret = intel_ring_begin(ring, 2); if (ret) return ret; intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); } return 0; } static int i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, struct list_head *objects) { struct drm_i915_gem_object *obj; struct change_domains cd; int ret; memset(&cd, 0, sizeof(cd)); list_for_each_entry(obj, objects, exec_list) i915_gem_object_set_to_gpu_domain(obj, ring, &cd); if (cd.invalidate_domains | cd.flush_domains) { #if WATCH_EXEC DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", __func__, cd.invalidate_domains, cd.flush_domains); #endif ret = i915_gem_execbuffer_flush(ring->dev, cd.invalidate_domains, cd.flush_domains, cd.flush_rings); if (ret) return ret; } if (cd.flips) { ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips); if (ret) return ret; } list_for_each_entry(obj, objects, exec_list) { ret = i915_gem_object_sync(obj, ring); if (ret) return ret; } return 0; } static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) { return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0; } static int validate_exec_list(struct drm_i915_gem_exec_object2 *exec, int count, vm_page_t ***map) { vm_page_t *ma; int i, length, page_count; /* XXXKIB various limits checking is missing there */ *map = malloc(count * sizeof(*ma), DRM_I915_GEM, M_WAITOK | M_ZERO); for (i = 0; i < count; i++) { /* First check for malicious input causing overflow */ if (exec[i].relocation_count > INT_MAX / sizeof(struct drm_i915_gem_relocation_entry)) return -EINVAL; length = exec[i].relocation_count * sizeof(struct drm_i915_gem_relocation_entry); if (length == 0) { (*map)[i] = NULL; continue; } /* * Since both start and end of the relocation region * may be not aligned on the page boundary, be * conservative and request a page slot for each * partial page. Thus +2. */ page_count = howmany(length, PAGE_SIZE) + 2; ma = (*map)[i] = malloc(page_count * sizeof(vm_page_t), DRM_I915_GEM, M_WAITOK | M_ZERO); if (vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, exec[i].relocs_ptr, length, VM_PROT_READ | VM_PROT_WRITE, ma, page_count) == -1) { free(ma, DRM_I915_GEM); (*map)[i] = NULL; return (-EFAULT); } } return 0; } static void i915_gem_execbuffer_move_to_active(struct list_head *objects, struct intel_ring_buffer *ring, u32 seqno) { struct drm_i915_gem_object *obj; uint32_t old_read, old_write; list_for_each_entry(obj, objects, exec_list) { old_read = obj->base.read_domains; old_write = obj->base.write_domain; obj->base.read_domains = obj->base.pending_read_domains; obj->base.write_domain = obj->base.pending_write_domain; obj->fenced_gpu_access = obj->pending_fenced_gpu_access; i915_gem_object_move_to_active(obj, ring, seqno); if (obj->base.write_domain) { obj->dirty = 1; obj->pending_gpu_write = true; list_move_tail(&obj->gpu_write_list, &ring->gpu_write_list); if (obj->pin_count) /* check for potential scanout */ intel_mark_busy(ring->dev, obj); } CTR3(KTR_DRM, "object_change_domain move_to_active %p %x %x", obj, old_read, old_write); } intel_mark_busy(ring->dev, NULL); } int i915_gem_sync_exec_requests; static void i915_gem_execbuffer_retire_commands(struct drm_device *dev, struct drm_file *file, struct intel_ring_buffer *ring) { struct drm_i915_gem_request *request; u32 invalidate; /* * Ensure that the commands in the batch buffer are * finished before the interrupt fires. * * The sampler always gets flushed on i965 (sigh). */ invalidate = I915_GEM_DOMAIN_COMMAND; if (INTEL_INFO(dev)->gen >= 4) invalidate |= I915_GEM_DOMAIN_SAMPLER; if (ring->flush(ring, invalidate, 0)) { i915_gem_next_request_seqno(ring); return; } /* Add a breadcrumb for the completion of the batch buffer */ request = malloc(sizeof(*request), DRM_I915_GEM, M_WAITOK | M_ZERO); if (request == NULL || i915_add_request(ring, file, request)) { i915_gem_next_request_seqno(ring); free(request, DRM_I915_GEM); } else if (i915_gem_sync_exec_requests) { i915_wait_request(ring, request->seqno); i915_gem_retire_requests(dev); } } static void i915_gem_fix_mi_batchbuffer_end(struct drm_i915_gem_object *batch_obj, uint32_t batch_start_offset, uint32_t batch_len) { char *mkva; uint64_t po_r, po_w; uint32_t cmd; po_r = batch_obj->base.dev->agp->base + batch_obj->gtt_offset + batch_start_offset + batch_len; if (batch_len > 0) po_r -= 4; mkva = pmap_mapdev_attr(trunc_page(po_r), 2 * PAGE_SIZE, PAT_WRITE_COMBINING); po_r &= PAGE_MASK; cmd = *(uint32_t *)(mkva + po_r); if (cmd != MI_BATCH_BUFFER_END) { /* * batch_len != 0 due to the check at the start of * i915_gem_do_execbuffer */ if (batch_obj->base.size > batch_start_offset + batch_len) { po_w = po_r + 4; /* DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END !\n"); */ } else { po_w = po_r; DRM_DEBUG("batchbuffer does not end by MI_BATCH_BUFFER_END, overwriting last bo cmd !\n"); } *(uint32_t *)(mkva + po_w) = MI_BATCH_BUFFER_END; } pmap_unmapdev((vm_offset_t)mkva, 2 * PAGE_SIZE); } int i915_fix_mi_batchbuffer_end = 0; static int i915_reset_gen7_sol_offsets(struct drm_device *dev, struct intel_ring_buffer *ring) { drm_i915_private_t *dev_priv = dev->dev_private; int ret, i; if (!IS_GEN7(dev) || ring != &dev_priv->rings[RCS]) return 0; ret = intel_ring_begin(ring, 4 * 3); if (ret) return ret; for (i = 0; i < 4; i++) { intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); intel_ring_emit(ring, 0); } intel_ring_advance(ring); return 0; } static int i915_gem_do_execbuffer(struct drm_device *dev, void *data, struct drm_file *file, struct drm_i915_gem_execbuffer2 *args, struct drm_i915_gem_exec_object2 *exec) { drm_i915_private_t *dev_priv = dev->dev_private; struct list_head objects; struct eb_objects *eb; struct drm_i915_gem_object *batch_obj; struct drm_clip_rect *cliprects = NULL; struct intel_ring_buffer *ring; vm_page_t **relocs_ma; u32 ctx_id = i915_execbuffer2_get_context_id(*args); u32 exec_start, exec_len; u32 seqno; u32 mask; int ret, mode, i; if (!i915_gem_check_execbuffer(args)) { DRM_DEBUG("execbuf with invalid offset/length\n"); return -EINVAL; } if (args->batch_len == 0) return (0); ret = validate_exec_list(exec, args->buffer_count, &relocs_ma); if (ret != 0) goto pre_struct_lock_err; switch (args->flags & I915_EXEC_RING_MASK) { case I915_EXEC_DEFAULT: case I915_EXEC_RENDER: ring = &dev_priv->rings[RCS]; break; case I915_EXEC_BSD: ring = &dev_priv->rings[VCS]; if (ctx_id != 0) { DRM_DEBUG("Ring %s doesn't support contexts\n", ring->name); return -EPERM; } break; case I915_EXEC_BLT: ring = &dev_priv->rings[BCS]; if (ctx_id != 0) { DRM_DEBUG("Ring %s doesn't support contexts\n", ring->name); return -EPERM; } break; default: DRM_DEBUG("execbuf with unknown ring: %d\n", (int)(args->flags & I915_EXEC_RING_MASK)); ret = -EINVAL; goto pre_struct_lock_err; } if (!intel_ring_initialized(ring)) { DRM_DEBUG("execbuf with invalid ring: %d\n", (int)(args->flags & I915_EXEC_RING_MASK)); return -EINVAL; } mode = args->flags & I915_EXEC_CONSTANTS_MASK; mask = I915_EXEC_CONSTANTS_MASK; switch (mode) { case I915_EXEC_CONSTANTS_REL_GENERAL: case I915_EXEC_CONSTANTS_ABSOLUTE: case I915_EXEC_CONSTANTS_REL_SURFACE: if (ring == &dev_priv->rings[RCS] && mode != dev_priv->relative_constants_mode) { if (INTEL_INFO(dev)->gen < 4) { ret = -EINVAL; goto pre_struct_lock_err; } if (INTEL_INFO(dev)->gen > 5 && mode == I915_EXEC_CONSTANTS_REL_SURFACE) { ret = -EINVAL; goto pre_struct_lock_err; } /* The HW changed the meaning on this bit on gen6 */ if (INTEL_INFO(dev)->gen >= 6) mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; } break; default: DRM_DEBUG("execbuf with unknown constants: %d\n", mode); ret = -EINVAL; goto pre_struct_lock_err; } if (args->buffer_count < 1) { DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); ret = -EINVAL; goto pre_struct_lock_err; } if (args->num_cliprects != 0) { if (ring != &dev_priv->rings[RCS]) { - DRM_DEBUG("clip rectangles are only valid with the render ring\n"); + DRM_DEBUG("clip rectangles are only valid with the render ring\n"); ret = -EINVAL; goto pre_struct_lock_err; } if (INTEL_INFO(dev)->gen >= 5) { DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); ret = -EINVAL; goto pre_struct_lock_err; } if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { DRM_DEBUG("execbuf with %u cliprects\n", args->num_cliprects); ret = -EINVAL; goto pre_struct_lock_err; } cliprects = malloc( sizeof(*cliprects) * args->num_cliprects, DRM_I915_GEM, M_WAITOK | M_ZERO); ret = -copyin((void *)(uintptr_t)args->cliprects_ptr, cliprects, sizeof(*cliprects) * args->num_cliprects); if (ret != 0) goto pre_struct_lock_err; } ret = i915_mutex_lock_interruptible(dev); if (ret) goto pre_struct_lock_err; if (dev_priv->mm.suspended) { DRM_UNLOCK(dev); ret = -EBUSY; goto pre_struct_lock_err; } eb = eb_create(args->buffer_count); if (eb == NULL) { DRM_UNLOCK(dev); ret = -ENOMEM; goto pre_struct_lock_err; } /* Look up object handles */ INIT_LIST_HEAD(&objects); for (i = 0; i < args->buffer_count; i++) { struct drm_i915_gem_object *obj; + obj = to_intel_bo(drm_gem_object_lookup(dev, file, exec[i].handle)); if (&obj->base == NULL) { DRM_DEBUG("Invalid object handle %d at index %d\n", exec[i].handle, i); /* prevent error path from reading uninitialized data */ ret = -ENOENT; goto err; } if (!list_empty(&obj->exec_list)) { DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", obj, exec[i].handle, i); ret = -EINVAL; goto err; } list_add_tail(&obj->exec_list, &objects); obj->exec_handle = exec[i].handle; obj->exec_entry = &exec[i]; eb_add_object(eb, obj); } /* take note of the batch buffer before we might reorder the lists */ batch_obj = list_entry(objects.prev, struct drm_i915_gem_object, exec_list); /* Move the objects en-masse into the GTT, evicting if necessary. */ ret = i915_gem_execbuffer_reserve(ring, file, &objects); if (ret) goto err; /* The objects are in their final locations, apply the relocations. */ ret = i915_gem_execbuffer_relocate(dev, eb, &objects); if (ret) { if (ret == -EFAULT) { ret = i915_gem_execbuffer_relocate_slow(dev, file, ring, - &objects, eb, exec, args->buffer_count); + &objects, eb, + exec, + args->buffer_count); DRM_LOCK_ASSERT(dev); } if (ret) goto err; } /* Set the pending read domains for the batch buffer to COMMAND */ if (batch_obj->base.pending_write_domain) { DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); ret = -EINVAL; goto err; } batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; ret = i915_gem_execbuffer_move_to_gpu(ring, &objects); if (ret) goto err; ret = i915_switch_context(ring, file, ctx_id); if (ret) goto err; seqno = i915_gem_next_request_seqno(ring); for (i = 0; i < I915_NUM_RINGS - 1; i++) { if (seqno < ring->sync_seqno[i]) { /* The GPU can not handle its semaphore value wrapping, * so every billion or so execbuffers, we need to stall * the GPU in order to reset the counters. */ ret = i915_gpu_idle(dev); if (ret) goto err; i915_gem_retire_requests(dev); KASSERT(ring->sync_seqno[i] == 0, ("Non-zero sync_seqno")); } } if (ring == &dev_priv->rings[RCS] && mode != dev_priv->relative_constants_mode) { ret = intel_ring_begin(ring, 4); if (ret) goto err; intel_ring_emit(ring, MI_NOOP); intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit(ring, INSTPM); intel_ring_emit(ring, mask << 16 | mode); intel_ring_advance(ring); dev_priv->relative_constants_mode = mode; } if (args->flags & I915_EXEC_GEN7_SOL_RESET) { ret = i915_reset_gen7_sol_offsets(dev, ring); if (ret) goto err; } exec_start = batch_obj->gtt_offset + args->batch_start_offset; exec_len = args->batch_len; if (i915_fix_mi_batchbuffer_end) { i915_gem_fix_mi_batchbuffer_end(batch_obj, args->batch_start_offset, args->batch_len); } CTR4(KTR_DRM, "ring_dispatch %s %d exec %x %x", ring->name, seqno, exec_start, exec_len); if (cliprects) { for (i = 0; i < args->num_cliprects; i++) { ret = i915_emit_box_p(dev, &cliprects[i], - args->DR1, args->DR4); + args->DR1, args->DR4); if (ret) goto err; - ret = ring->dispatch_execbuffer(ring, exec_start, - exec_len); + ret = ring->dispatch_execbuffer(ring, + exec_start, exec_len); if (ret) goto err; } } else { - ret = ring->dispatch_execbuffer(ring, exec_start, exec_len); + ret = ring->dispatch_execbuffer(ring, + exec_start, exec_len); if (ret) goto err; } i915_gem_execbuffer_move_to_active(&objects, ring, seqno); i915_gem_execbuffer_retire_commands(dev, file, ring); err: eb_destroy(eb); while (!list_empty(&objects)) { struct drm_i915_gem_object *obj; - obj = list_first_entry(&objects, struct drm_i915_gem_object, - exec_list); + obj = list_first_entry(&objects, + struct drm_i915_gem_object, + exec_list); list_del_init(&obj->exec_list); drm_gem_object_unreference(&obj->base); } DRM_UNLOCK(dev); pre_struct_lock_err: for (i = 0; i < args->buffer_count; i++) { if (relocs_ma[i] != NULL) { vm_page_unhold_pages(relocs_ma[i], howmany( exec[i].relocation_count * sizeof(struct drm_i915_gem_relocation_entry), PAGE_SIZE)); free(relocs_ma[i], DRM_I915_GEM); } } free(relocs_ma, DRM_I915_GEM); free(cliprects, DRM_I915_GEM); return ret; } /* * Legacy execbuffer just creates an exec2 list from the original exec object * list array and passes it to the real function. */ int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_execbuffer *args = data; struct drm_i915_gem_execbuffer2 exec2; struct drm_i915_gem_exec_object *exec_list = NULL; struct drm_i915_gem_exec_object2 *exec2_list = NULL; int ret, i; DRM_DEBUG("buffers_ptr %d buffer_count %d len %08x\n", (int) args->buffers_ptr, args->buffer_count, args->batch_len); if (args->buffer_count < 1) { DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); return -EINVAL; } /* Copy in the exec list from userland */ /* XXXKIB user-controlled malloc size */ exec_list = malloc(sizeof(*exec_list) * args->buffer_count, DRM_I915_GEM, M_WAITOK); exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count, DRM_I915_GEM, M_WAITOK); ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec_list, sizeof(*exec_list) * args->buffer_count); if (ret != 0) { DRM_DEBUG("copy %d exec entries failed %d\n", args->buffer_count, ret); free(exec_list, DRM_I915_GEM); free(exec2_list, DRM_I915_GEM); return (ret); } for (i = 0; i < args->buffer_count; i++) { exec2_list[i].handle = exec_list[i].handle; exec2_list[i].relocation_count = exec_list[i].relocation_count; exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; exec2_list[i].alignment = exec_list[i].alignment; exec2_list[i].offset = exec_list[i].offset; if (INTEL_INFO(dev)->gen < 4) exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; else exec2_list[i].flags = 0; } exec2.buffers_ptr = args->buffers_ptr; exec2.buffer_count = args->buffer_count; exec2.batch_start_offset = args->batch_start_offset; exec2.batch_len = args->batch_len; exec2.DR1 = args->DR1; exec2.DR4 = args->DR4; exec2.num_cliprects = args->num_cliprects; exec2.cliprects_ptr = args->cliprects_ptr; exec2.flags = I915_EXEC_RENDER; i915_execbuffer2_set_context_id(exec2, 0); ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); if (!ret) { /* Copy the new buffer offsets back to the user's exec list. */ for (i = 0; i < args->buffer_count; i++) exec_list[i].offset = exec2_list[i].offset; /* ... and back out to userspace */ ret = -copyout(exec_list, (void *)(uintptr_t)args->buffers_ptr, sizeof(*exec_list) * args->buffer_count); if (ret != 0) { DRM_DEBUG("failed to copy %d exec entries " "back to user (%d)\n", args->buffer_count, ret); } } free(exec_list, DRM_I915_GEM); free(exec2_list, DRM_I915_GEM); return ret; } int i915_gem_execbuffer2(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_execbuffer2 *args = data; struct drm_i915_gem_exec_object2 *exec2_list = NULL; int ret; DRM_DEBUG("buffers_ptr %jx buffer_count %d len %08x\n", (uintmax_t)args->buffers_ptr, args->buffer_count, args->batch_len); if (args->buffer_count < 1 || args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); return -EINVAL; } /* XXXKIB user-controllable malloc size */ exec2_list = malloc(sizeof(*exec2_list) * args->buffer_count, DRM_I915_GEM, M_WAITOK); ret = -copyin((void *)(uintptr_t)args->buffers_ptr, exec2_list, sizeof(*exec2_list) * args->buffer_count); if (ret != 0) { DRM_DEBUG("copy %d exec entries failed %d\n", args->buffer_count, ret); free(exec2_list, DRM_I915_GEM); - return (ret); + return -EFAULT; } ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); if (!ret) { /* Copy the new buffer offsets back to the user's exec list. */ ret = -copyout(exec2_list, (void *)(uintptr_t)args->buffers_ptr, sizeof(*exec2_list) * args->buffer_count); if (ret) { DRM_DEBUG("failed to copy %d exec entries " "back to user (%d)\n", args->buffer_count, ret); } } free(exec2_list, DRM_I915_GEM); return ret; } Index: head/sys/dev/drm2/i915/i915_gem_gtt.c =================================================================== --- head/sys/dev/drm2/i915/i915_gem_gtt.c (revision 287173) +++ head/sys/dev/drm2/i915/i915_gem_gtt.c (revision 287174) @@ -1,419 +1,406 @@ /* * Copyright © 2010 Daniel Vetter * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include /* PPGTT support for Sandybdrige/Gen6 and later */ -static void -i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt, - unsigned first_entry, unsigned num_entries) +static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt, + unsigned first_entry, + unsigned num_entries) { uint32_t *pt_vaddr; uint32_t scratch_pte; struct sf_buf *sf; unsigned act_pd, first_pte, last_pte, i; act_pd = first_entry / I915_PPGTT_PT_ENTRIES; first_pte = first_entry % I915_PPGTT_PT_ENTRIES; scratch_pte = GEN6_PTE_ADDR_ENCODE(ppgtt->scratch_page_dma_addr); scratch_pte |= GEN6_PTE_VALID | GEN6_PTE_CACHE_LLC; while (num_entries) { last_pte = first_pte + num_entries; if (last_pte > I915_PPGTT_PT_ENTRIES) last_pte = I915_PPGTT_PT_ENTRIES; sched_pin(); sf = sf_buf_alloc(ppgtt->pt_pages[act_pd], SFB_CPUPRIVATE); pt_vaddr = (uint32_t *)(uintptr_t)sf_buf_kva(sf); for (i = first_pte; i < last_pte; i++) pt_vaddr[i] = scratch_pte; sf_buf_free(sf); sched_unpin(); num_entries -= last_pte - first_pte; first_pte = 0; act_pd++; } } -int -i915_gem_init_aliasing_ppgtt(struct drm_device *dev) +int i915_gem_init_aliasing_ppgtt(struct drm_device *dev) { - struct drm_i915_private *dev_priv; + struct drm_i915_private *dev_priv = dev->dev_private; struct i915_hw_ppgtt *ppgtt; - u_int first_pd_entry_in_global_pt, i; + unsigned first_pd_entry_in_global_pt; + int i; - dev_priv = dev->dev_private; - /* - * ppgtt PDEs reside in the global gtt pagetable, which has 512*1024 + /* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024 * entries. For aliasing ppgtt support we just steal them at the end for - * now. - */ + * now. */ first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES; ppgtt = malloc(sizeof(*ppgtt), DRM_I915_GEM, M_WAITOK | M_ZERO); ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES; ppgtt->pt_pages = malloc(sizeof(vm_page_t) * ppgtt->num_pd_entries, DRM_I915_GEM, M_WAITOK | M_ZERO); for (i = 0; i < ppgtt->num_pd_entries; i++) { ppgtt->pt_pages[i] = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (ppgtt->pt_pages[i] == NULL) { dev_priv->mm.aliasing_ppgtt = ppgtt; i915_gem_cleanup_aliasing_ppgtt(dev); return (-ENOMEM); } } ppgtt->scratch_page_dma_addr = dev_priv->mm.gtt.scratch_page_dma; i915_ppgtt_clear_range(ppgtt, 0, ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES); ppgtt->pd_offset = (first_pd_entry_in_global_pt) * sizeof(uint32_t); dev_priv->mm.aliasing_ppgtt = ppgtt; return (0); } static void i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt, unsigned first_entry, unsigned num_entries, vm_page_t *pages, uint32_t pte_flags) { uint32_t *pt_vaddr, pte; struct sf_buf *sf; unsigned act_pd, first_pte; unsigned last_pte, i; vm_paddr_t page_addr; act_pd = first_entry / I915_PPGTT_PT_ENTRIES; first_pte = first_entry % I915_PPGTT_PT_ENTRIES; while (num_entries) { last_pte = first_pte + num_entries; if (last_pte > I915_PPGTT_PT_ENTRIES) last_pte = I915_PPGTT_PT_ENTRIES; sched_pin(); sf = sf_buf_alloc(ppgtt->pt_pages[act_pd], SFB_CPUPRIVATE); pt_vaddr = (uint32_t *)(uintptr_t)sf_buf_kva(sf); for (i = first_pte; i < last_pte; i++) { page_addr = VM_PAGE_TO_PHYS(*pages); pte = GEN6_PTE_ADDR_ENCODE(page_addr); pt_vaddr[i] = pte | pte_flags; pages++; } sf_buf_free(sf); sched_unpin(); num_entries -= last_pte - first_pte; first_pte = 0; act_pd++; } } -void -i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, - struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) +void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt, + struct drm_i915_gem_object *obj, + enum i915_cache_level cache_level) { struct drm_device *dev; struct drm_i915_private *dev_priv; uint32_t pte_flags; dev = obj->base.dev; dev_priv = dev->dev_private; pte_flags = GEN6_PTE_VALID; switch (cache_level) { case I915_CACHE_LLC_MLC: pte_flags |= GEN6_PTE_CACHE_LLC_MLC; break; case I915_CACHE_LLC: pte_flags |= GEN6_PTE_CACHE_LLC; break; case I915_CACHE_NONE: pte_flags |= GEN6_PTE_UNCACHED; break; default: panic("cache mode"); } i915_ppgtt_insert_pages(ppgtt, obj->gtt_space->start >> PAGE_SHIFT, obj->base.size >> PAGE_SHIFT, obj->pages, pte_flags); } void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, struct drm_i915_gem_object *obj) { - i915_ppgtt_clear_range(ppgtt, obj->gtt_space->start >> PAGE_SHIFT, - obj->base.size >> PAGE_SHIFT); + i915_ppgtt_clear_range(ppgtt, + obj->gtt_space->start >> PAGE_SHIFT, + obj->base.size >> PAGE_SHIFT); } void i915_gem_init_ppgtt(struct drm_device *dev) { - drm_i915_private_t *dev_priv; - struct i915_hw_ppgtt *ppgtt; - uint32_t pd_offset, pd_entry; - vm_paddr_t pt_addr; + drm_i915_private_t *dev_priv = dev->dev_private; + uint32_t pd_offset; struct intel_ring_buffer *ring; - u_int first_pd_entry_in_global_pt, i; + struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; + u_int first_pd_entry_in_global_pt; + vm_paddr_t pt_addr; + uint32_t pd_entry; + int i; - dev_priv = dev->dev_private; - ppgtt = dev_priv->mm.aliasing_ppgtt; - if (ppgtt == NULL) + if (!dev_priv->mm.aliasing_ppgtt) return; first_pd_entry_in_global_pt = 512 * 1024 - I915_PPGTT_PD_ENTRIES; for (i = 0; i < ppgtt->num_pd_entries; i++) { pt_addr = VM_PAGE_TO_PHYS(ppgtt->pt_pages[i]); pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr); pd_entry |= GEN6_PDE_VALID; intel_gtt_write(first_pd_entry_in_global_pt + i, pd_entry); } intel_gtt_read_pte(first_pd_entry_in_global_pt); pd_offset = ppgtt->pd_offset; pd_offset /= 64; /* in cachelines, */ pd_offset <<= 16; if (INTEL_INFO(dev)->gen == 6) { uint32_t ecochk, gab_ctl, ecobits; ecobits = I915_READ(GAC_ECO_BITS); I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); gab_ctl = I915_READ(GAB_CTL); I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); ecochk = I915_READ(GAM_ECOCHK); I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); } else if (INTEL_INFO(dev)->gen >= 7) { I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B); /* GFX_MODE is per-ring on gen7+ */ } for_each_ring(ring, dev_priv, i) { if (INTEL_INFO(dev)->gen >= 7) I915_WRITE(RING_MODE_GEN7(ring), _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset); } } +static bool do_idling(struct drm_i915_private *dev_priv) +{ + bool ret = dev_priv->mm.interruptible; + + if (dev_priv->mm.gtt.do_idle_maps) { + dev_priv->mm.interruptible = false; + if (i915_gpu_idle(dev_priv->dev)) { + DRM_ERROR("Couldn't idle GPU\n"); + /* Wait a bit, in hopes it avoids the hang */ + DELAY(10); + } + } + + return ret; +} + +static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) +{ + if (dev_priv->mm.gtt.do_idle_maps) + dev_priv->mm.interruptible = interruptible; +} + void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev) { struct drm_i915_private *dev_priv; struct i915_hw_ppgtt *ppgtt; vm_page_t m; int i; dev_priv = dev->dev_private; ppgtt = dev_priv->mm.aliasing_ppgtt; if (ppgtt == NULL) return; dev_priv->mm.aliasing_ppgtt = NULL; for (i = 0; i < ppgtt->num_pd_entries; i++) { m = ppgtt->pt_pages[i]; if (m != NULL) { vm_page_unwire(m, PQ_INACTIVE); vm_page_free(m); } } free(ppgtt->pt_pages, DRM_I915_GEM); free(ppgtt, DRM_I915_GEM); } static unsigned int cache_level_to_agp_type(struct drm_device *dev, enum i915_cache_level cache_level) { switch (cache_level) { case I915_CACHE_LLC_MLC: if (INTEL_INFO(dev)->gen >= 6) return (AGP_USER_CACHED_MEMORY_LLC_MLC); /* * Older chipsets do not have this extra level of CPU * cacheing, so fallthrough and request the PTE simply * as cached. */ case I915_CACHE_LLC: return (AGP_USER_CACHED_MEMORY); default: case I915_CACHE_NONE: return (AGP_USER_MEMORY); } } -static bool -do_idling(struct drm_i915_private *dev_priv) +void i915_gem_restore_gtt_mappings(struct drm_device *dev) { - bool ret = dev_priv->mm.interruptible; - - if (dev_priv->mm.gtt.do_idle_maps) { - dev_priv->mm.interruptible = false; - if (i915_gpu_idle(dev_priv->dev)) { - DRM_ERROR("Couldn't idle GPU\n"); - /* Wait a bit, in hopes it avoids the hang */ - DELAY(10); - } - } - - return ret; -} - -static void -undo_idling(struct drm_i915_private *dev_priv, bool interruptible) -{ - - if (dev_priv->mm.gtt.do_idle_maps) - dev_priv->mm.interruptible = interruptible; -} - -void -i915_gem_restore_gtt_mappings(struct drm_device *dev) -{ - struct drm_i915_private *dev_priv; + struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; - dev_priv = dev->dev_private; - /* First fill our portion of the GTT with scratch pages */ intel_gtt_clear_range(dev_priv->mm.gtt_start / PAGE_SIZE, - (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE); + (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE); list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) { i915_gem_clflush_object(obj); i915_gem_gtt_bind_object(obj, obj->cache_level); } intel_gtt_chipset_flush(); } -int -i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) +int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) { - return (0); + return 0; } void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { struct drm_device *dev; struct drm_i915_private *dev_priv; unsigned int agp_type; dev = obj->base.dev; dev_priv = dev->dev_private; agp_type = cache_level_to_agp_type(dev, cache_level); intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT, obj->base.size >> PAGE_SHIFT, obj->pages, agp_type); obj->has_global_gtt_mapping = 1; } -void -i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) +void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) { intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT, obj->base.size >> PAGE_SHIFT); obj->has_global_gtt_mapping = 0; } -void -i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) +void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) { struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; bool interruptible; - dev = obj->base.dev; - dev_priv = dev->dev_private; - interruptible = do_idling(dev_priv); undo_idling(dev_priv, interruptible); } -int -i915_gem_init_global_gtt(struct drm_device *dev, unsigned long start, - unsigned long mappable_end, unsigned long end) +int i915_gem_init_global_gtt(struct drm_device *dev, + unsigned long start, + unsigned long mappable_end, + unsigned long end) { drm_i915_private_t *dev_priv; unsigned long mappable; int error; dev_priv = dev->dev_private; mappable = min(end, mappable_end) - start; /* Substract the guard page ... */ drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE); dev_priv->mm.gtt_start = start; dev_priv->mm.gtt_mappable_end = mappable_end; dev_priv->mm.gtt_end = end; dev_priv->mm.gtt_total = end - start; dev_priv->mm.mappable_gtt_total = mappable; /* ... but ensure that we clear the entire range. */ intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE); device_printf(dev->dev, "taking over the fictitious range 0x%lx-0x%lx\n", dev->agp->base + start, dev->agp->base + start + mappable); error = -vm_phys_fictitious_reg_range(dev->agp->base + start, dev->agp->base + start + mappable, VM_MEMATTR_WRITE_COMBINING); return (error); } Index: head/sys/dev/drm2/i915/i915_gem_tiling.c =================================================================== --- head/sys/dev/drm2/i915/i915_gem_tiling.c (revision 287173) +++ head/sys/dev/drm2/i915/i915_gem_tiling.c (revision 287174) @@ -1,531 +1,529 @@ /* * Copyright © 2008 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * Authors: * Eric Anholt * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include /** @file i915_gem_tiling.c * * Support for managing tiling state of buffer objects. * * The idea behind tiling is to increase cache hit rates by rearranging * pixel data so that a group of pixel accesses are in the same cacheline. * Performance improvement from doing this on the back/depth buffer are on * the order of 30%. * * Intel architectures make this somewhat more complicated, though, by * adjustments made to addressing of data when the memory is in interleaved * mode (matched pairs of DIMMS) to improve memory bandwidth. * For interleaved memory, the CPU sends every sequential 64 bytes * to an alternate memory channel so it can get the bandwidth from both. * * The GPU also rearranges its accesses for increased bandwidth to interleaved * memory, and it matches what the CPU does for non-tiled. However, when tiled * it does it a little differently, since one walks addresses not just in the * X direction but also Y. So, along with alternating channels when bit * 6 of the address flips, it also alternates when other bits flip -- Bits 9 * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) * are common to both the 915 and 965-class hardware. * * The CPU also sometimes XORs in higher bits as well, to improve * bandwidth doing strided access like we do so frequently in graphics. This * is called "Channel XOR Randomization" in the MCH documentation. The result * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address * decode. * * All of this bit 6 XORing has an effect on our memory management, * as we need to make sure that the 3d driver can correctly address object * contents. * * If we don't have interleaved memory, all tiling is safe and no swizzling is * required. * * When bit 17 is XORed in, we simply refuse to tile at all. Bit * 17 is not just a page offset, so as we page an objet out and back in, * individual pages in it will have different bit 17 addresses, resulting in * each 64 bytes being swapped with its neighbor! * * Otherwise, if interleaved, we have to tell the 3d driver what the address * swizzling it needs to do is, since it's writing with the CPU to the pages * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order * to match what the GPU expects. */ /** * Detects bit 6 swizzling of address lookup between IGD access and CPU * access through main memory. */ void i915_gem_detect_bit_6_swizzle(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; if (INTEL_INFO(dev)->gen >= 6) { uint32_t dimm_c0, dimm_c1; dimm_c0 = I915_READ(MAD_DIMM_C0); dimm_c1 = I915_READ(MAD_DIMM_C1); dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK; /* Enable swizzling when the channels are populated with * identically sized dimms. We don't need to check the 3rd * channel because no cpu with gpu attached ships in that * configuration. Also, swizzling only makes sense for 2 * channels anyway. */ if (dimm_c0 == dimm_c1) { swizzle_x = I915_BIT_6_SWIZZLE_9_10; swizzle_y = I915_BIT_6_SWIZZLE_9; } else { swizzle_x = I915_BIT_6_SWIZZLE_NONE; swizzle_y = I915_BIT_6_SWIZZLE_NONE; } } else if (IS_GEN5(dev)) { /* On Ironlake whatever DRAM config, GPU always do * same swizzling setup. */ swizzle_x = I915_BIT_6_SWIZZLE_9_10; swizzle_y = I915_BIT_6_SWIZZLE_9; } else if (IS_GEN2(dev)) { /* As far as we know, the 865 doesn't have these bit 6 * swizzling issues. */ swizzle_x = I915_BIT_6_SWIZZLE_NONE; swizzle_y = I915_BIT_6_SWIZZLE_NONE; } else if (IS_MOBILE(dev) || (IS_GEN3(dev) && !IS_G33(dev))) { uint32_t dcc; /* On 9xx chipsets, channel interleave by the CPU is * determined by DCC. For single-channel, neither the CPU * nor the GPU do swizzling. For dual channel interleaved, * the GPU's interleave is bit 9 and 10 for X tiled, and bit * 9 for Y tiled. The CPU's interleave is independent, and * can be based on either bit 11 (haven't seen this yet) or * bit 17 (common). */ dcc = I915_READ(DCC); switch (dcc & DCC_ADDRESSING_MODE_MASK) { case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: swizzle_x = I915_BIT_6_SWIZZLE_NONE; swizzle_y = I915_BIT_6_SWIZZLE_NONE; break; case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: if (dcc & DCC_CHANNEL_XOR_DISABLE) { /* This is the base swizzling by the GPU for * tiled buffers. */ swizzle_x = I915_BIT_6_SWIZZLE_9_10; swizzle_y = I915_BIT_6_SWIZZLE_9; } else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) { /* Bit 11 swizzling by the CPU in addition. */ swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; swizzle_y = I915_BIT_6_SWIZZLE_9_11; } else { /* Bit 17 swizzling by the CPU in addition. */ swizzle_x = I915_BIT_6_SWIZZLE_9_10_17; swizzle_y = I915_BIT_6_SWIZZLE_9_17; } break; } if (dcc == 0xffffffff) { DRM_ERROR("Couldn't read from MCHBAR. " "Disabling tiling.\n"); swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; } } else { /* The 965, G33, and newer, have a very flexible memory * configuration. It will enable dual-channel mode * (interleaving) on as much memory as it can, and the GPU * will additionally sometimes enable different bit 6 * swizzling for tiled objects from the CPU. * * Here's what I found on the G965: * slot fill memory size swizzling * 0A 0B 1A 1B 1-ch 2-ch * 512 0 0 0 512 0 O * 512 0 512 0 16 1008 X * 512 0 0 512 16 1008 X * 0 512 0 512 16 1008 X * 1024 1024 1024 0 2048 1024 O * * We could probably detect this based on either the DRB * matching, which was the case for the swizzling required in * the table above, or from the 1-ch value being less than * the minimum size of a rank. */ if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) { swizzle_x = I915_BIT_6_SWIZZLE_NONE; swizzle_y = I915_BIT_6_SWIZZLE_NONE; } else { swizzle_x = I915_BIT_6_SWIZZLE_9_10; swizzle_y = I915_BIT_6_SWIZZLE_9; } } dev_priv->mm.bit_6_swizzle_x = swizzle_x; dev_priv->mm.bit_6_swizzle_y = swizzle_y; } /* Check pitch constriants for all chips & tiling formats */ static bool i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) { int tile_width; /* Linear is always fine */ if (tiling_mode == I915_TILING_NONE) - return (true); + return true; if (IS_GEN2(dev) || (tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))) tile_width = 128; else tile_width = 512; /* check maximum stride & object size */ if (INTEL_INFO(dev)->gen >= 4) { /* i965 stores the end address of the gtt mapping in the fence * reg, so dont bother to check the size */ if (stride / 128 > I965_FENCE_MAX_PITCH_VAL) - return (false); + return false; } else { if (stride > 8192) - return (false); + return false; if (IS_GEN3(dev)) { if (size > I830_FENCE_MAX_SIZE_VAL << 20) - return (false); + return false; } else { if (size > I830_FENCE_MAX_SIZE_VAL << 19) - return (false); + return false; } } /* 965+ just needs multiples of tile width */ if (INTEL_INFO(dev)->gen >= 4) { if (stride & (tile_width - 1)) - return (false); - return (true); + return false; + return true; } /* Pre-965 needs power of two tile widths */ if (stride < tile_width) - return (false); + return false; if (stride & (stride - 1)) - return (false); + return false; - return (true); + return true; } /* Is the current GTT allocation valid for the change in tiling? */ static bool i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) { u32 size; if (tiling_mode == I915_TILING_NONE) - return (true); + return true; if (INTEL_INFO(obj->base.dev)->gen >= 4) - return (true); + return true; if (INTEL_INFO(obj->base.dev)->gen == 3) { if (obj->gtt_offset & ~I915_FENCE_START_MASK) - return (false); + return false; } else { if (obj->gtt_offset & ~I830_FENCE_START_MASK) - return (false); + return false; } /* * Previous chips need to be aligned to the size of the smallest * fence register that can contain the object. */ if (INTEL_INFO(obj->base.dev)->gen == 3) size = 1024*1024; else size = 512*1024; while (size < obj->base.size) size <<= 1; if (obj->gtt_space->size != size) - return (false); + return false; if (obj->gtt_offset & (size - 1)) - return (false); + return false; - return (true); + return true; } /** * Sets the tiling mode of an object, returning the required swizzling of * bit 6 of addresses in the object. */ int i915_gem_set_tiling(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_set_tiling *args = data; drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; - int ret; + int ret = 0; - ret = 0; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (&obj->base == NULL) return -ENOENT; if (!i915_tiling_ok(dev, args->stride, obj->base.size, args->tiling_mode)) { drm_gem_object_unreference(&obj->base); return -EINVAL; } if (obj->pin_count) { drm_gem_object_unreference(&obj->base); return -EBUSY; } if (args->tiling_mode == I915_TILING_NONE) { args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; args->stride = 0; } else { if (args->tiling_mode == I915_TILING_X) args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; else args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; /* Hide bit 17 swizzling from the user. This prevents old Mesa * from aborting the application on sw fallbacks to bit 17, * and we use the pread/pwrite bit17 paths to swizzle for it. * If there was a user that was relying on the swizzle * information for drm_intel_bo_map()ed reads/writes this would * break it, but we don't have any of those. */ if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9; if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; /* If we can't handle the swizzling, make it untiled. */ if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { args->tiling_mode = I915_TILING_NONE; args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; args->stride = 0; } } DRM_LOCK(dev); if (args->tiling_mode != obj->tiling_mode || args->stride != obj->stride) { /* We need to rebind the object if its current allocation * no longer meets the alignment restrictions for its new * tiling mode. Otherwise we can just leave it alone, but * need to ensure that any fence register is updated before * the next fenced (either through the GTT or by the BLT unit * on older GPUs) access. * * After updating the tiling parameters, we then flag whether * we need to update an associated fence register. Note this * has to also include the unfenced register the GPU uses * whilst executing a fenced command for an untiled object. */ obj->map_and_fenceable = obj->gtt_space == NULL || - (obj->gtt_offset + obj->base.size <= - dev_priv->mm.gtt_mappable_end && - i915_gem_object_fence_ok(obj, args->tiling_mode)); + (obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end && + i915_gem_object_fence_ok(obj, args->tiling_mode)); /* Rebind if we need a change of alignment */ if (!obj->map_and_fenceable) { - uint32_t unfenced_alignment = - i915_gem_get_unfenced_gtt_alignment(dev, - obj->base.size, args->tiling_mode); + u32 unfenced_alignment = + i915_gem_get_unfenced_gtt_alignment(dev, + obj->base.size, + args->tiling_mode); if (obj->gtt_offset & (unfenced_alignment - 1)) ret = i915_gem_object_unbind(obj); } if (ret == 0) { obj->fence_dirty = obj->fenced_gpu_access || obj->fence_reg != I915_FENCE_REG_NONE; - obj->tiling_mode = args->tiling_mode; obj->stride = args->stride; /* Force the fence to be reacquired for GTT access */ i915_gem_release_mmap(obj); } } /* we have to maintain this existing ABI... */ args->stride = obj->stride; args->tiling_mode = obj->tiling_mode; drm_gem_object_unreference(&obj->base); DRM_UNLOCK(dev); - return (ret); + return ret; } /** * Returns the current tiling mode and required bit 6 swizzling for the object. */ int i915_gem_get_tiling(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_get_tiling *args = data; drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle)); if (&obj->base == NULL) return -ENOENT; DRM_LOCK(dev); args->tiling_mode = obj->tiling_mode; switch (obj->tiling_mode) { case I915_TILING_X: args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; break; case I915_TILING_Y: args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; break; case I915_TILING_NONE: args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; break; default: DRM_ERROR("unknown tiling mode\n"); } /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */ if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9; if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; drm_gem_object_unreference(&obj->base); DRM_UNLOCK(dev); return 0; } /** * Swap every 64 bytes of this page around, to account for it having a new * bit 17 of its physical address and therefore being interpreted differently * by the GPU. */ static void i915_gem_swizzle_page(vm_page_t m) { char temp[64]; char *vaddr; struct sf_buf *sf; int i; /* XXXKIB sleep */ sf = sf_buf_alloc(m, SFB_DEFAULT); vaddr = (char *)sf_buf_kva(sf); for (i = 0; i < PAGE_SIZE; i += 128) { memcpy(temp, &vaddr[i], 64); memcpy(&vaddr[i], &vaddr[i + 64], 64); memcpy(&vaddr[i + 64], temp, 64); } sf_buf_free(sf); } void i915_gem_object_do_bit_17_swizzle_page(struct drm_i915_gem_object *obj, vm_page_t m) { char new_bit_17; if (obj->bit_17 == NULL) return; new_bit_17 = VM_PAGE_TO_PHYS(m) >> 17; if ((new_bit_17 & 0x1) != (test_bit(m->pindex, obj->bit_17) != 0)) { i915_gem_swizzle_page(m); vm_page_dirty(m); } } void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) { int page_count = obj->base.size >> PAGE_SHIFT; int i; if (obj->bit_17 == NULL) return; for (i = 0; i < page_count; i++) { char new_bit_17 = VM_PAGE_TO_PHYS(obj->pages[i]) >> 17; if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { i915_gem_swizzle_page(obj->pages[i]); vm_page_dirty(obj->pages[i]); } } } void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) { int page_count = obj->base.size >> PAGE_SHIFT; int i; if (obj->bit_17 == NULL) { obj->bit_17 = malloc(BITS_TO_LONGS(page_count) * sizeof(long), DRM_I915_GEM, M_WAITOK); } /* XXXKIB: review locking, atomics might be not needed there */ for (i = 0; i < page_count; i++) { if (VM_PAGE_TO_PHYS(obj->pages[i]) & (1 << 17)) set_bit(i, obj->bit_17); else clear_bit(i, obj->bit_17); } }