-
-
Save tiagovignatti/2ce341604444b3ac0a21 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
commit f13a04be0723afbc8188fdc8d4cc0776590e94b3 | |
Author: Daniel Thompson <[email protected]> | |
Date: Fri Jun 19 14:52:28 2015 +0100 | |
drm: prime: Honour O_RDWR during prime-handle-to-fd | |
Currently DRM_IOCTL_PRIME_HANDLE_TO_FD rejects all flags except | |
(DRM|O)_CLOEXEC making it difficult (maybe impossible) for userspace | |
to mmap() the resulting dma-buf even when this is supported by the | |
DRM driver. | |
It is trivial to relax the restriction and permit read/write access. | |
This is safe because the flags are seldom touched by drm; mostly they | |
are passed verbatim to dma_buf calls. | |
CrOS, kernel v3.14 changes: removed first snip of code from the original patch, | |
related to documentation only that was diverging from newer kernels. | |
Change-Id: Ide8e6b548e3c8b57b18e693b5c4a9d2d800526ff | |
Signed-off-by: Daniel Thompson <[email protected]> | |
Signed-off-by: Tiago Vignatti <[email protected]> | |
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c | |
index cd608bc..aca5374 100644 | |
--- a/drivers/gpu/drm/drm_prime.c | |
+++ b/drivers/gpu/drm/drm_prime.c | |
@@ -579,14 +579,11 @@ int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data, | |
return -ENOSYS; | |
/* check flags are valid */ | |
- if (args->flags & ~DRM_CLOEXEC) | |
+ if (args->flags & ~(DRM_CLOEXEC | DRM_RDWR)) | |
return -EINVAL; | |
- /* we only want to pass DRM_CLOEXEC which is == O_CLOEXEC */ | |
- flags = args->flags & DRM_CLOEXEC; | |
- | |
return dev->driver->prime_handle_to_fd(dev, file_priv, | |
- args->handle, flags, &args->fd); | |
+ args->handle, args->flags, &args->fd); | |
} | |
int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data, | |
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h | |
index 3f1fc13..32191ee 100644 | |
--- a/include/uapi/drm/drm.h | |
+++ b/include/uapi/drm/drm.h | |
@@ -643,6 +643,7 @@ struct drm_set_client_cap { | |
__u64 value; | |
}; | |
+#define DRM_RDWR O_RDWR | |
#define DRM_CLOEXEC O_CLOEXEC | |
struct drm_prime_handle { | |
__u32 handle; | |
commit 0e98ed9e7077dc6fcee33e4b039316b7aa7c7767 | |
Author: Tiago Vignatti <[email protected]> | |
Date: Mon Jul 27 18:57:09 2015 -0300 | |
drm/i915: Use CPU mapping for userspace dma-buf mmap() | |
This patch changes the behavior of dma-buf mmap to use CPU mapping instead | |
going through GTT fence. This works fine in devices with LLC ("Core" mostly) | |
because coherency is not need to be controlled. | |
Caching mode is therefore changed to WB, avoiding the performance penalty of | |
writing through the write-combined buffer which leads the best performance for | |
CPU accesses. Besides, WB caching mode acquires a linear view of the memory, | |
so user-space have to deal with it accordingly. | |
vgem: | |
chronos@localhost /tmp/chrome/out_gbm/Release $ ./content_perftests | |
--ozone-platform=gbm --ozone-use-surfaceless --no-sandbox | |
--gtest_filter=GpuMemoryBufferPerfTests* | grep RES | |
*RESULT gpu_memory_buffer_time_ozone_native: map & unmap = 5.432 us/task | |
*RESULT gpu_memory_buffer_time_shared_memory: map & unmap = .385 us/task | |
*RESULT gpu_memory_buffer_time_ozone_native: read = 460.648 us/task | |
*RESULT gpu_memory_buffer_time_shared_memory: read = 1.371 us/task | |
*RESULT gpu_memory_buffer_time_ozone_native: write = 19.43 us/task | |
*RESULT gpu_memory_buffer_time_shared_memory: write = 1.824 us/task | |
*RESULT gpu_memory_buffer_time_ozone_native: read & write = 455.453 us/task | |
*RESULT gpu_memory_buffer_time_shared_memory: read & write = 2.648 us/task | |
vgem cpu mmap (aka "after"): | |
chronos@localhost /tmp/chrome/out_gbm/Release $ ./content_perftests | |
--ozone-platform=gbm --ozone-use-surfaceless --no-sandbox | |
--gtest_filter=GpuMemoryBufferPerfTests* | grep RES | |
*RESULT gpu_memory_buffer_time_ozone_native: map & unmap = 4.765 us/task | |
*RESULT gpu_memory_buffer_time_shared_memory: map & unmap = .365 us/task | |
*RESULT gpu_memory_buffer_time_ozone_native: read = 18.209 us/task | |
*RESULT gpu_memory_buffer_time_shared_memory: read = 3.542 us/task | |
*RESULT gpu_memory_buffer_time_ozone_native: write = 19.246 us/task | |
*RESULT gpu_memory_buffer_time_shared_memory: write = 3.788 us/task | |
*RESULT gpu_memory_buffer_time_ozone_native: read & write = 23.506 us/task | |
*RESULT gpu_memory_buffer_time_shared_memory: read & write = 7.141 us/task | |
Note that in this patch we're removing Atom support because it requires better | |
coherency handling. For that we'd need to expose dma-buf invalidate/flushing | |
ioctls and we're investigating it (WIP at the moment). | |
BUG=chromium:487189 | |
TEST=content_perftests (like shown above) and on amd64-generic_freon, | |
vgem_fb_test (which needs to be changed to not use tiling when creating gbm | |
BOs) | |
Change-Id: Ie33dc5d63ad12820b2f11caab2d662a8585ea49c | |
Signed-off-by: Tiago Vignatti <[email protected]> | |
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c | |
index 5d0f65d..f6b9a80 100644 | |
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c | |
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c | |
@@ -199,19 +199,24 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct * | |
{ | |
struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); | |
struct drm_device *dev = obj->base.dev; | |
+ int ret; | |
if (obj->base.size < vma->vm_end - vma->vm_start) | |
return -EINVAL; | |
- vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; | |
- vma->vm_ops = dev->driver->gem_vm_ops; | |
- vma->vm_private_data = &obj->base; | |
- vma->vm_page_prot = | |
- pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); | |
+ /* On non-LLC machines we'd need to be careful cause CPU and GPU don't | |
+ * share the CPU's L3 cache and coherency may hurt when CPU mapping. */ | |
+ if (!HAS_LLC(dev)) | |
+ return -EINVAL; | |
+ | |
+ if (!obj->base.filp) | |
+ return -EINVAL; | |
- vma->vm_ops->open(vma); | |
+ ret = obj->base.filp->f_op->mmap(obj->base.filp, vma); | |
+ fput(vma->vm_file); | |
+ vma->vm_file = get_file(obj->base.filp); | |
- return 0; | |
+ return ret; | |
} | |
static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment