From: Eric A. <er...@an...> - 2009-02-04 06:34:00
|
The basic problem was mmap_sem (do_mmap()) -> struct_mutex (drm_gem_mmap()) struct_mutex (i915_gem_execbuffer()) -> mmap_sem (copy_from/to_user()) We have plenty of places where we want to hold device state the same (struct_mutex) while we move a non-trivial amount of data (copy_from/to_user()), such as i915_gem_pwrite(). Solve this by moving the one thing that needed struct_mutex with mmap_sem held to using a lock to cover just those data structures (offset hash and offset manager). Signed-off-by: Eric Anholt <er...@an...> --- drivers/gpu/drm/drm_gem.c | 8 ++++---- drivers/gpu/drm/i915/i915_gem.c | 5 +++++ include/drm/drmP.h | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 6915fb8..7fe91b6 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -97,6 +97,7 @@ drm_gem_init(struct drm_device *dev) dev->mm_private = mm; + mutex_init(&mm->offset_mutex); if (drm_ht_create(&mm->offset_hash, 19)) { drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM); return -ENOMEM; @@ -485,10 +486,9 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) unsigned long prot; int ret = 0; - mutex_lock(&dev->struct_mutex); - + mutex_lock(&mm->offset_mutex); if (drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash)) { - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&mm->offset_mutex); return drm_mmap(filp, vma); } @@ -525,7 +525,7 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) drm_vm_open_locked(vma); out_unlock: - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&mm->offset_mutex); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6a9e3a8..8754054 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -646,6 +646,7 @@ i915_gem_create_mmap_offset(struct drm_gem_object *obj) map->size = obj->size; map->handle = obj; + mutex_lock(&mm->offset_mutex); /* Get a DRM GEM mmap offset allocated... */ list->file_offset_node = drm_mm_search_free(&mm->offset_manager, obj->size / PAGE_SIZE, 0, 0); @@ -671,12 +672,14 @@ i915_gem_create_mmap_offset(struct drm_gem_object *obj) /* By now we should be all set, any drm_mmap request on the offset * below will get to our mmap & fault handler */ obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT; + mutex_unlock(&mm->offset_mutex); return 0; out_free_mm: drm_mm_put_block(list->file_offset_node); out_free_list: + mutex_unlock(&mm->offset_mutex); drm_free(list->map, sizeof(struct drm_map_list), DRM_MEM_DRIVER); return ret; @@ -2896,6 +2899,7 @@ void i915_gem_free_object(struct drm_gem_object *obj) i915_gem_object_unbind(obj); + mutex_lock(&mm->offset_mutex); list = &obj->map_list; drm_ht_remove_item(&mm->offset_hash, &list->hash); @@ -2903,6 +2907,7 @@ void i915_gem_free_object(struct drm_gem_object *obj) drm_mm_put_block(list->file_offset_node); list->file_offset_node = NULL; } + mutex_unlock(&mm->offset_mutex); map = list->map; if (map) { diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 8190b9b..daadf06 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -570,6 +570,7 @@ struct drm_ati_pcigart_info { struct drm_gem_mm { struct drm_mm offset_manager; /**< Offset mgmt for buffer objects */ struct drm_open_hash offset_hash; /**< User token hash table for maps */ + struct mutex offset_mutex; /**< covers offset_manager and offset_hash */ }; /** -- 1.5.6.5 |
From: Eric A. <er...@an...> - 2009-02-18 00:59:52
|
The basic problem was mmap_sem (do_mmap()) -> struct_mutex (drm_gem_mmap(), i915_gem_fault()) struct_mutex (i915_gem_execbuffer()) -> mmap_sem (copy_from/to_user()) We have plenty of places where we want to hold device state the same (struct_mutex) while we move a non-trivial amount of data (copy_from/to_user()), such as i915_gem_pwrite(). Solve this by moving the easy things that needed struct_mutex with mmap_sem held to using a lock to cover just those data structures (offset hash and offset manager), and do trylock and reschedule in fault. Signed-off-by: Eric Anholt <er...@an...> --- drivers/gpu/drm/drm_gem.c | 8 ++++---- drivers/gpu/drm/i915/i915_gem.c | 15 ++++++++++++++- include/drm/drmP.h | 1 + 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 88d3368..13a0184 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -97,6 +97,7 @@ drm_gem_init(struct drm_device *dev) dev->mm_private = mm; + mutex_init(&mm->offset_mutex); if (drm_ht_create(&mm->offset_hash, 19)) { drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM); return -ENOMEM; @@ -508,10 +509,9 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) unsigned long prot; int ret = 0; - mutex_lock(&dev->struct_mutex); - + mutex_lock(&mm->offset_mutex); if (drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash)) { - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&mm->offset_mutex); return drm_mmap(filp, vma); } @@ -556,7 +556,7 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) drm_vm_open_locked(vma); out_unlock: - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&mm->offset_mutex); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ac534c9..da9a2cb 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -573,8 +573,16 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT; + /* Get the struct mutex before accessing GEM data structures, but + * keep the struct_mutex -> mmap_sem lock ordering so that we don't + * need to mangle pwrite/pread to allow mmap_sem -> struct_mutex. + */ + if (!mutex_trylock(&dev->struct_mutex)) { + need_resched(); + return VM_FAULT_NOPAGE; + } + /* Now bind it into the GTT if needed */ - mutex_lock(&dev->struct_mutex); if (!obj_priv->gtt_space) { ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); if (ret) { @@ -646,6 +654,7 @@ i915_gem_create_mmap_offset(struct drm_gem_object *obj) map->size = obj->size; map->handle = obj; + mutex_lock(&mm->offset_mutex); /* Get a DRM GEM mmap offset allocated... */ list->file_offset_node = drm_mm_search_free(&mm->offset_manager, obj->size / PAGE_SIZE, 0, 0); @@ -671,12 +680,14 @@ i915_gem_create_mmap_offset(struct drm_gem_object *obj) /* By now we should be all set, any drm_mmap request on the offset * below will get to our mmap & fault handler */ obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT; + mutex_unlock(&mm->offset_mutex); return 0; out_free_mm: drm_mm_put_block(list->file_offset_node); out_free_list: + mutex_unlock(&mm->offset_mutex); drm_free(list->map, sizeof(struct drm_map_list), DRM_MEM_DRIVER); return ret; @@ -690,6 +701,7 @@ i915_gem_free_mmap_offset(struct drm_gem_object *obj) struct drm_gem_mm *mm = dev->mm_private; struct drm_map_list *list; + mutex_lock(&mm->offset_mutex); list = &obj->map_list; drm_ht_remove_item(&mm->offset_hash, &list->hash); @@ -704,6 +716,7 @@ i915_gem_free_mmap_offset(struct drm_gem_object *obj) } obj_priv->mmap_offset = 0; + mutex_unlock(&mm->offset_mutex); } /** diff --git a/include/drm/drmP.h b/include/drm/drmP.h index e5f4ae9..04f765b 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -570,6 +570,7 @@ struct drm_ati_pcigart_info { struct drm_gem_mm { struct drm_mm offset_manager; /**< Offset mgmt for buffer objects */ struct drm_open_hash offset_hash; /**< User token hash table for maps */ + struct mutex offset_mutex; /**< covers offset_manager and offset_hash */ }; /** -- 1.5.6.5 |
From: Wang C. <wan...@cn...> - 2009-02-18 08:02:32
|
Eric Anholt said the following on 2009-2-18 8:59: > The basic problem was > mmap_sem (do_mmap()) -> struct_mutex (drm_gem_mmap(), i915_gem_fault()) > struct_mutex (i915_gem_execbuffer()) -> mmap_sem (copy_from/to_user()) > > We have plenty of places where we want to hold device state the same > (struct_mutex) while we move a non-trivial amount of data > (copy_from/to_user()), such as i915_gem_pwrite(). Solve this by moving the > easy things that needed struct_mutex with mmap_sem held to using a lock to > cover just those data structures (offset hash and offset manager), and do > trylock and reschedule in fault. > Eric, I tested the patch. But following bug still doesn't disappear. http://bugzilla.kernel.org/show_bug.cgi?id=12419 |
From: <kr...@bi...> - 2009-02-18 16:02:06
|
From: Kristian Høgsberg <kr...@re...> A number of GEM operations (and legacy drm ones) want to copy data to or from userspace while holding the struct_mutex lock. However, the fault handler calls us with the mmap_sem held and thus enforces the opposite locking order. This patch downs the mmap_sem up front for those operations that access userspace data under the struct_mutex lock to ensure the locking order is consistent. Signed-off-by: Kristian Høgsberg <kr...@re...> --- Here's a different and simpler attempt to fix the locking order problem. We can just down_read() the mmap_sem pre-emptively up-front, and the locking order is respected. It's simpler than the mutex_trylock() game, avoids introducing a new mutex. cheers, Kristian drivers/gpu/drm/i915/i915_dma.c | 6 +++++- drivers/gpu/drm/i915/i915_gem.c | 20 +++++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 81f1cff..d8b58d9 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -642,9 +642,11 @@ static int i915_batchbuffer(struct drm_device *dev, void *data, sizeof(struct drm_clip_rect))) return -EFAULT; + down_read(¤t->mm->mmap_sem); mutex_lock(&dev->struct_mutex); ret = i915_dispatch_batchbuffer(dev, batch); mutex_unlock(&dev->struct_mutex); + up_read(¤t->mm->mmap_sem); if (sarea_priv) sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); @@ -674,14 +676,16 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data, return -EFAULT; } + down_read(¤t->mm->mmap_sem); mutex_lock(&dev->struct_mutex); ret = i915_dispatch_cmdbuffer(dev, cmdbuf); mutex_unlock(&dev->struct_mutex); + up_read(¤t->mm->mmap_sem); + if (ret) { DRM_ERROR("i915_dispatch_cmdbuffer failed\n"); return ret; } - if (sarea_priv) sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d9cd42f..3dd8b6e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -171,6 +171,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, return -EINVAL; } + down_read(¤t->mm->mmap_sem); mutex_lock(&dev->struct_mutex); ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, @@ -196,6 +197,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, drm_gem_object_unreference(obj); mutex_unlock(&dev->struct_mutex); + up_read(¤t->mm->mmap_sem); return 0; } @@ -264,7 +266,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, if (!access_ok(VERIFY_READ, user_data, remain)) return -EFAULT; - + down_read(¤t->mm->mmap_sem); mutex_lock(&dev->struct_mutex); ret = i915_gem_object_pin(obj, 0); if (ret) { @@ -315,6 +317,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, fail: i915_gem_object_unpin(obj); mutex_unlock(&dev->struct_mutex); + up_read(¤t->mm->mmap_sem); return ret; } @@ -328,6 +331,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, loff_t offset; ssize_t written; + down_read(¤t->mm->mmap_sem); mutex_lock(&dev->struct_mutex); ret = i915_gem_object_set_to_cpu_domain(obj, 1); @@ -350,6 +354,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, } mutex_unlock(&dev->struct_mutex); + up_read(¤t->mm->mmap_sem); return 0; } @@ -2473,22 +2478,21 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, goto pre_mutex_err; } + down_read(¤t->mm->mmap_sem); mutex_lock(&dev->struct_mutex); i915_verify_inactive(dev, __FILE__, __LINE__); if (dev_priv->mm.wedged) { DRM_ERROR("Execbuf while wedged\n"); - mutex_unlock(&dev->struct_mutex); ret = -EIO; - goto pre_mutex_err; + goto mutex_err; } if (dev_priv->mm.suspended) { DRM_ERROR("Execbuf while VT-switched.\n"); - mutex_unlock(&dev->struct_mutex); ret = -EBUSY; - goto pre_mutex_err; + goto mutex_err; } /* Look up object handles */ @@ -2641,8 +2645,6 @@ err: for (i = 0; i < args->buffer_count; i++) drm_gem_object_unreference(object_list[i]); - mutex_unlock(&dev->struct_mutex); - if (!ret) { /* Copy the new buffer offsets back to the user's exec list. */ ret = copy_to_user((struct drm_i915_relocation_entry __user *) @@ -2655,6 +2657,10 @@ err: args->buffer_count, ret); } +mutex_err: + mutex_unlock(&dev->struct_mutex); + up_read(¤t->mm->mmap_sem); + pre_mutex_err: drm_free(object_list, sizeof(*object_list) * args->buffer_count, DRM_MEM_DRIVER); -- 1.6.1.3 |
From: Eric A. <er...@an...> - 2009-02-18 17:36:29
|
On Wed, 2009-02-18 at 11:02 -0500, kr...@bi... wrote: > From: Kristian Høgsberg <kr...@re...> > > A number of GEM operations (and legacy drm ones) want to copy data to > or from userspace while holding the struct_mutex lock. However, the > fault handler calls us with the mmap_sem held and thus enforces the > opposite locking order. This patch downs the mmap_sem up front for > those operations that access userspace data under the struct_mutex > lock to ensure the locking order is consistent. > > Signed-off-by: Kristian Høgsberg <kr...@re...> Have you tested this against actually faulting? My understanding was that you can't recurse on mmap_sem. > --- > > Here's a different and simpler attempt to fix the locking order > problem. We can just down_read() the mmap_sem pre-emptively up-front, > and the locking order is respected. It's simpler than the > mutex_trylock() game, avoids introducing a new mutex. > > cheers, > Kristian > > drivers/gpu/drm/i915/i915_dma.c | 6 +++++- > drivers/gpu/drm/i915/i915_gem.c | 20 +++++++++++++------- > 2 files changed, 18 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index 81f1cff..d8b58d9 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -642,9 +642,11 @@ static int i915_batchbuffer(struct drm_device *dev, void *data, > sizeof(struct drm_clip_rect))) > return -EFAULT; > > + down_read(¤t->mm->mmap_sem); > mutex_lock(&dev->struct_mutex); > ret = i915_dispatch_batchbuffer(dev, batch); > mutex_unlock(&dev->struct_mutex); > + up_read(¤t->mm->mmap_sem); > > if (sarea_priv) > sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); > @@ -674,14 +676,16 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data, > return -EFAULT; > } > > + down_read(¤t->mm->mmap_sem); > mutex_lock(&dev->struct_mutex); > ret = i915_dispatch_cmdbuffer(dev, cmdbuf); > mutex_unlock(&dev->struct_mutex); > + up_read(¤t->mm->mmap_sem); > + > if (ret) { > DRM_ERROR("i915_dispatch_cmdbuffer failed\n"); > return ret; > } > - > if (sarea_priv) > sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); > return 0; > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index d9cd42f..3dd8b6e 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -171,6 +171,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, > return -EINVAL; > } > > + down_read(¤t->mm->mmap_sem); > mutex_lock(&dev->struct_mutex); > > ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, > @@ -196,6 +197,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, > > drm_gem_object_unreference(obj); > mutex_unlock(&dev->struct_mutex); > + up_read(¤t->mm->mmap_sem); > > return 0; > } > @@ -264,7 +266,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, > if (!access_ok(VERIFY_READ, user_data, remain)) > return -EFAULT; > > - > + down_read(¤t->mm->mmap_sem); > mutex_lock(&dev->struct_mutex); > ret = i915_gem_object_pin(obj, 0); > if (ret) { > @@ -315,6 +317,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, > fail: > i915_gem_object_unpin(obj); > mutex_unlock(&dev->struct_mutex); > + up_read(¤t->mm->mmap_sem); > > return ret; > } > @@ -328,6 +331,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, > loff_t offset; > ssize_t written; > > + down_read(¤t->mm->mmap_sem); > mutex_lock(&dev->struct_mutex); > > ret = i915_gem_object_set_to_cpu_domain(obj, 1); > @@ -350,6 +354,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, > } > > mutex_unlock(&dev->struct_mutex); > + up_read(¤t->mm->mmap_sem); > > return 0; > } > @@ -2473,22 +2478,21 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, > goto pre_mutex_err; > } > > + down_read(¤t->mm->mmap_sem); > mutex_lock(&dev->struct_mutex); > > i915_verify_inactive(dev, __FILE__, __LINE__); > > if (dev_priv->mm.wedged) { > DRM_ERROR("Execbuf while wedged\n"); > - mutex_unlock(&dev->struct_mutex); > ret = -EIO; > - goto pre_mutex_err; > + goto mutex_err; > } > > if (dev_priv->mm.suspended) { > DRM_ERROR("Execbuf while VT-switched.\n"); > - mutex_unlock(&dev->struct_mutex); > ret = -EBUSY; > - goto pre_mutex_err; > + goto mutex_err; > } > > /* Look up object handles */ > @@ -2641,8 +2645,6 @@ err: > for (i = 0; i < args->buffer_count; i++) > drm_gem_object_unreference(object_list[i]); > > - mutex_unlock(&dev->struct_mutex); > - > if (!ret) { > /* Copy the new buffer offsets back to the user's exec list. */ > ret = copy_to_user((struct drm_i915_relocation_entry __user *) > @@ -2655,6 +2657,10 @@ err: > args->buffer_count, ret); > } > > +mutex_err: > + mutex_unlock(&dev->struct_mutex); > + up_read(¤t->mm->mmap_sem); > + > pre_mutex_err: > drm_free(object_list, sizeof(*object_list) * args->buffer_count, > DRM_MEM_DRIVER); -- Eric Anholt er...@an... eri...@in... |
From: Kristian H. <kr...@bi...> - 2009-02-18 17:58:05
|
On Wed, Feb 18, 2009 at 12:36 PM, Eric Anholt <er...@an...> wrote: > On Wed, 2009-02-18 at 11:02 -0500, kr...@bi... wrote: >> From: Kristian Høgsberg <kr...@re...> >> >> A number of GEM operations (and legacy drm ones) want to copy data to >> or from userspace while holding the struct_mutex lock. However, the >> fault handler calls us with the mmap_sem held and thus enforces the >> opposite locking order. This patch downs the mmap_sem up front for >> those operations that access userspace data under the struct_mutex >> lock to ensure the locking order is consistent. >> >> Signed-off-by: Kristian Høgsberg <kr...@re...> > > Have you tested this against actually faulting? My understanding was > that you can't recurse on mmap_sem. I tested it and it worked, but didn't add code to detect contention so I can't say for sure I hit that case. mmap_sem is a read/write semaphore, so while we can't recurse, we can get away with taking two reader locks. cheers, Kristian |
From: Thomas H. <th...@sh...> - 2009-02-18 23:30:06
|
Kristian Høgsberg wrote: > On Wed, Feb 18, 2009 at 12:36 PM, Eric Anholt <er...@an...> wrote: > >> On Wed, 2009-02-18 at 11:02 -0500, kr...@bi... wrote: >> >>> From: Kristian Høgsberg <kr...@re...> >>> >>> A number of GEM operations (and legacy drm ones) want to copy data to >>> or from userspace while holding the struct_mutex lock. However, the >>> fault handler calls us with the mmap_sem held and thus enforces the >>> opposite locking order. This patch downs the mmap_sem up front for >>> those operations that access userspace data under the struct_mutex >>> lock to ensure the locking order is consistent. >>> >>> Signed-off-by: Kristian Høgsberg <kr...@re...> >>> >> Have you tested this against actually faulting? My understanding was >> that you can't recurse on mmap_sem. >> > > I tested it and it worked, but didn't add code to detect contention so > I can't say for sure I hit that case. mmap_sem is a read/write > semaphore, so while we can't recurse, we can get away with taking two > reader locks. > > cheers, > Kristian > Kristian, This seems a bit odd to me. The extra lock taking does not prevent any deadlocks, so is this done to only silence a warning message? A reversed locking order between a mutex and an rwsem can never deadlock if the rwsem is only taken in read mode. If the mutex is taken simultaneously as the rwsem in _write_ mode then that locking order must be preserved across the code, even when the rwsem is taken in read mode. /Thomas > ------------------------------------------------------------------------------ > Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA > -OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise > -Strategies to boost innovation and cut costs with open source participation > -Receive a $600 discount off the registration fee with the source code: SFAD > http://p.sf.net/sfu/XcvMzF8H > -- > _______________________________________________ > Dri-devel mailing list > Dri...@li... > https://lists.sourceforge.net/lists/listinfo/dri-devel > |
From: Wang C. <wan...@cn...> - 2009-02-19 04:47:58
|
kr...@bi... said the following on 2009-2-19 0:02: > From: Kristian Høgsberg <kr...@re...> > > A number of GEM operations (and legacy drm ones) want to copy data to > or from userspace while holding the struct_mutex lock. However, the > fault handler calls us with the mmap_sem held and thus enforces the > opposite locking order. This patch downs the mmap_sem up front for > those operations that access userspace data under the struct_mutex > lock to ensure the locking order is consistent. > > Signed-off-by: Kristian Høgsberg <kr...@re...> Now I get the following dmesg ============================================= [ INFO: possible recursive locking detected ] 2.6.29-rc5-default #163 --------------------------------------------- X/3923 is trying to acquire lock: (&mm->mmap_sem){----}, at: [<c0168e13>] might_fault+0x42/0x7e but task is already holding lock: (&mm->mmap_sem){----}, at: [<eec2ef5b>] i915_cmdbuffer+0xfa/0x461 [i915] other info that might help us debug this: 2 locks held by X/3923: #0: (&mm->mmap_sem){----}, at: [<eec2ef5b>] i915_cmdbuffer+0xfa/0x461 [i915] #1: (&dev->struct_mutex){--..}, at: [<eec2ef68>] i915_cmdbuffer+0x107/0x461 [i 915] stack backtrace: Pid: 3923, comm: X Not tainted 2.6.29-rc5-default #163 Call Trace: [<c01374ef>] validate_chain+0x4bf/0xbb5 [<c0138254>] __lock_acquire+0x66f/0x6f9 [<c0138339>] lock_acquire+0x5b/0x77 [<c0168e13>] ? might_fault+0x42/0x7e [<c0168e30>] might_fault+0x5f/0x7e [<c0168e13>] ? might_fault+0x42/0x7e [<eec2ec72>] i915_emit_box+0x1d/0x20c [i915] [<eec2efd9>] i915_cmdbuffer+0x178/0x461 [i915] [<c01ee3f4>] ? copy_from_user+0x31/0x54 [<eebd085b>] drm_ioctl+0x1a6/0x21b [drm] [<eec2ee61>] ? i915_cmdbuffer+0x0/0x461 [i915] [<eebd06b5>] ? drm_ioctl+0x0/0x21b [drm] [<c0182a85>] vfs_ioctl+0x3d/0x50 [<c0182f85>] do_vfs_ioctl+0x41b/0x483 [<c02e8544>] ? do_page_fault+0x2f7/0x5a0 [<c018302d>] sys_ioctl+0x40/0x5a [<c0102cdd>] sysenter_do_call+0x12/0x31 |
From: <kr...@bi...> - 2009-02-18 16:38:27
|
From: Kristian Høgsberg <kr...@re...> A number of GEM operations (and legacy drm ones) want to copy data to or from userspace while holding the struct_mutex lock. However, the fault handler calls us with the mmap_sem held and thus enforces the opposite locking order. This patch downs the mmap_sem up front for those operations that access userspace data under the struct_mutex lock to ensure the locking order is consistent. Signed-off-by: Kristian Høgsberg <kr...@re...> --- Here's a different and simpler attempt to fix the locking order problem. We can just down_read() the mmap_sem pre-emptively up-front, and the locking order is respected. It's simpler than the mutex_trylock() game, avoids introducing a new mutex. (forgot to add lkml, resending) cheers, Kristian drivers/gpu/drm/i915/i915_dma.c | 6 +++++- drivers/gpu/drm/i915/i915_gem.c | 20 +++++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 81f1cff..d8b58d9 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -642,9 +642,11 @@ static int i915_batchbuffer(struct drm_device *dev, void *data, sizeof(struct drm_clip_rect))) return -EFAULT; + down_read(¤t->mm->mmap_sem); mutex_lock(&dev->struct_mutex); ret = i915_dispatch_batchbuffer(dev, batch); mutex_unlock(&dev->struct_mutex); + up_read(¤t->mm->mmap_sem); if (sarea_priv) sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); @@ -674,14 +676,16 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data, return -EFAULT; } + down_read(¤t->mm->mmap_sem); mutex_lock(&dev->struct_mutex); ret = i915_dispatch_cmdbuffer(dev, cmdbuf); mutex_unlock(&dev->struct_mutex); + up_read(¤t->mm->mmap_sem); + if (ret) { DRM_ERROR("i915_dispatch_cmdbuffer failed\n"); return ret; } - if (sarea_priv) sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); return 0; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d9cd42f..3dd8b6e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -171,6 +171,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, return -EINVAL; } + down_read(¤t->mm->mmap_sem); mutex_lock(&dev->struct_mutex); ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, @@ -196,6 +197,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, drm_gem_object_unreference(obj); mutex_unlock(&dev->struct_mutex); + up_read(¤t->mm->mmap_sem); return 0; } @@ -264,7 +266,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, if (!access_ok(VERIFY_READ, user_data, remain)) return -EFAULT; - + down_read(¤t->mm->mmap_sem); mutex_lock(&dev->struct_mutex); ret = i915_gem_object_pin(obj, 0); if (ret) { @@ -315,6 +317,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, fail: i915_gem_object_unpin(obj); mutex_unlock(&dev->struct_mutex); + up_read(¤t->mm->mmap_sem); return ret; } @@ -328,6 +331,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, loff_t offset; ssize_t written; + down_read(¤t->mm->mmap_sem); mutex_lock(&dev->struct_mutex); ret = i915_gem_object_set_to_cpu_domain(obj, 1); @@ -350,6 +354,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, } mutex_unlock(&dev->struct_mutex); + up_read(¤t->mm->mmap_sem); return 0; } @@ -2473,22 +2478,21 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, goto pre_mutex_err; } + down_read(¤t->mm->mmap_sem); mutex_lock(&dev->struct_mutex); i915_verify_inactive(dev, __FILE__, __LINE__); if (dev_priv->mm.wedged) { DRM_ERROR("Execbuf while wedged\n"); - mutex_unlock(&dev->struct_mutex); ret = -EIO; - goto pre_mutex_err; + goto mutex_err; } if (dev_priv->mm.suspended) { DRM_ERROR("Execbuf while VT-switched.\n"); - mutex_unlock(&dev->struct_mutex); ret = -EBUSY; - goto pre_mutex_err; + goto mutex_err; } /* Look up object handles */ @@ -2641,8 +2645,6 @@ err: for (i = 0; i < args->buffer_count; i++) drm_gem_object_unreference(object_list[i]); - mutex_unlock(&dev->struct_mutex); - if (!ret) { /* Copy the new buffer offsets back to the user's exec list. */ ret = copy_to_user((struct drm_i915_relocation_entry __user *) @@ -2655,6 +2657,10 @@ err: args->buffer_count, ret); } +mutex_err: + mutex_unlock(&dev->struct_mutex); + up_read(¤t->mm->mmap_sem); + pre_mutex_err: drm_free(object_list, sizeof(*object_list) * args->buffer_count, DRM_MEM_DRIVER); -- 1.6.1.3 |
From: Kristian H. <kr...@bi...> - 2009-02-18 23:25:46
|
On Wed, Feb 18, 2009 at 5:42 PM, Thomas Hellström <th...@sh...> wrote: > Kristian Høgsberg wrote: >> >> On Wed, Feb 18, 2009 at 12:36 PM, Eric Anholt <er...@an...> wrote: >> >>> >>> On Wed, 2009-02-18 at 11:02 -0500, kr...@bi... wrote: >>> >>>> >>>> From: Kristian Høgsberg <kr...@re...> >>>> >>>> A number of GEM operations (and legacy drm ones) want to copy data to >>>> or from userspace while holding the struct_mutex lock. However, the >>>> fault handler calls us with the mmap_sem held and thus enforces the >>>> opposite locking order. This patch downs the mmap_sem up front for >>>> those operations that access userspace data under the struct_mutex >>>> lock to ensure the locking order is consistent. >>>> >>>> Signed-off-by: Kristian Høgsberg <kr...@re...> >>>> >>> >>> Have you tested this against actually faulting? My understanding was >>> that you can't recurse on mmap_sem. >>> >> >> I tested it and it worked, but didn't add code to detect contention so >> I can't say for sure I hit that case. mmap_sem is a read/write >> semaphore, so while we can't recurse, we can get away with taking two >> reader locks. >> >> cheers, >> Kristian >> > > Kristian, > This seems a bit odd to me. The extra lock taking does not prevent any > deadlocks, so is this done to only silence a warning message? A reversed > locking order between a mutex and an rwsem can never deadlock if the rwsem > is only taken in read mode. If the mutex is taken simultaneously as the > rwsem in _write_ mode then that locking order must be preserved across the > code, even when the rwsem is taken in read mode. If all we ever did was read, we wouldn't need the mmap_sem at all... The point is that somebody could be holding the semaphore in write mode in which case taking the mmap_sem in read mode blocks. So if we're called with the mmap_sem held in write mode and we going to take the struct_lock mutex, but gem_execbuffer preempts, takes the struct_mutex and then tries to call copy_from_user(), we deadlock. Taking the mmap_sem in read mode early as in my patch, prevents this. In this case, I guess the fault handler is always only called with mmap_sem held in read mode, and then yes, there's no deadlock and the warning is harmless. That doesn't mean that it's a good idea - enforcing a consistent lock order is good practice that keeps the code clean and makes it easy to verify locking without knowing every part of the system in detail. And should the fault callback mmap_sem assumptions change in the future, gem will certainly be more robust if we keep the locking simple. cheers, Kristian |
From: Thomas H. <th...@sh...> - 2009-02-19 07:34:40
|
Kristian Høgsberg wrote: > > > In this case, I guess the fault handler is always only called with > mmap_sem held in read mode, and then yes, there's no deadlock and the > warning is harmless. That doesn't mean that it's a good idea - > enforcing a consistent lock order is good practice that keeps the code > clean and makes it easy to verify locking without knowing every part > of the system in detail. And should the fault callback mmap_sem > assumptions change in the future, gem will certainly be more robust if > we keep the locking simple. > Yes, but in this case you'll have read_lock(mmap_sem) mutex_lock(struct_mutex) read_lock(mmap_sem) Which both reverses locking order between the last two locks and does recursive locking. This won't cause deadlocks but will still warn. So one bad thing is traded against another. /Thomas > cheers, > Kristian > |
From: Peter Z. <pe...@in...> - 2009-02-19 10:00:44
|
On Wed, 2009-02-18 at 11:38 -0500, kr...@bi... wrote: > From: Kristian Høgsberg <kr...@re...> > > A number of GEM operations (and legacy drm ones) want to copy data to > or from userspace while holding the struct_mutex lock. However, the > fault handler calls us with the mmap_sem held and thus enforces the > opposite locking order. This patch downs the mmap_sem up front for > those operations that access userspace data under the struct_mutex > lock to ensure the locking order is consistent. > > Signed-off-by: Kristian Høgsberg <kr...@re...> > --- > > Here's a different and simpler attempt to fix the locking order > problem. We can just down_read() the mmap_sem pre-emptively up-front, > and the locking order is respected. It's simpler than the > mutex_trylock() game, avoids introducing a new mutex. > Hell no! for one, mmap_sem is not a recursive lock, so a pagefault will utterly fail with this in place. Secondly, holding mmap_sem for no good reason just sucks. > drivers/gpu/drm/i915/i915_dma.c | 6 +++++- > drivers/gpu/drm/i915/i915_gem.c | 20 +++++++++++++------- > 2 files changed, 18 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index 81f1cff..d8b58d9 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -642,9 +642,11 @@ static int i915_batchbuffer(struct drm_device *dev, void *data, > sizeof(struct drm_clip_rect))) > return -EFAULT; > > + down_read(¤t->mm->mmap_sem); > mutex_lock(&dev->struct_mutex); > ret = i915_dispatch_batchbuffer(dev, batch); > mutex_unlock(&dev->struct_mutex); > + up_read(¤t->mm->mmap_sem); > > if (sarea_priv) > sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); > @@ -674,14 +676,16 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data, > return -EFAULT; > } > > + down_read(¤t->mm->mmap_sem); > mutex_lock(&dev->struct_mutex); > ret = i915_dispatch_cmdbuffer(dev, cmdbuf); > mutex_unlock(&dev->struct_mutex); > + up_read(¤t->mm->mmap_sem); > + > if (ret) { > DRM_ERROR("i915_dispatch_cmdbuffer failed\n"); > return ret; > } > - > if (sarea_priv) > sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); > return 0; > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index d9cd42f..3dd8b6e 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -171,6 +171,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, > return -EINVAL; > } > > + down_read(¤t->mm->mmap_sem); > mutex_lock(&dev->struct_mutex); > > ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset, > @@ -196,6 +197,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, > > drm_gem_object_unreference(obj); > mutex_unlock(&dev->struct_mutex); > + up_read(¤t->mm->mmap_sem); > > return 0; > } > @@ -264,7 +266,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, > if (!access_ok(VERIFY_READ, user_data, remain)) > return -EFAULT; > > - > + down_read(¤t->mm->mmap_sem); > mutex_lock(&dev->struct_mutex); > ret = i915_gem_object_pin(obj, 0); > if (ret) { > @@ -315,6 +317,7 @@ i915_gem_gtt_pwrite(struct drm_device *dev, struct drm_gem_object *obj, > fail: > i915_gem_object_unpin(obj); > mutex_unlock(&dev->struct_mutex); > + up_read(¤t->mm->mmap_sem); > > return ret; > } > @@ -328,6 +331,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, > loff_t offset; > ssize_t written; > > + down_read(¤t->mm->mmap_sem); > mutex_lock(&dev->struct_mutex); > > ret = i915_gem_object_set_to_cpu_domain(obj, 1); > @@ -350,6 +354,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, struct drm_gem_object *obj, > } > > mutex_unlock(&dev->struct_mutex); > + up_read(¤t->mm->mmap_sem); > > return 0; > } > @@ -2473,22 +2478,21 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, > goto pre_mutex_err; > } > > + down_read(¤t->mm->mmap_sem); > mutex_lock(&dev->struct_mutex); > > i915_verify_inactive(dev, __FILE__, __LINE__); > > if (dev_priv->mm.wedged) { > DRM_ERROR("Execbuf while wedged\n"); > - mutex_unlock(&dev->struct_mutex); > ret = -EIO; > - goto pre_mutex_err; > + goto mutex_err; > } > > if (dev_priv->mm.suspended) { > DRM_ERROR("Execbuf while VT-switched.\n"); > - mutex_unlock(&dev->struct_mutex); > ret = -EBUSY; > - goto pre_mutex_err; > + goto mutex_err; > } > > /* Look up object handles */ > @@ -2641,8 +2645,6 @@ err: > for (i = 0; i < args->buffer_count; i++) > drm_gem_object_unreference(object_list[i]); > > - mutex_unlock(&dev->struct_mutex); > - > if (!ret) { > /* Copy the new buffer offsets back to the user's exec list. */ > ret = copy_to_user((struct drm_i915_relocation_entry __user *) > @@ -2655,6 +2657,10 @@ err: > args->buffer_count, ret); > } > > +mutex_err: > + mutex_unlock(&dev->struct_mutex); > + up_read(¤t->mm->mmap_sem); > + > pre_mutex_err: > drm_free(object_list, sizeof(*object_list) * args->buffer_count, > DRM_MEM_DRIVER); |
From: Peter Z. <pe...@in...> - 2009-02-19 10:57:11
|
On Thu, 2009-02-19 at 10:19 +0100, Peter Zijlstra wrote: > On Wed, 2009-02-18 at 11:38 -0500, kr...@bi... wrote: > > From: Kristian Høgsberg <kr...@re...> > > > > A number of GEM operations (and legacy drm ones) want to copy data to > > or from userspace while holding the struct_mutex lock. However, the > > fault handler calls us with the mmap_sem held and thus enforces the > > opposite locking order. This patch downs the mmap_sem up front for > > those operations that access userspace data under the struct_mutex > > lock to ensure the locking order is consistent. > > > > Signed-off-by: Kristian Høgsberg <kr...@re...> > > --- > > > > Here's a different and simpler attempt to fix the locking order > > problem. We can just down_read() the mmap_sem pre-emptively up-front, > > and the locking order is respected. It's simpler than the > > mutex_trylock() game, avoids introducing a new mutex. > > OK let me try that again -- my initial response was a tad curt :/ While I appreciate your efforts in fixing GEM (I too have an interest in seeing it done), I cannot support your patch. Firstly, you're using mmap_sem well outside its problem domain, this is bad form. Furthermore, holding it for extended durations for no good reason affects all other users. Secondly, mmap_sem is not a recursive lock (very few kernel locks are, and we generally frown upon recursive locking schemes), this means that the fault handler still cannot function properly. |
From: Nick P. <np...@su...> - 2009-02-19 13:35:07
|
On Thu, Feb 19, 2009 at 10:19:05AM +0100, Peter Zijlstra wrote: > On Wed, 2009-02-18 at 11:38 -0500, kr...@bi... wrote: > > From: Kristian Høgsberg <kr...@re...> > > > > A number of GEM operations (and legacy drm ones) want to copy data to > > or from userspace while holding the struct_mutex lock. However, the > > fault handler calls us with the mmap_sem held and thus enforces the > > opposite locking order. This patch downs the mmap_sem up front for > > those operations that access userspace data under the struct_mutex > > lock to ensure the locking order is consistent. > > > > Signed-off-by: Kristian Høgsberg <kr...@re...> > > --- > > > > Here's a different and simpler attempt to fix the locking order > > problem. We can just down_read() the mmap_sem pre-emptively up-front, > > and the locking order is respected. It's simpler than the > > mutex_trylock() game, avoids introducing a new mutex. The "simple" way to fix this is to just allocate a temporary buffer to copy a snapshot of the data going to/from userspace. Then do the real usercopy to/from that buffer outside the locks. You don't have any performance critical bulk copies (ie. that will blow the L1 cache), do you? |
From: Eric A. <er...@an...> - 2009-02-21 02:33:20
|
On Thu, 2009-02-19 at 13:57 +0100, Nick Piggin wrote: > On Thu, Feb 19, 2009 at 10:19:05AM +0100, Peter Zijlstra wrote: > > On Wed, 2009-02-18 at 11:38 -0500, kr...@bi... wrote: > > > From: Kristian Høgsberg <kr...@re...> > > > > > > A number of GEM operations (and legacy drm ones) want to copy data to > > > or from userspace while holding the struct_mutex lock. However, the > > > fault handler calls us with the mmap_sem held and thus enforces the > > > opposite locking order. This patch downs the mmap_sem up front for > > > those operations that access userspace data under the struct_mutex > > > lock to ensure the locking order is consistent. > > > > > > Signed-off-by: Kristian Høgsberg <kr...@re...> > > > --- > > > > > > Here's a different and simpler attempt to fix the locking order > > > problem. We can just down_read() the mmap_sem pre-emptively up-front, > > > and the locking order is respected. It's simpler than the > > > mutex_trylock() game, avoids introducing a new mutex. > > The "simple" way to fix this is to just allocate a temporary buffer > to copy a snapshot of the data going to/from userspace. Then do the > real usercopy to/from that buffer outside the locks. > > You don't have any performance critical bulk copies (ie. that will > blow the L1 cache), do you? 16kb is the most common size (batchbuffers). 32k is popular on 915 (vertex), and varying between 0-128k on 965 (vertex). The pwrite path generally represents 10-30% of CPU consumption in CPU-bound apps. -- Eric Anholt er...@an... eri...@in... |
From: Kristian H. <kr...@re...> - 2009-02-19 14:49:50
|
On Thu, 2009-02-19 at 11:33 +0100, Peter Zijlstra wrote: > On Thu, 2009-02-19 at 10:19 +0100, Peter Zijlstra wrote: > > On Wed, 2009-02-18 at 11:38 -0500, kr...@bi... wrote: > > > From: Kristian Høgsberg <kr...@re...> > > > > > > A number of GEM operations (and legacy drm ones) want to copy data to > > > or from userspace while holding the struct_mutex lock. However, the > > > fault handler calls us with the mmap_sem held and thus enforces the > > > opposite locking order. This patch downs the mmap_sem up front for > > > those operations that access userspace data under the struct_mutex > > > lock to ensure the locking order is consistent. > > > > > > Signed-off-by: Kristian Høgsberg <kr...@re...> > > > --- > > > > > > Here's a different and simpler attempt to fix the locking order > > > problem. We can just down_read() the mmap_sem pre-emptively up-front, > > > and the locking order is respected. It's simpler than the > > > mutex_trylock() game, avoids introducing a new mutex. > > > > > OK let me try that again -- my initial response was a tad curt :/ No that's fair, I was aware that the patch was probably borderline and got the feedback I was looking for ;) > While I appreciate your efforts in fixing GEM (I too have an interest in > seeing it done), I cannot support your patch. > > Firstly, you're using mmap_sem well outside its problem domain, this is > bad form. Furthermore, holding it for extended durations for no good > reason affects all other users. Yup, agree. > Secondly, mmap_sem is not a recursive lock (very few kernel locks are, > and we generally frown upon recursive locking schemes), this means that > the fault handler still cannot function properly. I understand, but we take it twice only as a read lock, so that should work, right? We prevent the deadlock the lockdep validator warned about and as far as I can see, the patch doesn't introduce a new one. But other than that I agree with the frowning on recursive locking, it's too often used to paper over badly thought out locking. cheers, Kristian |
From: Nick P. <np...@su...> - 2009-02-19 15:17:15
|
On Thu, Feb 19, 2009 at 09:49:40AM -0500, Kristian Høgsberg wrote: > > > Secondly, mmap_sem is not a recursive lock (very few kernel locks are, > > and we generally frown upon recursive locking schemes), this means that > > the fault handler still cannot function properly. > > I understand, but we take it twice only as a read lock, so that should > work, right? We prevent the deadlock the lockdep validator warned about > and as far as I can see, the patch doesn't introduce a new one. But > other than that I agree with the frowning on recursive locking, it's too > often used to paper over badly thought out locking. It doesn't work. rwsems are fair (otherwise there is terrible starvation properties), so if another process does an interleaved down_write, then the 2nd down_read will block until the down_write is serviced. |
From: Kristian H. <kr...@re...> - 2009-02-19 15:21:25
|
On Thu, 2009-02-19 at 16:17 +0100, Nick Piggin wrote: > On Thu, Feb 19, 2009 at 09:49:40AM -0500, Kristian Høgsberg wrote: > > > > > Secondly, mmap_sem is not a recursive lock (very few kernel locks are, > > > and we generally frown upon recursive locking schemes), this means that > > > the fault handler still cannot function properly. > > > > I understand, but we take it twice only as a read lock, so that should > > work, right? We prevent the deadlock the lockdep validator warned about > > and as far as I can see, the patch doesn't introduce a new one. But > > other than that I agree with the frowning on recursive locking, it's too > > often used to paper over badly thought out locking. > > It doesn't work. rwsems are fair (otherwise there is terrible starvation > properties), so if another process does an interleaved down_write, then > the 2nd down_read will block until the down_write is serviced. Ooh, right, yes of course, ouch. thanks, Kristian |
From: Peter Z. <pe...@in...> - 2009-02-18 15:57:11
|
On Tue, 2009-02-17 at 16:59 -0800, Eric Anholt wrote: > The basic problem was > mmap_sem (do_mmap()) -> struct_mutex (drm_gem_mmap(), i915_gem_fault()) > struct_mutex (i915_gem_execbuffer()) -> mmap_sem (copy_from/to_user()) That's not the only problem, there's also: dup_mmap() down_write(mmap_sem) vm_ops->open() -> drm_vm_open() mutex_lock(struct_mutex); > We have plenty of places where we want to hold device state the same > (struct_mutex) while we move a non-trivial amount of data > (copy_from/to_user()), such as i915_gem_pwrite(). Solve this by moving the > easy things that needed struct_mutex with mmap_sem held to using a lock to > cover just those data structures (offset hash and offset manager), and do > trylock and reschedule in fault. So we establish, mmap_sem offset_mutex i915_gem_mmap_gtt_ioctl() mutex_lock(struct_mutex) i915_gem_create_mmap_offset() mutex_lock(offset_mutex) However we still have struct_mutex mmap_sem in basically every copy_*_user() case But you cannot seem to switch ->fault() to use offset_mutex, which would work out the inversion because you then have: struct_mutex mmap_sem offset_mutex So why bother with the offset_mutex? Instead you make your ->fault() fail randomly. I'm not sure what Wang Chen sees after this patch, but I should not be the exact same splat, still it would not at all surprise me if there's plenty left. The locking looks very fragile and I don't think this patch is helping anything, sorry. > Signed-off-by: Eric Anholt <er...@an...> > --- > drivers/gpu/drm/drm_gem.c | 8 ++++---- > drivers/gpu/drm/i915/i915_gem.c | 15 ++++++++++++++- > include/drm/drmP.h | 1 + > 3 files changed, 19 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c > index 88d3368..13a0184 100644 > --- a/drivers/gpu/drm/drm_gem.c > +++ b/drivers/gpu/drm/drm_gem.c > @@ -97,6 +97,7 @@ drm_gem_init(struct drm_device *dev) > > dev->mm_private = mm; > > + mutex_init(&mm->offset_mutex); > if (drm_ht_create(&mm->offset_hash, 19)) { > drm_free(mm, sizeof(struct drm_gem_mm), DRM_MEM_MM); > return -ENOMEM; > @@ -508,10 +509,9 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) > unsigned long prot; > int ret = 0; > > - mutex_lock(&dev->struct_mutex); > - > + mutex_lock(&mm->offset_mutex); > if (drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash)) { > - mutex_unlock(&dev->struct_mutex); > + mutex_unlock(&mm->offset_mutex); > return drm_mmap(filp, vma); > } > > @@ -556,7 +556,7 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) > drm_vm_open_locked(vma); > > out_unlock: > - mutex_unlock(&dev->struct_mutex); > + mutex_unlock(&mm->offset_mutex); > > return ret; > } > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index ac534c9..da9a2cb 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -573,8 +573,16 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) > page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> > PAGE_SHIFT; > > + /* Get the struct mutex before accessing GEM data structures, but > + * keep the struct_mutex -> mmap_sem lock ordering so that we don't > + * need to mangle pwrite/pread to allow mmap_sem -> struct_mutex. > + */ > + if (!mutex_trylock(&dev->struct_mutex)) { > + need_resched(); > + return VM_FAULT_NOPAGE; > + } So we just fail the fault if someone happens to hold the struct_mutex? Seems rather fragile, could be another thread doing an ioctl. > /* Now bind it into the GTT if needed */ > - mutex_lock(&dev->struct_mutex); > if (!obj_priv->gtt_space) { > ret = i915_gem_object_bind_to_gtt(obj, obj_priv->gtt_alignment); > if (ret) { > @@ -646,6 +654,7 @@ i915_gem_create_mmap_offset(struct drm_gem_object *obj) > map->size = obj->size; > map->handle = obj; > > + mutex_lock(&mm->offset_mutex); > /* Get a DRM GEM mmap offset allocated... */ > list->file_offset_node = drm_mm_search_free(&mm->offset_manager, > obj->size / PAGE_SIZE, 0, 0); > @@ -671,12 +680,14 @@ i915_gem_create_mmap_offset(struct drm_gem_object *obj) > /* By now we should be all set, any drm_mmap request on the offset > * below will get to our mmap & fault handler */ > obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT; > + mutex_unlock(&mm->offset_mutex); > > return 0; > > out_free_mm: > drm_mm_put_block(list->file_offset_node); > out_free_list: > + mutex_unlock(&mm->offset_mutex); > drm_free(list->map, sizeof(struct drm_map_list), DRM_MEM_DRIVER); > > return ret; > @@ -690,6 +701,7 @@ i915_gem_free_mmap_offset(struct drm_gem_object *obj) > struct drm_gem_mm *mm = dev->mm_private; > struct drm_map_list *list; > > + mutex_lock(&mm->offset_mutex); > list = &obj->map_list; > drm_ht_remove_item(&mm->offset_hash, &list->hash); > > @@ -704,6 +716,7 @@ i915_gem_free_mmap_offset(struct drm_gem_object *obj) > } > > obj_priv->mmap_offset = 0; > + mutex_unlock(&mm->offset_mutex); > } > > /** > diff --git a/include/drm/drmP.h b/include/drm/drmP.h > index e5f4ae9..04f765b 100644 > --- a/include/drm/drmP.h > +++ b/include/drm/drmP.h > @@ -570,6 +570,7 @@ struct drm_ati_pcigart_info { > struct drm_gem_mm { > struct drm_mm offset_manager; /**< Offset mgmt for buffer objects */ > struct drm_open_hash offset_hash; /**< User token hash table for maps */ > + struct mutex offset_mutex; /**< covers offset_manager and offset_hash */ > }; > > /** |
From: Thomas H. <th...@sh...> - 2009-02-19 23:18:53
|
Peter Zijlstra wrote: > On Tue, 2009-02-17 at 16:59 -0800, Eric Anholt wrote: > >> The basic problem was >> mmap_sem (do_mmap()) -> struct_mutex (drm_gem_mmap(), i915_gem_fault()) >> struct_mutex (i915_gem_execbuffer()) -> mmap_sem (copy_from/to_user()) >> > > That's not the only problem, there's also: > > dup_mmap() > down_write(mmap_sem) > vm_ops->open() -> drm_vm_open() > mutex_lock(struct_mutex); > > >> We have plenty of places where we want to hold device state the same >> (struct_mutex) while we move a non-trivial amount of data >> (copy_from/to_user()), such as i915_gem_pwrite(). Solve this by moving the >> easy things that needed struct_mutex with mmap_sem held to using a lock to >> cover just those data structures (offset hash and offset manager), and do >> trylock and reschedule in fault. >> > > So we establish, > > mmap_sem > offset_mutex > > i915_gem_mmap_gtt_ioctl() > mutex_lock(struct_mutex) > i915_gem_create_mmap_offset() > mutex_lock(offset_mutex) > > However we still have > > struct_mutex > mmap_sem > > in basically every copy_*_user() case > > But you cannot seem to switch ->fault() to use offset_mutex, which would > work out the inversion because you then have: > > struct_mutex > mmap_sem > offset_mutex > > So why bother with the offset_mutex? Instead you make your ->fault() > fail randomly. > > I'm not sure what Wang Chen sees after this patch, but I should not be > the exact same splat, still it would not at all surprise me if there's > plenty left. > > The locking looks very fragile and I don't think this patch is helping > anything, sorry. > > It looks to me like the driver preferred locking order is object_mutex (which happens to be the device global struct_mutex) mmap_sem offset_mutex. So if one could avoid using the struct_mutex for object bookkeeping (A separate lock) then vm_open() and vm_close() would adhere to that locking order as well, simply by not taking the struct_mutex at all. So only fault() remains, in which that locking order is reversed. Personally I think the trylock ->reschedule->retry method with proper commenting is a good solution. It will be the _only_ place where locking order is reversed and it is done in a deadlock-safe manner. Note that fault() doesn't really fail, but requests a retry from user-space with rescheduling to give the process holding the struct_mutex time to release it. /Thomas |
From: Eric A. <er...@an...> - 2009-02-20 02:42:19
|
On Thu, 2009-02-19 at 23:26 +0100, Peter Zijlstra wrote: > On Thu, 2009-02-19 at 22:02 +0100, Thomas Hellstrom wrote: > > > > It looks to me like the driver preferred locking order is > > > > object_mutex (which happens to be the device global struct_mutex) > > mmap_sem > > offset_mutex. > > > > So if one could avoid using the struct_mutex for object bookkeeping (A > > separate lock) then > > vm_open() and vm_close() would adhere to that locking order as well, > > simply by not taking the struct_mutex at all. > > > > So only fault() remains, in which that locking order is reversed. > > Personally I think the trylock ->reschedule->retry method with proper > > commenting is a good solution. It will be the _only_ place where locking > > order is reversed and it is done in a deadlock-safe manner. Note that > > fault() doesn't really fail, but requests a retry from user-space with > > rescheduling to give the process holding the struct_mutex time to > > release it. > > It doesn't do the reschedule -- need_resched() will check if the current > task was marked to be scheduled away, furthermore yield based locking > sucks chunks. > > What's so very difficult about pulling the copy_*_user() out from under > the locks? That we're expecting the data movement to occur while holding device state in place. For example, we write data through the GTT most of the time so we: lock struct_mutex pin the object to the GTT flushing caches as needed copy_from_user unpin object unlock struct_mutex If I'm to pull the copy_from_user out, that means I have to: alloc temporary storage for each block of temp storage size: copy_from_user lock struct_mutex pin the object to the GTT flush caches as needed memcpy unpin object unlock struct_mutex At this point of introducing our third copy of the user's data in our hottest path, we should probably ditch the pwrite path entirely and go to user mapping of the objects for performance. Requiring user mapping (which has significant overhead) cuts the likelihood of moving from user-space object caching to kernel object caching in the future, which has the potential of saving steaming piles of memory. -- Eric Anholt er...@an... eri...@in... |
From: Peter Z. <pe...@in...> - 2009-02-19 23:52:19
|
On Thu, 2009-02-19 at 22:02 +0100, Thomas Hellstrom wrote: > > It looks to me like the driver preferred locking order is > > object_mutex (which happens to be the device global struct_mutex) > mmap_sem > offset_mutex. > > So if one could avoid using the struct_mutex for object bookkeeping (A > separate lock) then > vm_open() and vm_close() would adhere to that locking order as well, > simply by not taking the struct_mutex at all. > > So only fault() remains, in which that locking order is reversed. > Personally I think the trylock ->reschedule->retry method with proper > commenting is a good solution. It will be the _only_ place where locking > order is reversed and it is done in a deadlock-safe manner. Note that > fault() doesn't really fail, but requests a retry from user-space with > rescheduling to give the process holding the struct_mutex time to > release it. It doesn't do the reschedule -- need_resched() will check if the current task was marked to be scheduled away, furthermore yield based locking sucks chunks. What's so very difficult about pulling the copy_*_user() out from under the locks? |
From: Peter Z. <pe...@in...> - 2009-02-20 07:37:05
|
On Thu, 2009-02-19 at 18:04 -0800, Eric Anholt wrote: > On Thu, 2009-02-19 at 23:26 +0100, Peter Zijlstra wrote: > > On Thu, 2009-02-19 at 22:02 +0100, Thomas Hellstrom wrote: > > > > > > It looks to me like the driver preferred locking order is > > > > > > object_mutex (which happens to be the device global struct_mutex) > > > mmap_sem > > > offset_mutex. > > > > > > So if one could avoid using the struct_mutex for object bookkeeping (A > > > separate lock) then > > > vm_open() and vm_close() would adhere to that locking order as well, > > > simply by not taking the struct_mutex at all. > > > > > > So only fault() remains, in which that locking order is reversed. > > > Personally I think the trylock ->reschedule->retry method with proper > > > commenting is a good solution. It will be the _only_ place where locking > > > order is reversed and it is done in a deadlock-safe manner. Note that > > > fault() doesn't really fail, but requests a retry from user-space with > > > rescheduling to give the process holding the struct_mutex time to > > > release it. > > > > It doesn't do the reschedule -- need_resched() will check if the current > > task was marked to be scheduled away, furthermore yield based locking > > sucks chunks. Imagine what would happen if your faulting task was the highest RT prio task in the system, you'd end up with a life-lock. > > What's so very difficult about pulling the copy_*_user() out from under > > the locks? > > That we're expecting the data movement to occur while holding device > state in place. For example, we write data through the GTT most of the > time so we: > > lock struct_mutex > pin the object to the GTT > flushing caches as needed > copy_from_user > unpin object > unlock struct_mutex So you cannot drop the lock once you've pinned the dst object? > If I'm to pull the copy_from_user out, that means I have to: > > alloc temporary storage > for each block of temp storage size: > copy_from_user > lock struct_mutex > pin the object to the GTT > flush caches as needed > memcpy > unpin object > unlock struct_mutex > > At this point of introducing our third copy of the user's data in our > hottest path, we should probably ditch the pwrite path entirely and go > to user mapping of the objects for performance. Requiring user mapping > (which has significant overhead) cuts the likelihood of moving from > user-space object caching to kernel object caching in the future, which > has the potential of saving steaming piles of memory. Or you could get_user_pages() to fault the user pages and pin them, and then do pagefault_disable() and use copy_from_user_inatomic or such, and release the pages again. |
From: Thomas H. <th...@sh...> - 2009-02-20 08:31:59
|
Peter Zijlstra wrote: > On Thu, 2009-02-19 at 22:02 +0100, Thomas Hellstrom wrote: > >> >> It looks to me like the driver preferred locking order is >> >> object_mutex (which happens to be the device global struct_mutex) >> mmap_sem >> offset_mutex. >> >> So if one could avoid using the struct_mutex for object bookkeeping (A >> separate lock) then >> vm_open() and vm_close() would adhere to that locking order as well, >> simply by not taking the struct_mutex at all. >> >> So only fault() remains, in which that locking order is reversed. >> Personally I think the trylock ->reschedule->retry method with proper >> commenting is a good solution. It will be the _only_ place where locking >> order is reversed and it is done in a deadlock-safe manner. Note that >> fault() doesn't really fail, but requests a retry from user-space with >> rescheduling to give the process holding the struct_mutex time to >> release it. >> > > It doesn't do the reschedule -- need_resched() will check if the current > task was marked to be scheduled away, Yes. my mistake. set_tsk_need_resched() would be the proper call. If I'm correctly informed, that would kick in the scheduler _after_ the mmap_sem() is released, just before returning to user-space. > furthermore yield based locking > sucks chunks. > Yes, but AFAICT in this situation it is the only way to reverse locking order in a deadlock safe manner. If there is a lot of contention it will eat cpu. Unfortunately since the struct_mutex is such a wide lock there will probably be contention in some situations. BTW isn't this quite common in distributed resource management, when you can't ensure that all requestors will request resources in the same order? Try to grab all resources you need for an operation. If you fail to get one, release the resources you already have, sleep waiting for the failing one to be available and then retry. In this case we fail be cause we _may_ have a deadlock. Since we cannot release the mmap_sem and wait, we do the second best thing and tell the kernel to reschedule when the mmap_sem is released. > What's so very difficult about pulling the copy_*_user() out from under > the locks? > > Given Eric's comment this is a GEM performance-critical path, so even from a CPU-usage perspecive, the trylock solution may be preferred. From a make-the-code-easy-to-understand perspective, I agree pulling out copy_*_user() is a better solution. It might even be that the trylock solution doesn't kill the warnings from the lock dependency tracker. /Thomas |
From: Peter Z. <pe...@in...> - 2009-02-20 08:47:39
|
On Fri, 2009-02-20 at 09:31 +0100, Thomas Hellstrom wrote: > Peter Zijlstra wrote: > > On Thu, 2009-02-19 at 22:02 +0100, Thomas Hellstrom wrote: > > > >> > >> It looks to me like the driver preferred locking order is > >> > >> object_mutex (which happens to be the device global struct_mutex) > >> mmap_sem > >> offset_mutex. > >> > >> So if one could avoid using the struct_mutex for object bookkeeping (A > >> separate lock) then > >> vm_open() and vm_close() would adhere to that locking order as well, > >> simply by not taking the struct_mutex at all. > >> > >> So only fault() remains, in which that locking order is reversed. > >> Personally I think the trylock ->reschedule->retry method with proper > >> commenting is a good solution. It will be the _only_ place where locking > >> order is reversed and it is done in a deadlock-safe manner. Note that > >> fault() doesn't really fail, but requests a retry from user-space with > >> rescheduling to give the process holding the struct_mutex time to > >> release it. > >> > > > > It doesn't do the reschedule -- need_resched() will check if the current > > task was marked to be scheduled away, > Yes. my mistake. set_tsk_need_resched() would be the proper call. If I'm > correctly informed, that would kick in the scheduler _after_ the > mmap_sem() is released, just before returning to user-space. Yes, but it would still life-lock in the RT example given in the other email. > > furthermore yield based locking > > sucks chunks. > > > Yes, but AFAICT in this situation it is the only way to reverse locking > order in a deadlock safe manner. If there is a lot of contention it will > eat cpu. Unfortunately since the struct_mutex is such a wide lock there > will probably be contention in some situations. I'd be surprised if this were the only solution. Maybe its the easiest, but not one I'll support. > BTW isn't this quite common in distributed resource management, when you > can't ensure that all requestors will request resources in the same order? > Try to grab all resources you need for an operation. If you fail to get > one, release the resources you already have, sleep waiting for the > failing one to be available and then retry. Not if you're building deterministic systems. Such constructs are highly non-deterministic. Furthermore, this isn't really a distributed system is it? |