|
From: Laurent D. <ld...@li...> - 2015-03-26 17:38:09
|
CRIU is recreating the process memory layout by remapping the checkpointee memory area on top of the current process (criu). This includes remapping the vDSO to the place it has at checkpoint time. However some architectures like powerpc are keeping a reference to the vDSO base address to build the signal return stack frame by calling the vDSO sigreturn service. So once the vDSO has been moved, this reference is no more valid and the signal frame built later are not usable. This patch serie is introducing a new mm hook 'arch_remap' which is called when mremap is done and the mm lock still hold. The next patch is adding the vDSO remap and unmap tracking to the powerpc architecture. Changes in v4: -------------- - Reviewing the PowerPC part of the patch to handle partial unmap and remap of the vDSO. Changes in v3: -------------- - Fixed grammatical error in a comment of the second patch. Thanks again, Ingo. Changes in v2: -------------- - Following the Ingo Molnar's advice, enabling the call to arch_remap through the __HAVE_ARCH_REMAP macro. This reduces considerably the first patch. Laurent Dufour (2): mm: Introducing arch_remap hook powerpc/mm: Tracking vDSO remap arch/powerpc/include/asm/mmu_context.h | 32 +++++++++++++++++++++++++++- arch/powerpc/kernel/vdso.c | 39 ++++++++++++++++++++++++++++++++++ mm/mremap.c | 11 ++++++++-- 3 files changed, 79 insertions(+), 3 deletions(-) -- 1.9.1 |
|
From: Laurent D. <ld...@li...> - 2015-03-26 17:38:08
|
Some architecture would like to be triggered when a memory area is moved
through the mremap system call.
This patch is introducing a new arch_remap mm hook which is placed in the
path of mremap, and is called before the old area is unmapped (and the
arch_unmap hook is called).
The architectures which need to call this hook should define
__HAVE_ARCH_REMAP in their asm/mmu_context.h and provide the arch_remap
service with the following prototype:
void arch_remap(struct mm_struct *mm,
unsigned long old_start, unsigned long old_end,
unsigned long new_start, unsigned long new_end);
Signed-off-by: Laurent Dufour <ld...@li...>
---
mm/mremap.c | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/mm/mremap.c b/mm/mremap.c
index 57dadc025c64..bafc234db45c 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -25,6 +25,7 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
#include "internal.h"
@@ -286,8 +287,14 @@ static unsigned long move_vma(struct vm_area_struct *vma,
old_len = new_len;
old_addr = new_addr;
new_addr = -ENOMEM;
- } else if (vma->vm_file && vma->vm_file->f_op->mremap)
- vma->vm_file->f_op->mremap(vma->vm_file, new_vma);
+ } else {
+ if (vma->vm_file && vma->vm_file->f_op->mremap)
+ vma->vm_file->f_op->mremap(vma->vm_file, new_vma);
+#ifdef __HAVE_ARCH_REMAP
+ arch_remap(mm, old_addr, old_addr+old_len,
+ new_addr, new_addr+new_len);
+#endif
+ }
/* Conceal VM_ACCOUNT so old reservation is not undone */
if (vm_flags & VM_ACCOUNT) {
--
1.9.1
|
|
From: Laurent D. <ld...@li...> - 2015-03-26 17:38:10
|
Some processes (CRIU) are moving the vDSO area using the mremap system
call. As a consequence the kernel reference to the vDSO base address is
no more valid and the signal return frame built once the vDSO has been
moved is not pointing to the new sigreturn address.
This patch handles vDSO remapping and unmapping.
Moving or unmapping partially the vDSO lead to invalidate it from the
kernel point of view.
Signed-off-by: Laurent Dufour <ld...@li...>
---
arch/powerpc/include/asm/mmu_context.h | 32 +++++++++++++++++++++++++++-
arch/powerpc/kernel/vdso.c | 39 ++++++++++++++++++++++++++++++++++
2 files changed, 70 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 73382eba02dc..67734ce8be67 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -8,7 +8,6 @@
#include <linux/spinlock.h>
#include <asm/mmu.h>
#include <asm/cputable.h>
-#include <asm-generic/mm_hooks.h>
#include <asm/cputhreads.h>
/*
@@ -109,5 +108,36 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
#endif
}
+static inline void arch_dup_mmap(struct mm_struct *oldmm,
+ struct mm_struct *mm)
+{
+}
+
+static inline void arch_exit_mmap(struct mm_struct *mm)
+{
+}
+
+extern void arch_vdso_remap(struct mm_struct *mm,
+ unsigned long old_start, unsigned long old_end,
+ unsigned long new_start, unsigned long new_end);
+static inline void arch_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ arch_vdso_remap(mm, start, end, 0, 0);
+}
+
+static inline void arch_bprm_mm_init(struct mm_struct *mm,
+ struct vm_area_struct *vma)
+{
+}
+
+#define __HAVE_ARCH_REMAP
+static inline void arch_remap(struct mm_struct *mm,
+ unsigned long old_start, unsigned long old_end,
+ unsigned long new_start, unsigned long new_end)
+{
+ arch_vdso_remap(mm, old_start, old_end, new_start, new_end);
+}
+
#endif /* __KERNEL__ */
#endif /* __ASM_POWERPC_MMU_CONTEXT_H */
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 305eb0d9b768..a11b5d8f36d6 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -283,6 +283,45 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
return rc;
}
+void arch_vdso_remap(struct mm_struct *mm,
+ unsigned long old_start, unsigned long old_end,
+ unsigned long new_start, unsigned long new_end)
+{
+ unsigned long vdso_end, vdso_start;
+
+ if (!mm->context.vdso_base)
+ return;
+ vdso_start = mm->context.vdso_base;
+
+#ifdef CONFIG_PPC64
+ /* Calling is_32bit_task() implies that we are dealing with the
+ * current process memory. If there is a call path where mm is not
+ * owned by the current task, then we'll have need to store the
+ * vDSO size in the mm->context.
+ */
+ BUG_ON(current->mm != mm);
+ if (is_32bit_task())
+ vdso_end = vdso_start + (vdso32_pages << PAGE_SHIFT);
+ else
+ vdso_end = vdso_start + (vdso64_pages << PAGE_SHIFT);
+#else
+ vdso_end = vdso_start + (vdso32_pages << PAGE_SHIFT);
+#endif
+ vdso_end += (1<<PAGE_SHIFT); /* data page */
+
+ /* Check if the vDSO is in the range of the remapped area */
+ if ((vdso_start <= old_start && old_start < vdso_end) ||
+ (vdso_start < old_end && old_end <= vdso_end) ||
+ (old_start <= vdso_start && vdso_start < old_end)) {
+ /* Update vdso_base if the vDSO is entirely moved. */
+ if (old_start == vdso_start && old_end == vdso_end &&
+ (old_end - old_start) == (new_end - new_start))
+ mm->context.vdso_base = new_start;
+ else
+ mm->context.vdso_base = 0;
+ }
+}
+
const char *arch_vma_name(struct vm_area_struct *vma)
{
if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base)
--
1.9.1
|
|
From: Ingo M. <mi...@ke...> - 2015-03-26 18:56:03
|
* Laurent Dufour <ld...@li...> wrote:
> +{
> + unsigned long vdso_end, vdso_start;
> +
> + if (!mm->context.vdso_base)
> + return;
> + vdso_start = mm->context.vdso_base;
> +
> +#ifdef CONFIG_PPC64
> + /* Calling is_32bit_task() implies that we are dealing with the
> + * current process memory. If there is a call path where mm is not
> + * owned by the current task, then we'll have need to store the
> + * vDSO size in the mm->context.
> + */
> + BUG_ON(current->mm != mm);
> + if (is_32bit_task())
> + vdso_end = vdso_start + (vdso32_pages << PAGE_SHIFT);
> + else
> + vdso_end = vdso_start + (vdso64_pages << PAGE_SHIFT);
> +#else
> + vdso_end = vdso_start + (vdso32_pages << PAGE_SHIFT);
> +#endif
> + vdso_end += (1<<PAGE_SHIFT); /* data page */
> +
> + /* Check if the vDSO is in the range of the remapped area */
> + if ((vdso_start <= old_start && old_start < vdso_end) ||
> + (vdso_start < old_end && old_end <= vdso_end) ||
> + (old_start <= vdso_start && vdso_start < old_end)) {
> + /* Update vdso_base if the vDSO is entirely moved. */
> + if (old_start == vdso_start && old_end == vdso_end &&
> + (old_end - old_start) == (new_end - new_start))
> + mm->context.vdso_base = new_start;
> + else
> + mm->context.vdso_base = 0;
> + }
> +}
Oh my, that really looks awfully complex, as you predicted, and right
in every mremap() call.
I'm fine with your original, imperfect, KISS approach. Sorry about
this detour ...
Reviewed-by: Ingo Molnar <mi...@ke...>
Thanks,
Ingo
|
|
From: Laurent D. <ld...@li...> - 2015-03-27 11:02:31
|
On 26/03/2015 19:55, Ingo Molnar wrote:
>
> * Laurent Dufour <ld...@li...> wrote:
>
>> +{
>> + unsigned long vdso_end, vdso_start;
>> +
>> + if (!mm->context.vdso_base)
>> + return;
>> + vdso_start = mm->context.vdso_base;
>> +
>> +#ifdef CONFIG_PPC64
>> + /* Calling is_32bit_task() implies that we are dealing with the
>> + * current process memory. If there is a call path where mm is not
>> + * owned by the current task, then we'll have need to store the
>> + * vDSO size in the mm->context.
>> + */
>> + BUG_ON(current->mm != mm);
>> + if (is_32bit_task())
>> + vdso_end = vdso_start + (vdso32_pages << PAGE_SHIFT);
>> + else
>> + vdso_end = vdso_start + (vdso64_pages << PAGE_SHIFT);
>> +#else
>> + vdso_end = vdso_start + (vdso32_pages << PAGE_SHIFT);
>> +#endif
>> + vdso_end += (1<<PAGE_SHIFT); /* data page */
>> +
>> + /* Check if the vDSO is in the range of the remapped area */
>> + if ((vdso_start <= old_start && old_start < vdso_end) ||
>> + (vdso_start < old_end && old_end <= vdso_end) ||
>> + (old_start <= vdso_start && vdso_start < old_end)) {
>> + /* Update vdso_base if the vDSO is entirely moved. */
>> + if (old_start == vdso_start && old_end == vdso_end &&
>> + (old_end - old_start) == (new_end - new_start))
>> + mm->context.vdso_base = new_start;
>> + else
>> + mm->context.vdso_base = 0;
>> + }
>> +}
>
> Oh my, that really looks awfully complex, as you predicted, and right
> in every mremap() call.
I do agree, that's awfully complex ;)
> I'm fine with your original, imperfect, KISS approach. Sorry about
> this detour ...
>
> Reviewed-by: Ingo Molnar <mi...@ke...>
No problem, so let's stay on the v3 version of the patch.
Thanks for Reviewed-by statement which, I guess, applied to the v3 too.
Should I resend the v3 ?
Thanks,
Laurent.
|