Currently there are a couple of performance issues with the
__do_page_fault() implementation, which we've been hitting up
against on SMP.
The first issue is the page table lock, in practice this is read-side
only, and there's no requirement to have the lock held across the page
table walk. Presently if we build for < 4 CPUs this leads to the
page_table_lock being needlessly contended, which is easily visible on
heavily threaded applications.
The second issue is the TLB flush, this ends up being painfully slow due
to the P2 bounce, which also seems to be a sanity measure that has only
been kept around because it's what we've always done, and can simply be
done without. Killing this off doesn't have any impact on SMP, and as
Stuart's fast-path assembly path doesn't seem to care about it either, I
don't imagine it's something that is a big show-stopper for UP either.
It would be good if people with legacy SH-4 devices could test their
workloads with these optimizations in place, I'd like to get this pushed
out and move the rest of it to an assembly fast-path derived from
Stuart's patch.
At least none of the SH-X, SH-X2, and SH-X3 parts see any regressions
from this, and I'm assuming ST40 doesn't either.
---
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index f33cedb..60d74f7 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -258,9 +258,6 @@ asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
pmd_t *pmd;
pte_t *pte;
pte_t entry;
- struct mm_struct *mm = current->mm;
- spinlock_t *ptl = NULL;
- int ret = 1;
#ifdef CONFIG_SH_KGDB
if (kgdb_nofault && kgdb_bus_err_hook)
@@ -274,12 +271,11 @@ asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
*/
if (address >= P3SEG && address < P3_ADDR_MAX) {
pgd = pgd_offset_k(address);
- mm = NULL;
} else {
- if (unlikely(address >= TASK_SIZE || !mm))
+ if (unlikely(address >= TASK_SIZE || !current->mm))
return 1;
- pgd = pgd_offset(mm, address);
+ pgd = pgd_offset(current->mm, address);
}
pud = pud_offset(pgd, address);
@@ -289,34 +285,19 @@ asmlinkage int __kprobes __do_page_fault(struct pt_regs *regs,
if (pmd_none_or_clear_bad(pmd))
return 1;
- if (mm)
- pte = pte_offset_map_lock(mm, pmd, address, &ptl);
- else
- pte = pte_offset_kernel(pmd, address);
-
+ pte = pte_offset_kernel(pmd, address);
entry = *pte;
if (unlikely(pte_none(entry) || pte_not_present(entry)))
- goto unlock;
+ return 1;
if (unlikely(writeaccess && !pte_write(entry)))
- goto unlock;
+ return 1;
if (writeaccess)
entry = pte_mkdirty(entry);
entry = pte_mkyoung(entry);
-#ifdef CONFIG_CPU_SH4
- /*
- * ITLB is not affected by "ldtlb" instruction.
- * So, we need to flush the entry by ourselves.
- */
- local_flush_tlb_one(get_asid(), address & PAGE_MASK);
-#endif
-
set_pte(pte, entry);
update_mmu_cache(NULL, address, entry);
- ret = 0;
-unlock:
- if (mm)
- pte_unmap_unlock(pte, ptl);
- return ret;
+
+ return 0;
}
|