From: Paul M. <pau...@re...> - 2007-02-07 12:23:34
|
This introduces the first attempt at overhauling the user page copy/clear interface for the aliasing VIPT cases (only SH-4 and SH-4A right now, SH7705 should be able to use it directly, though, when in 32k cache mode). Previously we used a reserved 1MB chunk of the address space starting at P3 (and bumping up the vmalloc base as a result), then using an array of mutexes per page colour for gauranteeing exclusion to the page being operated on and manually poking at the PTEs when operating on the user page. This also required special copy/clear page ops that were handling the cache handling based on user page colour directly. Additionally, the mutex exclusion was also bogus given that it invariably is called under a spinlock. This uses a slightly different approach, where we use fixmap addresses for the various colours, with the fixmap addresses being fixed virtual for each address space. This permits us to use the normal page copy/clear ops directly, and vastly simplifies things in the process. And we can finally turn on mutex debugging without the p3map_mutex blowing up. With the light testing I've given this so far it seems to hold up, but I'd be interested to see if this breaks on systems with different cache models (I've only tested on SH7722 and SH7780, which are both 4-way). I'll queue this for 2.6.21 unless someone manages to break it. Against current git, but should apply to 2.6.20 too. -- arch/sh/kernel/sh_ksyms.c | 3 - arch/sh/mm/cache-sh4.c | 18 ------- arch/sh/mm/clear_page.S | 47 ------------------- arch/sh/mm/copy_page.S | 64 -------------------------- arch/sh/mm/pg-sh4.c | 110 +++++++++++++++++++++------------------------- include/asm-sh/fixmap.h | 8 ++- include/asm-sh/page.h | 13 +++-- include/asm-sh/pgtable.h | 6 -- 8 files changed, 71 insertions(+), 198 deletions(-) diff --git a/arch/sh/kernel/sh_ksyms.c b/arch/sh/kernel/sh_ksyms.c index fe1b276..a70bf70 100644 --- a/arch/sh/kernel/sh_ksyms.c +++ b/arch/sh/kernel/sh_ksyms.c @@ -100,8 +100,7 @@ EXPORT_SYMBOL(flush_dcache_page); EXPORT_SYMBOL(__flush_purge_region); #endif -#if defined(CONFIG_MMU) && (defined(CONFIG_CPU_SH4) || \ - defined(CONFIG_SH7705_CACHE_32KB)) +#if defined(CONFIG_MMU) && defined(CONFIG_SH7705_CACHE_32KB) EXPORT_SYMBOL(clear_user_page); #endif diff --git a/arch/sh/mm/cache-sh4.c b/arch/sh/mm/cache-sh4.c index e0cd4b7..9414945 100644 --- a/arch/sh/mm/cache-sh4.c +++ b/arch/sh/mm/cache-sh4.c @@ -2,7 +2,7 @@ * arch/sh/mm/cache-sh4.c * * Copyright (C) 1999, 2000, 2002 Niibe Yutaka - * Copyright (C) 2001 - 2006 Paul Mundt + * Copyright (C) 2001 - 2007 Paul Mundt * Copyright (C) 2003 Richard Curnow * * This file is subject to the terms and conditions of the GNU General Public @@ -11,8 +11,8 @@ */ #include <linux/init.h> #include <linux/mm.h> +#include <linux/pagemap.h> #include <linux/io.h> -#include <linux/mutex.h> #include <asm/mmu_context.h> #include <asm/cacheflush.h> @@ -77,16 +77,8 @@ static void __init emit_cache_params(void) /* * SH-4 has virtually indexed and physically tagged cache. */ - -/* Worst case assumed to be 64k cache, direct-mapped i.e. 4 synonym bits. */ -#define MAX_P3_MUTEXES 16 - -struct mutex p3map_mutex[MAX_P3_MUTEXES]; - void __init p3_cache_init(void) { - int i; - compute_alias(¤t_cpu_data.icache); compute_alias(¤t_cpu_data.dcache); @@ -106,12 +98,6 @@ void __init p3_cache_init(void) } emit_cache_params(); - - if (ioremap_page_range(P3SEG, P3SEG + (PAGE_SIZE * 4), 0, PAGE_KERNEL)) - panic("%s failed.", __FUNCTION__); - - for (i = 0; i < current_cpu_data.dcache.n_aliases; i++) - mutex_init(&p3map_mutex[i]); } /* diff --git a/arch/sh/mm/clear_page.S b/arch/sh/mm/clear_page.S index 8a70613..3ef3c82 100644 --- a/arch/sh/mm/clear_page.S +++ b/arch/sh/mm/clear_page.S @@ -1,5 +1,5 @@ /* - * __clear_user_page, __clear_user, clear_page implementation of SuperH + * __clear_user, clear_page implementation of SuperH * * Copyright (C) 2001 Kaz Kojima * Copyright (C) 2001, 2002 Niibe Yutaka @@ -150,48 +150,3 @@ ENTRY(__clear_user) .long 8b, .Lbad_clear_user .long 9b, .Lbad_clear_user .previous - -#if defined(CONFIG_CPU_SH4) -/* - * __clear_user_page - * @to: P3 address (with same color) - * @orig_to: P1 address - * - * void __clear_user_page(void *to, void *orig_to) - */ - -/* - * r0 --- scratch - * r4 --- to - * r5 --- orig_to - * r6 --- to + PAGE_SIZE - */ -ENTRY(__clear_user_page) - mov.l .Lpsz,r0 - mov r4,r6 - add r0,r6 - mov #0,r0 - ! -1: ocbi @r5 - add #32,r5 - movca.l r0,@r4 - mov r4,r1 - add #32,r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - mov.l r0,@-r4 - add #28,r4 - cmp/eq r6,r4 - bf/s 1b - ocbwb @r1 - ! - rts - nop -.Lpsz: .long PAGE_SIZE - -#endif - diff --git a/arch/sh/mm/copy_page.S b/arch/sh/mm/copy_page.S index 397c94c..e43c788 100644 --- a/arch/sh/mm/copy_page.S +++ b/arch/sh/mm/copy_page.S @@ -1,5 +1,5 @@ /* - * copy_page, __copy_user_page, __copy_user implementation of SuperH + * copy_page, __copy_user implementation of SuperH * * Copyright (C) 2001 Niibe Yutaka & Kaz Kojima * Copyright (C) 2002 Toshinobu Sugioka @@ -67,68 +67,6 @@ ENTRY(copy_page_slow) mov.l @r15+,r8 rts nop - -#if defined(CONFIG_CPU_SH4) -/* - * __copy_user_page - * @to: P1 address (with same color) - * @from: P1 address - * @orig_to: P1 address - * - * void __copy_user_page(void *to, void *from, void *orig_to) - */ - -/* - * r0, r1, r2, r3, r4, r5, r6, r7 --- scratch - * r8 --- from + PAGE_SIZE - * r9 --- orig_to - * r10 --- to - * r11 --- from - */ -ENTRY(__copy_user_page) - mov.l r8,@-r15 - mov.l r9,@-r15 - mov.l r10,@-r15 - mov.l r11,@-r15 - mov r4,r10 - mov r5,r11 - mov r6,r9 - mov r5,r8 - mov.l .Lpsz,r0 - add r0,r8 - ! -1: ocbi @r9 - add #32,r9 - mov.l @r11+,r0 - mov.l @r11+,r1 - mov.l @r11+,r2 - mov.l @r11+,r3 - mov.l @r11+,r4 - mov.l @r11+,r5 - mov.l @r11+,r6 - mov.l @r11+,r7 - movca.l r0,@r10 - mov r10,r0 - add #32,r10 - mov.l r7,@-r10 - mov.l r6,@-r10 - mov.l r5,@-r10 - mov.l r4,@-r10 - mov.l r3,@-r10 - mov.l r2,@-r10 - mov.l r1,@-r10 - ocbwb @r0 - cmp/eq r11,r8 - bf/s 1b - add #28,r10 - ! - mov.l @r15+,r11 - mov.l @r15+,r10 - mov.l @r15+,r9 - mov.l @r15+,r8 - rts - nop -#endif .Lpsz: .long PAGE_SIZE /* * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n); diff --git a/arch/sh/mm/pg-sh4.c b/arch/sh/mm/pg-sh4.c index 969efec..b627cd5 100644 --- a/arch/sh/mm/pg-sh4.c +++ b/arch/sh/mm/pg-sh4.c @@ -2,83 +2,75 @@ * arch/sh/mm/pg-sh4.c * * Copyright (C) 1999, 2000, 2002 Niibe Yutaka - * Copyright (C) 2002 - 2005 Paul Mundt + * Copyright (C) 2002 - 2007 Paul Mundt * * Released under the terms of the GNU GPL v2.0. */ #include <linux/mm.h> -#include <linux/mutex.h> +#include <linux/module.h> +#include <linux/highmem.h> #include <asm/mmu_context.h> #include <asm/cacheflush.h> +#include <asm/fixmap.h> -extern struct mutex p3map_mutex[]; +static inline void *kmap_coherent(struct page *page, unsigned long addr) +{ + enum fixed_addresses idx; + unsigned long vaddr, flags; + pte_t pte; -#define CACHE_ALIAS (current_cpu_data.dcache.alias_mask) + inc_preempt_count(); -/* - * clear_user_page - * @to: P1 address - * @address: U0 address to be mapped - * @page: page (virt_to_page(to)) - */ -void clear_user_page(void *to, unsigned long address, struct page *page) + idx = (addr & current_cpu_data.dcache.alias_mask) >> PAGE_SHIFT; + vaddr = __fix_to_virt(FIX_CMAP_END - idx); + pte = mk_pte(page, PAGE_KERNEL); + + local_irq_save(flags); + local_flush_tlb_one(get_asid(), vaddr); + local_irq_restore(flags); + + update_mmu_cache(NULL, vaddr, pte); + + return (void *)vaddr; +} + +static inline void kunmap_coherent(struct page *page) +{ + dec_preempt_count(); + preempt_check_resched(); +} + +void copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma) { - if (((address ^ (unsigned long)to) & CACHE_ALIAS) == 0) - clear_page(to); - else { - unsigned long phys_addr = PHYSADDR(to); - unsigned long p3_addr = P3SEG + (address & CACHE_ALIAS); - pgd_t *pgd = pgd_offset_k(p3_addr); - pud_t *pud = pud_offset(pgd, p3_addr); - pmd_t *pmd = pmd_offset(pud, p3_addr); - pte_t *pte = pte_offset_kernel(pmd, p3_addr); - pte_t entry; - unsigned long flags; + void *vfrom, *vto; + + vto = kmap_atomic(to, KM_USER1); + vfrom = kmap_coherent(from, vaddr); + copy_page(vto, vfrom); + kunmap_coherent(from); + + if ((vma->vm_flags & VM_EXEC) && + ((vaddr ^ (unsigned long)vto) & current_cpu_data.dcache.alias_mask)) + __flush_wback_region(vto, PAGE_SIZE); - entry = pfn_pte(phys_addr >> PAGE_SHIFT, PAGE_KERNEL); - mutex_lock(&p3map_mutex[(address & CACHE_ALIAS)>>12]); - set_pte(pte, entry); - local_irq_save(flags); - flush_tlb_one(get_asid(), p3_addr); - local_irq_restore(flags); - update_mmu_cache(NULL, p3_addr, entry); - __clear_user_page((void *)p3_addr, to); - pte_clear(&init_mm, p3_addr, pte); - mutex_unlock(&p3map_mutex[(address & CACHE_ALIAS)>>12]); - } + kunmap_atomic(vto, KM_USER1); + smp_wmb(); } +EXPORT_SYMBOL(copy_user_highpage); /* - * copy_user_page + * clear_user_page * @to: P1 address - * @from: P1 address * @address: U0 address to be mapped * @page: page (virt_to_page(to)) */ -void copy_user_page(void *to, void *from, unsigned long address, - struct page *page) +void clear_user_page(void *to, unsigned long address, struct page *page) { - if (((address ^ (unsigned long)to) & CACHE_ALIAS) == 0) - copy_page(to, from); - else { - unsigned long phys_addr = PHYSADDR(to); - unsigned long p3_addr = P3SEG + (address & CACHE_ALIAS); - pgd_t *pgd = pgd_offset_k(p3_addr); - pud_t *pud = pud_offset(pgd, p3_addr); - pmd_t *pmd = pmd_offset(pud, p3_addr); - pte_t *pte = pte_offset_kernel(pmd, p3_addr); - pte_t entry; - unsigned long flags; + clear_page(to); - entry = pfn_pte(phys_addr >> PAGE_SHIFT, PAGE_KERNEL); - mutex_lock(&p3map_mutex[(address & CACHE_ALIAS)>>12]); - set_pte(pte, entry); - local_irq_save(flags); - flush_tlb_one(get_asid(), p3_addr); - local_irq_restore(flags); - update_mmu_cache(NULL, p3_addr, entry); - __copy_user_page((void *)p3_addr, from, to); - pte_clear(&init_mm, p3_addr, pte); - mutex_unlock(&p3map_mutex[(address & CACHE_ALIAS)>>12]); - } + if (((address ^ (unsigned long)to) & + current_cpu_data.dcache.alias_mask)) + __flush_wback_region(to, PAGE_SIZE); } +EXPORT_SYMBOL(clear_user_page); diff --git a/include/asm-sh/cacheflush.h b/include/asm-sh/cacheflush.h diff --git a/include/asm-sh/fixmap.h b/include/asm-sh/fixmap.h index 458e9fa..8a56617 100644 --- a/include/asm-sh/fixmap.h +++ b/include/asm-sh/fixmap.h @@ -46,6 +46,9 @@ * fix-mapped? */ enum fixed_addresses { +#define FIX_N_COLOURS 16 + FIX_CMAP_BEGIN, + FIX_CMAP_END = FIX_CMAP_BEGIN + FIX_N_COLOURS, #ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, @@ -53,8 +56,8 @@ enum fixed_addresses { __end_of_fixed_addresses }; -extern void __set_fixmap (enum fixed_addresses idx, - unsigned long phys, pgprot_t flags); +extern void __set_fixmap(enum fixed_addresses idx, + unsigned long phys, pgprot_t flags); #define set_fixmap(idx, phys) \ __set_fixmap(idx, phys, PAGE_KERNEL) @@ -106,5 +109,4 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); return __virt_to_fix(vaddr); } - #endif diff --git a/include/asm-sh/page.h b/include/asm-sh/page.h index ac4b467..bb5bbec 100644 --- a/include/asm-sh/page.h +++ b/include/asm-sh/page.h @@ -71,10 +71,15 @@ extern void copy_page_nommu(void *to, void *from); #if defined(CONFIG_MMU) && (defined(CONFIG_CPU_SH4) || \ defined(CONFIG_SH7705_CACHE_32KB)) struct page; -extern void clear_user_page(void *to, unsigned long address, struct page *pg); -extern void copy_user_page(void *to, void *from, unsigned long address, struct page *pg); -extern void __clear_user_page(void *to, void *orig_to); -extern void __copy_user_page(void *to, void *from, void *orig_to); +struct vm_area_struct; +void clear_user_page(void *to, unsigned long address, struct page *pg); +void copy_user_page(void *to, void *from, unsigned long address, + struct page *pg); +#ifdef CONFIG_CPU_SH4 +void copy_user_highpage(struct page *to, struct page *from, + unsigned long vaddr, struct vm_area_struct *vma); +#define __HAVE_ARCH_COPY_USER_HIGHPAGE +#endif #elif defined(CONFIG_CPU_SH2) || defined(CONFIG_CPU_SH3) || !defined(CONFIG_MMU) #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) diff --git a/include/asm-sh/pgtable.h b/include/asm-sh/pgtable.h index 3721a44..4d6c3b6 100644 --- a/include/asm-sh/pgtable.h +++ b/include/asm-sh/pgtable.h @@ -55,11 +55,7 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define PTE_PHYS_MASK (0x20000000 - PAGE_SIZE) -/* - * First 1MB map is used by fixed purpose. - * Currently only 4-entry (16kB) is used (see arch/sh/mm/cache.c) - */ -#define VMALLOC_START (P3SEG+0x00100000) +#define VMALLOC_START P3SEG #define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) /* |