From: James S. <jsi...@us...> - 2001-11-07 17:29:06
|
Update of /cvsroot/linux-mips/linux/arch/mips/mm In directory usw-pr-cvs1:/tmp/cvs-serv345 Modified Files: Makefile c-sb1.c pg-sb1.c Added Files: tlb-sb1.c Log Message: Start of Sibyte SB1 chainsawing. --- NEW FILE: tlb-sb1.c --- /* * Copyright (C) 1996 David S. Miller (dm...@en...) * Copyright (C) 1997, 2001 Ralf Baechle (ra...@gn...) * Copyright (C) 2000, 2001 Broadcom Corporation * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /* * sb1250.c: MMU and cache operations for the SB1250 */ #include <asm/mmu_context.h> #include <asm/bootinfo.h> #include <asm/cpu.h> /* These are probed at ld_mmu time */ static unsigned int icache_size; static unsigned int dcache_size; static unsigned int icache_line_size; static unsigned int dcache_line_size; static unsigned int icache_index_mask; static unsigned int icache_assoc; static unsigned int dcache_assoc; static unsigned int icache_sets; static unsigned int dcache_sets; static unsigned int tlb_entries; /* Dump the current entry* and pagemask registers */ static inline void dump_cur_tlb_regs(void) { unsigned int entryhihi, entryhilo, entrylo0hi, entrylo0lo, entrylo1hi; unsigned int entrylo1lo, pagemask; __asm__ __volatile__ ( ".set push \n" ".set noreorder \n" ".set mips64 \n" ".set noat \n" " tlbr \n" " dmfc0 $1, $10 \n" " dsrl32 %0, $1, 0 \n" " sra %1, $1, 0 \n" " dmfc0 $1, $2 \n" " dsrl32 %2, $1, 0 \n" " sra %3, $1, 0 \n" " dmfc0 $1, $3 \n" " dsrl32 %4, $1, 0 \n" " sra %5, $1, 0 \n" " mfc0 %6, $5 \n" ".set pop \n" : "=r" (entryhihi), "=r" (entryhilo), "=r" (entrylo0hi), "=r" (entrylo0lo), "=r" (entrylo1hi), "=r" (entrylo1lo), "=r" (pagemask)); printk("%08X%08X %08X%08X %08X%08X %08X", entryhihi, entryhilo, entrylo0hi, entrylo0lo, entrylo1hi, entrylo1lo, pagemask); } void sb1_dump_tlb(void) { int entry; printk("Current TLB registers state:\n" " EntryHi EntryLo0 EntryLo1 PageMask Index\n" "--------------------------------------------------------------------\n"); dump_cur_tlb_regs(); printk(" %08X\n", read_32bit_cp0_register(CP0_INDEX)); printk("\n\nFull TLB Dump:" "Idx EntryHi EntryLo0 EntryLo1 PageMask\n" "--------------------------------------------------------------\n"); for (entry = 0; entry < tlb_entries; entry++) { set_index(entry); printk("\n%02i ", entry); __asm__ __volatile__ ( ".set push \n" ".set mips64 \n" " tlbr \n" ".set pop \n"); dump_cur_tlb_regs(); } printk("\n"); } void local_flush_tlb_all(void) { unsigned long flags; unsigned long old_ctx; int entry; __save_and_cli(flags); /* Save old context and create impossible VPN2 value */ old_ctx = (get_entryhi() & 0xff); set_entrylo0(0); set_entrylo1(0); for (entry = 0; entry < tlb_entries; entry++) { set_entryhi(KSEG0 + (PAGE_SIZE << 1) * entry); set_index(entry); tlb_write_indexed(); } set_entryhi(old_ctx); __restore_flags(flags); } /* * Use a bogus region of memory (starting at 0) to sanitize the TLB's. * Use increments of the maximum page size (16MB), and check for duplicate * entries before doing a given write. Then, when we're safe from collisions * with the firmware, go back and give all the entries invalid addresses with * the normal flush routine. */ void sb1_sanitize_tlb(void) { int entry; long addr = 0; long inc = 1<<24; /* 16MB */ /* Save old context and create impossible VPN2 value */ set_entrylo0(0); set_entrylo1(0); for (entry = 0; entry < tlb_entries; entry++) { do { addr += inc; set_entryhi(addr); tlb_probe(); } while ((int)(get_index()) >= 0); set_index(entry); tlb_write_indexed(); } /* Now that we know we're safe from collisions, we can safely flush the TLB with the "normal" routine. */ flush_tlb_all(); } void local_flush_tlb_range(struct mm_struct *mm, unsigned long start, unsigned long end) { unsigned long flags; int cpu; __save_and_cli(flags); cpu = smp_processor_id(); if(CPU_CONTEXT(cpu, mm) != 0) { int size; size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; size = (size + 1) >> 1; if(size <= (mips_cpu.tlbsize/2)) { int oldpid = (get_entryhi() & 0xff); int newpid = (CPU_CONTEXT(cpu, mm) & 0xff); start &= (PAGE_MASK << 1); end += ((PAGE_SIZE << 1) - 1); end &= (PAGE_MASK << 1); while(start < end) { int idx; set_entryhi(start | newpid); start += (PAGE_SIZE << 1); tlb_probe(); idx = get_index(); set_entrylo0(0); set_entrylo1(0); set_entryhi(KSEG0 + (idx << (PAGE_SHIFT+1))); if(idx < 0) continue; tlb_write_indexed(); } set_entryhi(oldpid); } else { get_new_mmu_context(mm); if (mm == current->active_mm) set_entryhi(CPU_CONTEXT(cpu, mm) & 0xff); } } __restore_flags(flags); } void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { unsigned long flags; __save_and_cli(flags); #ifdef CONFIG_SMP /* * This variable is eliminated from CPU_CONTEXT() if SMP isn't defined, so * conditional it to get rid of silly "unused variable" compiler complaints */ int cpu = smp_processor_id(); #endif if (CPU_CONTEXT(cpu, vma->vm_mm) != 0) { int oldpid, newpid, idx; #ifdef DEBUG_TLB printk("[tlbpage<%d,%08lx>]", CPU_CONTEXT(cpu, vma->vm_mm), page); #endif newpid = (CPU_CONTEXT(cpu, vma->vm_mm) & 0xff); page &= (PAGE_MASK << 1); oldpid = (get_entryhi() & 0xff); set_entryhi (page | newpid); tlb_probe(); idx = get_index(); set_entrylo0(0); set_entrylo1(0); if(idx < 0) goto finish; /* Make sure all entries differ. */ set_entryhi(KSEG0+(idx<<(PAGE_SHIFT+1))); tlb_write_indexed(); finish: set_entryhi(oldpid); } __restore_flags(flags); } /* All entries common to a mm share an asid. To effectively flush these entries, we just bump the asid. */ void local_flush_tlb_mm(struct mm_struct *mm) { unsigned long flags; int cpu; __save_and_cli(flags); cpu = smp_processor_id(); if (CPU_CONTEXT(cpu, mm) != 0) { get_new_mmu_context(mm); if (mm == current->active_mm) { set_entryhi(CPU_CONTEXT(cpu, mm) & 0xff); } } __restore_flags(flags); } /* Stolen from mips32 routines */ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) { unsigned long flags; pgd_t *pgdp; pmd_t *pmdp; pte_t *ptep; int idx, pid; /* * Handle debugger faulting in for debugee. */ if (current->active_mm != vma->vm_mm) return; __save_and_cli(flags); pid = get_entryhi() & 0xff; #ifdef DEBUG_TLB if((pid != (CPU_CONTEXT(cpu, vma->vm_mm) & 0xff)) || (CPU_CONTEXT(cpu, vma->vm_mm) == 0)) { printk("update_mmu_cache: Wheee, bogus tlbpid mmpid=%d tlbpid=%d\n", (int) (CPU_CONTEXT(cpu, vma->vm_mm) & 0xff), pid); } #endif address &= (PAGE_MASK << 1); set_entryhi(address | (pid)); pgdp = pgd_offset(vma->vm_mm, address); tlb_probe(); pmdp = pmd_offset(pgdp, address); idx = get_index(); ptep = pte_offset(pmdp, address); set_entrylo0(pte_val(*ptep++) >> 6); set_entrylo1(pte_val(*ptep) >> 6); set_entryhi(address | (pid)); if(idx < 0) { tlb_write_random(); } else { tlb_write_indexed(); } set_entryhi(pid); __restore_flags(flags); } /* * This is called from loadmmu.c. We have to set up all the * memory management function pointers, as well as initialize * the caches and tlbs */ void sb1_tlb_init(void) { /* * We don't know what state the firmware left the TLB's in, so this is * the ultra-conservative way to flush the TLB's and avoid machine * check exceptions due to duplicate TLB entries */ sb1_sanitize_tlb(); /* Turn on caching in kseg0 */ change_cp0_config(CONF_CM_CMASK, CONF_CM_CACHABLE_COW); } Index: Makefile =================================================================== RCS file: /cvsroot/linux-mips/linux/arch/mips/mm/Makefile,v retrieving revision 1.8 retrieving revision 1.9 diff -u -d -r1.8 -r1.9 --- Makefile 2001/10/25 16:43:24 1.8 +++ Makefile 2001/11/07 17:29:03 1.9 @@ -31,6 +31,7 @@ obj-$(CONFIG_CPU_R10000) += pg-andes.o c-andes.o tlb-r4k.o tlbex-r4k.o obj-$(CONFIG_CPU_MIPS32) += pg-mips32.o c-mips32.o tlb-r4k.o tlbex-r4k.o obj-$(CONFIG_CPU_MIPS64) += pg-mips32.o c-mips32.o tlb-r4k.o tlbex-r4k.o +obj-$(CONFIG_CPU_SB1) += pg-sb1.o c-sb1.o tlb-sb1.o obj-$(CONFIG_SGI_IP22) += umap.o obj-$(CONFIG_BAGET_MIPS) += umap.o Index: c-sb1.c =================================================================== RCS file: /cvsroot/linux-mips/linux/arch/mips/mm/c-sb1.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- c-sb1.c 2001/10/23 17:20:14 1.1 +++ c-sb1.c 2001/11/07 17:29:03 1.2 @@ -18,13 +18,10 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* - * In this entire file, I'm not sure what the role of the L2 on the sb1250 - * is. Since it is coherent to the system, we should never need to flush - * it...right?...right??? -JDC - */ - +#include <linux/init.h> #include <asm/mmu_context.h> +#include <asm/bootinfo.h> +#include <asm/cpu.h> /* These are probed at ld_mmu time */ static unsigned int icache_size; @@ -33,6 +30,8 @@ static unsigned int icache_line_size; static unsigned int dcache_line_size; +static unsigned int icache_index_mask; + static unsigned int icache_assoc; static unsigned int dcache_assoc; @@ -40,24 +39,24 @@ static unsigned int dcache_sets; static unsigned int tlb_entries; -void local_flush_tlb_all(void) +/* Define this to be insanely conservative (e.g. flush everything, lots) */ +#undef SB1_TLB_CONSERVATIVE +#undef SB1_CACHE_CONSERVATIVE +void pgd_init(unsigned long page) { - unsigned long flags; - unsigned long old_ctx; - int entry; + unsigned long *p = (unsigned long *) page; + int i; - __save_and_cli(flags); - /* Save old context and create impossible VPN2 value */ - old_ctx = (get_entryhi() & 0xff); - set_entrylo0(0); - set_entrylo1(0); - for (entry = 0; entry < tlb_entries; entry++) { - set_entryhi(KSEG0 + (PAGE_SIZE << 1) * entry); - set_index(entry); - tlb_write_indexed(); + for (i = 0; i < USER_PTRS_PER_PGD; i+=8) { + p[i + 0] = (unsigned long) invalid_pte_table; + p[i + 1] = (unsigned long) invalid_pte_table; + p[i + 2] = (unsigned long) invalid_pte_table; + p[i + 3] = (unsigned long) invalid_pte_table; + p[i + 4] = (unsigned long) invalid_pte_table; + p[i + 5] = (unsigned long) invalid_pte_table; + p[i + 6] = (unsigned long) invalid_pte_table; + p[i + 7] = (unsigned long) invalid_pte_table; } - set_entryhi(old_ctx); - __restore_flags(flags); } /* @@ -73,8 +72,9 @@ * to flush it */ -static void sb1_flush_cache_all(void) +static void _sb1_flush_cache_all(void) { + /* * Haven't worried too much about speed here; given that we're flushing * the icache, the time to invalidate is dwarfed by the time it's going @@ -84,71 +84,293 @@ * $2 - set count */ if (icache_sets) { + if (dcache_sets) { + __asm__ __volatile__ ( + ".set push \n" + ".set noreorder \n" + ".set noat \n" + ".set mips4 \n" + " move $1, %2 \n" /* Start at index 0 */ + "1: cache 0x1, 0($1) \n" /* WB/Invalidate this index */ + " addiu %1, %1, -1 \n" /* Decrement loop count */ + " bnez %1, 1b \n" /* loop test */ + " addu $1, $1, %0 \n" /* Next address */ + ".set pop \n" + ::"r" (dcache_line_size), + "r" (dcache_sets * dcache_assoc), + "r" (KSEG0) + :"$1"); + __asm__ __volatile__ ( + ".set push \n" + ".set noreorder \n" + ".set mips2 \n" + "sync \n" +#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS /* Bug 1384 */ + "sync \n" +#endif + ".set pop \n"); + } __asm__ __volatile__ ( ".set push \n" ".set noreorder \n" ".set noat \n" ".set mips4 \n" " move $1, %2 \n" /* Start at index 0 */ - "1: cache 0, 0($1) \n" /* Invalidate this index */ + "1: cache 0, 0($1) \n" /* Invalidate this index */ " addiu %1, %1, -1 \n" /* Decrement loop count */ " bnez %1, 1b \n" /* loop test */ - " addu $1, $1, %0 \n" /* Next address JDCXXX - Should be short piped */ + " addu $1, $1, %0 \n" /* Next address */ ".set pop \n" ::"r" (icache_line_size), "r" (icache_sets * icache_assoc), - "r" (KSEG0)); + "r" (KSEG0) + :"$1"); } - if (dcache_sets) { +} + +#ifdef CONFIG_SMP +static void sb1_flush_cache_all_ipi(void *ignored) +{ + _sb1_flush_cache_all(); +} +#endif + +static void sb1_flush_cache_all(void) +{ + smp_call_function(sb1_flush_cache_all_ipi, 0, 1, 1); + _sb1_flush_cache_all(); +} + +/* + * When flushing a range in the icache, we have to first writeback + * the dcache for the same range, so new ifetches will see any + * data that was dirty in the dcache. Also, if the flush is very + * large, just flush the whole cache rather than spinning in here + * forever. Fills from the (always coherent) L2 come in relatively + * quickly. + * + * Also, at the moment we just hit-writeback the dcache instead + * of writeback-invalidating it. Not doing the invalidates + * doesn't cost us anything, since we're coherent + * +*/ + +static void _sb1_flush_icache_range(unsigned long start, unsigned long end) +{ + if (icache_sets) { + if (dcache_sets) { +#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS + unsigned long flags; + local_irq_save(flags); +#endif + __asm__ __volatile__ ( + ".set push \n" + ".set noreorder \n" + ".set noat \n" + ".set mips4 \n" + " move $1, %0 \n" + "1: \n" +#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS + " lw $0, 0($1) \n" /* Bug 1370, 1368 */ + " cache 0x15, 0($1) \n" /* Hit-WB-inval this address */ +#else + " cache 0x19, 0($1) \n" /* Hit-WB this address */ +#endif + " bne $1, %1, 1b \n" /* loop test */ + " addu $1, $1, %2 \n" /* next line */ + ".set pop \n" + ::"r" (start), + "r" (end), + "r" (dcache_line_size) + :"$1"); + __asm__ __volatile__ ( + ".set push \n" + ".set noreorder \n" + ".set mips2 \n" + "sync \n" +#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS /* Bug 1384 */ + "sync \n" +#endif + ".set pop \n"); +#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS + local_irq_restore(flags); +#endif + } + /* XXXKW Guess what: these Kseg0 addressese aren't + enough to let us figure out what may be in the + cache under mapped Useg tags. The situation is + even worse, because bit 12 belongs to both the page + number AND the cache index, which means the Kseg0 + page number may have a different cache index than + the Useg address. For these two reasons, we have + to flush the entire thing. Since the Dcache is + physically tagged, we *can* use hit operations, */ +#if 0 + start &= icache_index_mask; + end &= icache_index_mask; +#else + start = 0; + end = icache_index_mask; +#endif __asm__ __volatile__ ( ".set push \n" ".set noreorder \n" ".set noat \n" ".set mips4 \n" - " move $1, %2 \n" /* Start at index 0 */ - "1: cache 0x1, 0($1) \n" /* WB/Invalidate this index */ - " addiu %1, %1, -1 \n" /* Decrement loop count */ - " bnez %1, 1b \n" /* loop test */ - " addu $1, $1, %0 \n" /* Next address JDCXXX - Should be short piped */ + " move $1, %0 \n" + "1: cache 0, (0<<13)($1) \n" /* Index-inval this address */ + " cache 0, (1<<13)($1) \n" /* Index-inval this address */ + " cache 0, (2<<13)($1) \n" /* Index-inval this address */ + " cache 0, (3<<13)($1) \n" /* Index-inval this address */ + " bne $1, %1, 1b \n" /* loop test */ + " addu $1, $1, %2 \n" /* next line */ ".set pop \n" - : - : "r" (dcache_line_size), - "r" (dcache_sets * dcache_assoc), "r" (KSEG0)); + ::"r" (start), + "r" (end), + "r" (icache_line_size) + :"$1"); } } -/* - * When flushing a range in the icache, we have to first writeback - * the dcache for the same range, so new ifetches will see any - * data that was dirty in the dcache - */ +/* XXXKW how should I pass these instead? */ +unsigned long flush_range_start; +unsigned long flush_range_end; + +#if defined(CONFIG_SMP) && !defined(SB1_CACHE_CONSERVATIVE) + +static void sb1_flush_icache_range_ipi(void *ignored) +{ + _sb1_flush_icache_range(flush_range_start, flush_range_end); +} +#endif static void sb1_flush_icache_range(unsigned long start, unsigned long end) { - /* JDCXXX - Implement me! */ +#ifdef SB1_CACHE_CONSERVATIVE sb1_flush_cache_all(); +#else + if (start == end) { + return; + } + start &= ~((long)(dcache_line_size - 1)); + end = (end - 1) & ~((long)(dcache_line_size - 1)); + + if ((end-start) >= (16*1024*1024)) { + sb1_flush_cache_all(); + } else { + _sb1_flush_icache_range(start, end); + flush_range_start = start; + flush_range_end = end; + smp_call_function(sb1_flush_icache_range_ipi, 0, 1, 1); + } +#endif } -static void sb1_flush_cache_mm(struct mm_struct *mm) +/* + * If there's no context yet, or the page isn't executable, no icache flush + * is needed + */ +static void sb1_flush_icache_page(struct vm_area_struct *vma, struct page *page) { - /* Don't need to do this, as the dcache is physically tagged */ + unsigned long addr; + + if ((vma->vm_mm->context == 0) || !(vma->vm_flags & VM_EXEC)) { + return; + } + + addr = (unsigned long)page_address(page); + /* XXXKW addr is a Kseg0 address, whereas hidden higher up the + call stack, we may really need to flush a Useg address. + Our Icache is virtually tagged, which means we have to be + super conservative. See comments in + _sb1_flush_icache_rage. */ + sb1_flush_icache_range(addr, addr + PAGE_SIZE); } -static void sb1_flush_cache_range(struct mm_struct *mm, - unsigned long start, - unsigned long end) +static inline void protected_flush_icache_line(unsigned long addr) { - /* Don't need to do this, as the dcache is physically tagged */ + __asm__ __volatile__( + " .set push \n" + " .set noreorder \n" + " .set mips4 \n" + "1: cache 0x10, (%0) \n" + "2: .set pop \n" + " .section __ex_table,\"a\"\n" + " .word 1b, 2b \n" + " .previous" + : + : "r" (addr)); } +static inline void protected_writeback_dcache_line(unsigned long addr) +{ +#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS + /* Have to be sure the TLB entry exists for the cache op, + so we have to be sure that nothing happens in between the + lw and the cache op + */ + unsigned long flags; + local_irq_save(flags); +#endif + __asm__ __volatile__( + " .set push \n" + " .set noreorder \n" + " .set mips4 \n" + "1: \n" +#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS + " lw $0, (%0) \n" + " cache 0x15, 0(%0) \n" /* Hit-WB-inval this address */ +#else + " cache 0x19, 0(%0) \n" /* Hit-WB this address */ +#endif + /* XXX: should be able to do this after both dcache cache + ops, but there's no guarantee that this will be inlined, + and the pass1 restriction checker can't detect syncs + following cache ops except in the following basic block. + */ + " sync \n" +#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS /* Bug 1384 */ + " sync \n" +#endif + "2: .set pop \n" + " .section __ex_table,\"a\"\n" + " .word 1b, 2b \n" + " .previous" + : + : "r" (addr)); +#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS + local_irq_restore(flags); +#endif +} -static void sb1_flush_cache_sigtramp(unsigned long page) +/* + * XXX - Still need to really understand this. This is mostly just + * derived from the r10k and r4k implementations, and seems to work + * but things that "seem to work" when I don't understand *why* they + * "seem to work" disturb me greatly...JDC + */ +static void sb1_flush_cache_sigtramp(unsigned long addr) { - /* JDCXXX - Implement me! */ - sb1_flush_cache_all(); + unsigned long daddr, iaddr; + + daddr = addr & ~(dcache_line_size - 1); + protected_writeback_dcache_line(daddr); + protected_writeback_dcache_line(daddr + dcache_line_size); + iaddr = addr & ~(icache_line_size - 1); + protected_flush_icache_line(iaddr); + protected_flush_icache_line(iaddr + icache_line_size); } +/* + * Anything that just flushes dcache state can be ignored, as we're always + * coherent in dcache space. This is just a dummy function that all the + * nop'ed routines point to + */ +static void sb1_nop(void) +{ +} + /* * This only needs to make sure stores done up to this * point are visible to other agents outside the CPU. Given @@ -170,31 +392,6 @@ :::"memory"); } - -/* Cribbed from the r2300 code */ -static void sb1_flush_cache_page(struct vm_area_struct *vma, - unsigned long page) -{ - sb1_flush_cache_all(); -#if 0 - struct mm_struct *mm = vma->vm_mm; - unsigned long physpage; - - /* No icache flush needed without context; */ - if (mm->context == 0) - return; - - /* No icache flush needed if the page isn't executable */ - if (!(vma->vm_flags & VM_EXEC)) - return; - - physpage = (unsigned long) page_address(page); - if (physpage) - sb1_flush_icache_range(physpage, physpage + PAGE_SIZE); -#endif -} - - /* * Cache set values (from the mips64 spec) * 0 - 64 @@ -206,13 +403,13 @@ * 6 - 4096 * 7 - Reserved */ + static unsigned int decode_cache_sets(unsigned int config_field) { if (config_field == 7) { /* JDCXXX - Find a graceful way to abort. */ return 0; } - return (1<<(config_field + 6)); } @@ -227,6 +424,7 @@ * 6 - 128 bytes * 7 - Reserved */ + static unsigned int decode_cache_line_size(unsigned int config_field) { if (config_field == 0) { @@ -248,12 +446,10 @@ * 12:10 Dcache line size * 9:7 Dcache Associativity */ - -static void probe_cache_sizes(void) +static __init void probe_cache_sizes(void) { u32 config1; - __asm__ __volatile__( ".set push \n" ".set mips64 \n" @@ -264,42 +460,40 @@ dcache_line_size = decode_cache_line_size((config1 >> 10) & 0x7); icache_sets = decode_cache_sets((config1 >> 22) & 0x7); dcache_sets = decode_cache_sets((config1 >> 13) & 0x7); - icache_assoc = ((config1 >> 16) & 0x7) + 1; + icache_assoc = ((config1 >> 16) & 0x7) + 1; dcache_assoc = ((config1 >> 7) & 0x7) + 1; icache_size = icache_line_size * icache_sets * icache_assoc; dcache_size = dcache_line_size * dcache_sets * dcache_assoc; + icache_index_mask = (icache_sets - 1) * icache_line_size; tlb_entries = ((config1 >> 25) & 0x3f) + 1; } - -/* This is called from loadmmu.c. We have to set up all the - memory management function pointers, as well as initialize - the caches and tlbs */ +/* + * This is called from loadmmu.c. We have to set up all the + * memory management function pointers, as well as initialize + * the caches and tlbs + */ void ld_mmu_sb1(void) { probe_cache_sizes(); _clear_page = sb1_clear_page; _copy_page = sb1_copy_page; - - _flush_cache_all = sb1_flush_cache_all; - _flush_cache_mm = sb1_flush_cache_mm; - _flush_cache_range = sb1_flush_cache_range; - _flush_cache_page = sb1_flush_cache_page; - _flush_cache_sigtramp = sb1_flush_cache_sigtramp; + _flush_cache_all = sb1_flush_cache_all; + ___flush_cache_all = sb1_flush_cache_all; + _flush_cache_mm = (void (*)(struct mm_struct *))sb1_nop; + _flush_cache_range = (void (*)(struct mm_struct *, unsigned long, unsigned long))sb1_nop; _flush_page_to_ram = sb1_flush_page_to_ram; - _flush_icache_page = sb1_flush_cache_page; + _flush_icache_page = sb1_flush_icache_page; + _flush_cache_sigtramp = sb1_flush_cache_sigtramp; _flush_icache_range = sb1_flush_icache_range; - - /* - * JDCXXX I'm not sure whether these are necessary: is this the right - * place to initialize the tlb? If it is, why is it done - * at this level instead of as common code in loadmmu()? - */ - flush_cache_all(); + /* None of these are needed for the sb1 */ + _flush_cache_page = (void (*)(struct vm_area_struct *, unsigned long))sb1_nop; - /* Turn on caching in kseg0 */ - set_cp0_config(CONF_CM_CMASK, 0); + /* JDCXXX I'm not sure whether these are necessary: is this the right + place to initialize the tlb? If it is, why is it done + at this level instead of as common code in loadmmu()? */ + flush_cache_all(); } Index: pg-sb1.c =================================================================== RCS file: /cvsroot/linux-mips/linux/arch/mips/mm/pg-sb1.c,v retrieving revision 1.1 retrieving revision 1.2 diff -u -d -r1.1 -r1.2 --- pg-sb1.c 2001/10/23 17:20:14 1.1 +++ pg-sb1.c 2001/11/07 17:29:03 1.2 @@ -1,10 +1,37 @@ -#include <linux/sched.h> -#include <linux/mm.h> +/* + * Copyright (C) 1996 David S. Miller (dm...@en...) + * Copyright (C) 1997, 2001 Ralf Baechle (ra...@gn...) + * Copyright (C) 2000 Sibyte + * + * Written by Justin Carlson (ca...@si...) + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ +#include <linux/config.h> -#include <asm/cacheops.h> +#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS +#define SB1_PREF_LOAD_STREAMED_HINT "0" +#define SB1_PREF_STORE_STREAMED_HINT "1" +#else +#define SB1_PREF_LOAD_STREAMED_HINT "4" +#define SB1_PREF_STORE_STREAMED_HINT "5" +#endif /* These are the functions hooked by the memory management function pointers */ -void sb1_clear_page(void *page) +static void sb1_clear_page(void *page) { /* JDCXXX - This should be bottlenecked by the write buffer, but these things tend to be mildly unpredictable...should check this on the @@ -19,30 +46,31 @@ ".set noat \n" ".set mips4 \n" " addiu $1, %0, %2 \n" /* Calculate the end of the page to clear */ - " pref 5, 0(%0) \n" /* Prefetch the first 4 lines */ - " pref 5, 32(%0) \n" - " pref 5, 64(%0) \n" - " pref 5, 96(%0) \n" + " pref " SB1_PREF_STORE_STREAMED_HINT ", 0(%0) \n" /* Prefetch the first 4 lines */ + " pref " SB1_PREF_STORE_STREAMED_HINT ", 32(%0) \n" + " pref " SB1_PREF_STORE_STREAMED_HINT ", 64(%0) \n" + " pref " SB1_PREF_STORE_STREAMED_HINT ", 96(%0) \n" "1: sd $0, 0(%0) \n" /* Throw out a cacheline of 0's */ " sd $0, 8(%0) \n" " sd $0, 16(%0) \n" " sd $0, 24(%0) \n" - " pref 5,128(%0) \n" /* Prefetch 4 lines ahead */ + " pref " SB1_PREF_STORE_STREAMED_HINT ",128(%0) \n" /* Prefetch 4 lines ahead */ " bne $1, %0, 1b \n" " addiu %0, %0, 32 \n" /* Next cacheline (This instruction better be short piped!) */ ".set pop \n" - : "=r" (page) - : "0" (page), "I" (PAGE_SIZE-32) - : "memory"); + :"=r" (page) + :"0" (page), + "I" (PAGE_SIZE-32) + :"$1","memory"); } -void sb1_copy_page(void *to, void *from) +static void sb1_copy_page(void *to, void *from) { /* This should be optimized in assembly...can't use ld/sd, though, * because the top 32 bits could be nuked if we took an interrupt - * during the routine. And this is not a good place to be cli()'ing + * during the routine. And this is not a good place to be cli()'ing */ /* The pref's used here are using "streaming" hints, which cause the @@ -50,7 +78,7 @@ * ends up copying a lot more data than is commonly used, so this seems * to make sense in terms of reducing cache pollution, but I've no real * performance data to back this up - */ + */ __asm__ __volatile__( ".set push \n" @@ -58,12 +86,12 @@ ".set noat \n" ".set mips4 \n" " addiu $1, %0, %4 \n" /* Calculate the end of the page to copy */ - " pref 4, 0(%0) \n" /* Prefetch the first 3 lines to be read and copied */ - " pref 5, 0(%1) \n" - " pref 4, 32(%0) \n" - " pref 5, 32(%1) \n" - " pref 4, 64(%0) \n" - " pref 5, 64(%1) \n" + " pref " SB1_PREF_LOAD_STREAMED_HINT ", 0(%0) \n" /* Prefetch the first 3 lines */ + " pref " SB1_PREF_STORE_STREAMED_HINT ", 0(%1) \n" + " pref " SB1_PREF_LOAD_STREAMED_HINT ", 32(%0) \n" + " pref " SB1_PREF_STORE_STREAMED_HINT ", 32(%1) \n" + " pref " SB1_PREF_LOAD_STREAMED_HINT ", 64(%0) \n" + " pref " SB1_PREF_STORE_STREAMED_HINT ", 64(%1) \n" "1: lw $2, 0(%0) \n" /* Block copy a cacheline */ " lw $3, 4(%0) \n" " lw $4, 8(%0) \n" @@ -72,8 +100,8 @@ " lw $7, 20(%0) \n" " lw $8, 24(%0) \n" " lw $9, 28(%0) \n" - " pref 4, 96(%0) \n" /* Prefetch ahead */ - " pref 5, 96(%1) \n" + " pref " SB1_PREF_LOAD_STREAMED_HINT ", 96(%0) \n" /* Prefetch ahead */ + " pref " SB1_PREF_STORE_STREAMED_HINT ", 96(%1) \n" " sw $2, 0(%1) \n" " sw $3, 4(%1) \n" " sw $4, 8(%1) \n" @@ -88,9 +116,13 @@ " bne $1, %0, 1b \n" " addiu %0, %0, 32 \n" /* Next cacheline */ ".set pop \n" - : "=r" (to), "=r" (from) - : "0" (from), "1" (to), "I" (PAGE_SIZE-32) - : "$2","$3","$4","$5","$6","$7","$8","$9","memory"); + :"=r" (to), + "=r" (from) + : + "0" (from), + "1" (to), + "I" (PAGE_SIZE-32) + :"$1","$2","$3","$4","$5","$6","$7","$8","$9","memory"); /* unsigned long *src = from; unsigned long *dest = to; @@ -99,21 +131,4 @@ *dest++ = *src++; } */ -} - -void pgd_init(unsigned long page) -{ - unsigned long *p = (unsigned long *) page; - int i; - - for (i = 0; i < USER_PTRS_PER_PGD; i+=8) { - p[i + 0] = (unsigned long) invalid_pte_table; - p[i + 1] = (unsigned long) invalid_pte_table; - p[i + 2] = (unsigned long) invalid_pte_table; - p[i + 3] = (unsigned long) invalid_pte_table; - p[i + 4] = (unsigned long) invalid_pte_table; - p[i + 5] = (unsigned long) invalid_pte_table; - p[i + 6] = (unsigned long) invalid_pte_table; - p[i + 7] = (unsigned long) invalid_pte_table; - } } |