[xtensa-cvscommit] linux/arch/xtensa/kernel handlers.S,1.7,1.8 traps.c,1.4,1.5

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/xtensa/linux/arch/xtensa/kernel
In directory sc8-pr-cvs1:/tmp/cvs-serv11618/arch/xtensa/kernel

Modified Files:
	handlers.S traps.c 
Log Message:

Add unaligned exception handling.  None of this code is on by default, and we'll leave it off until we sync up with hardware that supports this exception.  See the XTFIXME comment in the core.h files to enable this code.

Index: handlers.S
===================================================================
RCS file: /cvsroot/xtensa/linux/arch/xtensa/kernel/handlers.S,v
retrieving revision 1.7
retrieving revision 1.8
diff -C2 -d -r1.7 -r1.8
*** handlers.S	11 Nov 2002 22:25:25 -0000	1.7
--- handlers.S	28 Nov 2002 00:40:49 -0000	1.8
***************
*** 1193,1197 ****
  	.type _excStub,@function
  _excStub:
! 	l32i	a7, a1, PT_EXCCAUSE	// vector number (also infrequently-used 2nd parm passed to handler, below)

  	//  Determine which C handler to call, and place in a4.
--- 1193,1197 ----
  	.type _excStub,@function
  _excStub:
! 	l32i	a7, a1, PT_EXCCAUSE	// vector number (also infrequently-used 2ndparm passed to handler, below)

  	//  Determine which C handler to call, and place in a4.
***************
*** 1886,1889 ****
--- 1886,2589 ----
  	l32i	a0, a0, RSAVE_A0	// restore a0 from memory
  	rfe
+ 
+ 
+ #if XCHAL_UNALIGNED_LOAD_EXCEPTION
+ 
+ /*  First-level exit handler for unaligned exceptions.
+  *
+  *  This handler works only for kernel exceptions.  Unaligned user
+  *  access should get a seg fault.
+  *
+  *  Entry conditions:	a0 is undefined
+  *			depc contains original a0 value
+  */
+ 
+ 
+ 	.macro	src_b	r, w0, w1
+ #ifdef __XTENSA_EB__
+ 	src	\r, \w0, \w1
+ #else
+ 	src	\r, \w1, \w0
+ #endif
+ 	.endm
+ 
+ 	.macro	ssa8	r
+ #ifdef __XTENSA_EB__
+ 	ssa8b	\r
+ #else
+ 	ssa8l	\r
+ #endif
+ 	.endm
+ 
+ 
+ 	.data
+ 	.align 4
+ 
+ /* Define some offsets for robustness and readability.
+  *
+  * WARNING: If you add or subtract to this list, you'll likely impact
+  * code at labels unaligned_copyreg and unaligned_movereg below. */
+ 
+ #define UNALIGNED_A0	0
+ #define UNALIGNED_A2	4
+ #define UNALIGNED_A3	8
+ #define UNALIGNED_A4	12
+ #define UNALIGNED_A5	16
+ #define UNALIGNED_A6	20
+ #define UNALIGNED_A7	24
+ #define UNALIGNED_SAR	28
+ #define UNALIGNED_TOTAL	32
+ 
+ unaligned_space:
+ 	.space	UNALIGNED_TOTAL, 0
+ unaligned_scratch:
+ 	.space	2*4, 0
+ 
+ 	.text
+ 
+ 	.align	4
+ 	.global	handle_unaligned_kernel
+ handle_unaligned_kernel:
+ 
+ 	/* Save some working registers.  We also save the original a0
+ 	 * to memory so we don't lose it when a double exception
+ 	 * happens (and overwrites DEPC).  A double exception can
+ 	 * reasonably happen in kernel mode with vmalloc memory. */
+ 
+ 	movi	a0, unaligned_space
+ 	s32i	a2, a0, UNALIGNED_A2	// save a2
+ 	rsr	a2, EPC_1		// load PC of load/store
+ 	s32i	a3, a0, UNALIGNED_A3	// save a3
+ 	rsr	a3, DEPC		// load original a0
+ 	s32i	a4, a0, UNALIGNED_A4	// save a4
+ 	rsr	a4, SAR			// load original SAR
+ 	s32i	a5, a0, UNALIGNED_A5	// save a5
+ 	s32i	a6, a0, UNALIGNED_A6	// save a6
+ 	s32i	a7, a0, UNALIGNED_A7	// save a7
+ 	s32i	a3, a0, UNALIGNED_A0	// save a0
+ 	s32i	a4, a0, UNALIGNED_SAR	// save SAR
+ 
+ #ifdef DEBUG_UNALIGNMENT_TEST
+ 	movi	a3, unaexc_count
+ 	l32i	a4, a3, 0
+ 	addi	a4, a4, 1
+ 	s32i	a4, a3, 0
+ #endif
+ 
+ 	/* Now, identify one of the following load/store instructions.
+ 
+ 		xxxx xxxx = imm8 field
+ 		     yyyy = imm4 field
+ 		     ssss = s field
+ 		     tttt = t field
+ 
+ 		       23			    0
+ 			-----------------------------
+ 		L32I	xxxx xxxx 0010 ssss tttt 0010
+ 		L32I.N		  yyyy ssss tttt 1000 
+ 		L32SI	xxxx xxxx 0011 ssss tttt 0010
+ 		L16UI	xxxx xxxx 0001 ssss tttt 0010
+ 		L16SI	xxxx xxxx 1001 ssss tttt 0010
+ 
+ 		S32I	xxxx xxxx 0110 ssss tttt 0010
+ 		S32I.N	          yyyy ssss tttt 1001
+ 		S16I	xxxx xxxx 0101 ssss tttt 0010
+ 		S32RI	xxxx xxxx 1111 ssss tttt 0010
+ 			-----------------------------
+                                     ^         ^    ^
+ 	     sub-opcode (NIBBLE_R) -+         |    |
+ 	        t field (NIBBLE_T) -----------+    |
+ 	   major opcode (NIBBLE_OP0) --------------+
+ 	
+ 	 * The maximum instruction size is 24 bits.  Our strategy
+ 	 * below is to load the complete instruction into a register
+ 	 * so we can interrogate various fields.  However, formats and
+ 	 * orientation within the register will differ based on
+ 	 * endianess.  For big endian processors, we pack the
+ 	 * instruction words in the most-significant end of the
+ 	 * register.  For little endian, we pack the instruction at
+ 	 * the least-significant end.  We preserve the original
+ 	 * instruction format per endian ordering (see the ISA chapter
+ 	 * entitled "Instruction Formats and Opcodes").
+ 
+ 	 * Because endian-different Xtensa processors have different
+ 	 * instruction formats, we define the following constants into
+ 	 * the register to access certain fields of the instruction.
+ 
+ 	 */
+ 
+ #if XCHAL_HAVE_BE
+ #define	 NIBBLE_OP0  28
+ #else
+ #define	 NIBBLE_OP0  0
+ #endif
+ 
+ #if XCHAL_HAVE_BE
+ #define	 NIBBLE_T  24
+ #else
+ #define	 NIBBLE_T  4
+ #endif
+ 
+ #if XCHAL_HAVE_BE
+ #define	 NIBBLE_R  16
+ #else
+ #define	 NIBBLE_R  12
+ #endif
+ 
+ 	/* Big and little endian 16-bit values are located in
+          * different halves of a register.  HWORD_START helps to
+          * abstract the notion of extracting a 16-bit value from a
+          * register. */
+ 
+ #if XCHAL_HAVE_BE
+ #define HWORD_START  16
+ #else
+ #define HWORD_START  0
+ #endif
+ 
+ 	/* If the exception address is already aligned on a word
+          * boundary, the exception is spurious, and we just return.
+          * Note that we want to test and return before advancing over
+          * the load or store instruction so the processor can
+          * re-execute the instruction at EPC_1 when the spurious
+          * exception occured.  Also, this handler does not consider
+          * the case of a load/store of a 16-bit value on a halfword
+          * boundary, which would also be spurious.  For this 16-bit
+          * value case, this handler ends up loading/storing the value,
+          * and returns. */
+ 
+ 	rsr	a3, EXCVADDR	// read unaligned memory address
+ 	extui	a3, a3, 0, 2	// branch to return if exc is spurious
+ 	beqz	a3, unaligned_return // (if addr is already aligned)
+ 
+ 	/* We align the instruction address (EPC_1, not EXCVADDR),
+          * load two words, and shift the bytes such that the next
+          * three instruction bytes are in the most/least-significant
+          * end of a4 for big/little endianess, respectively.  Also,
+          * remember narrow instructions are only two bytes.
+ 	 *
+ 	 * The only possible danger of a double exception on the
+ 	 * following l32i instructions is kernel code in vmalloc
+ 	 * memory. The processor was just executing at the EPC_1
+ 	 * address, and indeed, already fetched the instruction.  That
+ 	 * guarantees a TLB mapping, which hasn't been replaced by
+ 	 * this unaligned exception handler that uses only static TLB
+ 	 * mappings. However, high-level interrupt handlers might
+ 	 * modify TLB entries, so for the generic case, we register a
+ 	 * TABLE_FIXUP handler here, too. */
+ 
+ 	movi	a6, find_handler
+ 	rsr	a7, EXCSAVE_1
+ 	s32i	a6, a7, TABLE_FIXUP
+ 
+ 	movi	a3, ~3
+ 	and	a3, a3, a2	// mask lower bits
+ 	l32i	a4, a3, 0	// load 2 words
+ 	l32i	a5, a3, 4
+ 	ssa8	a2
+ 	src_b	a4, a4, a5	// a4 has insn bytes
+ 	movi	a6, 0
+ 	s32i	a6, a7, TABLE_FIXUP
+ 
+ 	/* We first check for load instructions and branch to handle
+ 	 * them when found. */
+ 
+ 	extui	a5, a4, NIBBLE_OP0, 4		// load insn.op0 nibble
+ 
+ #if XCHAL_HAVE_DENSITY
+ 	_beqi	a5, 8, unaligned_loadn		// L32I.N
+ #endif
+ 	_bnei	a5, 2, 1f			// branch if no more loads
+ 	extui	a6, a4, NIBBLE_R, 4		// load insn.r nibble
+ 	_beqi	a6, 2, unaligned_load		// L32I
+ #if XCHAL_HAVE_SPECULATION
+ 	_beqi	a6, 3, unaligned_load		// L32SI
+ #endif
+ 	_beqi	a6, 1, unaligned_load		// L16UI
+ 	movi	a7, 9
+ 	_beq	a6, a7, unaligned_load		// L16SI
+ 1:
+ 	j	check_for_stores
+ 
+ 	/* Control comes here when we have a load instruction
+ 	 * accessing an unaligned address.  At this point,
+ 
+ 		a2 = EPC_1
+ 		a3 = EPC_1 & ~3
+ 		a4 = packed instruction nibbles
+ 		a5, a6, and a7 are temp registers
+ 
+ 	 * We carefully use only 32-bit load instructions so that this
+ 	 * handler will also work with IRAM (which requires 32-bit
+ 	 * accesses). */
+ 
+ unaligned_loadn:
+ 
+ 	addi	a7, a2, 2	// advance over 16-bit l32i.n insn
+ 	wsr	a7, EPC_1	// ...
+ 	j	3f
+ 
+ unaligned_load:
+ 
+ 	addi	a7, a2, 3	// advance over 24-bit load insn
+ 	wsr	a7, EPC_1	// ...
+ 3:
+ 
+ 	/* Before accessing memory, we must first register a
+ 	 * TABLE_FIXUP handler.  Should this data reside in vmalloc
+ 	 * memory without an existing TLB mapping, a double exception
+ 	 * could occur. */
+ 
+ 	movi	a5, find_handler
+ 	rsr	a6, EXCSAVE_1
+ 	rsr	a2, EXCVADDR	// load unaligned memory address
+ 	movi	a3, ~3		// align memory address
+ 	and	a3, a3, a2	// ...
+ 	s32i	a5, a6, TABLE_FIXUP
+ 
+ 	l32i	a5, a3, 0	// load 2 words
+ 	l32i	a6, a3, 4
+ 	ssa8	a2
+ 	src_b	a5, a5, a6	// a5 has word to return
+ 
+ 	/* We're done accessing memory, so clear the TABLE_FIXUP
+ 	 * handler. */
+ 
+ 	rsr	a6, EXCSAVE_1
+ 	movi	a7, 0
+ 	s32i	a7, a6, TABLE_FIXUP
+ 
+ 	/* a5 now has 32 bits from the unaligned memory address.
+          * Probe the instruction once again to see if it should be
+          * signed/unsigned 16-bit value instead, and make it so.  The
+          * algorithm here is as follows:
+ 	 *
+ 	 * 1.  If it's the narrow load, branch ahead.
+ 	 * 2.  Otherwise, check the R nibble for L16UI or L16SI.
+ 	 *
+ 	 * Omitting step one is wrong, since we may just be checking
+ 	 * the yyyy field of an L32I.N instruction.
+ 	 */
+ 
+ 	extui	a6, a4, NIBBLE_OP0, 4	// get insn.op0 nibble
+ 	_beqi	a6, 8, 5f		// branch if L32I.N
+ 	extui	a6, a4, NIBBLE_R, 4	// get insn.r nibble
+ 	_beqi	a6, 1, 4f		// branch if L16UI
+ 	movi	a7, 9
+ 	_bne	a6, a7, 5f		// branch if not L16SI
+ 
+ 	extui	a5, a5, HWORD_START, 16	// 16-bit, still unsigned
+ 	movi	a6, 16
+ 	ssr	a6		// ssr and ssl are equal when a6=16
+ 	sll	a5, a5		// put the sign bit in the ms-bit
+ 	sra	a5, a5		// copy the sign bit in 31..16
+ 	j	5f		// a5 is now 16-bit, signed
+ 4:
+ 	extui	a5, a5, HWORD_START, 16	// 16-bit, unsigned
+ 
+ 	/* a5 now contains the word of interest.  Figure out the
+ 	 * destination register and put the value there before
+ 	 * returning. */
+ 
+ 5:	extui	a6, a4, NIBBLE_T, 4	// get dst register number
+ 	movi	a7, unaligned_copyreg
+ 	addx8	a7, a6, a7		// index into jump table
+ 	jx	a7			// jump according to dst reg
+ 
+ 	/* The following code is dependent on the working registers we
+ 	 * save at the start of this exception handler.  Should
+ 	 * someone add or subtract to that list, the following code
+ 	 * will likely break. */
+ 
+ #if (UNALIGNED_TOTAL != 32)
+ #error Changes to saved-register list impact the following code
+ #endif
+ 
+ 	.align	8
+ unaligned_copyreg:
+ 	s32i	a5, a0, UNALIGNED_A0	// dst=a0
+ 	_j	1f
+ 	.align	8
+ 	mov	a1, a5			// dst=a1, fishy??
+ 	_j	1f
+ 	.align	8
+ 	s32i	a5, a0, UNALIGNED_A2	// dst=a2
+ 	_j	1f
+ 	.align	8
+ 	s32i	a5, a0, UNALIGNED_A3	// dst=a3
+ 	_j	1f
+ 	.align	8
+ 	s32i	a5, a0, UNALIGNED_A4	// dst=a4
+ 	_j	1f
+ 	.align	8
+ 	s32i	a5, a0, UNALIGNED_A5	// dst=a5
+ 	_j	1f
+ 	.align	8
+ 	s32i	a5, a0, UNALIGNED_A6	// dst=a6
+ 	_j	1f
+ 	.align	8
+ 	s32i	a5, a0, UNALIGNED_A7	// dst=a7
+ 	_j	1f
+ 	.align	8
+ 	mov	a8, a5			// dst=a8
+ 	_j	1f
+ 	.align	8
+ 	mov	a9, a5			// dst=a9
+ 	_j	1f
+ 	.align	8
+ 	mov	a10,a5			// dst=a10
+ 	_j	1f
+ 	.align	8
+ 	mov	a11,a5			// dst=a11
+ 	_j	1f
+ 	.align	8
+ 	mov	a12,a5			// dst=a12
+ 	_j	1f
+ 	.align	8
+ 	mov	a13,a5			// dst=a13
+ 	_j	1f
+ 	.align	8
+ 	mov	a14,a5			// dst=a14
+ 	_j	1f
+ 	.align	8
+ 	mov	a15,a5			// dst=a15
+ 	_j	1f
+ 
+ 	/* The dst register (or corresponding memory location)
+          * contains the desired word from the unaligned memory
+          * address, so we can now restore the working registers, and
+          * return. */
+ 
+ 	.align	4
+ 1:
+ unaligned_return:
+ 	l32i	a6, a0, UNALIGNED_SAR
+ 	l32i	a2, a0, UNALIGNED_A2
+ 	l32i	a3, a0, UNALIGNED_A3
+ 	wsr	a6, SAR
+ 	l32i	a4, a0, UNALIGNED_A4
+ 	l32i	a5, a0, UNALIGNED_A5
+ 	l32i	a6, a0, UNALIGNED_A6
+ 	l32i	a7, a0, UNALIGNED_A7
+ 	l32i	a0, a0, UNALIGNED_A0
+ 	rfe
+ 
+ 
+ 	/* After not finding any load instructions, control branches
+ 	 * to here to check for stores.  At this point
+ 
+ 		a2 = EPC_1
+ 		a3 = EPC_1 & ~3
+ 		a4 = packed instruction bytes
+ 		a5 = insn.op0 nibble
+ 		a6, a7 = scratch registers
+ 	 */
+ 
+ check_for_stores:
+ #if XCHAL_HAVE_DENSITY
+ 	movi	a7, 9
+ 	_beq	a5, a7, unaligned_storen	// S32I.N
+ #endif
+ 	_bnei	a5, 2, 3f			// branch if no more stores
+ 	extui	a6, a4, NIBBLE_R, 4		// load insn.r nibble
+ 	_beqi	a6, 6, unaligned_store		// S32I
+ 	_beqi	a6, 5, unaligned_store		// S16I
+ #if 0  /* not implemented in hardware yet */
+ 	movi	a7, 15
+ 	_beq	a6, a7, unaligned_store		// S32RI
+ #endif
+ 
+ 	/* We didn't find a valid load or store instruction.  The
+ 	 * exception is spurious, so just return.  Note that we
+ 	 * haven't yet advance EPC_1 to skip the current instruction,
+ 	 * so the processor will re-execute it. */
+ 
+ 	j	unaligned_return
+ 
+ 	/* Control comes here when we have a store instruction
+ 	 * accessing an unaligned address.  Advance EPC over the store
+ 	 * instruction. */
+ 
+ unaligned_storen:
+ 	addi	a7, a2, 2	// advance over 16-bit s32i.n insn
+ 	wsr	a7, EPC_1	// ...
+ 	j	3f
+ 
+ unaligned_store:
+ 	addi	a7, a2, 3	// advance over 24-bit store insn
+ 	wsr	a7, EPC_1	// ...
+ 3:
+ 
+ 	/* First, we must identify the register in which the data
+          * resides, and copy it to a5. */
+ 
+ 	extui	a5, a4, NIBBLE_T, 4	// src reg number
+ 	movi	a7, unaligned_movereg
+ 	addx8	a7, a5, a7		// index into jump table
+ 	jx	a7			// jump according to src reg
+ 
+ 	/* The following code is dependent on the working registers we
+ 	 * save at the start of this exception handler.  Should
+ 	 * someone add or subtract to that list, the following code
+ 	 * will likely break. */
+ 
+ #if (UNALIGNED_TOTAL != 32)
+ #error Changes to saved-register list impact the following code
+ #endif
+ 
+ 	.align	8
+ unaligned_movereg:
+ 	l32i	a5, a0, UNALIGNED_A0	// src=a0
+ 	_j	1f
+ 	.align	8
+ 	mov	a1, a5			// src=a1, fishy??
+ 	_j	1f
+ 	.align	8
+ 	l32i	a5, a0, UNALIGNED_A2	// src=a2
+ 	_j	1f
+ 	.align	8
+ 	l32i	a5, a0, UNALIGNED_A3	// src=a3
+ 	_j	1f
+ 	.align	8
+ 	l32i	a5, a0, UNALIGNED_A4	// src=a4
+ 	_j	1f
+ 	.align	8
+ 	l32i	a5, a0, UNALIGNED_A5	// src=a5
+ 	_j	1f
+ 	.align	8
+ 	l32i	a5, a0, UNALIGNED_A6	// src=a6
+ 	_j	1f
+ 	.align	8
+ 	l32i	a5, a0, UNALIGNED_A7	// src=a7
+ 	_j	1f
+ 	.align	8
+ 	mov	a5, a8			// src=a8
+ 	_j	1f
+ 	.align	8
+ 	mov	a5, a9			// src=a9
+ 	_j	1f
+ 	.align	8
+ 	mov	a5, a10			// src=a10
+ 	_j	1f
+ 	.align	8
+ 	mov	a5, a11			// src=a11
+ 	_j	1f
+ 	.align	8
+ 	mov	a5, a12			// src=a12
+ 	_j	1f
+ 	.align	8
+ 	mov	a5, a13			// src=a13
+ 	_j	1f
+ 	.align	8
+ 	mov	a5, a14			// src=a14
+ 	_j	1f
+ 	.align	8
+ 	mov	a5, a15			// src=a15
+ 	_j	1f
+ 
+ 	/* a5 now contains the data to save.  Now, store 32-bits or
+          * 16-bits?  To local IRAM or not?  Several cases to check...
+ 	 *
+ 	 * If the store address is not in local IRAM, we simply store
+ 	 * 8 bits at a time, until the data is written.  Local IRAM
+ 	 * requires 32-bit stores, so that case is more complex.
+ 	 * Furthermore, Xtensa processors may have multiple IRAMs at
+ 	 * noncontiguous addresses (in theory, at least), so we use
+ 	 * the compile-time HAL to determine (1) how many exist, (2)
+ 	 * where each is located, and (3) the size of each.  Note,
+ 	 * however, that the T1050 release was limited to a single
+ 	 * IRAM, and the following release might support two IRAMs.
+ 	 *
+ 	 * The purpose of the following iramchk macro is to provide
+ 	 * the infrastructure to support an arbitrary number of IRAMs
+ 	 * of various sizes at noncontiguous addresses.  As of this
+ 	 * writing, the code supports only two local IRAMs (this
+ 	 * should last a long while).  Adding support for additional
+ 	 * IRAMs is trivial with the iramchk macro.  Just add new
+ 	 * conditional blocks similar to the (XCHAL_NUM_INSTRAM >= 2)
+ 	 * block below.
+ 	 *
+ 	 * The iramchk macro has 4 arguments (all are registers):
+ 	 *
+ 	 *	iram_base  = base virtual address of the IRAM
+ 	 *	iram_size  = size (in bytes) of the IRAM
+ 	 *	first_byte = first memory address of the store
+ 	 *	last_byte  = last memory address of the store
+ 	 *
+ 	 * The iram_{base,size} arguments should come straight from
+ 	 * the compile-time HAL.  The first and last byte addresses of
+ 	 * the store help determine exactly whether the store resides
+ 	 * within, even partially, the IRAM.  A store may be 2 or 4
+ 	 * bytes, so the last_byte parameter is not fixed relative to
+ 	 * the first_byte.
+ 	 *
+ 	 * Macro iramchk modifies only the iram_size register.
+ 	 *
+ 	 * If iramchk finds a store residing at all in IRAM, it will
+ 	 * branch to label iram_store.  Otherwise, it falls through.
+ 	 */
+ 
+ 	.macro	iramchk	iram_base, iram_size, first_byte, last_byte
+ 	bltu	\last_byte, \iram_base, 0f	// check next
+ 	add	\iram_size, \iram_size, \iram_base
+ 	bltu	\first_byte, \iram_size, iram_store
+ 0:
+ 	.endm
+ 
+ 	.align	4
+ 1:
+ 	rsr	a2, EXCVADDR	// load unaligned memory address
+ 	movi	a3, ~3		// align memory address
+ 	and	a3, a3, a2	// ...
+ 
+ #if XCHAL_NUM_INSTRAM
+ 
+ 	/* For our IRAM comparisions, we can assume the store is 4
+ 	 * bytes. 16-bit stores to IRAM result in a Load Store Error
+ 	 * exception. */
+ 
+ 	movi	a3, 3		// stores to IRAM are 4 bytes
+ 	add	a3, a2, a3	// a3 <-- addr of last byte of store
+ 	movi	a6, XCHAL_INSTRAM0_VADDR
+ 	movi	a7, XCHAL_INSTRAM0_SIZE
+ 	iramchk	a6, a7, a2, a3
+ 
+ #if (XCHAL_NUM_INSTRAM >= 2)
+ 	movi	a6, XCHAL_INSTRAM1_VADDR
+ 	movi	a7, XCHAL_INSTRAM1_SIZE
+ 	iramchk	a6, a7, a2, a3
+ #endif
+ 
+ #if (XCHAL_NUM_INSTRAM >= 3)
+ #error More than two local IRAMs not handled!
+ /* To handle more IRAMs, you really just need to add more conditionals
+  * like the one above. */
+ #endif
+ 
+ 	/* Control falls through to here when we're done checking
+ 	 * whether the store falls within an IRAM.  In this case, we
+ 	 * have a non-IRAM store, so just jump to that code. */
+ 
+ 	j	noniram_store
+ 
+ 	/* The iramchk macro jumps to here when we have a store to a
+ 	 * local IRAM.  Worst case is that two IRAM words need
+ 	 * modification.  The strategy here is to copy the two words
+ 	 * from IRAM to some temp space where we know we can use 8-bit
+ 	 * stores, then use common code below to insert the store data
+ 	 * appropriately, then copy the data back into IRAM.  Recall
+ 	 * that
+ 	 *
+ 	 *	a2 = store address
+ 	 *	a4 = packed instruction bytes
+ 	 *	a5 = data to store
+ 	 *	a3, a6, a7 = working registers
+ 	 */
+ 
+ iram_store:
+ 
+ 	/* Register a fixup handler since the IRAM may not be mapped.
+ 	 * However, it usually is mapped through a wired way. */
+ 
+ 	movi	a3, find_handler
+ 	rsr	a6, EXCSAVE_1
+ 	s32i	a3, a6, TABLE_FIXUP
+ 
+ 	movi	a3, ~3
+ 	and	a3, a3, a2
+ 	movi	a2, unaligned_scratch
+ 	l32i	a6, a3, 0	// load first word
+ 	l32i	a7, a3, 4	// load second word
+ 	s32i	a6, a2, 0	// save first word
+ 	s32i	a7, a2, 4	// save second word
+ 
+ 	/* Mimic the odd IRAM address in the unaligned_scratch space
+ 	 * for the common code below.  Also, set a7 as a flag to the
+ 	 * common code so it knows it must recopy the data back to
+ 	 * IRAM. */
+ 
+ 	rsr	a3, EXCVADDR
+ 	extui	a3, a3, 0, 2	// get 2 lsbits
+ 	or	a2, a2, a3	// copy over 2 lsbits to scratch addr
+ 	movi	a7, 1		// set IRAM flag
+ 	j	1f
+ 
+ #endif  /* XCHAL_NUM_INSTRAM */
+ 
+ noniram_store:
+ 	movi	a7, 0		// clear IRAM flag
+ 
+ 	/* These stores might not be mapped in the tlb, so register a
+          * TABLE_FIXUP handler. */
+ 	
+ 	movi	a3, find_handler
+ 	rsr	a6, EXCSAVE_1
+ 	s32i	a3, a6, TABLE_FIXUP
+ 
+ 	/* Test the instruction again to see whether we should store 2
+ 	 * or 4 bytes. We use this algorithm to compute whether we
+ 	 * have a 16-bit store:
+ 	 *
+ 	 * 1.  If it's the narrow store, branch ahead.
+ 	 * 2.  Otherwise, check the R nibble for S16I.
+ 	 *
+ 	 * Omitting step one is wrong, since we may just be checking
+ 	 * the yyyy field of an S32I.N instruction.
+ 	 */
+ 
+ 1:	extui	a6, a4, NIBBLE_OP0, 4	// get insn.op0 nibble
+ 	addi	a6, a6, -1		// no spare regs, check for 9
+ 	_beqi	a6, 9-1, 3f		// branch if S32I.N
+ 	extui	a6, a4, NIBBLE_R, 4
+ 	_beqi	a6, 5, 2f		// branch if S16I
+ 3:
+ 
+ #if XCHAL_HAVE_BE
+ 	s8i	a5, a2, 3	// Store each byte in big-endian order.
+ 	srli	a5, a5, 8	// Note the entry point at 2: for S16I.
+ 	s8i	a5, a2, 2
+ 	srli	a5, a5, 8
+ 2:	s8i	a5, a2, 1
+ 	srli	a5, a5, 8
+ 	s8i	a5, a2, 0
+ #else
+ 	s8i	a5, a2, 0	// Store each byte in little-endian
+ 	addi	a2, a2, 1	// order.  Note the entry point at 2:
+ 	srli	a5, a5, 8	// for S16I.  The store32 case is
+ 				// slightly more complex than needed
+ 	s8i	a5, a2, 0	// to accomodate and simplify the S16I
+ 	addi	a2, a2, 1	// case in a general way.
+ 	srli	a5, a5, 8
+ 
+ 2:	s8i	a5, a2, 0
+ 	addi	a2, a2, 1
+ 	srli	a5, a5, 8
+ 
+ 	s8i	a5, a2, 0
+ #endif
+ 
+ 	/* Now check the IRAM flag.  If set, we came from the
+          * iram_store code, and we must copy the data back to IRAM.
+          * Otherwise, the data is already stored, and we just branch
+          * to clear TABLE_FIXUP, and return. */
+ 
+ 	beqz	a7, 3f
+ 
+ 	rsr	a2, EXCVADDR
+ 	movi	a3, unaligned_scratch
+ 	movi	a6, ~3
+ 	and	a2, a2, a6
+ 	l32i	a6, a3, 0
+ 	l32i	a7, a3, 4
+ 	s32i	a6, a2, 0
+ 	s32i	a7, a2, 4
+ 3:
+ 	rsr	a7, EXCSAVE_1
+ 	movi	a6, 0
+ 	s32i	a6, a7, TABLE_FIXUP
+ 
+ 	j	unaligned_return
+ 
+ #endif   /* XCHAL_UNALIGNED_LOAD_EXCEPTION */

Index: traps.c
===================================================================
RCS file: /cvsroot/xtensa/linux/arch/xtensa/kernel/traps.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** traps.c	11 Nov 2002 22:25:25 -0000	1.4
--- traps.c	28 Nov 2002 00:40:49 -0000	1.5
***************
*** 51,54 ****
--- 51,55 ----
  extern asmlinkage void handle_double_2nd_level_miss (void);
  extern asmlinkage void handle_2nd_level_miss (void);
+ extern asmlinkage void handle_unaligned_kernel (void);
  extern asmlinkage void do_page_fault (void);
  #if (XCHAL_CP_MASK & 1)
***************
*** 83,87 ****
  } _exceptions_t;

! _exceptions_t exc_translations [] = {

  	{ XCHAL_EXCCAUSE_ILLEGAL_INSTRUCTION, "Illegal Instruction" },
--- 84,88 ----
  } _exceptions_t;

! static _exceptions_t exc_translations [] = {

  	{ XCHAL_EXCCAUSE_ILLEGAL_INSTRUCTION, "Illegal Instruction" },
***************
*** 256,263 ****
--- 257,312 ----

+ #if XCHAL_UNALIGNED_LOAD_EXCEPTION
+ static void
+ handle_unaligned_user (struct pt_regs *regs)
+ {
+ 	struct task_struct *tsk = current;
+ 	siginfo_t info;
+ 
+ 	if (user_mode(regs)) { /* Sanity check */
+ 
+ 		tsk->thread.bad_vaddr = regs->excvaddr;
+ 		tsk->thread.error_code = -3; /* unaligned access, load or store */
+ #if 1
+ 		printk("handle_unaligned_user(): sending SIGSEGV to %s (pid=%d)"
+ 		       " for illegal access\n"
+ 		       "(load or store) to %08x (pc == %08x, ra == %08x)\n",
+ 		       tsk->comm,
+ 		       tsk->pid,
+ 		       regs->excvaddr,
+ 		       regs->pc,
+ 		       /* XTFIXME:  Shouldn't the 0xC0000000 value come from the CHAL? */
+ 		       (regs->aregs[0] & 0x3FFFFFFF) | (regs->pc & 0xC0000000) );
+ #endif
+ 		info.si_signo = SIGSEGV;
+ 		info.si_errno = 0;
+ 		info.si_code = SEGV_ACCERR;
+ 		info.si_addr = (void *) regs->excvaddr;
+ 		force_sig_info(SIGSEGV, &info, tsk);
+ 	}
+ 	else {
+ 
+ 		/* Control should not get here, because we register a
+ 		 * first-level handler in traps.c to catch all
+ 		 * unaligned kernel accesses.  This code is here,
+ 		 * however, to assist in debugging.  Just "unregister"
+ 		 * the kernel handler, and you'll get this debugging
+ 		 * information. */
+ 
+ 		printk ("handle_unaligned_user(): tsk happened to be %s\n"
+ 			"  access addr=0x%.8x, pc=%08x, ra=%08x\n",
+ 			tsk->comm, regs->excvaddr, regs->pc,
+ 			(regs->aregs[0] & 0x3FFFFFFF) | (regs->pc & 0xC0000000) );
+ 		panic ("Giving up on unaligned kernel access\n");
+ 	}
+ }
+ #endif
+ 
  static void
  do_debug(struct pt_regs *regs)
  {
+ #if 0
  	int i;
+ #endif

  #if 0
***************
*** 365,368 ****
--- 414,422 ----
  		set_c_except_handler(i, do_unhandled);
  	}
+ 
+ #if XCHAL_UNALIGNED_LOAD_EXCEPTION
+ 	set_except_vector(EXC_CODE_KERNEL + XCHAL_EXCCAUSE_LOAD_STORE_ALIGNMENT, handle_unaligned_kernel);
+ 	set_c_except_handler (XCHAL_EXCCAUSE_LOAD_STORE_ALIGNMENT, handle_unaligned_user);
+ #endif

  	/* Now change the one for system calls. */