From: SUGIOKA T. <su...@it...> - 2002-04-15 11:40:40
|
This is new __copy_user implementation that improves performance in some case (for example large file transfer by FTP/HTTP). I will commit to current and 2.4 branch if there is no objection. 2002-04-16 SUGIOKA Toshinobu <su...@it...> * arch/sh/mm/copy_page.S (__copy_user): New function. * include/asm-sh/uaccess.h (__copy_user): Remove macro, declared as external function. Index: arch/sh/mm/copy_page.S =================================================================== RCS file: /cvsroot/linuxsh/linux/arch/sh/mm/copy_page.S,v retrieving revision 1.1.1.1.2.2 diff -u -r1.1.1.1.2.2 copy_page.S --- arch/sh/mm/copy_page.S 3 Apr 2002 02:33:16 -0000 1.1.1.1.2.2 +++ arch/sh/mm/copy_page.S 15 Apr 2002 11:19:44 -0000 @@ -129,3 +129,268 @@ nop #endif .L4096: .word 4096 +/* + * __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n); + * Return the number of bytes NOT copied + */ +#define EX(...) \ + 9999: __VA_ARGS__ ; \ + .section __ex_table, "a"; \ + .long 9999b, 6000f ; \ + .previous +ENTRY(__copy_user) + tst r6,r6 ! Check explicitly for zero + bf 1f + rts + mov #0,r0 ! normal return +1: + mov.l r10,@-r15 + mov.l r9,@-r15 + mov.l r8,@-r15 + mov r4,r3 + add r6,r3 ! last destination address + mov #12,r0 ! Check if small number of bytes + cmp/gt r0,r6 + bt 2f + bra L_cleanup_loop + nop +2: + neg r5,r0 ! Calculate bytes needed to align source + add #4,r0 + and #3,r0 + tst r0,r0 + bt L_jump + mov r0,r1 + +L_loop1: + ! Copy bytes to align source +EX( mov.b @r5+,r0 ) + dt r1 +EX( mov.b r0,@r4 ) + add #-1,r6 + bf/s L_loop1 + add #1,r4 + +L_jump: + mov r6,r2 ! Calculate number of longwords to copy + shlr2 r2 + tst r2,r2 + bt L_cleanup + + mov r4,r0 ! Jump to appropriate routine + and #3,r0 + mov r0,r1 + shll2 r1 + mova L_jump_tbl,r0 + mov.l @(r0,r1),r1 + jmp @r1 + nop + + .align 2 +L_jump_tbl: + .long L_dest00 + .long L_dest01 + .long L_dest10 + .long L_dest11 + +! Destination = 00 + +L_dest00: + mov r2,r7 + shlr2 r7 + shlr r7 + tst r7,r7 + mov #7,r0 + bt/s 1f + and r0,r2 + .align 2 +2: +EX( mov.l @r5+,r0 ) +EX( mov.l @r5+,r8 ) +EX( mov.l @r5+,r9 ) +EX( mov.l @r5+,r10 ) +EX( mov.l r0,@r4 ) +EX( mov.l r8,@(4,r4) ) +EX( mov.l r9,@(8,r4) ) +EX( mov.l r10,@(12,r4) ) +EX( mov.l @r5+,r0 ) +EX( mov.l @r5+,r8 ) +EX( mov.l @r5+,r9 ) +EX( mov.l @r5+,r10 ) + dt r7 +EX( mov.l r0,@(16,r4) ) +EX( mov.l r8,@(20,r4) ) +EX( mov.l r9,@(24,r4) ) +EX( mov.l r10,@(28,r4) ) + bf/s 2b + add #32,r4 + tst r2,r2 + bt L_cleanup +1: + mov.l @r5+,r0 + dt r2 + mov.l r0,@r4 + bf/s 1b + add #4,r4 + + bra L_cleanup + nop + +! Destination = 10 + +L_dest10: + mov r2,r7 + shlr2 r7 + shlr r7 + tst r7,r7 + mov #7,r0 + bt/s 1f + and r0,r2 +2: + dt r7 +#ifdef __LITTLE_ENDIAN__ +EX( mov.l @r5+,r0 ) +EX( mov.l @r5+,r1 ) +EX( mov.l @r5+,r8 ) +EX( mov.l @r5+,r9 ) +EX( mov.l @r5+,r10 ) +EX( mov.w r0,@r4 ) + add #2,r4 + xtrct r1,r0 + xtrct r8,r1 + xtrct r9,r8 + xtrct r10,r9 + +EX( mov.l r0,@r4 ) +EX( mov.l r1,@(4,r4) ) +EX( mov.l r8,@(8,r4) ) +EX( mov.l r9,@(12,r4) ) + +EX( mov.l @r5+,r1 ) +EX( mov.l @r5+,r8 ) +EX( mov.l @r5+,r0 ) + xtrct r1,r10 + xtrct r8,r1 + xtrct r0,r8 + shlr16 r0 +EX( mov.l r10,@(16,r4) ) +EX( mov.l r1,@(20,r4) ) +EX( mov.l r8,@(24,r4) ) +EX( mov.w r0,@(28,r4) ) + bf/s 2b + add #30,r4 +#else +EX( mov.l @(28,r5),r0 ) +EX( mov.l @(24,r5),r8 ) +EX( mov.l @(20,r5),r9 ) +EX( mov.l @(16,r5),r10 ) +EX( mov.w r0,@(30,r4) ) + add #-2,r4 + xtrct r8,r0 + xtrct r9,r8 + xtrct r10,r9 +EX( mov.l r0,@(28,r4) ) +EX( mov.l r8,@(24,r4) ) +EX( mov.l r9,@(20,r4) ) + +EX( mov.l @(12,r5),r0 ) +EX( mov.l @(8,r5),r8 ) + xtrct r0,r10 +EX( mov.l @(4,r5),r9 ) + mov.l r10,@(16,r4) +EX( mov.l @r5,r10 ) + xtrct r8,r0 + xtrct r9,r8 + xtrct r10,r9 +EX( mov.l r0,@(12,r4) ) +EX( mov.l r8,@(8,r4) ) + swap.w r10,r0 +EX( mov.l r9,@(4,r4) ) +EX( mov.w r0,@(2,r4) ) + + add #32,r5 + bf/s 2b + add #34,r4 +#endif + tst r2,r2 + bt L_cleanup + +1: ! Read longword, write two words per iteration +EX( mov.l @r5+,r0 ) + dt r2 +#ifdef __LITTLE_ENDIAN__ +EX( mov.w r0,@r4 ) + shlr16 r0 +EX( mov.w r0,@(2,r4) ) +#else +EX( mov.w r0,@(2,r4) ) + shlr16 r0 +EX( mov.w r0,@r4 ) +#endif + bf/s 1b + add #4,r4 + + bra L_cleanup + nop + +! Destination = 01 or 11 + +L_dest01: +L_dest11: + ! Read longword, write byte, word, byte per iteration +EX( mov.l @r5+,r0 ) + dt r2 +#ifdef __LITTLE_ENDIAN__ +EX( mov.b r0,@r4 ) + shlr8 r0 + add #1,r4 +EX( mov.w r0,@r4 ) + shlr16 r0 +EX( mov.b r0,@(2,r4) ) + bf/s L_dest01 + add #3,r4 +#else +EX( mov.b r0,@(3,r4) ) + shlr8 r0 + swap.w r0,r7 +EX( mov.b r7,@r4 ) + add #1,r4 +EX( mov.w r0,@r4 ) + bf/s L_dest01 + add #3,r4 +#endif + +! Cleanup last few bytes +L_cleanup: + mov r6,r0 + and #3,r0 + tst r0,r0 + bt L_exit + mov r0,r6 + +L_cleanup_loop: +EX( mov.b @r5+,r0 ) + dt r6 +EX( mov.b r0,@r4 ) + bf/s L_cleanup_loop + add #1,r4 + +L_exit: + mov #0,r0 ! normal return +5000: + +# Exception handler: +.section .fixup, "ax" +6000: + mov.l 8000f,r1 + mov r3,r0 + jmp @r1 + sub r4,r0 + .align 2 +8000: .long 5000b + +.previous + mov.l @r15+,r8 + mov.l @r15+,r9 + rts + mov.l @r15+,r10 Index: include/asm-sh/uaccess.h =================================================================== RCS file: /cvsroot/linuxsh/linux/include/asm-sh/uaccess.h,v retrieving revision 1.1.1.1.2.2 diff -u -r1.1.1.1.2.2 uaccess.h --- include/asm-sh/uaccess.h 29 Mar 2002 00:01:08 -0000 1.1.1.1.2.2 +++ include/asm-sh/uaccess.h 15 Apr 2002 11:19:44 -0000 @@ -244,42 +244,7 @@ /* Generic arbitrary sized copy. */ /* Return the number of bytes NOT copied */ -/* XXX: should be such that: 4byte and the rest. */ -static __inline__ __kernel_size_t -__copy_user(void *__to, const void *__from, __kernel_size_t __n) -{ - unsigned long __dummy, _f, _t; - __kernel_size_t res; - - if ((res = __n)) - __asm__ __volatile__( - "9:\n\t" - "mov.b @%2+, %1\n\t" - "dt %0\n" - "1:\n\t" - "mov.b %1, @%3\n\t" - "bf/s 9b\n\t" - " add #1, %3\n" - "2:\n" - ".section .fixup,\"ax\"\n" - "3:\n\t" - "mov.l 5f, %1\n\t" - "jmp @%1\n\t" - " add #1, %0\n\t" - ".balign 4\n" - "5: .long 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .balign 4\n" - " .long 9b,2b\n" - " .long 1b,3b\n" - ".previous" - : "=r" (res), "=&z" (__dummy), "=r" (_f), "=r" (_t) - : "2" (__from), "3" (__to), "0" (res) - : "memory", "t"); - - return res; -} +extern __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n); #define copy_to_user(to,from,n) ({ \ void *__copy_to = (void *) (to); \ ---- SUGIOKA Toshinobu |