| /* Copyright (C) 2005, 2007 Free Software Foundation, Inc. |
| Contributed by Sunnorth |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it |
| under the terms of the GNU General Public License as published |
| by the Free Software Foundation; either version 3, or (at your |
| option) any later version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT |
| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public |
| License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #define ra r3 |
| #define a0 r4 |
| #define a1 r5 |
| #define a2 r6 |
| #define a3 r7 |
| #define v0 r23 |
| |
| #define t0 r8 |
| #define t1 r9 |
| #define t2 r10 |
| #define t3 r11 |
| #define t4 r22 |
| |
| #ifndef __pic__ |
| #if !defined(L_mulsi3) && !defined(L_divsi3) |
| .text |
| .global _flush_cache |
| #ifdef __score3__ |
| _flush_cache: |
| br r3 |
| #else |
| _flush_cache: |
| srli r9, r5, 4 |
| mv r8, r4 |
| mtsr r9, sr0 |
| 1: |
| cache 0xe, [r8, 0] # write back invalid dcache |
| addi r8, 16 |
| bcnz 1b |
| mfcr r8, cr4 |
| bittst! r8, 0x3 # if LDM is enable, write back LDM |
| beq! 6f |
| ldi r10, 0 |
| cache 0xc, [r10, 0] |
| 6: |
| bittst! r8, 0x2 # if LIM is enable, refill it |
| beq! 7f |
| cache 0x4, [r10, 0] |
| 7: |
| #nop! |
| #nop! |
| #nop! |
| #nop! |
| #nop! |
| mv r8, r4 |
| mtsr r9, sr0 |
| 2: |
| cache 0x2, [r8, 0] # invalid unlock icache |
| #nop! |
| #nop! |
| #nop! |
| #nop! |
| #nop! |
| addi r8, 16 |
| bcnz 2b |
| br r3 |
| #endif |
| #endif |
| |
| /* FUNCTION |
| (U) INT32 v0 = __mulsi3 ((U) INT32 a0, (U) INT32 a1); |
| REGISTERS: |
| use t0 |
| modify a0 |
| a1 -> become 0 |
| NOTE: |
| this seems to give better performance to just rotate and add. */ |
| |
| #ifdef L_mulsi3 |
| .text |
| .global __umulsi3 |
| .global __mulsi3 |
| /* signed multiplication (32x32) */ |
| .ent __mulsi3 |
| __umulsi3: |
| __mulsi3: |
| li t1, 0 |
| __mulsi3_loop: |
| andri.c t0, a1, 1 # t0 = multiplier[0] |
| srli a1, a1, 1 # a1 /= 2 |
| beq __mulsi3_loop2 # skip if (t0 == 0) |
| add t1, t1, a0 # add multiplicand |
| __mulsi3_loop2: |
| slli a0, a0, 1 # multiplicand mul 2 |
| cmpi.c a1, 0 |
| bne __mulsi3_loop |
| mv r4, t1 |
| br ra |
| .end __mulsi3 |
| #endif /* L_mulsi3 */ |
| |
| /* FUNCTION |
| UINT32 (v0) = __udivsi3 (UINT32 (a0), UINT32 (a1)); |
| INT32 (v0) = __divsi3 (INT32 (a0), INT32 (a1)); |
| UINT32 (v0) = __umodsi3 (UINT32 (a0), UINT32 (a1)); |
| INT32 (v0) = __modsi3 (INT32 (a0), INT32 (a1)); |
| DESCRIPTION |
| performs 32-bit division/modulo. |
| REGISTERS |
| used t0 bit-index |
| t1 |
| modify a0 becomes remainer */ |
| #ifdef L_divsi3 |
| .text |
| .global __udivsi3 |
| .global __umodsi3 |
| .global __divsi3 |
| .global __modsi3 |
| |
| /* unsigned division */ |
| .ent __udivsi3 |
| __udivsi3: |
| li t4, 0 |
| cmpi.c a1, 0 |
| beq __uds_exit |
| li t0, 1 |
| blt __uds_ok |
| __uds_normalize: |
| cmp.c a0, a1 |
| bcc __uds_ok |
| slli a1, a1, 1 |
| slli t0, t0, 1 |
| cmpi.c a1, 0 |
| bge __uds_normalize |
| __uds_ok: |
| __uds_loop2: |
| cmp.c a0, a1 |
| bcc __uds_loop3 |
| sub a0, a0, a1 |
| or t4, t4, t0 |
| __uds_loop3: |
| srli t0, t0, 1 |
| srli a1, a1, 1 |
| cmpi.c t0, 0 |
| bne __uds_loop2 |
| __uds_exit: |
| mv a1, a0 |
| mv r4, t4 |
| br ra |
| .end __udivsi3 |
| |
| /* unsigned modulus */ |
| .ent __umodsi3 |
| __umodsi3: |
| mv t3, ra |
| jl __udivsi3 |
| mv r4, a1 |
| br t3 |
| .end __umodsi3 |
| |
| /* abs and div */ |
| .ent __orgsi3 |
| __orgsi3: |
| cmpi.c a0, 0 |
| bge __orgsi3_a0p |
| neg a0, a0 |
| __orgsi3_a0p: |
| cmpi.c a1, 0 |
| bge __udivsi3 |
| neg a1, a1 |
| b __udivsi3 # goto udivsi3 |
| .end __orgsi3 |
| |
| /* signed division */ |
| .ent __divsi3 |
| __divsi3: |
| mv t3, ra |
| xor t2, a0, a1 |
| jl __orgsi3 |
| __divsi3_adjust: |
| cmpi.c t2, 0 |
| bge __divsi3_exit |
| neg r4, r4 |
| __divsi3_exit: |
| br t3 |
| .end __divsi3 |
| |
| /* signed modulus */ |
| .ent __modsi3 |
| __modsi3: |
| mv t3, ra |
| mv t2, a0 |
| jl __orgsi3 |
| mv r4, a1 |
| b __divsi3_adjust |
| .end __modsi3 |
| |
| #endif /* L_divsi3 */ |
| #else /* -fPIC */ |
| #if !defined(L_mulsi3) && !defined(L_divsi3) |
| .set pic |
| .text |
| .global _flush_cache |
| #ifdef __score3__ |
| _flush_cache: |
| br r3 |
| #else |
| _flush_cache: |
| addi r0, -8 # pic used |
| .cpload r29 # pic used |
| srli r9, r5, 4 |
| mv r8, r4 |
| mtsr r9, sr0 |
| 1: |
| cache 0xe, [r8, 0] # write back invalid dcache |
| addi r8, 16 |
| bcnz 1b |
| mfcr r8, cr4 |
| bittst! r8, 0x3 # if LDM is enable, write back LDM |
| beq! 6f |
| ldi r10, 0 |
| cache 0xc, [r10, 0] |
| 6: |
| bittst! r8, 0x2 # if LIM is enable, refill it |
| beq! 7f |
| cache 0x4, [r10, 0] |
| 7: |
| #nop! |
| #nop! |
| #nop! |
| #nop! |
| #nop! |
| mv r8, r4 |
| mtsr r9, sr0 |
| 2: |
| cache 0x2, [r8, 0] # invalid unlock icache |
| #nop! |
| #nop! |
| #nop! |
| #nop! |
| #nop! |
| addi r8, 16 |
| bcnz 2b |
| .cprestore r0, 12 # pic used |
| addi r0, 8 # pic used |
| br r3 |
| #endif |
| #endif |
| |
| /* FUNCTION |
| (U) INT32 v0 = __mulsi3 ((U) INT32 a0, (U) INT32 a1); |
| REGISTERS: |
| use t0 |
| modify a0 |
| a1 -> become 0 |
| NOTE: |
| this seems to give better performance to just rotate and add. */ |
| |
| #ifdef L_mulsi3 |
| .set pic |
| .text |
| .global __umulsi3 |
| .global __mulsi3 |
| /* signed multiplication (32x32) */ |
| .ent __mulsi3 |
| __umulsi3: |
| __mulsi3: |
| addi r0, -8 # pic used |
| .cpload r29 # pic used |
| li t1, 0 |
| __mulsi3_loop: |
| andri.c t0, a1, 1 # t0 = multiplier[0] |
| srli a1, a1, 1 # a1 /= 2 |
| beq __mulsi3_loop2 # skip if (t0 == 0) |
| add t1, t1, a0 # add multiplicand |
| __mulsi3_loop2: |
| slli a0, a0, 1 # multiplicand mul 2 |
| cmpi.c a1, 0 |
| bne __mulsi3_loop |
| mv r4, t1 |
| .cprestore r0, 12 # pic used |
| addi r0, 8 # pic used |
| br ra |
| .end __mulsi3 |
| #endif /* L_mulsi3 */ |
| |
| /* FUNCTION |
| UINT32 (v0) = __udivsi3 (UINT32 (a0), UINT32 (a1)); |
| INT32 (v0) = __divsi3 (INT32 (a0), INT32 (a1)); |
| UINT32 (v0) = __umodsi3 (UINT32 (a0), UINT32 (a1)); |
| INT32 (v0) = __modsi3 (INT32 (a0), INT32 (a1)); |
| DESCRIPTION |
| performs 32-bit division/modulo. |
| REGISTERS |
| used t0 bit-index |
| t1 |
| modify a0 becomes remainer */ |
| #ifdef L_divsi3 |
| .set pic |
| .text |
| .global __udivsi3 |
| .global __umodsi3 |
| .global __divsi3 |
| .global __modsi3 |
| |
| /* unsigned division */ |
| .ent __udivsi3 |
| __udivsi3: |
| addi r0, -8 # pic used |
| .cpload r29 # pic used |
| li t4, 0 |
| cmpi.c a1, 0 |
| beq __uds_exit |
| li t0, 1 |
| blt __uds_ok |
| __uds_normalize: |
| cmp.c a0, a1 |
| bcc __uds_ok |
| slli a1, a1, 1 |
| slli t0, t0, 1 |
| cmpi.c a1, 0 |
| bge __uds_normalize |
| __uds_ok: |
| __uds_loop2: |
| cmp.c a0, a1 |
| bcc __uds_loop3 |
| sub a0, a0, a1 |
| or t4, t4, t0 |
| __uds_loop3: |
| srli t0, t0, 1 |
| srli a1, a1, 1 |
| cmpi.c t0, 0 |
| bne __uds_loop2 |
| __uds_exit: |
| mv a1, a0 |
| mv r4, t4 |
| .cprestore r0, 12 # pic used |
| addi r0, 8 # pic used |
| br ra |
| .end __udivsi3 |
| |
| /* unsigned modulus */ |
| .ent __umodsi3 |
| __umodsi3: |
| addi r0, -8 # pic used |
| .cpload r29 # pic used |
| li t1, 0 |
| mv t3, ra |
| la r29, __udivsi3 |
| brl r29 |
| mv r4, a1 |
| .cprestore r0, 12 # pic used |
| addi r0, 8 # pic used |
| br t3 |
| .end __umodsi3 |
| |
| /* abs and div */ |
| .ent __orgsi3 |
| __orgsi3: |
| cmpi.c a0, 0 |
| bge __orgsi3_a0p |
| neg a0, a0 |
| __orgsi3_a0p: |
| cmpi.c a1, 0 |
| bge __udivsi3 |
| neg a1, a1 |
| b __udivsi3 # goto udivsi3 |
| .end __orgsi3 |
| |
| /* signed division */ |
| .ent __divsi3 |
| __divsi3: |
| addi r0, -8 # pic used |
| .cpload r29 # pic used |
| mv t3, ra |
| xor t2, a0, a1 |
| la r29, __orgsi3 |
| brl r29 |
| __divsi3_adjust: |
| cmpi.c t2, 0 |
| bge __divsi3_exit |
| neg r4, r4 |
| __divsi3_exit: |
| .cprestore r0, 12 # pic used |
| addi r0, 8 # pic used |
| br t3 |
| .end __divsi3 |
| |
| /* signed modulus */ |
| .ent __modsi3 |
| __modsi3: |
| addi r0, -8 # pic used |
| .cpload r29 # pic used |
| mv t3, ra |
| mv t2, a0 |
| la r29, __orgsi3 |
| brl r29 |
| mv r4, a1 |
| b __divsi3_adjust |
| .end __modsi3 |
| |
| #endif /*L_divsi3 */ |
| #endif |