| @ libgcc routines for ARM cpu. |
| @ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) |
| |
| /* Copyright (C) 1995-2023 Free Software Foundation, Inc. |
| |
| This file is free software; you can redistribute it and/or modify it |
| under the terms of the GNU General Public License as published by the |
| Free Software Foundation; either version 3, or (at your option) any |
| later version. |
| |
| This file is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| Under Section 7 of GPL version 3, you are granted additional |
| permissions described in the GCC Runtime Library Exception, version |
| 3.1, as published by the Free Software Foundation. |
| |
| You should have received a copy of the GNU General Public License and |
| a copy of the GCC Runtime Library Exception along with this program; |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /* Everything in this file should now use unified syntax. */ |
| |
| .syntax unified |
| |
| /* An executable stack is *not* required for these functions. */ |
| #if defined(__ELF__) && defined(__linux__) |
| .section .note.GNU-stack,"",%progbits |
| .previous |
| #endif /* __ELF__ and __linux__ */ |
| |
| #ifdef __ARM_EABI__ |
| /* Some attributes that are common to all routines in this file. */ |
| /* Tag_ABI_align_needed: This code does not require 8-byte |
| alignment from the caller. */ |
| /* .eabi_attribute 24, 0 -- default setting. */ |
| /* Tag_ABI_align_preserved: This code preserves 8-byte |
| alignment in any callee. */ |
| .eabi_attribute 25, 1 |
| #endif /* __ARM_EABI__ */ |
| /* ------------------------------------------------------------------------ */ |
| |
| /* We need to know what prefix to add to function names. */ |
| |
| #ifndef __USER_LABEL_PREFIX__ |
| #error __USER_LABEL_PREFIX__ not defined |
| #endif |
| |
| /* ANSI concatenation macros. */ |
| |
| #define CONCAT1(a, b) CONCAT2(a, b) |
| #define CONCAT2(a, b) a ## b |
| |
| /* Use the right prefix for global labels. */ |
| |
| #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) |
| |
| #ifdef __ELF__ |
| #ifdef __thumb__ |
| #define __PLT__ /* Not supported in Thumb assembler (for now). */ |
| #elif defined __vxworks && !defined __PIC__ |
| #define __PLT__ /* Not supported by the kernel loader. */ |
| #else |
| #define __PLT__ (PLT) |
| #endif |
| #define TYPE(x) .type SYM(x),function |
| #define SIZE(x) .size SYM(x), . - SYM(x) |
| #define LSYM(x) .x |
| #else |
| #define __PLT__ |
| #define TYPE(x) |
| #define SIZE(x) |
| #define LSYM(x) x |
| #endif |
| |
| /* Function end macros. Variants for interworking. */ |
| |
| /* There are times when we might prefer Thumb1 code even if ARM code is |
| permitted, for example, the code might be smaller, or there might be |
| interworking problems with switching to ARM state if interworking is |
| disabled. */ |
| #if (defined(__thumb__) \ |
| && !defined(__thumb2__) \ |
| && (!defined(__THUMB_INTERWORK__) \ |
| || defined (__OPTIMIZE_SIZE__) \ |
| || !__ARM_ARCH_ISA_ARM)) |
| # define __prefer_thumb__ |
| #endif |
| |
| #if !__ARM_ARCH_ISA_ARM && __ARM_ARCH_ISA_THUMB == 1 |
| #define NOT_ISA_TARGET_32BIT 1 |
| #endif |
| |
| /* How to return from a function call depends on the architecture variant. */ |
| |
| #if (__ARM_ARCH > 4) || defined(__ARM_ARCH_4T__) |
| |
| # define RET bx lr |
| # define RETc(x) bx##x lr |
| |
| /* Special precautions for interworking on armv4t. */ |
| # if (__ARM_ARCH == 4) |
| |
| /* Always use bx, not ldr pc. */ |
| # if (defined(__thumb__) || defined(__THUMB_INTERWORK__)) |
| # define __INTERWORKING__ |
| # endif /* __THUMB__ || __THUMB_INTERWORK__ */ |
| |
| /* Include thumb stub before arm mode code. */ |
| # if defined(__thumb__) && !defined(__THUMB_INTERWORK__) |
| # define __INTERWORKING_STUBS__ |
| # endif /* __thumb__ && !__THUMB_INTERWORK__ */ |
| |
| #endif /* __ARM_ARCH == 4 */ |
| |
| #else |
| |
| # define RET mov pc, lr |
| # define RETc(x) mov##x pc, lr |
| |
| #endif |
| |
| .macro cfi_pop advance, reg, cfa_offset |
| #ifdef __ELF__ |
| .pushsection .debug_frame |
| .byte 0x4 /* DW_CFA_advance_loc4 */ |
| .4byte \advance |
| .byte (0xc0 | \reg) /* DW_CFA_restore */ |
| .byte 0xe /* DW_CFA_def_cfa_offset */ |
| .uleb128 \cfa_offset |
| .popsection |
| #endif |
| .endm |
| .macro cfi_push advance, reg, offset, cfa_offset |
| #ifdef __ELF__ |
| .pushsection .debug_frame |
| .byte 0x4 /* DW_CFA_advance_loc4 */ |
| .4byte \advance |
| .byte (0x80 | \reg) /* DW_CFA_offset */ |
| .uleb128 (\offset / -4) |
| .byte 0xe /* DW_CFA_def_cfa_offset */ |
| .uleb128 \cfa_offset |
| .popsection |
| #endif |
| .endm |
| .macro cfi_start start_label, end_label |
| #ifdef __ELF__ |
| .pushsection .debug_frame |
| LSYM(Lstart_frame): |
| .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE |
| LSYM(Lstart_cie): |
| .4byte 0xffffffff @ CIE Identifier Tag |
| .byte 0x1 @ CIE Version |
| .ascii "\0" @ CIE Augmentation |
| .uleb128 0x1 @ CIE Code Alignment Factor |
| .sleb128 -4 @ CIE Data Alignment Factor |
| .byte 0xe @ CIE RA Column |
| .byte 0xc @ DW_CFA_def_cfa |
| .uleb128 0xd |
| .uleb128 0x0 |
| |
| .align 2 |
| LSYM(Lend_cie): |
| .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length |
| LSYM(Lstart_fde): |
| .4byte LSYM(Lstart_frame) @ FDE CIE offset |
| .4byte \start_label @ FDE initial location |
| .4byte \end_label-\start_label @ FDE address range |
| .popsection |
| #endif |
| .endm |
| .macro cfi_end end_label |
| #ifdef __ELF__ |
| .pushsection .debug_frame |
| .align 2 |
| LSYM(Lend_fde): |
| .popsection |
| \end_label: |
| #endif |
| .endm |
| |
| /* Don't pass dirn, it's there just to get token pasting right. */ |
| |
| .macro RETLDM regs=, cond=, unwind=, dirn=ia |
| #if defined (__INTERWORKING__) |
| .ifc "\regs","" |
| ldr\cond lr, [sp], #8 |
| .else |
| # if defined(__thumb2__) |
| pop\cond {\regs, lr} |
| # else |
| ldm\cond\dirn sp!, {\regs, lr} |
| # endif |
| .endif |
| .ifnc "\unwind", "" |
| /* Mark LR as restored. */ |
| 97: cfi_pop 97b - \unwind, 0xe, 0x0 |
| .endif |
| bx\cond lr |
| #else |
| /* Caller is responsible for providing IT instruction. */ |
| .ifc "\regs","" |
| ldr\cond pc, [sp], #8 |
| .else |
| # if defined(__thumb2__) |
| pop\cond {\regs, pc} |
| # else |
| ldm\cond\dirn sp!, {\regs, pc} |
| # endif |
| .endif |
| #endif |
| .endm |
| |
| /* The Unified assembly syntax allows the same code to be assembled for both |
| ARM and Thumb-2. However this is only supported by recent gas, so define |
| a set of macros to allow ARM code on older assemblers. */ |
| #if defined(__thumb2__) |
| .macro do_it cond, suffix="" |
| it\suffix \cond |
| .endm |
| .macro shift1 op, arg0, arg1, arg2 |
| \op \arg0, \arg1, \arg2 |
| .endm |
| #define do_push push |
| #define do_pop pop |
| /* Perform an arithmetic operation with a variable shift operand. This |
| requires two instructions and a scratch register on Thumb-2. */ |
| .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp |
| \shiftop \tmp, \src2, \shiftreg |
| \name \dest, \src1, \tmp |
| .endm |
| #else |
| .macro do_it cond, suffix="" |
| .endm |
| .macro shift1 op, arg0, arg1, arg2 |
| mov \arg0, \arg1, \op \arg2 |
| .endm |
| #define do_push stmfd sp!, |
| #define do_pop ldmfd sp!, |
| .macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp |
| \name \dest, \src1, \src2, \shiftop \shiftreg |
| .endm |
| #endif |
| |
| #define COND(op1, op2, cond) op1 ## op2 ## cond |
| |
| #ifdef __ARM_EABI__ |
| .macro ARM_LDIV0 name signed |
| cmp r0, #0 |
| .ifc \signed, unsigned |
| movne r0, #0xffffffff |
| .else |
| movgt r0, #0x7fffffff |
| movlt r0, #0x80000000 |
| .endif |
| b SYM (__aeabi_idiv0) __PLT__ |
| .endm |
| #else |
| .macro ARM_LDIV0 name signed |
| str lr, [sp, #-8]! |
| 98: cfi_push 98b - __\name, 0xe, -0x8, 0x8 |
| bl SYM (__div0) __PLT__ |
| mov r0, #0 @ About as wrong as it could be. |
| RETLDM unwind=98b |
| .endm |
| #endif |
| |
| |
| #ifdef __ARM_EABI__ |
| .macro THUMB_LDIV0 name signed |
| #ifdef NOT_ISA_TARGET_32BIT |
| |
| push {r0, lr} |
| movs r0, #0 |
| bl SYM(__aeabi_idiv0) |
| @ We know we are not on armv4t, so pop pc is safe. |
| pop {r1, pc} |
| |
| #elif defined(__thumb2__) |
| .syntax unified |
| .ifc \signed, unsigned |
| cbz r0, 1f |
| mov r0, #0xffffffff |
| 1: |
| .else |
| cmp r0, #0 |
| do_it gt |
| movgt r0, #0x7fffffff |
| do_it lt |
| movlt r0, #0x80000000 |
| .endif |
| b.w SYM(__aeabi_idiv0) __PLT__ |
| #else |
| .align 2 |
| bx pc |
| nop |
| .arm |
| cmp r0, #0 |
| .ifc \signed, unsigned |
| movne r0, #0xffffffff |
| .else |
| movgt r0, #0x7fffffff |
| movlt r0, #0x80000000 |
| .endif |
| b SYM(__aeabi_idiv0) __PLT__ |
| .thumb |
| #endif |
| .endm |
| #else |
| .macro THUMB_LDIV0 name signed |
| push { r1, lr } |
| 98: cfi_push 98b - __\name, 0xe, -0x4, 0x8 |
| bl SYM (__div0) |
| movs r0, #0 @ About as wrong as it could be. |
| #if defined (__INTERWORKING__) |
| pop { r1, r2 } |
| bx r2 |
| #else |
| pop { r1, pc } |
| #endif |
| .endm |
| #endif |
| |
| .macro FUNC_END name |
| SIZE (__\name) |
| .endm |
| |
| .macro DIV_FUNC_END name signed |
| cfi_start __\name, LSYM(Lend_div0) |
| LSYM(Ldiv0): |
| #ifdef __thumb__ |
| THUMB_LDIV0 \name \signed |
| #else |
| ARM_LDIV0 \name \signed |
| #endif |
| cfi_end LSYM(Lend_div0) |
| FUNC_END \name |
| .endm |
| |
| .macro THUMB_FUNC_START name |
| .globl SYM (\name) |
| TYPE (\name) |
| .thumb_func |
| SYM (\name): |
| .endm |
| |
| /* Function start macros. Variants for ARM and Thumb. */ |
| |
| #ifdef __thumb__ |
| #define THUMB_FUNC .thumb_func |
| #define THUMB_CODE .force_thumb |
| # if defined(__thumb2__) |
| #define THUMB_SYNTAX |
| # else |
| #define THUMB_SYNTAX |
| # endif |
| #else |
| #define THUMB_FUNC |
| #define THUMB_CODE |
| #define THUMB_SYNTAX |
| #endif |
| |
| .macro FUNC_START name |
| .text |
| .globl SYM (__\name) |
| TYPE (__\name) |
| .align 0 |
| THUMB_CODE |
| THUMB_FUNC |
| THUMB_SYNTAX |
| SYM (__\name): |
| .endm |
| |
| .macro ARM_SYM_START name |
| TYPE (\name) |
| .align 0 |
| SYM (\name): |
| .endm |
| |
| .macro SYM_END name |
| SIZE (\name) |
| .endm |
| |
| /* Special function that will always be coded in ARM assembly, even if |
| in Thumb-only compilation. */ |
| |
| #if defined(__thumb2__) |
| |
| /* For Thumb-2 we build everything in thumb mode. */ |
| .macro ARM_FUNC_START name |
| FUNC_START \name |
| .syntax unified |
| .endm |
| #define EQUIV .thumb_set |
| .macro ARM_CALL name |
| bl __\name |
| .endm |
| |
| #elif defined(__INTERWORKING_STUBS__) |
| |
| .macro ARM_FUNC_START name |
| FUNC_START \name |
| bx pc |
| nop |
| .arm |
| /* A hook to tell gdb that we've switched to ARM mode. Also used to call |
| directly from other local arm routines. */ |
| _L__\name: |
| .endm |
| #define EQUIV .thumb_set |
| /* Branch directly to a function declared with ARM_FUNC_START. |
| Must be called in arm mode. */ |
| .macro ARM_CALL name |
| bl _L__\name |
| .endm |
| |
| #else /* !(__INTERWORKING_STUBS__ || __thumb2__) */ |
| |
| #ifdef NOT_ISA_TARGET_32BIT |
| #define EQUIV .thumb_set |
| #else |
| .macro ARM_FUNC_START name |
| .text |
| .globl SYM (__\name) |
| TYPE (__\name) |
| .align 0 |
| .arm |
| SYM (__\name): |
| .endm |
| #define EQUIV .set |
| .macro ARM_CALL name |
| bl __\name |
| .endm |
| #endif |
| |
| #endif |
| |
| .macro FUNC_ALIAS new old |
| .globl SYM (__\new) |
| #if defined (__thumb__) |
| .thumb_set SYM (__\new), SYM (__\old) |
| #else |
| .set SYM (__\new), SYM (__\old) |
| #endif |
| .endm |
| |
| #ifndef NOT_ISA_TARGET_32BIT |
| .macro ARM_FUNC_ALIAS new old |
| .globl SYM (__\new) |
| EQUIV SYM (__\new), SYM (__\old) |
| #if defined(__INTERWORKING_STUBS__) |
| .set SYM (_L__\new), SYM (_L__\old) |
| #endif |
| .endm |
| #endif |
| |
| #ifdef __ARMEB__ |
| #define xxh r0 |
| #define xxl r1 |
| #define yyh r2 |
| #define yyl r3 |
| #else |
| #define xxh r1 |
| #define xxl r0 |
| #define yyh r3 |
| #define yyl r2 |
| #endif |
| |
| #ifdef __ARM_EABI__ |
| .macro WEAK name |
| .weak SYM (__\name) |
| .endm |
| #endif |
| |
| #ifdef __thumb__ |
| /* Register aliases. */ |
| |
| work .req r4 @ XXXX is this safe ? |
| dividend .req r0 |
| divisor .req r1 |
| overdone .req r2 |
| result .req r2 |
| curbit .req r3 |
| #endif |
| #if 0 |
| ip .req r12 |
| sp .req r13 |
| lr .req r14 |
| pc .req r15 |
| #endif |
| |
| /* ------------------------------------------------------------------------ */ |
| /* Bodies of the division and modulo routines. */ |
| /* ------------------------------------------------------------------------ */ |
| |
| .macro ARM_DIV_BODY dividend, divisor, result, curbit |
| |
| #if defined (__ARM_FEATURE_CLZ) && ! defined (__OPTIMIZE_SIZE__) |
| |
| #if defined (__thumb2__) |
| clz \curbit, \dividend |
| clz \result, \divisor |
| sub \curbit, \result, \curbit |
| rsb \curbit, \curbit, #31 |
| adr \result, 1f |
| add \curbit, \result, \curbit, lsl #4 |
| mov \result, #0 |
| mov pc, \curbit |
| .p2align 3 |
| 1: |
| .set shift, 32 |
| .rept 32 |
| .set shift, shift - 1 |
| cmp.w \dividend, \divisor, lsl #shift |
| nop.n |
| adc.w \result, \result, \result |
| it cs |
| subcs.w \dividend, \dividend, \divisor, lsl #shift |
| .endr |
| #else |
| clz \curbit, \dividend |
| clz \result, \divisor |
| sub \curbit, \result, \curbit |
| rsbs \curbit, \curbit, #31 |
| addne \curbit, \curbit, \curbit, lsl #1 |
| mov \result, #0 |
| addne pc, pc, \curbit, lsl #2 |
| nop |
| .set shift, 32 |
| .rept 32 |
| .set shift, shift - 1 |
| cmp \dividend, \divisor, lsl #shift |
| adc \result, \result, \result |
| subcs \dividend, \dividend, \divisor, lsl #shift |
| .endr |
| #endif |
| |
| #else /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */ |
| #if defined (__ARM_FEATURE_CLZ) |
| |
| clz \curbit, \divisor |
| clz \result, \dividend |
| sub \result, \curbit, \result |
| mov \curbit, #1 |
| mov \divisor, \divisor, lsl \result |
| mov \curbit, \curbit, lsl \result |
| mov \result, #0 |
| |
| #else /* !defined (__ARM_FEATURE_CLZ) */ |
| |
| @ Initially shift the divisor left 3 bits if possible, |
| @ set curbit accordingly. This allows for curbit to be located |
| @ at the left end of each 4-bit nibbles in the division loop |
| @ to save one loop in most cases. |
| tst \divisor, #0xe0000000 |
| moveq \divisor, \divisor, lsl #3 |
| moveq \curbit, #8 |
| movne \curbit, #1 |
| |
| @ Unless the divisor is very big, shift it up in multiples of |
| @ four bits, since this is the amount of unwinding in the main |
| @ division loop. Continue shifting until the divisor is |
| @ larger than the dividend. |
| 1: cmp \divisor, #0x10000000 |
| cmplo \divisor, \dividend |
| movlo \divisor, \divisor, lsl #4 |
| movlo \curbit, \curbit, lsl #4 |
| blo 1b |
| |
| @ For very big divisors, we must shift it a bit at a time, or |
| @ we will be in danger of overflowing. |
| 1: cmp \divisor, #0x80000000 |
| cmplo \divisor, \dividend |
| movlo \divisor, \divisor, lsl #1 |
| movlo \curbit, \curbit, lsl #1 |
| blo 1b |
| |
| mov \result, #0 |
| |
| #endif /* !defined (__ARM_FEATURE_CLZ) */ |
| |
| @ Division loop |
| 1: cmp \dividend, \divisor |
| do_it hs, t |
| subhs \dividend, \dividend, \divisor |
| orrhs \result, \result, \curbit |
| cmp \dividend, \divisor, lsr #1 |
| do_it hs, t |
| subhs \dividend, \dividend, \divisor, lsr #1 |
| orrhs \result, \result, \curbit, lsr #1 |
| cmp \dividend, \divisor, lsr #2 |
| do_it hs, t |
| subhs \dividend, \dividend, \divisor, lsr #2 |
| orrhs \result, \result, \curbit, lsr #2 |
| cmp \dividend, \divisor, lsr #3 |
| do_it hs, t |
| subhs \dividend, \dividend, \divisor, lsr #3 |
| orrhs \result, \result, \curbit, lsr #3 |
| cmp \dividend, #0 @ Early termination? |
| do_it ne, t |
| movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? |
| movne \divisor, \divisor, lsr #4 |
| bne 1b |
| |
| #endif /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */ |
| |
| .endm |
| /* ------------------------------------------------------------------------ */ |
| .macro ARM_DIV2_ORDER divisor, order |
| |
| #if defined (__ARM_FEATURE_CLZ) |
| |
| clz \order, \divisor |
| rsb \order, \order, #31 |
| |
| #else |
| |
| cmp \divisor, #(1 << 16) |
| movhs \divisor, \divisor, lsr #16 |
| movhs \order, #16 |
| movlo \order, #0 |
| |
| cmp \divisor, #(1 << 8) |
| movhs \divisor, \divisor, lsr #8 |
| addhs \order, \order, #8 |
| |
| cmp \divisor, #(1 << 4) |
| movhs \divisor, \divisor, lsr #4 |
| addhs \order, \order, #4 |
| |
| cmp \divisor, #(1 << 2) |
| addhi \order, \order, #3 |
| addls \order, \order, \divisor, lsr #1 |
| |
| #endif |
| |
| .endm |
| /* ------------------------------------------------------------------------ */ |
| .macro ARM_MOD_BODY dividend, divisor, order, spare |
| |
| #if defined(__ARM_FEATURE_CLZ) && ! defined (__OPTIMIZE_SIZE__) |
| |
| clz \order, \divisor |
| clz \spare, \dividend |
| sub \order, \order, \spare |
| rsbs \order, \order, #31 |
| addne pc, pc, \order, lsl #3 |
| nop |
| .set shift, 32 |
| .rept 32 |
| .set shift, shift - 1 |
| cmp \dividend, \divisor, lsl #shift |
| subcs \dividend, \dividend, \divisor, lsl #shift |
| .endr |
| |
| #else /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */ |
| #if defined (__ARM_FEATURE_CLZ) |
| |
| clz \order, \divisor |
| clz \spare, \dividend |
| sub \order, \order, \spare |
| mov \divisor, \divisor, lsl \order |
| |
| #else /* !defined (__ARM_FEATURE_CLZ) */ |
| |
| mov \order, #0 |
| |
| @ Unless the divisor is very big, shift it up in multiples of |
| @ four bits, since this is the amount of unwinding in the main |
| @ division loop. Continue shifting until the divisor is |
| @ larger than the dividend. |
| 1: cmp \divisor, #0x10000000 |
| cmplo \divisor, \dividend |
| movlo \divisor, \divisor, lsl #4 |
| addlo \order, \order, #4 |
| blo 1b |
| |
| @ For very big divisors, we must shift it a bit at a time, or |
| @ we will be in danger of overflowing. |
| 1: cmp \divisor, #0x80000000 |
| cmplo \divisor, \dividend |
| movlo \divisor, \divisor, lsl #1 |
| addlo \order, \order, #1 |
| blo 1b |
| |
| #endif /* !defined (__ARM_FEATURE_CLZ) */ |
| |
| @ Perform all needed substractions to keep only the reminder. |
| @ Do comparisons in batch of 4 first. |
| subs \order, \order, #3 @ yes, 3 is intended here |
| blt 2f |
| |
| 1: cmp \dividend, \divisor |
| subhs \dividend, \dividend, \divisor |
| cmp \dividend, \divisor, lsr #1 |
| subhs \dividend, \dividend, \divisor, lsr #1 |
| cmp \dividend, \divisor, lsr #2 |
| subhs \dividend, \dividend, \divisor, lsr #2 |
| cmp \dividend, \divisor, lsr #3 |
| subhs \dividend, \dividend, \divisor, lsr #3 |
| cmp \dividend, #1 |
| mov \divisor, \divisor, lsr #4 |
| subges \order, \order, #4 |
| bge 1b |
| |
| tst \order, #3 |
| teqne \dividend, #0 |
| beq 5f |
| |
| @ Either 1, 2 or 3 comparison/substractions are left. |
| 2: cmn \order, #2 |
| blt 4f |
| beq 3f |
| cmp \dividend, \divisor |
| subhs \dividend, \dividend, \divisor |
| mov \divisor, \divisor, lsr #1 |
| 3: cmp \dividend, \divisor |
| subhs \dividend, \dividend, \divisor |
| mov \divisor, \divisor, lsr #1 |
| 4: cmp \dividend, \divisor |
| subhs \dividend, \dividend, \divisor |
| 5: |
| |
| #endif /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */ |
| |
| .endm |
| /* ------------------------------------------------------------------------ */ |
| .macro THUMB_DIV_MOD_BODY modulo |
| @ Load the constant 0x10000000 into our work register. |
| movs work, #1 |
| lsls work, #28 |
| LSYM(Loop1): |
| @ Unless the divisor is very big, shift it up in multiples of |
| @ four bits, since this is the amount of unwinding in the main |
| @ division loop. Continue shifting until the divisor is |
| @ larger than the dividend. |
| cmp divisor, work |
| bhs LSYM(Lbignum) |
| cmp divisor, dividend |
| bhs LSYM(Lbignum) |
| lsls divisor, #4 |
| lsls curbit, #4 |
| b LSYM(Loop1) |
| LSYM(Lbignum): |
| @ Set work to 0x80000000 |
| lsls work, #3 |
| LSYM(Loop2): |
| @ For very big divisors, we must shift it a bit at a time, or |
| @ we will be in danger of overflowing. |
| cmp divisor, work |
| bhs LSYM(Loop3) |
| cmp divisor, dividend |
| bhs LSYM(Loop3) |
| lsls divisor, #1 |
| lsls curbit, #1 |
| b LSYM(Loop2) |
| LSYM(Loop3): |
| @ Test for possible subtractions ... |
| .if \modulo |
| @ ... On the final pass, this may subtract too much from the dividend, |
| @ so keep track of which subtractions are done, we can fix them up |
| @ afterwards. |
| movs overdone, #0 |
| cmp dividend, divisor |
| blo LSYM(Lover1) |
| subs dividend, dividend, divisor |
| LSYM(Lover1): |
| lsrs work, divisor, #1 |
| cmp dividend, work |
| blo LSYM(Lover2) |
| subs dividend, dividend, work |
| mov ip, curbit |
| movs work, #1 |
| rors curbit, work |
| orrs overdone, curbit |
| mov curbit, ip |
| LSYM(Lover2): |
| lsrs work, divisor, #2 |
| cmp dividend, work |
| blo LSYM(Lover3) |
| subs dividend, dividend, work |
| mov ip, curbit |
| movs work, #2 |
| rors curbit, work |
| orrs overdone, curbit |
| mov curbit, ip |
| LSYM(Lover3): |
| lsrs work, divisor, #3 |
| cmp dividend, work |
| blo LSYM(Lover4) |
| subs dividend, dividend, work |
| mov ip, curbit |
| movs work, #3 |
| rors curbit, work |
| orrs overdone, curbit |
| mov curbit, ip |
| LSYM(Lover4): |
| mov ip, curbit |
| .else |
| @ ... and note which bits are done in the result. On the final pass, |
| @ this may subtract too much from the dividend, but the result will be ok, |
| @ since the "bit" will have been shifted out at the bottom. |
| cmp dividend, divisor |
| blo LSYM(Lover1) |
| subs dividend, dividend, divisor |
| orrs result, result, curbit |
| LSYM(Lover1): |
| lsrs work, divisor, #1 |
| cmp dividend, work |
| blo LSYM(Lover2) |
| subs dividend, dividend, work |
| lsrs work, curbit, #1 |
| orrs result, work |
| LSYM(Lover2): |
| lsrs work, divisor, #2 |
| cmp dividend, work |
| blo LSYM(Lover3) |
| subs dividend, dividend, work |
| lsrs work, curbit, #2 |
| orrs result, work |
| LSYM(Lover3): |
| lsrs work, divisor, #3 |
| cmp dividend, work |
| blo LSYM(Lover4) |
| subs dividend, dividend, work |
| lsrs work, curbit, #3 |
| orrs result, work |
| LSYM(Lover4): |
| .endif |
| |
| cmp dividend, #0 @ Early termination? |
| beq LSYM(Lover5) |
| lsrs curbit, #4 @ No, any more bits to do? |
| beq LSYM(Lover5) |
| lsrs divisor, #4 |
| b LSYM(Loop3) |
| LSYM(Lover5): |
| .if \modulo |
| @ Any subtractions that we should not have done will be recorded in |
| @ the top three bits of "overdone". Exactly which were not needed |
| @ are governed by the position of the bit, stored in ip. |
| movs work, #0xe |
| lsls work, #28 |
| ands overdone, work |
| beq LSYM(Lgot_result) |
| |
| @ If we terminated early, because dividend became zero, then the |
| @ bit in ip will not be in the bottom nibble, and we should not |
| @ perform the additions below. We must test for this though |
| @ (rather relying upon the TSTs to prevent the additions) since |
| @ the bit in ip could be in the top two bits which might then match |
| @ with one of the smaller RORs. |
| mov curbit, ip |
| movs work, #0x7 |
| tst curbit, work |
| beq LSYM(Lgot_result) |
| |
| mov curbit, ip |
| movs work, #3 |
| rors curbit, work |
| tst overdone, curbit |
| beq LSYM(Lover6) |
| lsrs work, divisor, #3 |
| adds dividend, work |
| LSYM(Lover6): |
| mov curbit, ip |
| movs work, #2 |
| rors curbit, work |
| tst overdone, curbit |
| beq LSYM(Lover7) |
| lsrs work, divisor, #2 |
| adds dividend, work |
| LSYM(Lover7): |
| mov curbit, ip |
| movs work, #1 |
| rors curbit, work |
| tst overdone, curbit |
| beq LSYM(Lgot_result) |
| lsrs work, divisor, #1 |
| adds dividend, work |
| .endif |
| LSYM(Lgot_result): |
| .endm |
| |
| /* If performance is preferred, the following functions are provided. */ |
| #if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__) |
| |
| /* Branch to div(n), and jump to label if curbit is lo than divisior. */ |
| .macro BranchToDiv n, label |
| lsrs curbit, dividend, \n |
| cmp curbit, divisor |
| blo \label |
| .endm |
| |
| /* Body of div(n). Shift the divisor in n bits and compare the divisor |
| and dividend. Update the dividend as the substruction result. */ |
| .macro DoDiv n |
| lsrs curbit, dividend, \n |
| cmp curbit, divisor |
| bcc 1f |
| lsls curbit, divisor, \n |
| subs dividend, dividend, curbit |
| |
| 1: adcs result, result |
| .endm |
| |
| /* The body of division with positive divisor. Unless the divisor is very |
| big, shift it up in multiples of four bits, since this is the amount of |
| unwinding in the main division loop. Continue shifting until the divisor |
| is larger than the dividend. */ |
| .macro THUMB1_Div_Positive |
| movs result, #0 |
| BranchToDiv #1, LSYM(Lthumb1_div1) |
| BranchToDiv #4, LSYM(Lthumb1_div4) |
| BranchToDiv #8, LSYM(Lthumb1_div8) |
| BranchToDiv #12, LSYM(Lthumb1_div12) |
| BranchToDiv #16, LSYM(Lthumb1_div16) |
| LSYM(Lthumb1_div_large_positive): |
| movs result, #0xff |
| lsls divisor, divisor, #8 |
| rev result, result |
| lsrs curbit, dividend, #16 |
| cmp curbit, divisor |
| blo 1f |
| asrs result, #8 |
| lsls divisor, divisor, #8 |
| beq LSYM(Ldivbyzero_waypoint) |
| |
| 1: lsrs curbit, dividend, #12 |
| cmp curbit, divisor |
| blo LSYM(Lthumb1_div12) |
| b LSYM(Lthumb1_div16) |
| LSYM(Lthumb1_div_loop): |
| lsrs divisor, divisor, #8 |
| LSYM(Lthumb1_div16): |
| Dodiv #15 |
| Dodiv #14 |
| Dodiv #13 |
| Dodiv #12 |
| LSYM(Lthumb1_div12): |
| Dodiv #11 |
| Dodiv #10 |
| Dodiv #9 |
| Dodiv #8 |
| bcs LSYM(Lthumb1_div_loop) |
| LSYM(Lthumb1_div8): |
| Dodiv #7 |
| Dodiv #6 |
| Dodiv #5 |
| LSYM(Lthumb1_div5): |
| Dodiv #4 |
| LSYM(Lthumb1_div4): |
| Dodiv #3 |
| LSYM(Lthumb1_div3): |
| Dodiv #2 |
| LSYM(Lthumb1_div2): |
| Dodiv #1 |
| LSYM(Lthumb1_div1): |
| subs divisor, dividend, divisor |
| bcs 1f |
| cpy divisor, dividend |
| |
| 1: adcs result, result |
| cpy dividend, result |
| RET |
| |
| LSYM(Ldivbyzero_waypoint): |
| b LSYM(Ldiv0) |
| .endm |
| |
| /* The body of division with negative divisor. Similar with |
| THUMB1_Div_Positive except that the shift steps are in multiples |
| of six bits. */ |
| .macro THUMB1_Div_Negative |
| lsrs result, divisor, #31 |
| beq 1f |
| negs divisor, divisor |
| |
| 1: asrs curbit, dividend, #32 |
| bcc 2f |
| negs dividend, dividend |
| |
| 2: eors curbit, result |
| movs result, #0 |
| cpy ip, curbit |
| BranchToDiv #4, LSYM(Lthumb1_div_negative4) |
| BranchToDiv #8, LSYM(Lthumb1_div_negative8) |
| LSYM(Lthumb1_div_large): |
| movs result, #0xfc |
| lsls divisor, divisor, #6 |
| rev result, result |
| lsrs curbit, dividend, #8 |
| cmp curbit, divisor |
| blo LSYM(Lthumb1_div_negative8) |
| |
| lsls divisor, divisor, #6 |
| asrs result, result, #6 |
| cmp curbit, divisor |
| blo LSYM(Lthumb1_div_negative8) |
| |
| lsls divisor, divisor, #6 |
| asrs result, result, #6 |
| cmp curbit, divisor |
| blo LSYM(Lthumb1_div_negative8) |
| |
| lsls divisor, divisor, #6 |
| beq LSYM(Ldivbyzero_negative) |
| asrs result, result, #6 |
| b LSYM(Lthumb1_div_negative8) |
| LSYM(Lthumb1_div_negative_loop): |
| lsrs divisor, divisor, #6 |
| LSYM(Lthumb1_div_negative8): |
| DoDiv #7 |
| DoDiv #6 |
| DoDiv #5 |
| DoDiv #4 |
| LSYM(Lthumb1_div_negative4): |
| DoDiv #3 |
| DoDiv #2 |
| bcs LSYM(Lthumb1_div_negative_loop) |
| DoDiv #1 |
| subs divisor, dividend, divisor |
| bcs 1f |
| cpy divisor, dividend |
| |
| 1: cpy curbit, ip |
| adcs result, result |
| asrs curbit, curbit, #1 |
| cpy dividend, result |
| bcc 2f |
| negs dividend, dividend |
| cmp curbit, #0 |
| |
| 2: bpl 3f |
| negs divisor, divisor |
| |
| 3: RET |
| |
| LSYM(Ldivbyzero_negative): |
| cpy curbit, ip |
| asrs curbit, curbit, #1 |
| bcc LSYM(Ldiv0) |
| negs dividend, dividend |
| .endm |
| #endif /* ARM Thumb version. */ |
| |
| /* ------------------------------------------------------------------------ */ |
| /* Start of the Real Functions */ |
| /* ------------------------------------------------------------------------ */ |
| #ifdef L_udivsi3 |
| |
| #if defined(__prefer_thumb__) |
| |
| FUNC_START udivsi3 |
| FUNC_ALIAS aeabi_uidiv udivsi3 |
| #if defined(__OPTIMIZE_SIZE__) |
| |
| cmp divisor, #0 |
| beq LSYM(Ldiv0) |
| LSYM(udivsi3_skip_div0_test): |
| movs curbit, #1 |
| movs result, #0 |
| |
| push { work } |
| cmp dividend, divisor |
| blo LSYM(Lgot_result) |
| |
| THUMB_DIV_MOD_BODY 0 |
| |
| movs r0, result |
| pop { work } |
| RET |
| |
| /* Implementation of aeabi_uidiv for ARMv6m. This version is only |
| used in ARMv6-M when we need an efficient implementation. */ |
| #else |
| LSYM(udivsi3_skip_div0_test): |
| THUMB1_Div_Positive |
| |
| #endif /* __OPTIMIZE_SIZE__ */ |
| |
| #elif defined(__ARM_ARCH_EXT_IDIV__) |
| |
| ARM_FUNC_START udivsi3 |
| ARM_FUNC_ALIAS aeabi_uidiv udivsi3 |
| |
| cmp r1, #0 |
| beq LSYM(Ldiv0) |
| |
| udiv r0, r0, r1 |
| RET |
| |
| #else /* ARM version/Thumb-2. */ |
| |
| ARM_FUNC_START udivsi3 |
| ARM_FUNC_ALIAS aeabi_uidiv udivsi3 |
| |
| /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily |
| check for division-by-zero a second time. */ |
| LSYM(udivsi3_skip_div0_test): |
| subs r2, r1, #1 |
| do_it eq |
| RETc(eq) |
| bcc LSYM(Ldiv0) |
| cmp r0, r1 |
| bls 11f |
| tst r1, r2 |
| beq 12f |
| |
| ARM_DIV_BODY r0, r1, r2, r3 |
| |
| mov r0, r2 |
| RET |
| |
| 11: do_it eq, e |
| moveq r0, #1 |
| movne r0, #0 |
| RET |
| |
| 12: ARM_DIV2_ORDER r1, r2 |
| |
| mov r0, r0, lsr r2 |
| RET |
| |
| #endif /* ARM version */ |
| |
| DIV_FUNC_END udivsi3 unsigned |
| |
| #if defined(__prefer_thumb__) |
| FUNC_START aeabi_uidivmod |
| cmp r1, #0 |
| beq LSYM(Ldiv0) |
| # if defined(__OPTIMIZE_SIZE__) |
| push {r0, r1, lr} |
| bl LSYM(udivsi3_skip_div0_test) |
| POP {r1, r2, r3} |
| muls r2, r0 |
| subs r1, r1, r2 |
| bx r3 |
| # else |
| /* Both the quotient and remainder are calculated simultaneously |
| in THUMB1_Div_Positive. There is no need to calculate the |
| remainder again here. */ |
| b LSYM(udivsi3_skip_div0_test) |
| RET |
| # endif /* __OPTIMIZE_SIZE__ */ |
| |
| #elif defined(__ARM_ARCH_EXT_IDIV__) |
| ARM_FUNC_START aeabi_uidivmod |
| cmp r1, #0 |
| beq LSYM(Ldiv0) |
| mov r2, r0 |
| udiv r0, r0, r1 |
| mls r1, r0, r1, r2 |
| RET |
| #else |
| ARM_FUNC_START aeabi_uidivmod |
| cmp r1, #0 |
| beq LSYM(Ldiv0) |
| stmfd sp!, { r0, r1, lr } |
| bl LSYM(udivsi3_skip_div0_test) |
| ldmfd sp!, { r1, r2, lr } |
| mul r3, r2, r0 |
| sub r1, r1, r3 |
| RET |
| #endif |
| FUNC_END aeabi_uidivmod |
| |
| #endif /* L_udivsi3 */ |
| /* ------------------------------------------------------------------------ */ |
| #ifdef L_umodsi3 |
| |
| #if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1 |
| |
| ARM_FUNC_START umodsi3 |
| |
| cmp r1, #0 |
| beq LSYM(Ldiv0) |
| udiv r2, r0, r1 |
| mls r0, r1, r2, r0 |
| RET |
| |
| #elif defined(__thumb__) |
| |
| FUNC_START umodsi3 |
| |
| cmp divisor, #0 |
| beq LSYM(Ldiv0) |
| movs curbit, #1 |
| cmp dividend, divisor |
| bhs LSYM(Lover10) |
| RET |
| |
| LSYM(Lover10): |
| push { work } |
| |
| THUMB_DIV_MOD_BODY 1 |
| |
| pop { work } |
| RET |
| |
| #else /* ARM version. */ |
| |
| FUNC_START umodsi3 |
| |
| subs r2, r1, #1 @ compare divisor with 1 |
| bcc LSYM(Ldiv0) |
| cmpne r0, r1 @ compare dividend with divisor |
| moveq r0, #0 |
| tsthi r1, r2 @ see if divisor is power of 2 |
| andeq r0, r0, r2 |
| RETc(ls) |
| |
| ARM_MOD_BODY r0, r1, r2, r3 |
| |
| RET |
| |
| #endif /* ARM version. */ |
| |
| DIV_FUNC_END umodsi3 unsigned |
| |
| #endif /* L_umodsi3 */ |
| /* ------------------------------------------------------------------------ */ |
| #ifdef L_divsi3 |
| |
| #if defined(__prefer_thumb__) |
| |
| FUNC_START divsi3 |
| FUNC_ALIAS aeabi_idiv divsi3 |
| #if defined(__OPTIMIZE_SIZE__) |
| |
| cmp divisor, #0 |
| beq LSYM(Ldiv0) |
| LSYM(divsi3_skip_div0_test): |
| push { work } |
| movs work, dividend |
| eors work, divisor @ Save the sign of the result. |
| mov ip, work |
| movs curbit, #1 |
| movs result, #0 |
| cmp divisor, #0 |
| bpl LSYM(Lover10) |
| negs divisor, divisor @ Loops below use unsigned. |
| LSYM(Lover10): |
| cmp dividend, #0 |
| bpl LSYM(Lover11) |
| negs dividend, dividend |
| LSYM(Lover11): |
| cmp dividend, divisor |
| blo LSYM(Lgot_result) |
| |
| THUMB_DIV_MOD_BODY 0 |
| |
| movs r0, result |
| mov work, ip |
| cmp work, #0 |
| bpl LSYM(Lover12) |
| negs r0, r0 |
| LSYM(Lover12): |
| pop { work } |
| RET |
| |
| /* Implementation of aeabi_idiv for ARMv6m. This version is only |
| used in ARMv6-M when we need an efficient implementation. */ |
| #else |
| LSYM(divsi3_skip_div0_test): |
| cpy curbit, dividend |
| orrs curbit, divisor |
| bmi LSYM(Lthumb1_div_negative) |
| |
| LSYM(Lthumb1_div_positive): |
| THUMB1_Div_Positive |
| |
| LSYM(Lthumb1_div_negative): |
| THUMB1_Div_Negative |
| |
| #endif /* __OPTIMIZE_SIZE__ */ |
| |
| #elif defined(__ARM_ARCH_EXT_IDIV__) |
| |
| ARM_FUNC_START divsi3 |
| ARM_FUNC_ALIAS aeabi_idiv divsi3 |
| |
| cmp r1, #0 |
| beq LSYM(Ldiv0) |
| sdiv r0, r0, r1 |
| RET |
| |
| #else /* ARM/Thumb-2 version. */ |
| |
| ARM_FUNC_START divsi3 |
| ARM_FUNC_ALIAS aeabi_idiv divsi3 |
| |
| cmp r1, #0 |
| beq LSYM(Ldiv0) |
| LSYM(divsi3_skip_div0_test): |
| eor ip, r0, r1 @ save the sign of the result. |
| do_it mi |
| rsbmi r1, r1, #0 @ loops below use unsigned. |
| subs r2, r1, #1 @ division by 1 or -1 ? |
| beq 10f |
| movs r3, r0 |
| do_it mi |
| rsbmi r3, r0, #0 @ positive dividend value |
| cmp r3, r1 |
| bls 11f |
| tst r1, r2 @ divisor is power of 2 ? |
| beq 12f |
| |
| ARM_DIV_BODY r3, r1, r0, r2 |
| |
| cmp ip, #0 |
| do_it mi |
| rsbmi r0, r0, #0 |
| RET |
| |
| 10: teq ip, r0 @ same sign ? |
| do_it mi |
| rsbmi r0, r0, #0 |
| RET |
| |
| 11: do_it lo |
| movlo r0, #0 |
| do_it eq,t |
| moveq r0, ip, asr #31 |
| orreq r0, r0, #1 |
| RET |
| |
| 12: ARM_DIV2_ORDER r1, r2 |
| |
| cmp ip, #0 |
| mov r0, r3, lsr r2 |
| do_it mi |
| rsbmi r0, r0, #0 |
| RET |
| |
| #endif /* ARM version */ |
| |
| DIV_FUNC_END divsi3 signed |
| |
| #if defined(__prefer_thumb__) |
| FUNC_START aeabi_idivmod |
| cmp r1, #0 |
| beq LSYM(Ldiv0) |
| # if defined(__OPTIMIZE_SIZE__) |
| push {r0, r1, lr} |
| bl LSYM(divsi3_skip_div0_test) |
| POP {r1, r2, r3} |
| muls r2, r0 |
| subs r1, r1, r2 |
| bx r3 |
| # else |
| /* Both the quotient and remainder are calculated simultaneously |
| in THUMB1_Div_Positive and THUMB1_Div_Negative. There is no |
| need to calculate the remainder again here. */ |
| b LSYM(divsi3_skip_div0_test) |
| RET |
| # endif /* __OPTIMIZE_SIZE__ */ |
| |
| #elif defined(__ARM_ARCH_EXT_IDIV__) |
| ARM_FUNC_START aeabi_idivmod |
| cmp r1, #0 |
| beq LSYM(Ldiv0) |
| mov r2, r0 |
| sdiv r0, r0, r1 |
| mls r1, r0, r1, r2 |
| RET |
| #else |
| ARM_FUNC_START aeabi_idivmod |
| cmp r1, #0 |
| beq LSYM(Ldiv0) |
| stmfd sp!, { r0, r1, lr } |
| bl LSYM(divsi3_skip_div0_test) |
| ldmfd sp!, { r1, r2, lr } |
| mul r3, r2, r0 |
| sub r1, r1, r3 |
| RET |
| #endif |
| FUNC_END aeabi_idivmod |
| |
| #endif /* L_divsi3 */ |
| /* ------------------------------------------------------------------------ */ |
| #ifdef L_modsi3 |
| |
| #if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1 |
| |
| ARM_FUNC_START modsi3 |
| |
| cmp r1, #0 |
| beq LSYM(Ldiv0) |
| |
| sdiv r2, r0, r1 |
| mls r0, r1, r2, r0 |
| RET |
| |
| #elif defined(__thumb__) |
| |
| FUNC_START modsi3 |
| |
| movs curbit, #1 |
| cmp divisor, #0 |
| beq LSYM(Ldiv0) |
| bpl LSYM(Lover10) |
| negs divisor, divisor @ Loops below use unsigned. |
| LSYM(Lover10): |
| push { work } |
| @ Need to save the sign of the dividend, unfortunately, we need |
| @ work later on. Must do this after saving the original value of |
| @ the work register, because we will pop this value off first. |
| push { dividend } |
| cmp dividend, #0 |
| bpl LSYM(Lover11) |
| negs dividend, dividend |
| LSYM(Lover11): |
| cmp dividend, divisor |
| blo LSYM(Lgot_result) |
| |
| THUMB_DIV_MOD_BODY 1 |
| |
| pop { work } |
| cmp work, #0 |
| bpl LSYM(Lover12) |
| negs dividend, dividend |
| LSYM(Lover12): |
| pop { work } |
| RET |
| |
| #else /* ARM version. */ |
| |
| FUNC_START modsi3 |
| |
| cmp r1, #0 |
| beq LSYM(Ldiv0) |
| rsbmi r1, r1, #0 @ loops below use unsigned. |
| movs ip, r0 @ preserve sign of dividend |
| rsbmi r0, r0, #0 @ if negative make positive |
| subs r2, r1, #1 @ compare divisor with 1 |
| cmpne r0, r1 @ compare dividend with divisor |
| moveq r0, #0 |
| tsthi r1, r2 @ see if divisor is power of 2 |
| andeq r0, r0, r2 |
| bls 10f |
| |
| ARM_MOD_BODY r0, r1, r2, r3 |
| |
| 10: cmp ip, #0 |
| rsbmi r0, r0, #0 |
| RET |
| |
| #endif /* ARM version */ |
| |
| DIV_FUNC_END modsi3 signed |
| |
| #endif /* L_modsi3 */ |
| /* ------------------------------------------------------------------------ */ |
| #ifdef L_dvmd_tls |
| |
| #ifdef __ARM_EABI__ |
| WEAK aeabi_idiv0 |
| WEAK aeabi_ldiv0 |
| FUNC_START aeabi_idiv0 |
| FUNC_START aeabi_ldiv0 |
| RET |
| FUNC_END aeabi_ldiv0 |
| FUNC_END aeabi_idiv0 |
| #else |
| FUNC_START div0 |
| RET |
| FUNC_END div0 |
| #endif |
| |
| #endif /* L_divmodsi_tools */ |
| /* ------------------------------------------------------------------------ */ |
| #ifdef L_dvmd_lnx |
| @ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls |
| |
| /* Constant taken from <asm/signal.h>. */ |
| #define SIGFPE 8 |
| |
| #ifdef __ARM_EABI__ |
| cfi_start __aeabi_ldiv0, LSYM(Lend_aeabi_ldiv0) |
| WEAK aeabi_idiv0 |
| WEAK aeabi_ldiv0 |
| ARM_FUNC_START aeabi_idiv0 |
| ARM_FUNC_START aeabi_ldiv0 |
| do_push {r1, lr} |
| 98: cfi_push 98b - __aeabi_ldiv0, 0xe, -0x4, 0x8 |
| #else |
| cfi_start __div0, LSYM(Lend_div0) |
| ARM_FUNC_START div0 |
| do_push {r1, lr} |
| 98: cfi_push 98b - __div0, 0xe, -0x4, 0x8 |
| #endif |
| |
| mov r0, #SIGFPE |
| bl SYM(raise) __PLT__ |
| RETLDM r1 unwind=98b |
| |
| #ifdef __ARM_EABI__ |
| cfi_end LSYM(Lend_aeabi_ldiv0) |
| FUNC_END aeabi_ldiv0 |
| FUNC_END aeabi_idiv0 |
| #else |
| cfi_end LSYM(Lend_div0) |
| FUNC_END div0 |
| #endif |
| |
| #endif /* L_dvmd_lnx */ |
| #ifdef L_clear_cache |
| #if defined __ARM_EABI__ && defined __linux__ |
| @ EABI GNU/Linux call to cacheflush syscall. |
| ARM_FUNC_START clear_cache |
| do_push {r7} |
| #if __ARM_ARCH >= 7 || defined(__ARM_ARCH_6T2__) |
| movw r7, #2 |
| movt r7, #0xf |
| #else |
| mov r7, #0xf0000 |
| add r7, r7, #2 |
| #endif |
| mov r2, #0 |
| svc 0 |
| do_pop {r7} |
| RET |
| FUNC_END clear_cache |
| #else |
| #error "This is only for ARM EABI GNU/Linux" |
| #endif |
| #endif /* L_clear_cache */ |
| |
| #ifdef L_speculation_barrier |
| FUNC_START speculation_barrier |
| #if __ARM_ARCH >= 7 |
| isb |
| dsb sy |
| #elif defined __ARM_EABI__ && defined __linux__ |
| /* We don't have a speculation barrier directly for this |
| platform/architecture variant. But we can use a kernel |
| clear_cache service routine which will emit such instructions |
| if run on a later version of the architecture. We don't |
| really want to flush the cache, but we must give it a valid |
| address, so just clear pc..pc+1. */ |
| #if defined __thumb__ && !defined __thumb2__ |
| push {r7} |
| movs r7, #0xf |
| lsls r7, #16 |
| adds r7, #2 |
| adr r0, . + 4 |
| adds r1, r0, #1 |
| movs r2, #0 |
| svc 0 |
| pop {r7} |
| #else |
| do_push {r7} |
| #ifdef __ARM_ARCH_6T2__ |
| movw r7, #2 |
| movt r7, #0xf |
| #else |
| mov r7, #0xf0000 |
| add r7, r7, #2 |
| #endif |
| add r0, pc, #0 /* ADR. */ |
| add r1, r0, #1 |
| mov r2, #0 |
| svc 0 |
| do_pop {r7} |
| #endif /* Thumb1 only */ |
| #else |
| #warning "No speculation barrier defined for this platform" |
| #endif |
| RET |
| FUNC_END speculation_barrier |
| #endif |
| /* ------------------------------------------------------------------------ */ |
| /* Dword shift operations. */ |
| /* All the following Dword shift variants rely on the fact that |
| shft xxx, Reg |
| is in fact done as |
| shft xxx, (Reg & 255) |
| so for Reg value in (32...63) and (-1...-31) we will get zero (in the |
| case of logical shifts) or the sign (for asr). */ |
| |
| #ifdef __ARMEB__ |
| #define al r1 |
| #define ah r0 |
| #else |
| #define al r0 |
| #define ah r1 |
| #endif |
| |
| /* Prevent __aeabi double-word shifts from being produced on SymbianOS. */ |
| #ifndef __symbian__ |
| |
| #ifdef L_lshrdi3 |
| |
| FUNC_START lshrdi3 |
| FUNC_ALIAS aeabi_llsr lshrdi3 |
| |
| #ifdef __thumb__ |
| lsrs al, r2 |
| movs r3, ah |
| lsrs ah, r2 |
| mov ip, r3 |
| subs r2, #32 |
| lsrs r3, r2 |
| orrs al, r3 |
| negs r2, r2 |
| mov r3, ip |
| lsls r3, r2 |
| orrs al, r3 |
| RET |
| #else |
| subs r3, r2, #32 |
| rsb ip, r2, #32 |
| movmi al, al, lsr r2 |
| movpl al, ah, lsr r3 |
| orrmi al, al, ah, lsl ip |
| mov ah, ah, lsr r2 |
| RET |
| #endif |
| FUNC_END aeabi_llsr |
| FUNC_END lshrdi3 |
| |
| #endif |
| |
| #ifdef L_ashrdi3 |
| |
| FUNC_START ashrdi3 |
| FUNC_ALIAS aeabi_lasr ashrdi3 |
| |
| #ifdef __thumb__ |
| lsrs al, r2 |
| movs r3, ah |
| asrs ah, r2 |
| subs r2, #32 |
| @ If r2 is negative at this point the following step would OR |
| @ the sign bit into all of AL. That's not what we want... |
| bmi 1f |
| mov ip, r3 |
| asrs r3, r2 |
| orrs al, r3 |
| mov r3, ip |
| 1: |
| negs r2, r2 |
| lsls r3, r2 |
| orrs al, r3 |
| RET |
| #else |
| subs r3, r2, #32 |
| rsb ip, r2, #32 |
| movmi al, al, lsr r2 |
| movpl al, ah, asr r3 |
| orrmi al, al, ah, lsl ip |
| mov ah, ah, asr r2 |
| RET |
| #endif |
| |
| FUNC_END aeabi_lasr |
| FUNC_END ashrdi3 |
| |
| #endif |
| |
| #ifdef L_ashldi3 |
| |
| FUNC_START ashldi3 |
| FUNC_ALIAS aeabi_llsl ashldi3 |
| |
| #ifdef __thumb__ |
| lsls ah, r2 |
| movs r3, al |
| lsls al, r2 |
| mov ip, r3 |
| subs r2, #32 |
| lsls r3, r2 |
| orrs ah, r3 |
| negs r2, r2 |
| mov r3, ip |
| lsrs r3, r2 |
| orrs ah, r3 |
| RET |
| #else |
| subs r3, r2, #32 |
| rsb ip, r2, #32 |
| movmi ah, ah, lsl r2 |
| movpl ah, al, lsl r3 |
| orrmi ah, ah, al, lsr ip |
| mov al, al, lsl r2 |
| RET |
| #endif |
| FUNC_END aeabi_llsl |
| FUNC_END ashldi3 |
| |
| #endif |
| |
| #endif /* __symbian__ */ |
| |
| #ifdef L_clzsi2 |
| #ifdef NOT_ISA_TARGET_32BIT |
| FUNC_START clzsi2 |
| movs r1, #28 |
| movs r3, #1 |
| lsls r3, r3, #16 |
| cmp r0, r3 /* 0x10000 */ |
| bcc 2f |
| lsrs r0, r0, #16 |
| subs r1, r1, #16 |
| 2: lsrs r3, r3, #8 |
| cmp r0, r3 /* #0x100 */ |
| bcc 2f |
| lsrs r0, r0, #8 |
| subs r1, r1, #8 |
| 2: lsrs r3, r3, #4 |
| cmp r0, r3 /* #0x10 */ |
| bcc 2f |
| lsrs r0, r0, #4 |
| subs r1, r1, #4 |
| 2: adr r2, 1f |
| ldrb r0, [r2, r0] |
| adds r0, r0, r1 |
| bx lr |
| .align 2 |
| 1: |
| .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 |
| FUNC_END clzsi2 |
| #else |
| ARM_FUNC_START clzsi2 |
| # if defined (__ARM_FEATURE_CLZ) |
| clz r0, r0 |
| RET |
| # else |
| mov r1, #28 |
| cmp r0, #0x10000 |
| do_it cs, t |
| movcs r0, r0, lsr #16 |
| subcs r1, r1, #16 |
| cmp r0, #0x100 |
| do_it cs, t |
| movcs r0, r0, lsr #8 |
| subcs r1, r1, #8 |
| cmp r0, #0x10 |
| do_it cs, t |
| movcs r0, r0, lsr #4 |
| subcs r1, r1, #4 |
| adr r2, 1f |
| ldrb r0, [r2, r0] |
| add r0, r0, r1 |
| RET |
| .align 2 |
| 1: |
| .byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 |
| # endif /* !defined (__ARM_FEATURE_CLZ) */ |
| FUNC_END clzsi2 |
| #endif |
| #endif /* L_clzsi2 */ |
| |
| #ifdef L_clzdi2 |
| #if !defined (__ARM_FEATURE_CLZ) |
| |
| # ifdef NOT_ISA_TARGET_32BIT |
| FUNC_START clzdi2 |
| push {r4, lr} |
| cmp xxh, #0 |
| bne 1f |
| # ifdef __ARMEB__ |
| movs r0, xxl |
| bl __clzsi2 |
| adds r0, r0, #32 |
| b 2f |
| 1: |
| bl __clzsi2 |
| # else |
| bl __clzsi2 |
| adds r0, r0, #32 |
| b 2f |
| 1: |
| movs r0, xxh |
| bl __clzsi2 |
| # endif |
| 2: |
| pop {r4, pc} |
| # else /* NOT_ISA_TARGET_32BIT */ |
| ARM_FUNC_START clzdi2 |
| do_push {r4, lr} |
| cmp xxh, #0 |
| bne 1f |
| # ifdef __ARMEB__ |
| mov r0, xxl |
| bl __clzsi2 |
| add r0, r0, #32 |
| b 2f |
| 1: |
| bl __clzsi2 |
| # else |
| bl __clzsi2 |
| add r0, r0, #32 |
| b 2f |
| 1: |
| mov r0, xxh |
| bl __clzsi2 |
| # endif |
| 2: |
| RETLDM r4 |
| FUNC_END clzdi2 |
| # endif /* NOT_ISA_TARGET_32BIT */ |
| |
| #else /* defined (__ARM_FEATURE_CLZ) */ |
| |
| ARM_FUNC_START clzdi2 |
| cmp xxh, #0 |
| do_it eq, et |
| clzeq r0, xxl |
| clzne r0, xxh |
| addeq r0, r0, #32 |
| RET |
| FUNC_END clzdi2 |
| |
| #endif |
| #endif /* L_clzdi2 */ |
| |
| #ifdef L_ctzsi2 |
| #ifdef NOT_ISA_TARGET_32BIT |
| FUNC_START ctzsi2 |
| negs r1, r0 |
| ands r0, r0, r1 |
| movs r1, #28 |
| movs r3, #1 |
| lsls r3, r3, #16 |
| cmp r0, r3 /* 0x10000 */ |
| bcc 2f |
| lsrs r0, r0, #16 |
| subs r1, r1, #16 |
| 2: lsrs r3, r3, #8 |
| cmp r0, r3 /* #0x100 */ |
| bcc 2f |
| lsrs r0, r0, #8 |
| subs r1, r1, #8 |
| 2: lsrs r3, r3, #4 |
| cmp r0, r3 /* #0x10 */ |
| bcc 2f |
| lsrs r0, r0, #4 |
| subs r1, r1, #4 |
| 2: adr r2, 1f |
| ldrb r0, [r2, r0] |
| subs r0, r0, r1 |
| bx lr |
| .align 2 |
| 1: |
| .byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31 |
| FUNC_END ctzsi2 |
| #else |
| ARM_FUNC_START ctzsi2 |
| rsb r1, r0, #0 |
| and r0, r0, r1 |
| # if defined (__ARM_FEATURE_CLZ) |
| clz r0, r0 |
| rsb r0, r0, #31 |
| RET |
| # else |
| mov r1, #28 |
| cmp r0, #0x10000 |
| do_it cs, t |
| movcs r0, r0, lsr #16 |
| subcs r1, r1, #16 |
| cmp r0, #0x100 |
| do_it cs, t |
| movcs r0, r0, lsr #8 |
| subcs r1, r1, #8 |
| cmp r0, #0x10 |
| do_it cs, t |
| movcs r0, r0, lsr #4 |
| subcs r1, r1, #4 |
| adr r2, 1f |
| ldrb r0, [r2, r0] |
| sub r0, r0, r1 |
| RET |
| .align 2 |
| 1: |
| .byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31 |
| # endif /* !defined (__ARM_FEATURE_CLZ) */ |
| FUNC_END ctzsi2 |
| #endif |
| #endif /* L_clzsi2 */ |
| |
| /* ------------------------------------------------------------------------ */ |
| /* These next two sections are here despite the fact that they contain Thumb |
| assembler because their presence allows interworked code to be linked even |
| when the GCC library is this one. */ |
| |
| /* Do not build the interworking functions when the target architecture does |
| not support Thumb instructions. (This can be a multilib option). */ |
| #if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\ |
| || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \ |
| || __ARM_ARCH >= 6 |
| |
| #if defined L_call_via_rX |
| |
| /* These labels & instructions are used by the Arm/Thumb interworking code. |
| The address of function to be called is loaded into a register and then |
| one of these labels is called via a BL instruction. This puts the |
| return address into the link register with the bottom bit set, and the |
| code here switches to the correct mode before executing the function. */ |
| |
| .text |
| .align 0 |
| .force_thumb |
| |
| .macro call_via register |
| THUMB_FUNC_START _call_via_\register |
| |
| bx \register |
| nop |
| |
| SIZE (_call_via_\register) |
| .endm |
| |
| call_via r0 |
| call_via r1 |
| call_via r2 |
| call_via r3 |
| call_via r4 |
| call_via r5 |
| call_via r6 |
| call_via r7 |
| call_via r8 |
| call_via r9 |
| call_via sl |
| call_via fp |
| call_via ip |
| call_via sp |
| call_via lr |
| |
| #endif /* L_call_via_rX */ |
| |
| /* Don't bother with the old interworking routines for Thumb-2. */ |
| /* ??? Maybe only omit these on "m" variants. */ |
| #if !defined(__thumb2__) && __ARM_ARCH_ISA_ARM |
| |
| #if defined L_interwork_call_via_rX |
| |
| /* These labels & instructions are used by the Arm/Thumb interworking code, |
| when the target address is in an unknown instruction set. The address |
| of function to be called is loaded into a register and then one of these |
| labels is called via a BL instruction. This puts the return address |
| into the link register with the bottom bit set, and the code here |
| switches to the correct mode before executing the function. Unfortunately |
| the target code cannot be relied upon to return via a BX instruction, so |
| instead we have to store the resturn address on the stack and allow the |
| called function to return here instead. Upon return we recover the real |
| return address and use a BX to get back to Thumb mode. |
| |
| There are three variations of this code. The first, |
| _interwork_call_via_rN(), will push the return address onto the |
| stack and pop it in _arm_return(). It should only be used if all |
| arguments are passed in registers. |
| |
| The second, _interwork_r7_call_via_rN(), instead stores the return |
| address at [r7, #-4]. It is the caller's responsibility to ensure |
| that this address is valid and contains no useful data. |
| |
| The third, _interwork_r11_call_via_rN(), works in the same way but |
| uses r11 instead of r7. It is useful if the caller does not really |
| need a frame pointer. */ |
| |
| .text |
| .align 0 |
| |
| .code 32 |
| .globl _arm_return |
| LSYM(Lstart_arm_return): |
| cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return) |
| cfi_push 0, 0xe, -0x8, 0x8 |
| nop @ This nop is for the benefit of debuggers, so that |
| @ backtraces will use the correct unwind information. |
| _arm_return: |
| RETLDM unwind=LSYM(Lstart_arm_return) |
| cfi_end LSYM(Lend_arm_return) |
| |
| .globl _arm_return_r7 |
| _arm_return_r7: |
| ldr lr, [r7, #-4] |
| bx lr |
| |
| .globl _arm_return_r11 |
| _arm_return_r11: |
| ldr lr, [r11, #-4] |
| bx lr |
| |
| .macro interwork_with_frame frame, register, name, return |
| .code 16 |
| |
| THUMB_FUNC_START \name |
| |
| bx pc |
| nop |
| |
| .code 32 |
| tst \register, #1 |
| streq lr, [\frame, #-4] |
| adreq lr, _arm_return_\frame |
| bx \register |
| |
| SIZE (\name) |
| .endm |
| |
| .macro interwork register |
| .code 16 |
| |
| THUMB_FUNC_START _interwork_call_via_\register |
| |
| bx pc |
| nop |
| |
| .code 32 |
| .globl LSYM(Lchange_\register) |
| LSYM(Lchange_\register): |
| tst \register, #1 |
| streq lr, [sp, #-8]! |
| adreq lr, _arm_return |
| bx \register |
| |
| SIZE (_interwork_call_via_\register) |
| |
| interwork_with_frame r7,\register,_interwork_r7_call_via_\register |
| interwork_with_frame r11,\register,_interwork_r11_call_via_\register |
| .endm |
| |
| interwork r0 |
| interwork r1 |
| interwork r2 |
| interwork r3 |
| interwork r4 |
| interwork r5 |
| interwork r6 |
| interwork r7 |
| interwork r8 |
| interwork r9 |
| interwork sl |
| interwork fp |
| interwork ip |
| interwork sp |
| |
| /* The LR case has to be handled a little differently... */ |
| .code 16 |
| |
| THUMB_FUNC_START _interwork_call_via_lr |
| |
| bx pc |
| nop |
| |
| .code 32 |
| .globl .Lchange_lr |
| .Lchange_lr: |
| tst lr, #1 |
| stmeqdb r13!, {lr, pc} |
| mov ip, lr |
| adreq lr, _arm_return |
| bx ip |
| |
| SIZE (_interwork_call_via_lr) |
| |
| #endif /* L_interwork_call_via_rX */ |
| #endif /* !__thumb2__ */ |
| |
| /* Functions to support compact pic switch tables in thumb1 state. |
| All these routines take an index into the table in r0. The |
| table is at LR & ~1 (but this must be rounded up in the case |
| of 32-bit entires). They are only permitted to clobber r12 |
| and r14 and r0 must be preserved on exit. */ |
| #ifdef L_thumb1_case_sqi |
| |
| .text |
| .align 0 |
| .force_thumb |
| .syntax unified |
| THUMB_FUNC_START __gnu_thumb1_case_sqi |
| push {r1} |
| mov r1, lr |
| lsrs r1, r1, #1 |
| lsls r1, r1, #1 |
| ldrsb r1, [r1, r0] |
| lsls r1, r1, #1 |
| add lr, lr, r1 |
| pop {r1} |
| bx lr |
| SIZE (__gnu_thumb1_case_sqi) |
| #endif |
| |
| #ifdef L_thumb1_case_uqi |
| |
| .text |
| .align 0 |
| .force_thumb |
| .syntax unified |
| THUMB_FUNC_START __gnu_thumb1_case_uqi |
| push {r1} |
| mov r1, lr |
| lsrs r1, r1, #1 |
| lsls r1, r1, #1 |
| ldrb r1, [r1, r0] |
| lsls r1, r1, #1 |
| add lr, lr, r1 |
| pop {r1} |
| bx lr |
| SIZE (__gnu_thumb1_case_uqi) |
| #endif |
| |
| #ifdef L_thumb1_case_shi |
| |
| .text |
| .align 0 |
| .force_thumb |
| .syntax unified |
| THUMB_FUNC_START __gnu_thumb1_case_shi |
| push {r0, r1} |
| mov r1, lr |
| lsrs r1, r1, #1 |
| lsls r0, r0, #1 |
| lsls r1, r1, #1 |
| ldrsh r1, [r1, r0] |
| lsls r1, r1, #1 |
| add lr, lr, r1 |
| pop {r0, r1} |
| bx lr |
| SIZE (__gnu_thumb1_case_shi) |
| #endif |
| |
| #ifdef L_thumb1_case_uhi |
| |
| .text |
| .align 0 |
| .force_thumb |
| .syntax unified |
| THUMB_FUNC_START __gnu_thumb1_case_uhi |
| push {r0, r1} |
| mov r1, lr |
| lsrs r1, r1, #1 |
| lsls r0, r0, #1 |
| lsls r1, r1, #1 |
| ldrh r1, [r1, r0] |
| lsls r1, r1, #1 |
| add lr, lr, r1 |
| pop {r0, r1} |
| bx lr |
| SIZE (__gnu_thumb1_case_uhi) |
| #endif |
| |
| #ifdef L_thumb1_case_si |
| |
| .text |
| .align 0 |
| .force_thumb |
| .syntax unified |
| THUMB_FUNC_START __gnu_thumb1_case_si |
| push {r0, r1} |
| mov r1, lr |
| adds.n r1, r1, #2 /* Align to word. */ |
| lsrs r1, r1, #2 |
| lsls r0, r0, #2 |
| lsls r1, r1, #2 |
| ldr r0, [r1, r0] |
| adds r0, r0, r1 |
| mov lr, r0 |
| pop {r0, r1} |
| mov pc, lr /* We know we were called from thumb code. */ |
| SIZE (__gnu_thumb1_case_si) |
| #endif |
| |
| #endif /* Arch supports thumb. */ |
| |
| .macro CFI_START_FUNCTION |
| .cfi_startproc |
| .cfi_remember_state |
| .endm |
| |
| .macro CFI_END_FUNCTION |
| .cfi_restore_state |
| .cfi_endproc |
| .endm |
| |
| #ifndef __symbian__ |
| /* The condition here must match the one in gcc/config/arm/elf.h and |
| libgcc/config/arm/t-elf. */ |
| #ifndef NOT_ISA_TARGET_32BIT |
| #include "ieee754-df.S" |
| #include "ieee754-sf.S" |
| #include "bpabi.S" |
| #else /* NOT_ISA_TARGET_32BIT */ |
| #include "bpabi-v6m.S" |
| #endif /* NOT_ISA_TARGET_32BIT */ |
| #endif /* !__symbian__ */ |