| /* Copyright (C) 2008-2021 Free Software Foundation, Inc. |
| Contributor: Joern Rennecke <joern.rennecke@embecosm.com> |
| on behalf of Synopsys Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| Under Section 7 of GPL version 3, you are granted additional |
| permissions described in the GCC Runtime Library Exception, version |
| 3.1, as published by the Free Software Foundation. |
| |
| You should have received a copy of the GNU General Public License and |
| a copy of the GCC Runtime Library Exception along with this program; |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include "../arc-ieee-754.h" |
| |
| #if 0 /* DEBUG */ |
| .global __muldf3 |
| .balign 4 |
| __muldf3: |
| push_s blink |
| push_s r2 |
| push_s r3 |
| push_s r0 |
| bl.d __muldf3_c |
| push_s r1 |
| ld_s r2,[sp,12] |
| ld_s r3,[sp,8] |
| st_s r0,[sp,12] |
| st_s r1,[sp,8] |
| pop_s r1 |
| bl.d __muldf3_asm |
| pop_s r0 |
| pop_s r3 |
| pop_s r2 |
| pop_s blink |
| cmp r0,r2 |
| cmp.eq r1,r3 |
| jeq_s [blink] |
| and r12,DBL0H,DBL1H |
| bic.f 0,0x7ff80000,r12 ; both NaN -> OK |
| jeq_s [blink] |
| b abort |
| #define __muldf3 __muldf3_asm |
| #endif /* DEBUG */ |
| |
| __muldf3_support: /* This label makes debugger output saner. */ |
| .balign 4 |
| FUNC(__muldf3) |
| .Ldenorm_2: |
| breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output |
| norm.f r12,DBL1L |
| mov.mi r12,21 |
| add.pl r12,r12,22 |
| neg r11,r12 |
| asl_s r12,r12,20 |
| lsr.f DBL1H,DBL1L,r11 |
| ror DBL1L,DBL1L,r11 |
| sub_s DBL0H,DBL0H,r12 |
| mov.eq DBL1H,DBL1L |
| sub_l DBL1L,DBL1L,DBL1H |
| /* Fall through. */ |
| .global __muldf3 |
| .balign 4 |
| __muldf3: |
| mulu64 DBL0L,DBL1L |
| ld.as r9,[pcl,0x68] ; ((.L7ff00000-.+2)/4)] |
| bmsk r6,DBL0H,19 |
| bset r6,r6,20 |
| and r11,DBL0H,r9 |
| breq.d r11,0,.Ldenorm_dbl0 |
| and r12,DBL1H,r9 |
| breq.d r12,0,.Ldenorm_dbl1 |
| mov r8,mlo |
| mov r4,mhi |
| mulu64 r6,DBL1L |
| breq.d r11,r9,.Linf_nan |
| bmsk r10,DBL1H,19 |
| breq.d r12,r9,.Linf_nan |
| bset r10,r10,20 |
| add.f r4,r4,mlo |
| adc r5,mhi,0 |
| mulu64 r10,DBL0L |
| add_s r12,r12,r11 ; add exponents |
| add.f r4,r4,mlo |
| adc r5,r5,mhi |
| mulu64 r6,r10 |
| tst r8,r8 |
| bclr r8,r9,30 ; 0x3ff00000 |
| bset.ne r4,r4,0 ; put least significant word into sticky bit |
| bclr r6,r9,20 ; 0x7fe00000 |
| add.f r5,r5,mlo |
| adc r7,mhi,0 ; fraction product in r7:r5:r4 |
| lsr.f r10,r7,9 |
| rsub.eq r8,r8,r9 ; 0x40000000 |
| sub r12,r12,r8 ; subtract bias + implicit 1 |
| brhs.d r12,r6,.Linf_denorm |
| rsub r10,r10,12 |
| .Lshift_frac: |
| neg r8,r10 |
| asl r6,r4,r10 |
| lsr DBL0L,r4,r8 |
| add.f 0,r6,r6 |
| btst.eq DBL0L,0 |
| cmp.eq r4,r4 ; round to nearest / round to even |
| asl r4,r5,r10 |
| lsr r5,r5,r8 |
| adc.f DBL0L,DBL0L,r4 |
| xor.f 0,DBL0H,DBL1H |
| asl r7,r7,r10 |
| add_s r12,r12,r5 |
| adc DBL0H,r12,r7 |
| j_s.d [blink] |
| bset.mi DBL0H,DBL0H,31 |
| |
| /* N.B. This is optimized for ARC700. |
| ARC600 has very different scheduling / instruction selection criteria. */ |
| |
| /* If one number is denormal, subtract some from the exponent of the other |
| one (if the other exponent is too small, return 0), and normalize the |
| denormal. Then re-run the computation. */ |
| .Lret0_2: |
| lsr_s DBL0H,DBL0H,31 |
| asl_s DBL0H,DBL0H,31 |
| j_s.d [blink] |
| mov_s DBL0L,0 |
| .balign 4 |
| .Ldenorm_dbl0: |
| mov_s r12,DBL0L |
| mov_s DBL0L,DBL1L |
| mov_s DBL1L,r12 |
| mov_s r12,DBL0H |
| mov_s DBL0H,DBL1H |
| mov_s DBL1H,r12 |
| and r11,DBL0H,r9 |
| .Ldenorm_dbl1: |
| brhs r11,r9,.Linf_nan |
| brhs 0x3ca00001,r11,.Lret0 |
| sub_s DBL0H,DBL0H,DBL1H |
| bmsk.f DBL1H,DBL1H,30 |
| add_s DBL0H,DBL0H,DBL1H |
| beq.d .Ldenorm_2 |
| norm r12,DBL1H |
| sub_s r12,r12,10 |
| asl r5,r12,20 |
| asl_s DBL1H,DBL1H,r12 |
| sub DBL0H,DBL0H,r5 |
| neg r5,r12 |
| lsr r6,DBL1L,r5 |
| asl_s DBL1L,DBL1L,r12 |
| b.d __muldf3 |
| add_s DBL1H,DBL1H,r6 |
| |
| .Lret0: xor_s DBL0H,DBL0H,DBL1H |
| bclr DBL1H,DBL0H,31 |
| xor_s DBL0H,DBL0H,DBL1H |
| j_s.d [blink] |
| mov_s DBL0L,0 |
| |
| .balign 4 |
| .Linf_nan: |
| bclr r12,DBL1H,31 |
| xor_s DBL1H,DBL1H,DBL0H |
| bclr_s DBL0H,DBL0H,31 |
| max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf |
| or.f 0,DBL0H,DBL0L |
| mov_s DBL0L,0 |
| or.ne.f DBL1L,DBL1L,r12 |
| not_s DBL0H,DBL0L ; inf * 0 -> NaN |
| mov.ne DBL0H,r8 |
| tst_s DBL1H,DBL1H |
| j_s.d [blink] |
| bset.mi DBL0H,DBL0H,31 |
| |
| /* We have checked for infinity / NaN input before, and transformed |
| denormalized inputs into normalized inputs. Thus, the worst case |
| exponent overflows are: |
| 1 + 1 - 0x400 == 0xc02 : maximum underflow |
| 0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow |
| N.B. 0x7e and 0x7f are also values for overflow. |
| |
| If (r12 <= -54), we have an underflow to zero. */ |
| .balign 4 |
| .Linf_denorm: |
| lsr r6,r12,28 |
| brlo.d r6,0xc,.Linf |
| asr r6,r12,20 |
| add.f r10,r10,r6 |
| brgt.d r10,0,.Lshift_frac |
| mov_s r12,0 |
| beq.d .Lround_frac |
| add r10,r10,32 |
| .Lshift32_frac: |
| tst r4,r4 |
| mov r4,r5 |
| bset.ne r4,r4,1 |
| mov r5,r7 |
| brge.d r10,1,.Lshift_frac |
| mov r7,0 |
| breq.d r10,0,.Lround_frac |
| add r10,r10,32 |
| brgt r10,21,.Lshift32_frac |
| b_s .Lret0 |
| |
| .Lround_frac: |
| add.f 0,r4,r4 |
| btst.eq r5,0 |
| mov_s DBL0L,r5 |
| mov_s DBL0H,r7 |
| adc.eq.f DBL0L,DBL0L,0 |
| j_s.d [blink] |
| adc.eq DBL0H,DBL0H,0 |
| |
| .Linf: mov_s DBL0L,0 |
| xor.f DBL1H,DBL1H,DBL0H |
| mov_s DBL0H,r9 |
| j_s.d [blink] |
| bset.mi DBL0H,DBL0H,31 |
| ENDFUNC(__muldf3) |
| |
| .balign 4 |
| .L7ff00000: |
| .long 0x7ff00000 |