libgcc/config/ft32/lib1funcs.S - gcc - Git at Google

 # ieee754 sf routines for FT32

 /* Copyright (C) 1995-2021 Free Software Foundation, Inc.

 This file is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
 Free Software Foundation; either version 3, or (at your option) any
 later version.

 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 General Public License for more details.

 Under Section 7 of GPL version 3, you are granted additional
 permissions described in the GCC Runtime Library Exception, version
 3.1, as published by the Free Software Foundation.

 You should have received a copy of the GNU General Public License and
 a copy of the GCC Runtime Library Exception along with this program;
 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 <http://www.gnu.org/licenses/>.  */

 # See http://www.ens-lyon.fr/LIP/Pub/Rapports/PhD/PhD2006/PhD2006-02.pdf
 # for implementation details of all except division which is detailed below
 #

 #ifdef L_fp_tools
 // .global __cmpsf2_
 nan:            .long 0x7FFFFFFF    # also abs mask
 inf:            .long 0x7F800000
 sign_mask:      .long 0x80000000
 m_mask:         .long 0x007FFFFF
 exp_bias:       .long 127
 edge_case:      .long 0x00FFFFFF
 smallest_norm:  .long 0x00800000    # implicit bit
 high_FF:        .long 0xFF000000
 high_uint:      .long 0xFFFFFFFF

 ntz_table:
     .byte   32,0,1,12,2,6,0,13,3,0,7,0,0,0,0,14
     .byte   10,4,0,0,8,0,0,25,0,0,0,0,0,21,27,15
     .byte   31,11,5,0,0,0,0,0,9,0,0,24,0,0,20,26
     .byte   30,0,0,0,0,23,0,19,29,0,22,18,28,17,16,0

 #endif

 # Supply a few 'missing' instructions

 # not
 .macro      not rd,r1
     xor     \rd,\r1,-1
 .endm

 # negate
 .macro      neg x
     not     \x, \x
     add     \x, \x, 1
 .endm

 # set $cc from the result of "ashl reg,dist"
 .macro  ashlcc reg,dist
     .long   0x5de04008 | (\reg << 15) | (\dist << 4)
 .endm


 # converts an unsigned number x to a signed rep based on the bits in sign
 # sign should be 0x00000000 or 0xffffffff.
 .macro      to_signed x, sign
     add     \x,\x,\sign    # conditionally decrement x
     xor     \x,\x,\sign    # conditionally complement x
 .endm


 .macro  ld32    r,v
     ldk     \r,(\v>>10)
     ldl     \r,\r,(\v & 1023)
 .endm

 # calculate trailing zero count in x, also uses scr.
 # Using Seal's algorithm
 .macro      ntz x, scr
     not     \scr, \x
     add     \scr, \scr, 1
     and     \x, \x, \scr
     ashl    \scr, \x, 4
     add     \x, \scr, \x
     ashl    \scr, \x, 6
     add     \x, \scr, \x
     ashl    \scr, \x, 16
     sub     \x, \scr, \x
     lshr    \x, \x, 26
     ldk     \scr, ntz_table
     add     \x, \x, \scr
     lpmi.b  \x, \x, 0
 .endm

 # calculate leading zero count
 .macro      nlz x, scr
     flip    \x, \x, 31
     ntz     \x, \scr
 .endm


 # Round 26 bit mantissa to nearest
 # | 23 bits frac | G | R | S |
 .macro      round m,  s1, s2
     ldk     \s1,0xc8
     and     \s2,\m,7
     lshr    \s1,\s1,\s2
     and     \s1,\s1,1
     lshr    \m,\m,2
     add     \m,\m,\s1
 .endm

 # If NZ, set the LSB of reg
 .macro      sticky reg
     jmpc    z,1f
     or      \reg,\reg,1             # set the sticky bit to 1
 1:
 .endm

 ##########################################################################
 ##########################################################################
 ## addition & subtraction

 #if defined(L_subsf3) || defined(L_addsub_sf)
 .global __subsf3
 __subsf3:
     # this is subtraction, so we just change the sign of r1
     lpm     $r2,sign_mask
     xor     $r1,$r1,$r2
     jmp     __addsf3
 #endif

 #if defined(L_addsf3) || defined(L_addsub_sf)
 .global __addsf3
 __addsf3:
     # x in $r0, y in $r1, result z in $r0       --||| 100 instructions +/- |||--
     # unpack e, calc d
     bextu   $r2,$r0,(8<<5)|23   # ex in r2
     bextu   $r3,$r1,(8<<5)|23   # ey in r3
     sub     $r5,$r2,$r3         # d = ex - ey

     # Special values are 0x00 and 0xff in ex and ey.
     # If (ex&ey) != 0 or (xy|ey)=255 then there may be
     # a special value.
     tst     $r2,$r3
     jmpc    nz,1f
     jmp     slow
 1:  or      $r4,$r2,$r3
     cmp     $r4,255
     jmpc    nz,no_special_vals
 slow:
     # Check for early exit
     cmp     $r2,0
     jmpc    z,test_if_not_255
     cmp     $r3,0
     jmpc    nz,no_early_exit
 test_if_not_255:
     cmp     $r2,255
     jmpc    z,no_early_exit
     cmp     $r3,255
     jmpc    z,no_early_exit
     or      $r6,$r2,$r3
     cmp     $r6,0
     jmpc    nz,was_not_zero
     and     $r0,$r0,$r1
     lpm     $r1,sign_mask
     and     $r0,$r0,$r1
     return
 was_not_zero:
     cmp     $r2,0
     jmpc    nz,ret_x
     move    $r0,$r1
     return
 ret_x:
     return
 no_early_exit:
     # setup to test for special values
     sub     $r6,$r2,1
     and     $r6,$r6,0xFE
     sub     $r7,$r3,1
     and     $r7,$r7,0xFE
     # test for special values
     cmp     $r6,$r7
     jmpc    gte,ex_spec_is_gte
     move    $r6,$r7
 ex_spec_is_gte:
     cmp     $r6,0xFE
     jmpc    nz,no_special_vals
     cmp     $r5,0
     jmpc    ns,d_gte_0
     cmp     $r3,0xFF
     jmpc    z,ret_y
     cmp     $r2,0
     jmpc    z,ret_y
 ret_y:
     move    $r0,$r1
     return
 d_gte_0:
     cmp     $r5,0
     jmpc    z,d_is_0
     cmp     $r2,0xFF
     jmpc    z,ret_x
     cmp     $r3,0
     jmpc    z,ret_x
 d_is_0:
     cmp     $r2,0xFF
     jmpc    nz,no_special_vals
     ashl    $r6,$r0,9           # clear all except x frac
     ashl    $r7,$r1,9           # clear all except y frac
     or      $r6,$r6,$r7
     cmp     $r6,0
     jmpc    nz,ret_nan
     lshr    $r4,$r0,31          # sx in r4
     lshr    $r5,$r1,31          # sy in r4
     cmp     $r4,$r5
     jmpc    nz,ret_nan
     return
 ret_nan:
     lpm     $r0,nan
     return
 no_special_vals:
     ldk     $r8,(1<<10)|(9<<5)|26   # setup implicit bit and mask for e
     #----------------------
     ashr    $r4,$r0,31              # sx in r4
     ashl    $r0,$r0,3               # shift mx 3 for GRS bits
     bins    $r0,$r0,$r8             # clear sx, ex and add implicit bit mx
     # change mx to signed mantissa
     to_signed $r0,$r4
     #----------------------
     ashr    $r4,$r1,31              # sy in r4
     ashl    $r1,$r1,3               # shift my 3 for GRS bits
     bins    $r1,$r1,$r8             # clear sy, ey and add implicit bit my
     # change my to signed mantissa
     to_signed $r1,$r4
     #----------------------
     # test if we swap ms based on d sign
     cmp     $r5,0
     jmpc    gte,noswap
     # swap mx & my
     xor     $r0,$r0,$r1
     xor     $r1,$r0,$r1
     xor     $r0,$r0,$r1
     # d positive means that ex>=ey, so ez = ex
     # d negative means that ey>ex, so ez = ey
     move    $r2,$r3
     # |d|
     neg     $r5
 noswap:
                                     # now $r2 = ez = max(ex,ey)
     cmp     $r5,26                  # max necessary alignment shift is 26
     jmpc    lt,under_26
     ldk     $r5,26
 under_26:
     ldk     $r7,-1
     ashl    $r7,$r7,$r5             # create inverse of mask for test of S bit value in discarded my
     not     $r7,$r7
     tst     $r1,$r7                 # determine value of sticky bit
     # shift my >> |d|
     ashr    $r1,$r1,$r5
     sticky  $r1

     # add ms
     add     $r0,$r0,$r1

     # $r4 = sign(mx), mx = |mx|
     ashr    $r4,$r0,31
     xor     $r0,$r0,$r4
     sub     $r0,$r0,$r4

     # realign mantissa using leading zero count
     flip    $r7,$r0,31
     ntz     $r7,$r8
     ashl    $r0,$r0,$r7
     btst    $r0,(6<<5)|0            # test low bits for sticky again
     lshr    $r0,$r0,6
     sticky  $r0

     # update exponent
     add     $r2,$r2,5
     sub     $r2,$r2,$r7

     # Round to nearest
     round   $r0,$r7,$r6

     # detect_exp_update
     lshr    $r6,$r0,24
     add     $r2,$r2,$r6

     # final tests
     # mz == 0? if so, we just bail with a +0
     cmp     $r0,0
     jmpc    nz,msum_not_zero
     ldk     $r0,0
     return
 msum_not_zero:
     # Combined check that (1 <= ez <= 254)
     sub     $r3,$r2,1
     cmp     $r3,254
     jmpc    b,no_special_ret
     # underflow?
     cmp     $r2,0
     jmpc    gt,no_under
     ldk     $r0,0
     jmp     pack_sz
 no_under:
     # overflow?
     cmp     $r2,255
     jmpc    lt,no_special_ret
     ldk     $r0,0x7F8
     ashl    $r0,$r0,20
     jmp     pack_sz
 no_special_ret:
     # Pack ez
     ldl     $r2,$r2,(8<<5)|23
     bins    $r0,$r0,$r2             # width = 8, pos = 23 pack ez
     # Pack sz
 pack_sz:
     ldl     $r4,$r4,(1<<5)|31
     bins    $r0,$r0,$r4             # width = 1, pos = 31 set sz to sy
     return
 #endif

 ##########################################################################
 ##########################################################################
 ## multiplication

 #ifdef  L_mulsf3
 .global __mulsf3
 __mulsf3:
     # x in $r0, y in $r1, result z in $r0       --||| 61 instructions +/- |||--

     # unpack e
     bextu   $r2,$r0,(8<<5)|23   # ex in r2
     bextu   $r3,$r1,(8<<5)|23   # ey in r3
     # calc result sign
     xor     $r4,$r0,$r1
     lpm     $r5,sign_mask
     and     $r4,$r4,$r5         # sz in r4

     # unpack m add implicit bit
     ldk     $r5,(1<<10)|(9<<5)|23   # setup implicit bit and mask for e
     #----------------------
     bins    $r0,$r0,$r5             # clear sx, ex and add implicit bit mx

     sub     $r6,$r2,1
     cmp     $r6,254
     jmpc    b,1f
     jmp     slow_mul
 1:  sub     $r6,$r3,1
     cmp     $r6,254
     jmpc    b,no_special_vals_mul

 slow_mul:
     # Check for early exit
     cmp     $r2,0
     jmpc    z,op_is_zero
     cmp     $r3,0
     jmpc    nz,no_early_exit_mul
 op_is_zero:
     cmp     $r2,255
     jmpc    z,no_early_exit_mul
     cmp     $r3,255
     jmpc    z,no_early_exit_mul
     move    $r0,$r4
     return
 no_early_exit_mul:
     # setup to test for special values
     sub     $r6,$r2,1
     and     $r6,$r6,0xFE
     sub     $r7,$r3,1
     and     $r7,$r7,0xFE
     # test for special values
     cmp     $r6,$r7
     jmpc    gte,ex_spec_is_gte_ey_mul
     move    $r6,$r7
 ex_spec_is_gte_ey_mul:
     cmp     $r6,0xFE
     jmpc    nz,no_special_vals_mul
     cmp     $r2,0xFF
     jmpc    nz,ex_not_FF_mul
     ashl    $r6,$r0,9
     cmp     $r6,0
     jmpc    nz,ret_nan
     cmp     $r3,0
     jmpc    z,ret_nan
     ashl    $r6,$r1,1
     lpm     $r7,high_FF
     cmp     $r6,$r7
     jmpc    a,ret_nan
     cmp     $r6,0
     jmpc    z,ret_nan
     # infinity
     lpm     $r0,inf
     or      $r0,$r0,$r4
     return
 ex_not_FF_mul:
     cmp     $r2,0
     jmpc    nz,no_nan_mul
     cmp     $r3,0xFF
     jmpc    nz,no_nan_mul
     jmp     ret_nan
 no_nan_mul:
     lpm     $r0,nan
     and     $r0,$r0,$r1
     or      $r0,$r0,$r4
     return

 ret_nan:
     lpm     $r0,nan
     return

 no_special_vals_mul:
     bins    $r1,$r1,$r5         # clear sy, ey and add implicit bit my
     # calc ez
     add     $r3,$r2,$r3
     sub     $r3,$r3,127         # ez in r3

     # (r1,r2) = R0 * R1
     mul     $r2,$r0,$r1
     muluh   $r1,$r0,$r1

     btst    $r1,(1<<5)|15       # XXX use jmpx
     jmpc    z,mul_z0

     # mz is 1X.XX...X
     # 48-bit product is in (r1,r2). The low 22 bits of r2
     # are discarded.
     lshr    $r0,$r2,22
     ashl    $r1,$r1,10
     or      $r0,$r0,$r1         # r0 = (r1,r2) >> 22
     ashlcc  2,10
     sticky  $r0
     add     $r3,$r3,1           # bump exponent

     # Round to nearest
     round   $r0, $r1, $r2
     lshr    $r6,$r0,24
     add     $r3,$r3,$r6

     sub     $r6,$r3,1
     cmp     $r6,254
     jmpc    b,no_special_ret_mul

 special_ret_mul:
     # When the final exponent <= 0, result is flushed to 0 except
     # for the border case 0x00FFFFFF which is promoted to next higher
     # FP no., that is, the smallest "normalized" number.
     cmp     $r3,0
     jmpc    gt,exp_normal
     # Pack ez
     ldl     $r3,$r3,(8<<5)|23
     bins    $r0,$r0,$r3 # width = 8, pos = 23 pack ez
     lpm     $r2,edge_case
     cmp     $r0,$r2
     jmpc    nz,no_edge_case
     lpm     $r0,smallest_norm
     jmp     pack_sz_mul
 no_edge_case:
     ldk     $r0,0
     jmp     pack_sz_mul
 exp_normal:
     # overflow?
     cmp     $r3,255
     jmpc    lt,no_special_ret_mul
     ldk     $r0,0x7F8
     ashl    $r0,$r0,20
     jmp     pack_sz_mul
 no_special_ret_mul:
     # Pack ez
     ldl     $r3,$r3,(8<<5)|23
     bins    $r0,$r0,$r3 # width = 8, pos = 23 pack ez
     # Pack sz
 pack_sz_mul:
     or    $r0,$r0,$r4
     return

 mul_z0:
     # mz is 0X.XX...X
     # 48-bit product is in (r1,r2). The low 21 bits of r2
     # are discarded.
     lshr    $r0,$r2,21
     ashl    $r1,$r1,11
     or      $r0,$r0,$r1         # r0 = (r1,r2) >> 22
     ashlcc  2,11
     sticky  $r0
     # Round to nearest
     round   $r0, $r1, $r2
     lshr    $r6,$r0,24
     add     $r3,$r3,$r6

     sub     $r6,$r3,1
     cmp     $r6,254
     jmpc    b,no_special_ret_mul
     jmp     special_ret_mul
 #endif

 ##########################################################################
 ##########################################################################
 ## division

 ## See http://perso.ens-lyon.fr/gilles.villard/BIBLIOGRAPHIE/PDF/arith19.pdf
 ## for implementation details


 #ifdef  L_divsf3
 dc_1: .long             0xffffe7d7
 dc_2: .long             0xffffffe8
 dc_3: .long             0xffbad86f
 dc_4: .long             0xfffbece7
 dc_5: .long             0xf3672b51
 dc_6: .long             0xfd9d3a3e
 dc_7: .long             0x9a3c4390
 dc_8: .long             0xd4d2ce9b
 dc_9: .long             0x1bba92b3
 dc_10: .long            0x525a1a8b
 dc_11: .long            0x0452b1bf
 dc_12: .long            0xFFFFFFC0
 spec_val_test:  .long   0x7F7FFFFF

 .global __divsf3
 __divsf3:
     push    $r13
     # x in $r0, y in $r1, result z in $r0       --||| 73 instructions +/- |||-
     bextu   $r10,$r0,(8<<5)|23   # ex in r2
     bextu   $r11,$r1,(8<<5)|23   # ey in r3
     lpm     $r6, m_mask
     and     $r2, $r0, $r6        # mx
     and     $r3, $r1, $r6        # my
     cmp     $r2,$r3
     bextu   $r2,$r30,(1<<5)|4   # c = Tx >= T;
     ashl    $r3,$r3,9           # T = X << 9;
     lpm     $r13, sign_mask
     ashl    $r4,$r0,8           # X8 = X << 8;
     or      $r4,$r4,$r13        # Mx = X8 | 0x80000000;
     lshr    $r5,$r4,$r2         # S = Mx >> c;
     # calc D
     sub     $r2, $r11, $r2
     add     $r12, $r10, 125
     sub     $r2, $r12, $r2      # int D = (Ex + 125) - (Ey - c);
     # calc result sign
     xor     $r12,$r0,$r1
     and     $r12,$r12,$r13      # Sr = ( X ˆ Y ) & 0x80000000;
     # check early exit
     cmp     $r10, 0
     jmpc    nz, no_early_ret_dev
     cmp     $r11, 0
     jmpc    z, no_early_ret_dev
     cmp     $r11, 255
     jmpc    z, no_early_ret_dev
     move    $r0, $r12
     pop     $r13
     return
 no_early_ret_dev:
  # setup to test for special values
     sub     $r8,$r10,1
     and     $r8,$r8,0xFE
     sub     $r9,$r11,1
     and     $r9,$r9,0xFE
     # test for special values
     cmp     $r8, $r9
     jmpc    gte, absXm1_gte_absYm1
     move    $r8, $r9
 absXm1_gte_absYm1:
     cmp     $r8, 0xFE
     jmpc    nz, no_spec_ret_div
     cmp     $r10, 0xFF
     jmpc    nz, ex_not_FF_div
     lpm     $r6, m_mask
     and     $r2, $r0, $r6        # mx
     cmp     $r2, 0
     jmpc    nz, ret_nan_div
     cmp     $r11, 0xFF
     jmpc    z, ret_nan_div
     jmp     ret_inf_div
 ex_not_FF_div:
     cmp     $r11, 0xFF
     jmpc    nz, ey_not_FF_div
     ashl    $r13, $r1, 9
     cmp     $r13, 0
     jmpc    nz, ret_nan_div
     move    $r0, $r12
     pop     $r13
     return
 ey_not_FF_div:
     or      $r10, $r10, $r11
     cmp     $r10, 0
     jmpc    z, ret_nan_div
 ret_inf_div:
     lpm     $r6, inf
     move    $r0, $r6
     or      $r0, $r0, $r12
     pop     $r13
     return
 ret_nan_div:
     lpm     $r0, nan
     pop     $r13
     return

 no_spec_ret_div:
 # check for overflow
     ldk     $r6, 0xFE
     cmp     $r2, $r6
     jmpc    lt, no_overflow_div
     lpm     $r6, inf
     or      $r0, $r12, $r6
     pop     $r13
     return
 no_overflow_div:
 # check for underflow
     cmp     $r2, 0
     jmpc    ns, no_underflow_div
     xnor    $r6, $r6, $r6       # -1
     cmp     $r2, $r6
     jmpc    nz, ret_sr_div
     ldk     $r7, 0xFF
     xor     $r6, $r6, $r7       # 0xFF ^ -1 = 0xFFFFFF00
     cmp     $r4, $r6
     jmpc    nz, ret_sr_div
     lpm     $r6, sign_mask
     cmp     $r4, $r6
     jmpc    nz, ret_sr_div
     lshr    $r0, $r6, 8
     or      $r0, $r0, $r12
     pop     $r13
     return
 ret_sr_div:
     move    $r0, $r12
     pop     $r13
     return
 no_underflow_div:
     lpm     $r6, dc_1
     muluh   $r7, $r3, $r6       # i0 = mul( T , 0xffffe7d7 );
     lpm     $r6, dc_2
     sub     $r7, $r6, $r7       # i1 = 0xffffffe8 - i0;
     muluh   $r7, $r5, $r7       # i2 = mul( S , i1 );
     add     $r7, $r7, 0x20      # i3 = 0x00000020 + i2;
     muluh   $r8, $r3, $r3       # i4 = mul( T , T );
     muluh   $r9, $r5, $r8       # i5 = mul( S , i4 );
     lpm     $r6, dc_3
     muluh   $r10, $r3, $r6      # i6 = mul( T , 0xffbad86f );
     lpm     $r6, dc_4
     sub     $r10, $r6, $r10     # i7 = 0xfffbece7 - i6;
     muluh   $r10, $r9, $r10     # i8 = mul( i5 , i7 );
     add     $r7, $r7, $r10      # i9 = i3 + i8;
     muluh   $r9, $r8, $r9       # i10 = mul( i4 , i5 );
     lpm     $r6, dc_5
     muluh   $r10, $r3, $r6      # i11 = mul( T , 0xf3672b51 );
     lpm     $r6, dc_6
     sub     $r10, $r6, $r10     # i12 = 0xfd9d3a3e - i11;
     lpm     $r6, dc_7
     muluh   $r11, $r3, $r6      # i13 = mul( T , 0x9a3c4390 );
     lpm     $r6, dc_8
     sub     $r11, $r6, $r11     # i14 = 0xd4d2ce9b - i13
     muluh   $r11, $r8, $r11     # i15 = mul( i4 , i14 );
     add     $r10, $r10, $r11    # i16 = i12 + i15;
     muluh   $r10, $r9, $r10     # i17 = mul( i10 , i16 )
     add     $r7, $r7, $r10      # i18 = i9 + i17;
     muluh   $r10, $r8, $r8      # i19 = mul( i4 , i4 );
     lpm     $r6, dc_9
     muluh   $r11, $r3, $r6      # i20 = mul( T , 0x1bba92b3 );
     lpm     $r6, dc_10
     sub     $r11, $r6, $r11     # i21 = 0x525a1a8b - i20;
     lpm     $r6, dc_11
     muluh   $r8, $r8, $r6       # i22 = mul( i4 , 0x0452b1bf );
     add     $r8, $r11, $r8      # i23 = i21 + i22;
     muluh   $r8, $r10, $r8      # i24 = mul( i19 , i23 );
     muluh   $r8, $r9, $r8       # i25 = mul( i10 , i24 );
     add     $r3, $r7, $r8       # V = i18 + i25;
 # W = V & 0xFFFFFFC0;
     lpm     $r6, dc_12
     and     $r3, $r3, $r6   # W
 # round and pack final values
     ashl    $r0, $r2, 23        # pack D
     or      $r0, $r0, $r12      # pack Sr
     ashl    $r12, $r1, 8
     or      $r12, $r12, $r13    # My
     muluh   $r10, $r3, $r12
     lshr    $r11, $r5, 1
     cmp     $r10, $r11
     jmpc    gte, div_ret_1
     add     $r3, $r3, 0x40
 div_ret_1:
     lshr    $r3, $r3, 7
     add     $r0, $r0, $r3
     pop     $r13
     return
 #endif

 ##########################################################################
 ##########################################################################
 ## Negate

 #ifdef L_negsf
 .global __negsf
 __negsf:
     lpm     $r1, sign_mask
     xor     $r0, $r0, $r1
     return
 #endif

 ##########################################################################
 ##########################################################################
 ## float to int & unsigned int

 #ifdef L_fixsfsi
 .global __fixsfsi
 __fixsfsi: # 20 instructions
     bextu   $r1,$r0,(8<<5)|23   # e in r1
     lshr    $r2,$r0,31          # s in r2
     lpm     $r3, m_mask
     and     $r0,$r0,$r3         # m in r0
     # test nan
     cmp     $r1,0xFF
     jmpc    nz, int_not_nan
     cmp     $r0,0
     jmpc    z, int_not_nan
     ldk     $r0,0
     return
 int_not_nan:
     # test edges
     cmp     $r1, 127
     jmpc    gte, int_not_zero   # lower limit
     ldk     $r0,0
     return
 int_not_zero:
     cmp     $r1, 158
     jmpc    lt, int_not_max    # upper limit
     lpm     $r0, nan
     cmp     $r2, 0
     jmpc    z, int_positive
     xnor    $r0, $r0, 0
     return
 int_not_max:
     lpm     $r3, smallest_norm
     or      $r0, $r0, $r3       # set implicit bit
     sub     $r1, $r1, 150
     cmp     $r1, 0
     jmpc    s, shift_right
     ashl    $r0, $r0, $r1
     jmp     set_int_sign
 shift_right:
     xnor    $r1, $r1, 0
     add     $r1, $r1, 1
     lshr    $r0, $r0, $r1
 set_int_sign:
     cmp     $r2, 0
     jmpc    z, int_positive
     xnor    $r0, $r0, 0
     add     $r0, $r0, 1
 int_positive:
     return
 #endif

 #ifdef L_fixunssfsi
 .global __fixunssfsi
 __fixunssfsi: # 19 instructions
     lshr    $r2, $r0, 31          # s in r2
     cmp     $r2, 0
     jmpc    z, uint_not_neg
     ldk     $r0, 0
     return
 uint_not_neg:
     bextu   $r1, $r0, (8<<5)|23   # e in r1
     sub     $r1, $r1, 127
     lpm     $r3, m_mask
     and     $r0, $r0, $r3         # m in r0
     # test nan
     cmp     $r1, 0xFF
     jmpc    nz, uint_not_nan
     cmp     $r0, 0
     jmpc    z, uint_not_nan
     ldk     $r0, 0
     return
 uint_not_nan:
     # test edges
     cmp     $r1, 0
     jmpc    ns, uint_not_zero   # lower limit
     ldk     $r0, 0
     return
 uint_not_zero:
     lpm     $r3, smallest_norm
     or      $r0, $r0, $r3       # set implicit bit
     cmp     $r1, 23
     jmpc    lt, shift_uint_right
     sub     $r1, $r1, 23
     ashl    $r0, $r0, $r1
     return
 shift_uint_right:
     ldk     $r3, 23
     sub     $r1, $r3, $r1
     lshr    $r0, $r0, $r1
     return
 #endif

 ##########################################################################
 ##########################################################################
 ## int & unsigned int to float


 .macro  i2f x, s1, s2, s3, lbl
     move    \s1, \x
     nlz     \s1, \s2
     cmp     \s1, 8
     jmpc    s, float_round\lbl
     sub     \s2, \s1, 8
     ashl    \x, \x, \s2
     jmp     float_no_round\lbl
 float_round\lbl:
     cmp     \s1, 6
     jmpc    s, float_shift_right\lbl
     sub     \s2, \s1, 6
     ashl    \x, \x, \s2
     jmp     float_round_and_pack\lbl
 float_shift_right\lbl:
     ldk     \s2, 6
     sub     \s2, \s2, \s1
     xnor    \s3, \s3 ,\s3           # 0xFFFFFFFF
     ashl    \s3, \s3 ,\s2           # create inverse of mask for test of S bit value in discarded my
     xnor    \s3, \s3 ,0             # NOT
     tst     \x, \s3                # determine value of sticky bit
     lshr    \x, \x, \s2
     jmpc    z,float_round_and_pack\lbl
     or      \x, \x, 1               # set the sticky bit to 1
 float_round_and_pack\lbl:
     bextu   \s2, \x, (1<<5)|2      # extract low bit of m
     or      \x, \x, \s2           # or p into r
     add     \x, \x, 1
     lshr    \x, \x, 2
     btst    \x, (1<<5)|24          # test for carry from round
     jmpc    z, float_no_round\lbl
     sub     \s1, \s1, 1             # inc e for carry (actually dec nlz)
     lshr    \x, \x, 1
 float_no_round\lbl:
     ldk     \s2, 158
     sub     \s1, \s2, \s1
     # Pack e
     ldl     \s1, \s1, (8<<5)|23
     bins    \x, \x, \s1
 .endm


 #ifdef L_floatsisf
 .global __floatsisf
 __floatsisf:                       # 32 instructions
     cmp     $r0, 0
     jmpc    nz, float_not_zero
     return
 float_not_zero:
     ashr    $r1, $r0, 31            # s in r1
     xor     $r0, $r0, $r1           # cond neg
     sub     $r0, $r0, $r1
     i2f     $r0, $r2, $r3, $r4, 1
     ldl     $r1, $r1, (1<<5)|31
     bins    $r0, $r0, $r1
     return
 #endif

 #ifdef L_floatunsisf
 .global __floatunsisf
 __floatunsisf:                        # 26 instructions
     cmp     $r0, 0
     jmpc    nz, float_not_zero2
     return
 float_not_zero2:
     i2f     $r0, $r1, $r2, $r3, 2
     return
 #endif

 #if 0
 ##########################################################################
 ##########################################################################
 ## float compare


 __cmpsf2_:
     # calc abs vals
     lpm     $r3, nan                # also abs mask
     and     $r2, $r0, $r3
     and     $r3, $r1, $r3
     # test if either abs is nan
     lpm     $r4, inf
     cmp     $r2, $r4
     jmpc    gt, cmp_is_gt
     cmp     $r3, $r4
     jmpc    gt, cmp_is_gt
     # test if both are 0
     or      $r2, $r2, $r3
     cmp     $r2, 0
     jmpc    z, cmp_is_eq
     # test if eq
     cmp     $r0, $r1
     jmpc    z, cmp_is_eq
     # -- if either is pos
     and     $r2, $r0, $r1
     cmp     $r2, 0
     jmpc    s, cmp_both_neg
     cmp     $r0, $r1
     jmpc    gt, cmp_is_gt
     # r0 < r1
     lpm     $r0, high_uint
     return
 cmp_both_neg:
     cmp     $r0, $r1
     jmpc    lt, cmp_is_gt
     # r0 < r1
     lpm     $r0, high_uint
     return
 cmp_is_gt:
     ldk     $r0, 1
     return
 cmp_is_eq:
     ldk     $r0, 0
     return
 #endif

 #ifdef  L_udivsi3
 .global __udivsi3
 __udivsi3:
 	# $r0 is dividend
 	# $r1 is divisor
 	ldk	$r2,0
 	push	$r28
 	ldk	$r28,-32
 0:
 	lshr	$r3,$r0,31	# Shift $r2:$r0 left one
 	ashl	$r0,$r0,1
 	ashl	$r2,$r2,1
 	or	$r2,$r2,$r3
 	cmp	$r2,$r1
 	jmpc	b,1f
 2:
 	sub	$r2,$r2,$r1
 	add	$r0,$r0,1
 1:
 	add	$r28,$r28,1
 	jmpx	31,$r28,1,0b
 	pop	$r28
 	# $r0: quotient
 	# $r2: remainder
 	return
 #endif

 #ifdef	L_umodsi3
 .global	__umodsi3
 __umodsi3:
 	call	__udivsi3
 	move	$r0,$r2
 	return
 #endif

 #ifdef	L_divsi3
 .global	__divsi3
 __divsi3:
 	xor	$r5,$r0,$r1	# $r5 is sign of result
 	ashr	$r2,$r0,31	# $r0 = abs($r0)
 	xor	$r0,$r0,$r2
 	sub	$r0,$r0,$r2
 	ashr	$r2,$r1,31	# $r1 = abs($r1)
 	xor	$r1,$r1,$r2
 	sub	$r1,$r1,$r2
 	call	__udivsi3
 	ashr	$r5,$r5,31
 	xor	$r0,$r0,$r5
 	sub	$r0,$r0,$r5
 	return

 #endif

 #ifdef	L_modsi3
 .global	__modsi3
 __modsi3:
 	move	$r5,$r0		# $r5 is sign of result
 	ashr	$r2,$r0,31	# $r0 = abs($r0)
 	xor	$r0,$r0,$r2
 	sub	$r0,$r0,$r2
 	ashr	$r2,$r1,31	# $r1 = abs($r1)
 	xor	$r1,$r1,$r2
 	sub	$r1,$r1,$r2
 	call	__umodsi3
 	ashr	$r5,$r5,31
 	xor	$r0,$r0,$r5
 	sub	$r0,$r0,$r5
 	return
 #endif
	# ieee754 sf routines for FT32

	/* Copyright (C) 1995-2021 Free Software Foundation, Inc.

	This file is free software; you can redistribute it and/or modify it
	under the terms of the GNU General Public License as published by the
	Free Software Foundation; either version 3, or (at your option) any
	later version.

	This file is distributed in the hope that it will be useful, but
	WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	General Public License for more details.

	Under Section 7 of GPL version 3, you are granted additional
	permissions described in the GCC Runtime Library Exception, version
	3.1, as published by the Free Software Foundation.

	You should have received a copy of the GNU General Public License and
	a copy of the GCC Runtime Library Exception along with this program;
	see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
	<http://www.gnu.org/licenses/>. */

	# See http://www.ens-lyon.fr/LIP/Pub/Rapports/PhD/PhD2006/PhD2006-02.pdf
	# for implementation details of all except division which is detailed below
	#

	#ifdef L_fp_tools
	// .global __cmpsf2_
	nan: .long 0x7FFFFFFF # also abs mask
	inf: .long 0x7F800000
	sign_mask: .long 0x80000000
	m_mask: .long 0x007FFFFF
	exp_bias: .long 127
	edge_case: .long 0x00FFFFFF
	smallest_norm: .long 0x00800000 # implicit bit
	high_FF: .long 0xFF000000
	high_uint: .long 0xFFFFFFFF

	ntz_table:
	.byte 32,0,1,12,2,6,0,13,3,0,7,0,0,0,0,14
	.byte 10,4,0,0,8,0,0,25,0,0,0,0,0,21,27,15
	.byte 31,11,5,0,0,0,0,0,9,0,0,24,0,0,20,26
	.byte 30,0,0,0,0,23,0,19,29,0,22,18,28,17,16,0

	#endif

	# Supply a few 'missing' instructions

	# not
	.macro not rd,r1
	xor \rd,\r1,-1
	.endm

	# negate
	.macro neg x
	not \x, \x
	add \x, \x, 1
	.endm

	# set $cc from the result of "ashl reg,dist"
	.macro ashlcc reg,dist
	.long 0x5de04008 \| (\reg << 15) \| (\dist << 4)
	.endm


	# converts an unsigned number x to a signed rep based on the bits in sign
	# sign should be 0x00000000 or 0xffffffff.
	.macro to_signed x, sign
	add \x,\x,\sign # conditionally decrement x
	xor \x,\x,\sign # conditionally complement x
	.endm


	.macro ld32 r,v
	ldk \r,(\v>>10)
	ldl \r,\r,(\v & 1023)
	.endm

	# calculate trailing zero count in x, also uses scr.
	# Using Seal's algorithm
	.macro ntz x, scr
	not \scr, \x
	add \scr, \scr, 1
	and \x, \x, \scr
	ashl \scr, \x, 4
	add \x, \scr, \x
	ashl \scr, \x, 6
	add \x, \scr, \x
	ashl \scr, \x, 16
	sub \x, \scr, \x
	lshr \x, \x, 26
	ldk \scr, ntz_table
	add \x, \x, \scr
	lpmi.b \x, \x, 0
	.endm

	# calculate leading zero count
	.macro nlz x, scr
	flip \x, \x, 31
	ntz \x, \scr
	.endm


	# Round 26 bit mantissa to nearest
	# \| 23 bits frac \| G \| R \| S \|
	.macro round m, s1, s2
	ldk \s1,0xc8
	and \s2,\m,7
	lshr \s1,\s1,\s2
	and \s1,\s1,1
	lshr \m,\m,2
	add \m,\m,\s1
	.endm

	# If NZ, set the LSB of reg
	.macro sticky reg
	jmpc z,1f
	or \reg,\reg,1 # set the sticky bit to 1
	1:
	.endm

	##########################################################################
	##########################################################################
	## addition & subtraction

	#if defined(L_subsf3) \|\| defined(L_addsub_sf)
	.global __subsf3
	__subsf3:
	# this is subtraction, so we just change the sign of r1
	lpm $r2,sign_mask
	xor $r1,$r1,$r2
	jmp __addsf3
	#endif

	#if defined(L_addsf3) \|\| defined(L_addsub_sf)
	.global __addsf3
	__addsf3:
	# x in $r0, y in $r1, result z in $r0 --\|\|\| 100 instructions +/- \|\|\|--
	# unpack e, calc d
	bextu $r2,$r0,(8<<5)\|23 # ex in r2
	bextu $r3,$r1,(8<<5)\|23 # ey in r3
	sub $r5,$r2,$r3 # d = ex - ey

	# Special values are 0x00 and 0xff in ex and ey.
	# If (ex&ey) != 0 or (xy\|ey)=255 then there may be
	# a special value.
	tst $r2,$r3
	jmpc nz,1f
	jmp slow
	1: or $r4,$r2,$r3
	cmp $r4,255
	jmpc nz,no_special_vals
	slow:
	# Check for early exit
	cmp $r2,0
	jmpc z,test_if_not_255
	cmp $r3,0
	jmpc nz,no_early_exit
	test_if_not_255:
	cmp $r2,255
	jmpc z,no_early_exit
	cmp $r3,255
	jmpc z,no_early_exit
	or $r6,$r2,$r3
	cmp $r6,0
	jmpc nz,was_not_zero
	and $r0,$r0,$r1
	lpm $r1,sign_mask
	and $r0,$r0,$r1
	return
	was_not_zero:
	cmp $r2,0
	jmpc nz,ret_x
	move $r0,$r1
	return
	ret_x:
	return
	no_early_exit:
	# setup to test for special values
	sub $r6,$r2,1
	and $r6,$r6,0xFE
	sub $r7,$r3,1
	and $r7,$r7,0xFE
	# test for special values
	cmp $r6,$r7
	jmpc gte,ex_spec_is_gte
	move $r6,$r7
	ex_spec_is_gte:
	cmp $r6,0xFE
	jmpc nz,no_special_vals
	cmp $r5,0
	jmpc ns,d_gte_0
	cmp $r3,0xFF
	jmpc z,ret_y
	cmp $r2,0
	jmpc z,ret_y
	ret_y:
	move $r0,$r1
	return
	d_gte_0:
	cmp $r5,0
	jmpc z,d_is_0
	cmp $r2,0xFF
	jmpc z,ret_x
	cmp $r3,0
	jmpc z,ret_x
	d_is_0:
	cmp $r2,0xFF
	jmpc nz,no_special_vals
	ashl $r6,$r0,9 # clear all except x frac
	ashl $r7,$r1,9 # clear all except y frac
	or $r6,$r6,$r7
	cmp $r6,0
	jmpc nz,ret_nan
	lshr $r4,$r0,31 # sx in r4
	lshr $r5,$r1,31 # sy in r4
	cmp $r4,$r5
	jmpc nz,ret_nan
	return
	ret_nan:
	lpm $r0,nan
	return
	no_special_vals:
	ldk $r8,(1<<10)\|(9<<5)\|26 # setup implicit bit and mask for e
	#----------------------
	ashr $r4,$r0,31 # sx in r4
	ashl $r0,$r0,3 # shift mx 3 for GRS bits
	bins $r0,$r0,$r8 # clear sx, ex and add implicit bit mx
	# change mx to signed mantissa
	to_signed $r0,$r4
	#----------------------
	ashr $r4,$r1,31 # sy in r4
	ashl $r1,$r1,3 # shift my 3 for GRS bits
	bins $r1,$r1,$r8 # clear sy, ey and add implicit bit my
	# change my to signed mantissa
	to_signed $r1,$r4
	#----------------------
	# test if we swap ms based on d sign
	cmp $r5,0
	jmpc gte,noswap
	# swap mx & my
	xor $r0,$r0,$r1
	xor $r1,$r0,$r1
	xor $r0,$r0,$r1
	# d positive means that ex>=ey, so ez = ex
	# d negative means that ey>ex, so ez = ey
	move $r2,$r3
	# \|d\|
	neg $r5
	noswap:
	# now $r2 = ez = max(ex,ey)
	cmp $r5,26 # max necessary alignment shift is 26
	jmpc lt,under_26
	ldk $r5,26
	under_26:
	ldk $r7,-1
	ashl $r7,$r7,$r5 # create inverse of mask for test of S bit value in discarded my
	not $r7,$r7
	tst $r1,$r7 # determine value of sticky bit
	# shift my >> \|d\|
	ashr $r1,$r1,$r5
	sticky $r1

	# add ms
	add $r0,$r0,$r1

	# $r4 = sign(mx), mx = \|mx\|
	ashr $r4,$r0,31
	xor $r0,$r0,$r4
	sub $r0,$r0,$r4

	# realign mantissa using leading zero count
	flip $r7,$r0,31
	ntz $r7,$r8
	ashl $r0,$r0,$r7
	btst $r0,(6<<5)\|0 # test low bits for sticky again
	lshr $r0,$r0,6
	sticky $r0

	# update exponent
	add $r2,$r2,5
	sub $r2,$r2,$r7

	# Round to nearest
	round $r0,$r7,$r6

	# detect_exp_update
	lshr $r6,$r0,24
	add $r2,$r2,$r6

	# final tests
	# mz == 0? if so, we just bail with a +0
	cmp $r0,0
	jmpc nz,msum_not_zero
	ldk $r0,0
	return
	msum_not_zero:
	# Combined check that (1 <= ez <= 254)
	sub $r3,$r2,1
	cmp $r3,254
	jmpc b,no_special_ret
	# underflow?
	cmp $r2,0
	jmpc gt,no_under
	ldk $r0,0
	jmp pack_sz
	no_under:
	# overflow?
	cmp $r2,255
	jmpc lt,no_special_ret
	ldk $r0,0x7F8
	ashl $r0,$r0,20
	jmp pack_sz
	no_special_ret:
	# Pack ez
	ldl $r2,$r2,(8<<5)\|23
	bins $r0,$r0,$r2 # width = 8, pos = 23 pack ez
	# Pack sz
	pack_sz:
	ldl $r4,$r4,(1<<5)\|31
	bins $r0,$r0,$r4 # width = 1, pos = 31 set sz to sy
	return
	#endif

	##########################################################################
	##########################################################################
	## multiplication

	#ifdef L_mulsf3
	.global __mulsf3
	__mulsf3:
	# x in $r0, y in $r1, result z in $r0 --\|\|\| 61 instructions +/- \|\|\|--

	# unpack e
	bextu $r2,$r0,(8<<5)\|23 # ex in r2
	bextu $r3,$r1,(8<<5)\|23 # ey in r3
	# calc result sign
	xor $r4,$r0,$r1
	lpm $r5,sign_mask
	and $r4,$r4,$r5 # sz in r4

	# unpack m add implicit bit
	ldk $r5,(1<<10)\|(9<<5)\|23 # setup implicit bit and mask for e
	#----------------------
	bins $r0,$r0,$r5 # clear sx, ex and add implicit bit mx

	sub $r6,$r2,1
	cmp $r6,254
	jmpc b,1f
	jmp slow_mul
	1: sub $r6,$r3,1
	cmp $r6,254
	jmpc b,no_special_vals_mul

	slow_mul:
	# Check for early exit
	cmp $r2,0
	jmpc z,op_is_zero
	cmp $r3,0
	jmpc nz,no_early_exit_mul
	op_is_zero:
	cmp $r2,255
	jmpc z,no_early_exit_mul
	cmp $r3,255
	jmpc z,no_early_exit_mul
	move $r0,$r4
	return
	no_early_exit_mul:
	# setup to test for special values
	sub $r6,$r2,1
	and $r6,$r6,0xFE
	sub $r7,$r3,1
	and $r7,$r7,0xFE
	# test for special values
	cmp $r6,$r7
	jmpc gte,ex_spec_is_gte_ey_mul
	move $r6,$r7
	ex_spec_is_gte_ey_mul:
	cmp $r6,0xFE
	jmpc nz,no_special_vals_mul
	cmp $r2,0xFF
	jmpc nz,ex_not_FF_mul
	ashl $r6,$r0,9
	cmp $r6,0
	jmpc nz,ret_nan
	cmp $r3,0
	jmpc z,ret_nan
	ashl $r6,$r1,1
	lpm $r7,high_FF
	cmp $r6,$r7
	jmpc a,ret_nan
	cmp $r6,0
	jmpc z,ret_nan
	# infinity
	lpm $r0,inf
	or $r0,$r0,$r4
	return
	ex_not_FF_mul:
	cmp $r2,0
	jmpc nz,no_nan_mul
	cmp $r3,0xFF
	jmpc nz,no_nan_mul
	jmp ret_nan
	no_nan_mul:
	lpm $r0,nan
	and $r0,$r0,$r1
	or $r0,$r0,$r4
	return

	ret_nan:
	lpm $r0,nan
	return

	no_special_vals_mul:
	bins $r1,$r1,$r5 # clear sy, ey and add implicit bit my
	# calc ez
	add $r3,$r2,$r3
	sub $r3,$r3,127 # ez in r3

	# (r1,r2) = R0 * R1
	mul $r2,$r0,$r1
	muluh $r1,$r0,$r1

	btst $r1,(1<<5)\|15 # XXX use jmpx
	jmpc z,mul_z0

	# mz is 1X.XX...X
	# 48-bit product is in (r1,r2). The low 22 bits of r2
	# are discarded.
	lshr $r0,$r2,22
	ashl $r1,$r1,10
	or $r0,$r0,$r1 # r0 = (r1,r2) >> 22
	ashlcc 2,10
	sticky $r0
	add $r3,$r3,1 # bump exponent

	# Round to nearest
	round $r0, $r1, $r2
	lshr $r6,$r0,24
	add $r3,$r3,$r6

	sub $r6,$r3,1
	cmp $r6,254
	jmpc b,no_special_ret_mul

	special_ret_mul:
	# When the final exponent <= 0, result is flushed to 0 except
	# for the border case 0x00FFFFFF which is promoted to next higher
	# FP no., that is, the smallest "normalized" number.
	cmp $r3,0
	jmpc gt,exp_normal
	# Pack ez
	ldl $r3,$r3,(8<<5)\|23
	bins $r0,$r0,$r3 # width = 8, pos = 23 pack ez
	lpm $r2,edge_case
	cmp $r0,$r2
	jmpc nz,no_edge_case
	lpm $r0,smallest_norm
	jmp pack_sz_mul
	no_edge_case:
	ldk $r0,0
	jmp pack_sz_mul
	exp_normal:
	# overflow?
	cmp $r3,255
	jmpc lt,no_special_ret_mul
	ldk $r0,0x7F8
	ashl $r0,$r0,20
	jmp pack_sz_mul
	no_special_ret_mul:
	# Pack ez
	ldl $r3,$r3,(8<<5)\|23
	bins $r0,$r0,$r3 # width = 8, pos = 23 pack ez
	# Pack sz
	pack_sz_mul:
	or $r0,$r0,$r4
	return

	mul_z0:
	# mz is 0X.XX...X
	# 48-bit product is in (r1,r2). The low 21 bits of r2
	# are discarded.
	lshr $r0,$r2,21
	ashl $r1,$r1,11
	or $r0,$r0,$r1 # r0 = (r1,r2) >> 22
	ashlcc 2,11
	sticky $r0
	# Round to nearest
	round $r0, $r1, $r2
	lshr $r6,$r0,24
	add $r3,$r3,$r6

	sub $r6,$r3,1
	cmp $r6,254
	jmpc b,no_special_ret_mul
	jmp special_ret_mul
	#endif

	##########################################################################
	##########################################################################
	## division

	## See http://perso.ens-lyon.fr/gilles.villard/BIBLIOGRAPHIE/PDF/arith19.pdf
	## for implementation details




	#ifdef L_divsf3
	dc_1: .long 0xffffe7d7
	dc_2: .long 0xffffffe8
	dc_3: .long 0xffbad86f
	dc_4: .long 0xfffbece7
	dc_5: .long 0xf3672b51
	dc_6: .long 0xfd9d3a3e
	dc_7: .long 0x9a3c4390
	dc_8: .long 0xd4d2ce9b
	dc_9: .long 0x1bba92b3
	dc_10: .long 0x525a1a8b
	dc_11: .long 0x0452b1bf
	dc_12: .long 0xFFFFFFC0
	spec_val_test: .long 0x7F7FFFFF

	.global __divsf3
	__divsf3:
	push $r13
	# x in $r0, y in $r1, result z in $r0 --\|\|\| 73 instructions +/- \|\|\|-
	bextu $r10,$r0,(8<<5)\|23 # ex in r2
	bextu $r11,$r1,(8<<5)\|23 # ey in r3
	lpm $r6, m_mask
	and $r2, $r0, $r6 # mx
	and $r3, $r1, $r6 # my
	cmp $r2,$r3
	bextu $r2,$r30,(1<<5)\|4 # c = Tx >= T;
	ashl $r3,$r3,9 # T = X << 9;
	lpm $r13, sign_mask
	ashl $r4,$r0,8 # X8 = X << 8;
	or $r4,$r4,$r13 # Mx = X8 \| 0x80000000;
	lshr $r5,$r4,$r2 # S = Mx >> c;
	# calc D
	sub $r2, $r11, $r2
	add $r12, $r10, 125
	sub $r2, $r12, $r2 # int D = (Ex + 125) - (Ey - c);
	# calc result sign
	xor $r12,$r0,$r1
	and $r12,$r12,$r13 # Sr = ( X ˆ Y ) & 0x80000000;
	# check early exit
	cmp $r10, 0
	jmpc nz, no_early_ret_dev
	cmp $r11, 0
	jmpc z, no_early_ret_dev
	cmp $r11, 255
	jmpc z, no_early_ret_dev
	move $r0, $r12
	pop $r13
	return
	no_early_ret_dev:
	# setup to test for special values
	sub $r8,$r10,1
	and $r8,$r8,0xFE
	sub $r9,$r11,1
	and $r9,$r9,0xFE
	# test for special values
	cmp $r8, $r9
	jmpc gte, absXm1_gte_absYm1
	move $r8, $r9
	absXm1_gte_absYm1:
	cmp $r8, 0xFE
	jmpc nz, no_spec_ret_div
	cmp $r10, 0xFF
	jmpc nz, ex_not_FF_div
	lpm $r6, m_mask
	and $r2, $r0, $r6 # mx
	cmp $r2, 0
	jmpc nz, ret_nan_div
	cmp $r11, 0xFF
	jmpc z, ret_nan_div
	jmp ret_inf_div
	ex_not_FF_div:
	cmp $r11, 0xFF
	jmpc nz, ey_not_FF_div
	ashl $r13, $r1, 9
	cmp $r13, 0
	jmpc nz, ret_nan_div
	move $r0, $r12
	pop $r13
	return
	ey_not_FF_div:
	or $r10, $r10, $r11
	cmp $r10, 0
	jmpc z, ret_nan_div
	ret_inf_div:
	lpm $r6, inf
	move $r0, $r6
	or $r0, $r0, $r12
	pop $r13
	return
	ret_nan_div:
	lpm $r0, nan
	pop $r13
	return

	no_spec_ret_div:
	# check for overflow
	ldk $r6, 0xFE
	cmp $r2, $r6
	jmpc lt, no_overflow_div
	lpm $r6, inf
	or $r0, $r12, $r6
	pop $r13
	return
	no_overflow_div:
	# check for underflow
	cmp $r2, 0
	jmpc ns, no_underflow_div
	xnor $r6, $r6, $r6 # -1
	cmp $r2, $r6
	jmpc nz, ret_sr_div
	ldk $r7, 0xFF
	xor $r6, $r6, $r7 # 0xFF ^ -1 = 0xFFFFFF00
	cmp $r4, $r6
	jmpc nz, ret_sr_div
	lpm $r6, sign_mask
	cmp $r4, $r6
	jmpc nz, ret_sr_div
	lshr $r0, $r6, 8
	or $r0, $r0, $r12
	pop $r13
	return
	ret_sr_div:
	move $r0, $r12
	pop $r13
	return
	no_underflow_div:
	lpm $r6, dc_1
	muluh $r7, $r3, $r6 # i0 = mul( T , 0xffffe7d7 );
	lpm $r6, dc_2
	sub $r7, $r6, $r7 # i1 = 0xffffffe8 - i0;
	muluh $r7, $r5, $r7 # i2 = mul( S , i1 );
	add $r7, $r7, 0x20 # i3 = 0x00000020 + i2;
	muluh $r8, $r3, $r3 # i4 = mul( T , T );
	muluh $r9, $r5, $r8 # i5 = mul( S , i4 );
	lpm $r6, dc_3
	muluh $r10, $r3, $r6 # i6 = mul( T , 0xffbad86f );
	lpm $r6, dc_4
	sub $r10, $r6, $r10 # i7 = 0xfffbece7 - i6;
	muluh $r10, $r9, $r10 # i8 = mul( i5 , i7 );
	add $r7, $r7, $r10 # i9 = i3 + i8;
	muluh $r9, $r8, $r9 # i10 = mul( i4 , i5 );
	lpm $r6, dc_5
	muluh $r10, $r3, $r6 # i11 = mul( T , 0xf3672b51 );
	lpm $r6, dc_6
	sub $r10, $r6, $r10 # i12 = 0xfd9d3a3e - i11;
	lpm $r6, dc_7
	muluh $r11, $r3, $r6 # i13 = mul( T , 0x9a3c4390 );
	lpm $r6, dc_8
	sub $r11, $r6, $r11 # i14 = 0xd4d2ce9b - i13
	muluh $r11, $r8, $r11 # i15 = mul( i4 , i14 );
	add $r10, $r10, $r11 # i16 = i12 + i15;
	muluh $r10, $r9, $r10 # i17 = mul( i10 , i16 )
	add $r7, $r7, $r10 # i18 = i9 + i17;
	muluh $r10, $r8, $r8 # i19 = mul( i4 , i4 );
	lpm $r6, dc_9
	muluh $r11, $r3, $r6 # i20 = mul( T , 0x1bba92b3 );
	lpm $r6, dc_10
	sub $r11, $r6, $r11 # i21 = 0x525a1a8b - i20;
	lpm $r6, dc_11
	muluh $r8, $r8, $r6 # i22 = mul( i4 , 0x0452b1bf );
	add $r8, $r11, $r8 # i23 = i21 + i22;
	muluh $r8, $r10, $r8 # i24 = mul( i19 , i23 );
	muluh $r8, $r9, $r8 # i25 = mul( i10 , i24 );
	add $r3, $r7, $r8 # V = i18 + i25;
	# W = V & 0xFFFFFFC0;
	lpm $r6, dc_12
	and $r3, $r3, $r6 # W
	# round and pack final values
	ashl $r0, $r2, 23 # pack D
	or $r0, $r0, $r12 # pack Sr
	ashl $r12, $r1, 8
	or $r12, $r12, $r13 # My
	muluh $r10, $r3, $r12
	lshr $r11, $r5, 1
	cmp $r10, $r11
	jmpc gte, div_ret_1
	add $r3, $r3, 0x40
	div_ret_1:
	lshr $r3, $r3, 7
	add $r0, $r0, $r3
	pop $r13
	return
	#endif

	##########################################################################
	##########################################################################
	## Negate

	#ifdef L_negsf
	.global __negsf
	__negsf:
	lpm $r1, sign_mask
	xor $r0, $r0, $r1
	return
	#endif

	##########################################################################
	##########################################################################
	## float to int & unsigned int

	#ifdef L_fixsfsi
	.global __fixsfsi
	__fixsfsi: # 20 instructions
	bextu $r1,$r0,(8<<5)\|23 # e in r1
	lshr $r2,$r0,31 # s in r2
	lpm $r3, m_mask
	and $r0,$r0,$r3 # m in r0
	# test nan
	cmp $r1,0xFF
	jmpc nz, int_not_nan
	cmp $r0,0
	jmpc z, int_not_nan
	ldk $r0,0
	return
	int_not_nan:
	# test edges
	cmp $r1, 127
	jmpc gte, int_not_zero # lower limit
	ldk $r0,0
	return
	int_not_zero:
	cmp $r1, 158
	jmpc lt, int_not_max # upper limit
	lpm $r0, nan
	cmp $r2, 0
	jmpc z, int_positive
	xnor $r0, $r0, 0
	return
	int_not_max:
	lpm $r3, smallest_norm
	or $r0, $r0, $r3 # set implicit bit
	sub $r1, $r1, 150
	cmp $r1, 0
	jmpc s, shift_right
	ashl $r0, $r0, $r1
	jmp set_int_sign
	shift_right:
	xnor $r1, $r1, 0
	add $r1, $r1, 1
	lshr $r0, $r0, $r1
	set_int_sign:
	cmp $r2, 0
	jmpc z, int_positive
	xnor $r0, $r0, 0
	add $r0, $r0, 1
	int_positive:
	return
	#endif

	#ifdef L_fixunssfsi
	.global __fixunssfsi
	__fixunssfsi: # 19 instructions
	lshr $r2, $r0, 31 # s in r2
	cmp $r2, 0
	jmpc z, uint_not_neg
	ldk $r0, 0
	return
	uint_not_neg:
	bextu $r1, $r0, (8<<5)\|23 # e in r1
	sub $r1, $r1, 127
	lpm $r3, m_mask
	and $r0, $r0, $r3 # m in r0
	# test nan
	cmp $r1, 0xFF
	jmpc nz, uint_not_nan
	cmp $r0, 0
	jmpc z, uint_not_nan
	ldk $r0, 0
	return
	uint_not_nan:
	# test edges
	cmp $r1, 0
	jmpc ns, uint_not_zero # lower limit
	ldk $r0, 0
	return
	uint_not_zero:
	lpm $r3, smallest_norm
	or $r0, $r0, $r3 # set implicit bit
	cmp $r1, 23
	jmpc lt, shift_uint_right
	sub $r1, $r1, 23
	ashl $r0, $r0, $r1
	return
	shift_uint_right:
	ldk $r3, 23
	sub $r1, $r3, $r1
	lshr $r0, $r0, $r1
	return
	#endif

	##########################################################################
	##########################################################################
	## int & unsigned int to float


	.macro i2f x, s1, s2, s3, lbl
	move \s1, \x
	nlz \s1, \s2
	cmp \s1, 8
	jmpc s, float_round\lbl
	sub \s2, \s1, 8
	ashl \x, \x, \s2
	jmp float_no_round\lbl
	float_round\lbl:
	cmp \s1, 6
	jmpc s, float_shift_right\lbl
	sub \s2, \s1, 6
	ashl \x, \x, \s2
	jmp float_round_and_pack\lbl
	float_shift_right\lbl:
	ldk \s2, 6
	sub \s2, \s2, \s1
	xnor \s3, \s3 ,\s3 # 0xFFFFFFFF
	ashl \s3, \s3 ,\s2 # create inverse of mask for test of S bit value in discarded my
	xnor \s3, \s3 ,0 # NOT
	tst \x, \s3 # determine value of sticky bit
	lshr \x, \x, \s2
	jmpc z,float_round_and_pack\lbl
	or \x, \x, 1 # set the sticky bit to 1
	float_round_and_pack\lbl:
	bextu \s2, \x, (1<<5)\|2 # extract low bit of m
	or \x, \x, \s2 # or p into r
	add \x, \x, 1
	lshr \x, \x, 2
	btst \x, (1<<5)\|24 # test for carry from round
	jmpc z, float_no_round\lbl
	sub \s1, \s1, 1 # inc e for carry (actually dec nlz)
	lshr \x, \x, 1
	float_no_round\lbl:
	ldk \s2, 158
	sub \s1, \s2, \s1
	# Pack e
	ldl \s1, \s1, (8<<5)\|23
	bins \x, \x, \s1
	.endm


	#ifdef L_floatsisf
	.global __floatsisf
	__floatsisf: # 32 instructions
	cmp $r0, 0
	jmpc nz, float_not_zero
	return
	float_not_zero:
	ashr $r1, $r0, 31 # s in r1
	xor $r0, $r0, $r1 # cond neg
	sub $r0, $r0, $r1
	i2f $r0, $r2, $r3, $r4, 1
	ldl $r1, $r1, (1<<5)\|31
	bins $r0, $r0, $r1
	return
	#endif

	#ifdef L_floatunsisf
	.global __floatunsisf
	__floatunsisf: # 26 instructions
	cmp $r0, 0
	jmpc nz, float_not_zero2
	return
	float_not_zero2:
	i2f $r0, $r1, $r2, $r3, 2
	return
	#endif

	#if 0
	##########################################################################
	##########################################################################
	## float compare


	__cmpsf2_:
	# calc abs vals
	lpm $r3, nan # also abs mask
	and $r2, $r0, $r3
	and $r3, $r1, $r3
	# test if either abs is nan
	lpm $r4, inf
	cmp $r2, $r4
	jmpc gt, cmp_is_gt
	cmp $r3, $r4
	jmpc gt, cmp_is_gt
	# test if both are 0
	or $r2, $r2, $r3
	cmp $r2, 0
	jmpc z, cmp_is_eq
	# test if eq
	cmp $r0, $r1
	jmpc z, cmp_is_eq
	# -- if either is pos
	and $r2, $r0, $r1
	cmp $r2, 0
	jmpc s, cmp_both_neg
	cmp $r0, $r1
	jmpc gt, cmp_is_gt
	# r0 < r1
	lpm $r0, high_uint
	return
	cmp_both_neg:
	cmp $r0, $r1
	jmpc lt, cmp_is_gt
	# r0 < r1
	lpm $r0, high_uint
	return
	cmp_is_gt:
	ldk $r0, 1
	return
	cmp_is_eq:
	ldk $r0, 0
	return
	#endif

	#ifdef L_udivsi3
	.global __udivsi3
	__udivsi3:
	# $r0 is dividend
	# $r1 is divisor
	ldk $r2,0
	push $r28
	ldk $r28,-32
	0:
	lshr $r3,$r0,31 # Shift $r2:$r0 left one
	ashl $r0,$r0,1
	ashl $r2,$r2,1
	or $r2,$r2,$r3
	cmp $r2,$r1
	jmpc b,1f
	2:
	sub $r2,$r2,$r1
	add $r0,$r0,1
	1:
	add $r28,$r28,1
	jmpx 31,$r28,1,0b
	pop $r28
	# $r0: quotient
	# $r2: remainder
	return
	#endif

	#ifdef L_umodsi3
	.global __umodsi3
	__umodsi3:
	call __udivsi3
	move $r0,$r2
	return
	#endif

	#ifdef L_divsi3
	.global __divsi3
	__divsi3:
	xor $r5,$r0,$r1 # $r5 is sign of result
	ashr $r2,$r0,31 # $r0 = abs($r0)
	xor $r0,$r0,$r2
	sub $r0,$r0,$r2
	ashr $r2,$r1,31 # $r1 = abs($r1)
	xor $r1,$r1,$r2
	sub $r1,$r1,$r2
	call __udivsi3
	ashr $r5,$r5,31
	xor $r0,$r0,$r5
	sub $r0,$r0,$r5
	return

	#endif

	#ifdef L_modsi3
	.global __modsi3
	__modsi3:
	move $r5,$r0 # $r5 is sign of result
	ashr $r2,$r0,31 # $r0 = abs($r0)
	xor $r0,$r0,$r2
	sub $r0,$r0,$r2
	ashr $r2,$r1,31 # $r1 = abs($r1)
	xor $r1,$r1,$r2
	sub $r1,$r1,$r2
	call __umodsi3
	ashr $r5,$r5,31
	xor $r0,$r0,$r5
	sub $r0,$r0,$r5
	return
	#endif