| /* 64-bit multiplication support for TILEPro. |
| Copyright (C) 2011-2021 Free Software Foundation, Inc. |
| Contributed by Walter Lee (walt@tilera.com) |
| |
| This file is free software; you can redistribute it and/or modify it |
| under the terms of the GNU General Public License as published by the |
| Free Software Foundation; either version 3, or (at your option) any |
| later version. |
| |
| This file is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| Under Section 7 of GPL version 3, you are granted additional |
| permissions described in the GCC Runtime Library Exception, version |
| 3.1, as published by the Free Software Foundation. |
| |
| You should have received a copy of the GNU General Public License and |
| a copy of the GCC Runtime Library Exception along with this program; |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /* 64-bit multiplication support. */ |
| |
| .file "softmpy.S" |
| |
| /* Parameters */ |
| #define lo0 r9 /* low 32 bits of n0 */ |
| #define hi0 r1 /* high 32 bits of n0 */ |
| #define lo1 r2 /* low 32 bits of n1 */ |
| #define hi1 r3 /* high 32 bits of n1 */ |
| |
| /* temps */ |
| #define result1_a r4 |
| #define result1_b r5 |
| |
| #define tmp0 r6 |
| #define tmp0_left_16 r7 |
| #define tmp1 r8 |
| |
| .section .text.__muldi3, "ax" |
| .align 8 |
| .globl __muldi3 |
| .type __muldi3, @function |
| __muldi3: |
| { |
| move lo0, r0 /* so we can write "out r0" while "in r0" alive */ |
| mulhl_uu tmp0, lo1, r0 |
| } |
| { |
| mulll_uu result1_a, lo1, hi0 |
| } |
| { |
| move tmp1, tmp0 |
| mulhla_uu tmp0, lo0, lo1 |
| } |
| { |
| mulhlsa_uu result1_a, lo1, hi0 |
| } |
| { |
| mulll_uu result1_b, lo0, hi1 |
| slt_u tmp1, tmp0, tmp1 |
| } |
| { |
| mulhlsa_uu result1_a, lo0, hi1 |
| shli r0, tmp0, 16 |
| } |
| { |
| move tmp0_left_16, r0 |
| mulhha_uu result1_b, lo0, lo1 |
| } |
| { |
| mullla_uu r0, lo1, lo0 |
| shli tmp1, tmp1, 16 |
| } |
| { |
| mulhlsa_uu result1_b, hi0, lo1 |
| inthh tmp1, tmp1, tmp0 |
| } |
| { |
| mulhlsa_uu result1_a, hi1, lo0 |
| slt_u tmp0, r0, tmp0_left_16 |
| } |
| /* NOTE: this will stall for a cycle here. Oh well. */ |
| { |
| add r1, tmp0, tmp1 |
| add result1_a, result1_a, result1_b |
| } |
| { |
| add r1, r1, result1_a |
| jrp lr |
| } |
| .size __muldi3,.-__muldi3 |