| ###################################- |
| # |
| # Copyright 2009, 2010 Free Software Foundation, Inc. |
| # |
| # Contributed by Michael Eager <eager@eagercon.com>. |
| # |
| # This file is free software; you can redistribute it and/or modify it |
| # under the terms of the GNU General Public License as published by the |
| # Free Software Foundation; either version 3, or (at your option) any |
| # later version. |
| # |
| # GCC is distributed in the hope that it will be useful, but WITHOUT |
| # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public |
| # License for more details. |
| # |
| # Under Section 7 of GPL version 3, you are granted additional |
| # permissions described in the GCC Runtime Library Exception, version |
| # 3.1, as published by the Free Software Foundation. |
| # |
| # You should have received a copy of the GNU General Public License and |
| # a copy of the GCC Runtime Library Exception along with this program; |
| # see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| # <http://www.gnu.org/licenses/>. |
| # |
| # muldi3_hard.asm |
| # |
| # Multiply operation for 64 bit integers, for devices with hard multiply |
| # Input : Operand1[H] in Reg r5 |
| # Operand1[L] in Reg r6 |
| # Operand2[H] in Reg r7 |
| # Operand2[L] in Reg r8 |
| # Output: Result[H] in Reg r3 |
| # Result[L] in Reg r4 |
| # |
| # Explaination: |
| # |
| # Both the input numbers are divided into 16 bit number as follows |
| # op1 = A B C D |
| # op2 = E F G H |
| # result = D * H |
| # + (C * H + D * G) << 16 |
| # + (B * H + C * G + D * F) << 32 |
| # + (A * H + B * G + C * F + D * E) << 48 |
| # |
| # Only 64 bits of the output are considered |
| # |
| ####################################### |
| |
| .globl muldi3_hardproc |
| .ent muldi3_hardproc |
| muldi3_hardproc: |
| addi r1,r1,-40 |
| |
| # Save the input operands on the caller's stack |
| swi r5,r1,44 |
| swi r6,r1,48 |
| swi r7,r1,52 |
| swi r8,r1,56 |
| |
| # Store all the callee saved registers |
| sw r20,r1,r0 |
| swi r21,r1,4 |
| swi r22,r1,8 |
| swi r23,r1,12 |
| swi r24,r1,16 |
| swi r25,r1,20 |
| swi r26,r1,24 |
| swi r27,r1,28 |
| |
| # Load all the 16 bit values for A thru H |
| lhui r20,r1,44 # A |
| lhui r21,r1,46 # B |
| lhui r22,r1,48 # C |
| lhui r23,r1,50 # D |
| lhui r24,r1,52 # E |
| lhui r25,r1,54 # F |
| lhui r26,r1,56 # G |
| lhui r27,r1,58 # H |
| |
| # D * H ==> LSB of the result on stack ==> Store1 |
| mul r9,r23,r27 |
| swi r9,r1,36 # Pos2 and Pos3 |
| |
| # Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2 |
| # Store the carry generated in position 2 for Pos 3 |
| lhui r11,r1,36 # Pos2 |
| mul r9,r22,r27 # C * H |
| mul r10,r23,r26 # D * G |
| add r9,r9,r10 |
| addc r12,r0,r0 |
| add r9,r9,r11 |
| addc r12,r12,r0 # Store the Carry |
| shi r9,r1,36 # Store Pos2 |
| swi r9,r1,32 |
| lhui r11,r1,32 |
| shi r11,r1,34 # Store Pos1 |
| |
| # Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1 |
| mul r9,r21,r27 # B * H |
| mul r10,r22,r26 # C * G |
| mul r7,r23,r25 # D * F |
| add r9,r9,r11 |
| add r9,r9,r10 |
| add r9,r9,r7 |
| swi r9,r1,32 # Pos0 and Pos1 |
| |
| # Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0 |
| lhui r11,r1,32 # Pos0 |
| mul r9,r20,r27 # A * H |
| mul r10,r21,r26 # B * G |
| mul r7,r22,r25 # C * F |
| mul r8,r23,r24 # D * E |
| add r9,r9,r11 |
| add r9,r9,r10 |
| add r9,r9,r7 |
| add r9,r9,r8 |
| sext16 r9,r9 # Sign extend the MSB |
| shi r9,r1,32 |
| |
| # Move results to r3 and r4 |
| lhui r3,r1,32 |
| add r3,r3,r12 |
| shi r3,r1,32 |
| lwi r3,r1,32 # Hi Part |
| lwi r4,r1,36 # Lo Part |
| |
| # Restore Callee saved registers |
| lw r20,r1,r0 |
| lwi r21,r1,4 |
| lwi r22,r1,8 |
| lwi r23,r1,12 |
| lwi r24,r1,16 |
| lwi r25,r1,20 |
| lwi r26,r1,24 |
| lwi r27,r1,28 |
| |
| # Restore Frame and return |
| rtsd r15,8 |
| addi r1,r1,40 |
| |
| .end muldi3_hardproc |
| |
| |