blob: 4ef5cf7aae9c270cc3b01dfdd990741564fcb12a [file] [log] [blame]
###################################-
#
# Copyright (C) 2009-2021 Free Software Foundation, Inc.
#
# Contributed by Michael Eager <eager@eagercon.com>.
#
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3, or (at your option) any
# later version.
#
# GCC is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
# License for more details.
#
# Under Section 7 of GPL version 3, you are granted additional
# permissions described in the GCC Runtime Library Exception, version
# 3.1, as published by the Free Software Foundation.
#
# You should have received a copy of the GNU General Public License and
# a copy of the GCC Runtime Library Exception along with this program;
# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
# <http://www.gnu.org/licenses/>.
#
# muldi3_hard.S
#
# Multiply operation for 64 bit integers, for devices with hard multiply
# Input : Operand1[H] in Reg r5
# Operand1[L] in Reg r6
# Operand2[H] in Reg r7
# Operand2[L] in Reg r8
# Output: Result[H] in Reg r3
# Result[L] in Reg r4
#
# Explaination:
#
# Both the input numbers are divided into 16 bit number as follows
# op1 = A B C D
# op2 = E F G H
# result = D * H
# + (C * H + D * G) << 16
# + (B * H + C * G + D * F) << 32
# + (A * H + B * G + C * F + D * E) << 48
#
# Only 64 bits of the output are considered
#
#######################################
/* An executable stack is *not* required for these functions. */
#ifdef __linux__
.section .note.GNU-stack,"",%progbits
.previous
#endif
.globl muldi3_hardproc
.ent muldi3_hardproc
muldi3_hardproc:
addi r1,r1,-40
# Save the input operands on the caller's stack
swi r5,r1,44
swi r6,r1,48
swi r7,r1,52
swi r8,r1,56
# Store all the callee saved registers
sw r20,r1,r0
swi r21,r1,4
swi r22,r1,8
swi r23,r1,12
swi r24,r1,16
swi r25,r1,20
swi r26,r1,24
swi r27,r1,28
# Load all the 16 bit values for A through H
lhui r20,r1,44 # A
lhui r21,r1,46 # B
lhui r22,r1,48 # C
lhui r23,r1,50 # D
lhui r24,r1,52 # E
lhui r25,r1,54 # F
lhui r26,r1,56 # G
lhui r27,r1,58 # H
# D * H ==> LSB of the result on stack ==> Store1
mul r9,r23,r27
swi r9,r1,36 # Pos2 and Pos3
# Hi (Store1) + C * H + D * G ==> Store2 ==> Pos1 and Pos2
# Store the carry generated in position 2 for Pos 3
lhui r11,r1,36 # Pos2
mul r9,r22,r27 # C * H
mul r10,r23,r26 # D * G
add r9,r9,r10
addc r12,r0,r0
add r9,r9,r11
addc r12,r12,r0 # Store the Carry
shi r9,r1,36 # Store Pos2
swi r9,r1,32
lhui r11,r1,32
shi r11,r1,34 # Store Pos1
# Hi (Store2) + B * H + C * G + D * F ==> Store3 ==> Pos0 and Pos1
mul r9,r21,r27 # B * H
mul r10,r22,r26 # C * G
mul r7,r23,r25 # D * F
add r9,r9,r11
add r9,r9,r10
add r9,r9,r7
swi r9,r1,32 # Pos0 and Pos1
# Hi (Store3) + A * H + B * G + C * F + D * E ==> Store3 ==> Pos0
lhui r11,r1,32 # Pos0
mul r9,r20,r27 # A * H
mul r10,r21,r26 # B * G
mul r7,r22,r25 # C * F
mul r8,r23,r24 # D * E
add r9,r9,r11
add r9,r9,r10
add r9,r9,r7
add r9,r9,r8
sext16 r9,r9 # Sign extend the MSB
shi r9,r1,32
# Move results to r3 and r4
lhui r3,r1,32
add r3,r3,r12
shi r3,r1,32
lwi r3,r1,32 # Hi Part
lwi r4,r1,36 # Lo Part
# Restore Callee saved registers
lw r20,r1,r0
lwi r21,r1,4
lwi r22,r1,8
lwi r23,r1,12
lwi r24,r1,16
lwi r25,r1,20
lwi r26,r1,24
lwi r27,r1,28
# Restore Frame and return
rtsd r15,8
addi r1,r1,40
.end muldi3_hardproc