| /* Copyright (C) 2000-2021 Free Software Foundation, Inc. |
| Contributed by James E. Wilson <wilson@cygnus.com>. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| Under Section 7 of GPL version 3, you are granted additional |
| permissions described in the GCC Runtime Library Exception, version |
| 3.1, as published by the Free Software Foundation. |
| |
| You should have received a copy of the GNU General Public License and |
| a copy of the GCC Runtime Library Exception along with this program; |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #ifdef L__divxf3 |
| // Compute a 80-bit IEEE double-extended quotient. |
| // |
| // From the Intel IA-64 Optimization Guide, choose the minimum latency |
| // alternative. |
| // |
| // farg0 holds the dividend. farg1 holds the divisor. |
| // |
| // __divtf3 is an alternate symbol name for backward compatibility. |
| |
| .text |
| .align 16 |
| .global __divxf3 |
| .proc __divxf3 |
| __divxf3: |
| #ifdef SHARED |
| .global __divtf3 |
| __divtf3: |
| #endif |
| cmp.eq p7, p0 = r0, r0 |
| frcpa.s0 f10, p6 = farg0, farg1 |
| ;; |
| (p6) cmp.ne p7, p0 = r0, r0 |
| .pred.rel.mutex p6, p7 |
| (p6) fnma.s1 f11 = farg1, f10, f1 |
| (p6) fma.s1 f12 = farg0, f10, f0 |
| ;; |
| (p6) fma.s1 f13 = f11, f11, f0 |
| (p6) fma.s1 f14 = f11, f11, f11 |
| ;; |
| (p6) fma.s1 f11 = f13, f13, f11 |
| (p6) fma.s1 f13 = f14, f10, f10 |
| ;; |
| (p6) fma.s1 f10 = f13, f11, f10 |
| (p6) fnma.s1 f11 = farg1, f12, farg0 |
| ;; |
| (p6) fma.s1 f11 = f11, f10, f12 |
| (p6) fnma.s1 f12 = farg1, f10, f1 |
| ;; |
| (p6) fma.s1 f10 = f12, f10, f10 |
| (p6) fnma.s1 f12 = farg1, f11, farg0 |
| ;; |
| (p6) fma.s0 fret0 = f12, f10, f11 |
| (p7) mov fret0 = f10 |
| br.ret.sptk rp |
| .endp __divxf3 |
| #endif |
| |
| #ifdef L__divdf3 |
| // Compute a 64-bit IEEE double quotient. |
| // |
| // From the Intel IA-64 Optimization Guide, choose the minimum latency |
| // alternative. |
| // |
| // farg0 holds the dividend. farg1 holds the divisor. |
| |
| .text |
| .align 16 |
| .global __divdf3 |
| .proc __divdf3 |
| __divdf3: |
| cmp.eq p7, p0 = r0, r0 |
| frcpa.s0 f10, p6 = farg0, farg1 |
| ;; |
| (p6) cmp.ne p7, p0 = r0, r0 |
| .pred.rel.mutex p6, p7 |
| (p6) fmpy.s1 f11 = farg0, f10 |
| (p6) fnma.s1 f12 = farg1, f10, f1 |
| ;; |
| (p6) fma.s1 f11 = f12, f11, f11 |
| (p6) fmpy.s1 f13 = f12, f12 |
| ;; |
| (p6) fma.s1 f10 = f12, f10, f10 |
| (p6) fma.s1 f11 = f13, f11, f11 |
| ;; |
| (p6) fmpy.s1 f12 = f13, f13 |
| (p6) fma.s1 f10 = f13, f10, f10 |
| ;; |
| (p6) fma.d.s1 f11 = f12, f11, f11 |
| (p6) fma.s1 f10 = f12, f10, f10 |
| ;; |
| (p6) fnma.d.s1 f8 = farg1, f11, farg0 |
| ;; |
| (p6) fma.d fret0 = f8, f10, f11 |
| (p7) mov fret0 = f10 |
| br.ret.sptk rp |
| ;; |
| .endp __divdf3 |
| #endif |
| |
| #ifdef L__divsf3 |
| // Compute a 32-bit IEEE float quotient. |
| // |
| // From the Intel IA-64 Optimization Guide, choose the minimum latency |
| // alternative. |
| // |
| // farg0 holds the dividend. farg1 holds the divisor. |
| |
| .text |
| .align 16 |
| .global __divsf3 |
| .proc __divsf3 |
| __divsf3: |
| cmp.eq p7, p0 = r0, r0 |
| frcpa.s0 f10, p6 = farg0, farg1 |
| ;; |
| (p6) cmp.ne p7, p0 = r0, r0 |
| .pred.rel.mutex p6, p7 |
| (p6) fmpy.s1 f8 = farg0, f10 |
| (p6) fnma.s1 f9 = farg1, f10, f1 |
| ;; |
| (p6) fma.s1 f8 = f9, f8, f8 |
| (p6) fmpy.s1 f9 = f9, f9 |
| ;; |
| (p6) fma.s1 f8 = f9, f8, f8 |
| (p6) fmpy.s1 f9 = f9, f9 |
| ;; |
| (p6) fma.d.s1 f10 = f9, f8, f8 |
| ;; |
| (p6) fnorm.s.s0 fret0 = f10 |
| (p7) mov fret0 = f10 |
| br.ret.sptk rp |
| ;; |
| .endp __divsf3 |
| #endif |
| |
| #ifdef L__divdi3 |
| // Compute a 64-bit integer quotient. |
| // |
| // From the Intel IA-64 Optimization Guide, choose the minimum latency |
| // alternative. |
| // |
| // in0 holds the dividend. in1 holds the divisor. |
| |
| .text |
| .align 16 |
| .global __divdi3 |
| .proc __divdi3 |
| __divdi3: |
| .regstk 2,0,0,0 |
| // Transfer inputs to FP registers. |
| setf.sig f8 = in0 |
| setf.sig f9 = in1 |
| // Check divide by zero. |
| cmp.ne.unc p0,p7=0,in1 |
| ;; |
| // Convert the inputs to FP, so that they won't be treated as unsigned. |
| fcvt.xf f8 = f8 |
| fcvt.xf f9 = f9 |
| (p7) break 1 |
| ;; |
| // Compute the reciprocal approximation. |
| frcpa.s1 f10, p6 = f8, f9 |
| ;; |
| // 3 Newton-Raphson iterations. |
| (p6) fnma.s1 f11 = f9, f10, f1 |
| (p6) fmpy.s1 f12 = f8, f10 |
| ;; |
| (p6) fmpy.s1 f13 = f11, f11 |
| (p6) fma.s1 f12 = f11, f12, f12 |
| ;; |
| (p6) fma.s1 f10 = f11, f10, f10 |
| (p6) fma.s1 f11 = f13, f12, f12 |
| ;; |
| (p6) fma.s1 f10 = f13, f10, f10 |
| (p6) fnma.s1 f12 = f9, f11, f8 |
| ;; |
| (p6) fma.s1 f10 = f12, f10, f11 |
| ;; |
| // Round quotient to an integer. |
| fcvt.fx.trunc.s1 f10 = f10 |
| ;; |
| // Transfer result to GP registers. |
| getf.sig ret0 = f10 |
| br.ret.sptk rp |
| ;; |
| .endp __divdi3 |
| #endif |
| |
| #ifdef L__moddi3 |
| // Compute a 64-bit integer modulus. |
| // |
| // From the Intel IA-64 Optimization Guide, choose the minimum latency |
| // alternative. |
| // |
| // in0 holds the dividend (a). in1 holds the divisor (b). |
| |
| .text |
| .align 16 |
| .global __moddi3 |
| .proc __moddi3 |
| __moddi3: |
| .regstk 2,0,0,0 |
| // Transfer inputs to FP registers. |
| setf.sig f14 = in0 |
| setf.sig f9 = in1 |
| // Check divide by zero. |
| cmp.ne.unc p0,p7=0,in1 |
| ;; |
| // Convert the inputs to FP, so that they won't be treated as unsigned. |
| fcvt.xf f8 = f14 |
| fcvt.xf f9 = f9 |
| (p7) break 1 |
| ;; |
| // Compute the reciprocal approximation. |
| frcpa.s1 f10, p6 = f8, f9 |
| ;; |
| // 3 Newton-Raphson iterations. |
| (p6) fmpy.s1 f12 = f8, f10 |
| (p6) fnma.s1 f11 = f9, f10, f1 |
| ;; |
| (p6) fma.s1 f12 = f11, f12, f12 |
| (p6) fmpy.s1 f13 = f11, f11 |
| ;; |
| (p6) fma.s1 f10 = f11, f10, f10 |
| (p6) fma.s1 f11 = f13, f12, f12 |
| ;; |
| sub in1 = r0, in1 |
| (p6) fma.s1 f10 = f13, f10, f10 |
| (p6) fnma.s1 f12 = f9, f11, f8 |
| ;; |
| setf.sig f9 = in1 |
| (p6) fma.s1 f10 = f12, f10, f11 |
| ;; |
| fcvt.fx.trunc.s1 f10 = f10 |
| ;; |
| // r = q * (-b) + a |
| xma.l f10 = f10, f9, f14 |
| ;; |
| // Transfer result to GP registers. |
| getf.sig ret0 = f10 |
| br.ret.sptk rp |
| ;; |
| .endp __moddi3 |
| #endif |
| |
| #ifdef L__udivdi3 |
| // Compute a 64-bit unsigned integer quotient. |
| // |
| // From the Intel IA-64 Optimization Guide, choose the minimum latency |
| // alternative. |
| // |
| // in0 holds the dividend. in1 holds the divisor. |
| |
| .text |
| .align 16 |
| .global __udivdi3 |
| .proc __udivdi3 |
| __udivdi3: |
| .regstk 2,0,0,0 |
| // Transfer inputs to FP registers. |
| setf.sig f8 = in0 |
| setf.sig f9 = in1 |
| // Check divide by zero. |
| cmp.ne.unc p0,p7=0,in1 |
| ;; |
| // Convert the inputs to FP, to avoid FP software-assist faults. |
| fcvt.xuf.s1 f8 = f8 |
| fcvt.xuf.s1 f9 = f9 |
| (p7) break 1 |
| ;; |
| // Compute the reciprocal approximation. |
| frcpa.s1 f10, p6 = f8, f9 |
| ;; |
| // 3 Newton-Raphson iterations. |
| (p6) fnma.s1 f11 = f9, f10, f1 |
| (p6) fmpy.s1 f12 = f8, f10 |
| ;; |
| (p6) fmpy.s1 f13 = f11, f11 |
| (p6) fma.s1 f12 = f11, f12, f12 |
| ;; |
| (p6) fma.s1 f10 = f11, f10, f10 |
| (p6) fma.s1 f11 = f13, f12, f12 |
| ;; |
| (p6) fma.s1 f10 = f13, f10, f10 |
| (p6) fnma.s1 f12 = f9, f11, f8 |
| ;; |
| (p6) fma.s1 f10 = f12, f10, f11 |
| ;; |
| // Round quotient to an unsigned integer. |
| fcvt.fxu.trunc.s1 f10 = f10 |
| ;; |
| // Transfer result to GP registers. |
| getf.sig ret0 = f10 |
| br.ret.sptk rp |
| ;; |
| .endp __udivdi3 |
| #endif |
| |
| #ifdef L__umoddi3 |
| // Compute a 64-bit unsigned integer modulus. |
| // |
| // From the Intel IA-64 Optimization Guide, choose the minimum latency |
| // alternative. |
| // |
| // in0 holds the dividend (a). in1 holds the divisor (b). |
| |
| .text |
| .align 16 |
| .global __umoddi3 |
| .proc __umoddi3 |
| __umoddi3: |
| .regstk 2,0,0,0 |
| // Transfer inputs to FP registers. |
| setf.sig f14 = in0 |
| setf.sig f9 = in1 |
| // Check divide by zero. |
| cmp.ne.unc p0,p7=0,in1 |
| ;; |
| // Convert the inputs to FP, to avoid FP software assist faults. |
| fcvt.xuf.s1 f8 = f14 |
| fcvt.xuf.s1 f9 = f9 |
| (p7) break 1; |
| ;; |
| // Compute the reciprocal approximation. |
| frcpa.s1 f10, p6 = f8, f9 |
| ;; |
| // 3 Newton-Raphson iterations. |
| (p6) fmpy.s1 f12 = f8, f10 |
| (p6) fnma.s1 f11 = f9, f10, f1 |
| ;; |
| (p6) fma.s1 f12 = f11, f12, f12 |
| (p6) fmpy.s1 f13 = f11, f11 |
| ;; |
| (p6) fma.s1 f10 = f11, f10, f10 |
| (p6) fma.s1 f11 = f13, f12, f12 |
| ;; |
| sub in1 = r0, in1 |
| (p6) fma.s1 f10 = f13, f10, f10 |
| (p6) fnma.s1 f12 = f9, f11, f8 |
| ;; |
| setf.sig f9 = in1 |
| (p6) fma.s1 f10 = f12, f10, f11 |
| ;; |
| // Round quotient to an unsigned integer. |
| fcvt.fxu.trunc.s1 f10 = f10 |
| ;; |
| // r = q * (-b) + a |
| xma.l f10 = f10, f9, f14 |
| ;; |
| // Transfer result to GP registers. |
| getf.sig ret0 = f10 |
| br.ret.sptk rp |
| ;; |
| .endp __umoddi3 |
| #endif |
| |
| #ifdef L__divsi3 |
| // Compute a 32-bit integer quotient. |
| // |
| // From the Intel IA-64 Optimization Guide, choose the minimum latency |
| // alternative. |
| // |
| // in0 holds the dividend. in1 holds the divisor. |
| |
| .text |
| .align 16 |
| .global __divsi3 |
| .proc __divsi3 |
| __divsi3: |
| .regstk 2,0,0,0 |
| // Check divide by zero. |
| cmp.ne.unc p0,p7=0,in1 |
| sxt4 in0 = in0 |
| sxt4 in1 = in1 |
| ;; |
| setf.sig f8 = in0 |
| setf.sig f9 = in1 |
| (p7) break 1 |
| ;; |
| mov r2 = 0x0ffdd |
| fcvt.xf f8 = f8 |
| fcvt.xf f9 = f9 |
| ;; |
| setf.exp f11 = r2 |
| frcpa.s1 f10, p6 = f8, f9 |
| ;; |
| (p6) fmpy.s1 f8 = f8, f10 |
| (p6) fnma.s1 f9 = f9, f10, f1 |
| ;; |
| (p6) fma.s1 f8 = f9, f8, f8 |
| (p6) fma.s1 f9 = f9, f9, f11 |
| ;; |
| (p6) fma.s1 f10 = f9, f8, f8 |
| ;; |
| fcvt.fx.trunc.s1 f10 = f10 |
| ;; |
| getf.sig ret0 = f10 |
| br.ret.sptk rp |
| ;; |
| .endp __divsi3 |
| #endif |
| |
| #ifdef L__modsi3 |
| // Compute a 32-bit integer modulus. |
| // |
| // From the Intel IA-64 Optimization Guide, choose the minimum latency |
| // alternative. |
| // |
| // in0 holds the dividend. in1 holds the divisor. |
| |
| .text |
| .align 16 |
| .global __modsi3 |
| .proc __modsi3 |
| __modsi3: |
| .regstk 2,0,0,0 |
| mov r2 = 0x0ffdd |
| sxt4 in0 = in0 |
| sxt4 in1 = in1 |
| ;; |
| setf.sig f13 = r32 |
| setf.sig f9 = r33 |
| // Check divide by zero. |
| cmp.ne.unc p0,p7=0,in1 |
| ;; |
| sub in1 = r0, in1 |
| fcvt.xf f8 = f13 |
| fcvt.xf f9 = f9 |
| ;; |
| setf.exp f11 = r2 |
| frcpa.s1 f10, p6 = f8, f9 |
| (p7) break 1 |
| ;; |
| (p6) fmpy.s1 f12 = f8, f10 |
| (p6) fnma.s1 f10 = f9, f10, f1 |
| ;; |
| setf.sig f9 = in1 |
| (p6) fma.s1 f12 = f10, f12, f12 |
| (p6) fma.s1 f10 = f10, f10, f11 |
| ;; |
| (p6) fma.s1 f10 = f10, f12, f12 |
| ;; |
| fcvt.fx.trunc.s1 f10 = f10 |
| ;; |
| xma.l f10 = f10, f9, f13 |
| ;; |
| getf.sig ret0 = f10 |
| br.ret.sptk rp |
| ;; |
| .endp __modsi3 |
| #endif |
| |
| #ifdef L__udivsi3 |
| // Compute a 32-bit unsigned integer quotient. |
| // |
| // From the Intel IA-64 Optimization Guide, choose the minimum latency |
| // alternative. |
| // |
| // in0 holds the dividend. in1 holds the divisor. |
| |
| .text |
| .align 16 |
| .global __udivsi3 |
| .proc __udivsi3 |
| __udivsi3: |
| .regstk 2,0,0,0 |
| mov r2 = 0x0ffdd |
| zxt4 in0 = in0 |
| zxt4 in1 = in1 |
| ;; |
| setf.sig f8 = in0 |
| setf.sig f9 = in1 |
| // Check divide by zero. |
| cmp.ne.unc p0,p7=0,in1 |
| ;; |
| fcvt.xf f8 = f8 |
| fcvt.xf f9 = f9 |
| (p7) break 1 |
| ;; |
| setf.exp f11 = r2 |
| frcpa.s1 f10, p6 = f8, f9 |
| ;; |
| (p6) fmpy.s1 f8 = f8, f10 |
| (p6) fnma.s1 f9 = f9, f10, f1 |
| ;; |
| (p6) fma.s1 f8 = f9, f8, f8 |
| (p6) fma.s1 f9 = f9, f9, f11 |
| ;; |
| (p6) fma.s1 f10 = f9, f8, f8 |
| ;; |
| fcvt.fxu.trunc.s1 f10 = f10 |
| ;; |
| getf.sig ret0 = f10 |
| br.ret.sptk rp |
| ;; |
| .endp __udivsi3 |
| #endif |
| |
| #ifdef L__umodsi3 |
| // Compute a 32-bit unsigned integer modulus. |
| // |
| // From the Intel IA-64 Optimization Guide, choose the minimum latency |
| // alternative. |
| // |
| // in0 holds the dividend. in1 holds the divisor. |
| |
| .text |
| .align 16 |
| .global __umodsi3 |
| .proc __umodsi3 |
| __umodsi3: |
| .regstk 2,0,0,0 |
| mov r2 = 0x0ffdd |
| zxt4 in0 = in0 |
| zxt4 in1 = in1 |
| ;; |
| setf.sig f13 = in0 |
| setf.sig f9 = in1 |
| // Check divide by zero. |
| cmp.ne.unc p0,p7=0,in1 |
| ;; |
| sub in1 = r0, in1 |
| fcvt.xf f8 = f13 |
| fcvt.xf f9 = f9 |
| ;; |
| setf.exp f11 = r2 |
| frcpa.s1 f10, p6 = f8, f9 |
| (p7) break 1; |
| ;; |
| (p6) fmpy.s1 f12 = f8, f10 |
| (p6) fnma.s1 f10 = f9, f10, f1 |
| ;; |
| setf.sig f9 = in1 |
| (p6) fma.s1 f12 = f10, f12, f12 |
| (p6) fma.s1 f10 = f10, f10, f11 |
| ;; |
| (p6) fma.s1 f10 = f10, f12, f12 |
| ;; |
| fcvt.fxu.trunc.s1 f10 = f10 |
| ;; |
| xma.l f10 = f10, f9, f13 |
| ;; |
| getf.sig ret0 = f10 |
| br.ret.sptk rp |
| ;; |
| .endp __umodsi3 |
| #endif |
| |
| #ifdef L__save_stack_nonlocal |
| // Notes on save/restore stack nonlocal: We read ar.bsp but write |
| // ar.bspstore. This is because ar.bsp can be read at all times |
| // (independent of the RSE mode) but since it's read-only we need to |
| // restore the value via ar.bspstore. This is OK because |
| // ar.bsp==ar.bspstore after executing "flushrs". |
| |
| // void __ia64_save_stack_nonlocal(void *save_area, void *stack_pointer) |
| |
| .text |
| .align 16 |
| .global __ia64_save_stack_nonlocal |
| .proc __ia64_save_stack_nonlocal |
| __ia64_save_stack_nonlocal: |
| { .mmf |
| alloc r18 = ar.pfs, 2, 0, 0, 0 |
| mov r19 = ar.rsc |
| ;; |
| } |
| { .mmi |
| flushrs |
| st8 [in0] = in1, 24 |
| and r19 = 0x1c, r19 |
| ;; |
| } |
| { .mmi |
| st8 [in0] = r18, -16 |
| mov ar.rsc = r19 |
| or r19 = 0x3, r19 |
| ;; |
| } |
| { .mmi |
| mov r16 = ar.bsp |
| mov r17 = ar.rnat |
| adds r2 = 8, in0 |
| ;; |
| } |
| { .mmi |
| st8 [in0] = r16 |
| st8 [r2] = r17 |
| } |
| { .mib |
| mov ar.rsc = r19 |
| br.ret.sptk.few rp |
| ;; |
| } |
| .endp __ia64_save_stack_nonlocal |
| #endif |
| |
| #ifdef L__nonlocal_goto |
| // void __ia64_nonlocal_goto(void *target_label, void *save_area, |
| // void *static_chain); |
| |
| .text |
| .align 16 |
| .global __ia64_nonlocal_goto |
| .proc __ia64_nonlocal_goto |
| __ia64_nonlocal_goto: |
| { .mmi |
| alloc r20 = ar.pfs, 3, 0, 0, 0 |
| ld8 r12 = [in1], 8 |
| mov.ret.sptk rp = in0, .L0 |
| ;; |
| } |
| { .mmf |
| ld8 r16 = [in1], 8 |
| mov r19 = ar.rsc |
| ;; |
| } |
| { .mmi |
| flushrs |
| ld8 r17 = [in1], 8 |
| and r19 = 0x1c, r19 |
| ;; |
| } |
| { .mmi |
| ld8 r18 = [in1] |
| mov ar.rsc = r19 |
| or r19 = 0x3, r19 |
| ;; |
| } |
| { .mmi |
| mov ar.bspstore = r16 |
| ;; |
| mov ar.rnat = r17 |
| ;; |
| } |
| { .mmi |
| loadrs |
| invala |
| mov r15 = in2 |
| ;; |
| } |
| .L0: { .mib |
| mov ar.rsc = r19 |
| mov ar.pfs = r18 |
| br.ret.sptk.few rp |
| ;; |
| } |
| .endp __ia64_nonlocal_goto |
| #endif |
| |
| #ifdef L__restore_stack_nonlocal |
| // This is mostly the same as nonlocal_goto above. |
| // ??? This has not been tested yet. |
| |
| // void __ia64_restore_stack_nonlocal(void *save_area) |
| |
| .text |
| .align 16 |
| .global __ia64_restore_stack_nonlocal |
| .proc __ia64_restore_stack_nonlocal |
| __ia64_restore_stack_nonlocal: |
| { .mmf |
| alloc r20 = ar.pfs, 4, 0, 0, 0 |
| ld8 r12 = [in0], 8 |
| ;; |
| } |
| { .mmb |
| ld8 r16=[in0], 8 |
| mov r19 = ar.rsc |
| ;; |
| } |
| { .mmi |
| flushrs |
| ld8 r17 = [in0], 8 |
| and r19 = 0x1c, r19 |
| ;; |
| } |
| { .mmf |
| ld8 r18 = [in0] |
| mov ar.rsc = r19 |
| ;; |
| } |
| { .mmi |
| mov ar.bspstore = r16 |
| ;; |
| mov ar.rnat = r17 |
| or r19 = 0x3, r19 |
| ;; |
| } |
| { .mmf |
| loadrs |
| invala |
| ;; |
| } |
| .L0: { .mib |
| mov ar.rsc = r19 |
| mov ar.pfs = r18 |
| br.ret.sptk.few rp |
| ;; |
| } |
| .endp __ia64_restore_stack_nonlocal |
| #endif |
| |
| #ifdef L__trampoline |
| // Implement the nested function trampoline. This is out of line |
| // so that we don't have to bother with flushing the icache, as |
| // well as making the on-stack trampoline smaller. |
| // |
| // The trampoline has the following form: |
| // |
| // +-------------------+ > |
| // TRAMP: | __ia64_trampoline | | |
| // +-------------------+ > fake function descriptor |
| // | TRAMP+16 | | |
| // +-------------------+ > |
| // | target descriptor | |
| // +-------------------+ |
| // | static link | |
| // +-------------------+ |
| |
| .text |
| .align 16 |
| .global __ia64_trampoline |
| .proc __ia64_trampoline |
| __ia64_trampoline: |
| { .mmi |
| ld8 r2 = [r1], 8 |
| ;; |
| ld8 r15 = [r1] |
| } |
| { .mmi |
| ld8 r3 = [r2], 8 |
| ;; |
| ld8 r1 = [r2] |
| mov b6 = r3 |
| } |
| { .bbb |
| br.sptk.many b6 |
| ;; |
| } |
| .endp __ia64_trampoline |
| #endif |
| |
| #ifdef SHARED |
| // Thunks for backward compatibility. |
| #ifdef L_fixtfdi |
| .text |
| .align 16 |
| .global __fixtfti |
| .proc __fixtfti |
| __fixtfti: |
| { .bbb |
| br.sptk.many __fixxfti |
| ;; |
| } |
| .endp __fixtfti |
| #endif |
| #ifdef L_fixunstfdi |
| .align 16 |
| .global __fixunstfti |
| .proc __fixunstfti |
| __fixunstfti: |
| { .bbb |
| br.sptk.many __fixunsxfti |
| ;; |
| } |
| .endp __fixunstfti |
| #endif |
| #ifdef L_floatditf |
| .align 16 |
| .global __floattitf |
| .proc __floattitf |
| __floattitf: |
| { .bbb |
| br.sptk.many __floattixf |
| ;; |
| } |
| .endp __floattitf |
| #endif |
| #endif |