| ;; Copyright (C) 2007-2021 Free Software Foundation, Inc. |
| ;; |
| ;; This file is part of GCC. |
| ;; |
| ;; GCC is free software; you can redistribute it and/or modify |
| ;; it under the terms of the GNU General Public License as published by |
| ;; the Free Software Foundation; either version 3, or (at your option) |
| ;; any later version. |
| ;; |
| ;; GCC is distributed in the hope that it will be useful, |
| ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| ;; GNU General Public License for more details. |
| ;; |
| ;; You should have received a copy of the GNU General Public License |
| ;; along with GCC; see the file COPYING3. If not see |
| ;; <http://www.gnu.org/licenses/>. */ |
| ;; |
| ;; ......................... |
| ;; |
| ;; DFA-based pipeline description for MIPS64 model R20Kc. |
| ;; Contributed by Jason Eckhardt (jle@cygnus.com). |
| ;; |
| ;; The R20Kc is a dual-issue processor that can generally bundle |
| ;; instructions as follows: |
| ;; 1. integer with integer |
| ;; 2. integer with fp |
| ;; 3. fp with fpload/fpstore |
| ;; |
| ;; Of course, there are various restrictions. |
| ;; Reference: |
| ;; "Ruby (R20K) Technical Specification Rev. 1.2, December 28, 1999." |
| ;; |
| ;; ......................... |
| |
| ;; Use three automata to isolate long latency operations, reducing space. |
| (define_automaton "r20kc_other, r20kc_fdiv, r20kc_idiv") |
| |
| ;; |
| ;; Describe the resources. |
| ;; |
| |
| ;; Global. |
| (define_cpu_unit "r20kc_iss0, r20kc_iss1" "r20kc_other") |
| |
| ;; Integer execution unit (pipeline A). |
| (define_cpu_unit "r20kc_ixua_addsub_agen" "r20kc_other") |
| (define_cpu_unit "r20kc_ixua_shift" "r20kc_other") |
| |
| (exclusion_set "r20kc_ixua_addsub_agen" "r20kc_ixua_shift") |
| |
| ;; Integer execution unit (pipeline B). |
| (define_cpu_unit "r20kc_ixub_addsub" "r20kc_other") |
| (define_cpu_unit "r20kc_ixub_branch" "r20kc_other") |
| (define_cpu_unit "r20kc_ixub_mpydiv" "r20kc_other") |
| (define_cpu_unit "r20kc_ixub_mpydiv_iter" "r20kc_idiv") |
| |
| (exclusion_set "r20kc_ixub_addsub" "r20kc_ixub_branch, r20kc_ixub_mpydiv") |
| (exclusion_set "r20kc_ixub_branch" "r20kc_ixub_mpydiv") |
| |
| ;; Cache / memory interface. |
| (define_cpu_unit "r20kc_cache" "r20kc_other") |
| |
| ;; Floating-point unit. |
| (define_cpu_unit "r20kc_fpu_add" "r20kc_other") |
| (define_cpu_unit "r20kc_fpu_mpy" "r20kc_other") |
| (define_cpu_unit "r20kc_fpu_mpy_iter" "r20kc_fdiv") |
| (define_cpu_unit "r20kc_fpu_divsqrt" "r20kc_other") |
| (define_cpu_unit "r20kc_fpu_divsqrt_iter" "r20kc_fdiv") |
| |
| (exclusion_set "r20kc_fpu_add" "r20kc_fpu_mpy, r20kc_fpu_divsqrt") |
| (exclusion_set "r20kc_fpu_mpy" "r20kc_fpu_divsqrt") |
| |
| ;; After branch any insn cannot be issued. |
| (absence_set "r20kc_iss0,r20kc_iss1" "r20kc_ixub_branch") |
| |
| ;; |
| ;; Define reservations for unit name mnemonics or combinations. |
| ;; |
| |
| (define_reservation "r20kc_iss" |
| "r20kc_iss0|r20kc_iss1") |
| (define_reservation "r20kc_single_dispatch" |
| "r20kc_iss0+r20kc_iss1") |
| (define_reservation "r20kc_iaddsub" |
| "r20kc_iss+(r20kc_ixua_addsub_agen|r20kc_ixub_addsub)") |
| (define_reservation "r20kc_ishift" |
| "r20kc_iss+r20kc_ixua_shift") |
| (define_reservation "r20kc_fpmove" |
| "r20kc_iss+r20kc_ixua_addsub_agen") |
| (define_reservation "r20kc_imem" |
| "r20kc_iss+r20kc_ixua_addsub_agen+r20kc_cache") |
| (define_reservation "r20kc_icache" |
| "r20kc_cache") |
| (define_reservation "r20kc_impydiv" |
| "r20kc_iss+r20kc_ixub_mpydiv") |
| (define_reservation "r20kc_impydiv_iter" |
| "r20kc_ixub_mpydiv_iter") |
| (define_reservation "r20kc_ibranch" |
| "r20kc_iss+r20kc_ixub_branch") |
| |
| (define_reservation "r20kc_fpadd" |
| "r20kc_iss+r20kc_fpu_add") |
| (define_reservation "r20kc_fpmpy" |
| "r20kc_iss+r20kc_fpu_mpy") |
| (define_reservation "r20kc_fpmpy_iter" |
| "r20kc_fpu_mpy_iter") |
| (define_reservation "r20kc_fpdivsqrt" |
| "r20kc_iss+r20kc_fpu_divsqrt") |
| (define_reservation "r20kc_fpdivsqrt_iter" |
| "r20kc_fpu_divsqrt_iter") |
| |
| ;; |
| ;; Describe instruction reservations for integer operations. |
| ;; |
| |
| ;; Conditional moves always force single-dispatch. |
| (define_insn_reservation "r20kc_cond_move_int" 1 |
| (and (eq_attr "cpu" "20kc") |
| (and (eq_attr "type" "condmove") |
| (eq_attr "mode" "!SF,DF"))) |
| "r20kc_single_dispatch") |
| |
| (define_insn_reservation "r20kc_cond_move_fp" 4 |
| (and (eq_attr "cpu" "20kc") |
| (and (eq_attr "type" "condmove") |
| (eq_attr "mode" "SF,DF"))) |
| "r20kc_single_dispatch") |
| |
| (define_insn_reservation "r20kc_int_other" 1 |
| (and (eq_attr "cpu" "20kc") |
| (eq_attr "type" "move,arith,const,nop")) |
| "r20kc_iaddsub") |
| |
| ;; Shifts can only execute on ixu pipeline A. |
| (define_insn_reservation "r20kc_int_shift" 1 |
| (and (eq_attr "cpu" "20kc") |
| (eq_attr "type" "shift")) |
| "r20kc_ishift") |
| |
| (define_insn_reservation "r20kc_ld" 2 |
| (and (eq_attr "cpu" "20kc") |
| (eq_attr "type" "load,prefetch,prefetchx")) |
| "r20kc_imem") |
| |
| |
| ;; A load immediately following a store will stall, so |
| ;; say that a store uses the cache for an extra cycle. |
| (define_insn_reservation "r20kc_st" 2 |
| (and (eq_attr "cpu" "20kc") |
| (eq_attr "type" "store")) |
| "r20kc_imem,r20kc_icache") |
| |
| (define_insn_reservation "r20kc_fld" 3 |
| (and (eq_attr "cpu" "20kc") |
| (eq_attr "type" "fpload")) |
| "r20kc_imem") |
| |
| (define_insn_reservation "r20kc_ffst" 3 |
| (and (eq_attr "cpu" "20kc") |
| (eq_attr "type" "fpstore")) |
| "r20kc_imem,r20kc_icache*2") |
| |
| ;; Integer divide latency is between 13 and 42 cycles for DIV[U] and between |
| ;; 13 and 72 cycles for DDIV[U]. This depends on the value of the inputs |
| ;; so we just choose the worst case latency. |
| (define_insn_reservation "r20kc_idiv_si" 42 |
| (and (eq_attr "cpu" "20kc") |
| (and (eq_attr "type" "idiv") |
| (eq_attr "mode" "SI"))) |
| "r20kc_impydiv+(r20kc_impydiv_iter*42)") |
| |
| (define_insn_reservation "r20kc_idiv_di" 72 |
| (and (eq_attr "cpu" "20kc") |
| (and (eq_attr "type" "idiv") |
| (eq_attr "mode" "DI"))) |
| "r20kc_impydiv+(r20kc_impydiv_iter*72)") |
| |
| ;; Integer multiply latency is 4 or 7 cycles for word and double-word |
| ;; respectively. |
| (define_insn_reservation "r20kc_impy_si" 4 |
| (and (eq_attr "cpu" "20kc") |
| (and (eq_attr "type" "imadd,imul,imul3") |
| (eq_attr "mode" "SI"))) |
| "r20kc_impydiv+(r20kc_impydiv_iter*2)") |
| |
| (define_insn_reservation "r20kc_impy_di" 7 |
| (and (eq_attr "cpu" "20kc") |
| (and (eq_attr "type" "imadd,imul,imul3") |
| (eq_attr "mode" "DI"))) |
| "r20kc_impydiv+(r20kc_impydiv_iter*7)") |
| |
| ;; Move to/from HI/LO. |
| ;; Moving to HI/LO has a 3 cycle latency while moving from only has a 1 |
| ;; cycle latency. Repeat rate is 3 for both. |
| (define_insn_reservation "r20kc_imthilo" 3 |
| (and (eq_attr "cpu" "20kc") |
| (eq_attr "type" "mthi,mtlo")) |
| "r20kc_impydiv+(r20kc_impydiv_iter*3)") |
| |
| (define_insn_reservation "r20kc_imfhilo" 1 |
| (and (eq_attr "cpu" "20kc") |
| (eq_attr "type" "mfhi,mflo")) |
| "r20kc_impydiv+(r20kc_impydiv_iter*3)") |
| |
| ;; Move to fp coprocessor. |
| (define_insn_reservation "r20kc_ixfer_mt" 3 |
| (and (eq_attr "cpu" "20kc") |
| (eq_attr "type" "mtc")) |
| "r20kc_fpmove") |
| |
| ;; Move from fp coprocessor. |
| (define_insn_reservation "r20kc_ixfer_mf" 2 |
| (and (eq_attr "cpu" "20kc") |
| (eq_attr "type" "mfc")) |
| "r20kc_fpmove") |
| |
| ;; Assume branch predicted correctly. |
| (define_insn_reservation "r20kc_ibr" 1 |
| (and (eq_attr "cpu" "20kc") |
| (eq_attr "type" "branch,jump,call")) |
| "r20kc_ibranch") |
| |
| ;; |
| ;; Describe instruction reservations for the floating-point operations. |
| ;; |
| (define_insn_reservation "r20kc_fp_other" 4 |
| (and (eq_attr "cpu" "20kc") |
| (eq_attr "type" "fmove,fadd,fabs,fneg,fcmp")) |
| "r20kc_fpadd") |
| |
| (define_insn_reservation "r20kc_fp_cvt_a" 4 |
| (and (eq_attr "cpu" "20kc") |
| (and (eq_attr "type" "fcvt") |
| (eq_attr "cnv_mode" "I2S,I2D,S2D"))) |
| "r20kc_fpadd") |
| |
| (define_insn_reservation "r20kc_fp_cvt_b" 5 |
| (and (eq_attr "cpu" "20kc") |
| (and (eq_attr "type" "fcvt") |
| (eq_attr "cnv_mode" "D2S,S2I"))) |
| "r20kc_fpadd") |
| |
| (define_insn_reservation "r20kc_fp_divsqrt_df" 32 |
| (and (eq_attr "cpu" "20kc") |
| (and (eq_attr "type" "fdiv,fsqrt") |
| (eq_attr "mode" "DF"))) |
| "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*32)") |
| |
| (define_insn_reservation "r20kc_fp_divsqrt_sf" 17 |
| (and (eq_attr "cpu" "20kc") |
| (and (eq_attr "type" "fdiv,fsqrt") |
| (eq_attr "mode" "SF"))) |
| "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*17)") |
| |
| (define_insn_reservation "r20kc_fp_rsqrt_df" 35 |
| (and (eq_attr "cpu" "20kc") |
| (and (eq_attr "type" "frsqrt") |
| (eq_attr "mode" "DF"))) |
| "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*35)") |
| |
| (define_insn_reservation "r20kc_fp_rsqrt_sf" 17 |
| (and (eq_attr "cpu" "20kc") |
| (and (eq_attr "type" "frsqrt") |
| (eq_attr "mode" "SF"))) |
| "r20kc_fpdivsqrt+(r20kc_fpdivsqrt_iter*17)") |
| |
| (define_insn_reservation "r20kc_fp_mpy_sf" 4 |
| (and (eq_attr "cpu" "20kc") |
| (and (eq_attr "type" "fmul,fmadd") |
| (eq_attr "mode" "SF"))) |
| "r20kc_fpmpy+r20kc_fpmpy_iter") |
| |
| (define_insn_reservation "r20kc_fp_mpy_df" 5 |
| (and (eq_attr "cpu" "20kc") |
| (and (eq_attr "type" "fmul,fmadd") |
| (eq_attr "mode" "DF"))) |
| "r20kc_fpmpy+(r20kc_fpmpy_iter*2)") |
| |
| ;; Force single-dispatch for unknown or multi. |
| (define_insn_reservation "r20kc_unknown" 1 |
| (and (eq_attr "cpu" "20kc") |
| (eq_attr "type" "unknown,multi,atomic,syncloop")) |
| "r20kc_single_dispatch") |