;; DFA-based pipeline description for MIPS32 model 74k. ;; Contributed by MIPS Technologies and CodeSourcery. ;; ;; Reference: ;; “MIPS32 74K Microarchitecure Specification Rev. 01.02 Jun 15, 2006” ;; “MIPS32 74Kf Processor Core Datasheet Jun 2, 2006” ;; ;; Copyright (C) 2007-2021 Free Software Foundation, Inc. ;; ;; This file is part of GCC. ;; ;; GCC is free software; you can redistribute it and/or modify it ;; under the terms of the GNU General Public License as published ;; by the Free Software Foundation; either version 3, or (at your ;; option) any later version.
;; GCC is distributed in the hope that it will be useful, but WITHOUT ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public ;; License for more details.
;; You should have received a copy of the GNU General Public License ;; along with GCC; see the file COPYING3. If not see ;; http://www.gnu.org/licenses/.
(define_automaton “r74k_mdu_pipe, r74k_alu_pipe, r74k_agen_pipe, r74k_fpu”) (define_cpu_unit “r74k_mul” “r74k_mdu_pipe”) (define_cpu_unit “r74k_alu” “r74k_alu_pipe”) (define_cpu_unit “r74k_agen” “r74k_agen_pipe”) (define_cpu_unit “r74k_fpu_arith” “r74k_fpu”) (define_cpu_unit “r74k_fpu_ldst” “r74k_fpu”)
;; -------------------------------------------------------------- ;; Producers ;; --------------------------------------------------------------
;; ALU: Logicals/Arithmetics ;; - Logicals, move (addu/addiu with rt = 0), Set less than, ;; sign extend - 1 cycle (define_insn_reservation “r74k_int_logical” 1 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “logical,move,signext,slt”)) “r74k_alu”)
;; - Arithmetics - 2 cycles (define_insn_reservation “r74k_int_arith” 2 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “arith,const,shift,clz”)) “r74k_alu”)
(define_insn_reservation “r74k_int_nop” 0 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “nop”)) “nothing”)
(define_insn_reservation “r74k_int_cmove” 4 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “condmove”)) “r74k_agen*2”)
;; MDU: fully pipelined multiplier ;; mult - delivers result to hi/lo in 4 cycle (pipelined) (define_insn_reservation “r74k_int_mult” 4 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “imul”)) “r74k_alu+r74k_mul”)
;; madd, msub - delivers result to hi/lo in 4 cycle (pipelined) (define_insn_reservation “r74k_int_madd” 4 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “imadd”)) “r74k_alu+r74k_mul”)
;; mul - delivers result to general register in 7 cycles (define_insn_reservation “r74k_int_mul3” 7 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “imul3”)) “r74k_alu+r74k_mul”)
;; mfhi, mflo, mflhxu - deliver result to gpr in 7 cycles (define_insn_reservation “r74k_int_mfhilo” 7 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “mfhi,mflo”)) “r74k_alu+r74k_mul”)
;; mthi, mtlo, mtlhx - deliver result to hi/lo, thence madd, handled as bypass (define_insn_reservation “r74k_int_mthilo” 7 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “mthi,mtlo”)) “r74k_alu+r74k_mul”)
;; div - default to 50 cycles for 32bit operands. Faster for 8 bit, ;; but is tricky to identify. (define_insn_reservation “r74k_int_div” 50 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “idiv”)) “r74k_alu+r74k_mul*50”)
;; call (define_insn_reservation “r74k_int_call” 1 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “call”)) “r74k_agen”)
;; branch/jump (define_insn_reservation “r74k_int_jump” 1 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “branch,jump”)) “r74k_agen”)
;; loads: lb, lbu, lh, lhu, ll, lw, lwl, lwr, lwpc, lwxs ;; prefetch: prefetch, prefetchx (define_insn_reservation “r74k_int_load” 3 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “load,prefetch,prefetchx”)) “r74k_agen”)
;; stores (define_insn_reservation “r74k_int_store” 1 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “store”)) “r74k_agen”)
;; Unknowns - Currently these include blockage, consttable and alignment ;; rtls. They do not really affect scheduling latency, (blockage ;; affects scheduling via log links, but not used here). ;; (define_insn_reservation “r74k_unknown” 1 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “unknown,atomic,syncloop”)) “r74k_alu”)
(define_insn_reservation “r74k_multi” 10 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “multi”)) “(r74k_alu+r74k_agen)*10”)
;; -------------------------------------------------------------- ;; Bypass to Consumer ;; --------------------------------------------------------------
;; load->next use : 3 cycles (Default) ;; load->load base: 4 cycles ;; load->store base: 4 cycles (define_bypass 4 “r74k_int_load” “r74k_int_load”) (define_bypass 4 “r74k_int_load” “r74k_int_store” “!mips_store_data_bypass_p”)
;; logical/move/slt/signext->next use : 1 cycles (Default) ;; logical/move/slt/signext->load base: 2 cycles ;; logical/move/slt/signext->store base: 2 cycles (define_bypass 2 “r74k_int_logical” “r74k_int_load”) (define_bypass 2 “r74k_int_logical” “r74k_int_store” “!mips_store_data_bypass_p”)
;; arith->next use : 2 cycles (Default) ;; arith->load base: 3 cycles ;; arith->store base: 3 cycles (define_bypass 3 “r74k_int_arith” “r74k_int_load”) (define_bypass 3 “r74k_int_arith” “r74k_int_store” “!mips_store_data_bypass_p”)
;; cmove->next use : 4 cycles (Default) ;; cmove->load base: 5 cycles ;; cmove->store base: 5 cycles (define_bypass 5 “r74k_int_cmove” “r74k_int_load”) (define_bypass 5 “r74k_int_cmove” “r74k_int_store” “!mips_store_data_bypass_p”)
;; mult/madd/msub->int_mfhilo : 4 cycles (default) ;; mult->madd/msub : 1 cycles ;; madd/msub->madd/msub : 1 cycles (define_bypass 1 “r74k_int_mult,r74k_int_mul3” “r74k_int_madd” “mips_linked_madd_p”) (define_bypass 1 “r74k_int_madd” “r74k_int_madd” “mips_linked_madd_p”)
;; -------------------------------------------------------------- ;; DSP instructions ;; --------------------------------------------------------------
;; Non-saturating insn have the same latency as normal ALU operations, (define_insn_reservation “r74k_dsp_alu” 2 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “dspalu”)) “r74k_alu”)
;; Saturating insn takes an extra cycle. (define_insn_reservation “r74k_dsp_alu_sat” 3 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “dspalusat”)) “r74k_alu”)
;; dpaq_s, dpau, dpsq_s, dpsu, maq_s, mulsaq ;; - delivers result to hi/lo in 6 cycle (bypass at M4) (define_insn_reservation “r74k_dsp_mac” 6 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “dspmac”)) “r74k_alu+r74k_mul”)
;; dpaq_sa, dpsq_sa, maq_sa ;; - delivers result to hi/lo in 7 cycle (bypass at WB) (define_insn_reservation “r74k_dsp_mac_sat” 7 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “dspmacsat”)) “r74k_alu+r74k_mul”)
;; extp, extpdp, extpdpv, extpv, extr, extrv ;; - same latency as “mul” (define_insn_reservation “r74k_dsp_acc_ext” 7 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “accext”)) “r74k_alu+r74k_mul”)
;; mthlip, shilo, shilov ;; - same latency as “mul” (define_insn_reservation “r74k_dsp_acc_mod” 7 (and (eq_attr “cpu” “74kc,74kf2_1,74kf1_1,74kf3_2”) (eq_attr “type” “accmod”)) “r74k_alu+r74k_mul”)
;; dspalu ->load/store base ;; dspalusat->load/store base ;; - we should never see these in real life.
;; dsp_mac->dsp_mac : 1 cycles (repeat rate of 1) ;; dsp_mac->dsp_mac_sat : 1 cycles (repeat rate of 1) (define_bypass 1 “r74k_dsp_mac” “r74k_dsp_mac”) (define_bypass 1 “r74k_dsp_mac” “r74k_dsp_mac_sat”)
;; dsp_mac_sat->dsp_mac_sat : 2 cycles (repeat rate of 2) ;; dsp_mac_sat->dsp_mac : 2 cycles (repeat rate of 2) (define_bypass 2 “r74k_dsp_mac_sat” “r74k_dsp_mac_sat”) (define_bypass 2 “r74k_dsp_mac_sat” “r74k_dsp_mac”)
(define_bypass 1 “r74k_int_mult” “r74k_dsp_mac”) (define_bypass 1 “r74k_int_mult” “r74k_dsp_mac_sat”)
(define_bypass 1 “r74k_int_mul3” “r74k_dsp_mac” “mips_linked_madd_p”) (define_bypass 1 “r74k_int_mul3” “r74k_dsp_mac_sat” “mips_linked_madd_p”)
;; Assuming the following is true (bypass at M4) ;; AP AF AM MB M1 M2 M3 M4 WB GR GC ;; AP AF AM MB M1 M2 M3 M4 WB GR GC ;; dsp_mac->dsp_acc_ext : 4 cycles ;; dsp_mac->dsp_acc_mod : 4 cycles (define_bypass 4 “r74k_dsp_mac” “r74k_dsp_acc_ext”) (define_bypass 4 “r74k_dsp_mac” “r74k_dsp_acc_mod”)
;; Assuming the following is true (bypass at WB) ;; AP AF AM MB M1 M2 M3 M4 WB GR GC ;; AP AF AM MB M1 M2 M3 M4 WB GR GC ;; dsp_mac_sat->dsp_acc_ext : 5 cycles ;; dsp_mac_sat->dsp_acc_mod : 5 cycles (define_bypass 5 “r74k_dsp_mac_sat” “r74k_dsp_acc_ext”) (define_bypass 5 “r74k_dsp_mac_sat” “r74k_dsp_acc_mod”)
;; -------------------------------------------------------------- ;; Floating Point Instructions ;; --------------------------------------------------------------
;; 74Kf FPU runs at 1:1 or 2:1 core/FPU clock ratio.
;; fadd, fabs, fneg, (define_insn_reservation “r74kf1_1_fadd” 4 (and (eq_attr “cpu” “74kf1_1”) (eq_attr “type” “fadd,fabs,fneg”)) “r74k_fpu_arith”)
(define_insn_reservation “r74kf2_1_fadd” 8 (and (eq_attr “cpu” “74kf2_1”) (eq_attr “type” “fadd,fabs,fneg”)) “r74k_fpu_arith*2”)
(define_insn_reservation “r74kf3_2_fadd” 6 (and (eq_attr “cpu” “74kf3_2”) (eq_attr “type” “fadd,fabs,fneg”)) “r74k_fpu_arith”)
;; fmove, fcmove (define_insn_reservation “r74kf1_1_fmove” 4 (and (eq_attr “cpu” “74kf1_1”) (eq_attr “type” “fmove”)) “r74k_fpu_arith”)
(define_insn_reservation “r74kf2_1_fmove” 8 (and (eq_attr “cpu” “74kf2_1”) (eq_attr “type” “fmove”)) “r74k_fpu_arith*2”)
(define_insn_reservation “r74kf3_2_fmove” 6 (and (eq_attr “cpu” “74kf3_2”) (eq_attr “type” “fmove”)) “r74k_fpu_arith”)
;; fload (define_insn_reservation “r74kf1_1_fload” 4 (and (eq_attr “cpu” “74kf1_1”) (eq_attr “type” “fpload,fpidxload”)) “r74k_agen+r74k_fpu_ldst”)
(define_insn_reservation “r74kf2_1_fload” 8 (and (eq_attr “cpu” “74kf2_1”) (eq_attr “type” “fpload,fpidxload”)) “r74k_agen+(r74k_fpu_ldst*2)”)
(define_insn_reservation “r74kf3_2_fload” 6 (and (eq_attr “cpu” “74kf3_2”) (eq_attr “type” “fpload,fpidxload”)) “r74k_agen+r74k_fpu_ldst”)
;; fstore (define_insn_reservation “r74kf1_1_fstore” 1 (and (eq_attr “cpu” “74kf1_1”) (eq_attr “type” “fpstore,fpidxstore”)) “r74k_agen+r74k_fpu_ldst”)
(define_insn_reservation “r74kf2_1_fstore” 2 (and (eq_attr “cpu” “74kf2_1”) (eq_attr “type” “fpstore,fpidxstore”)) “r74k_agen+(r74k_fpu_ldst*2)”)
(define_insn_reservation “r74kf3_2_fstore” 1 (and (eq_attr “cpu” “74kf3_2”) (eq_attr “type” “fpstore,fpidxstore”)) “r74k_agen+r74k_fpu_ldst”)
;; fmul, fmadd (define_insn_reservation “r74kf1_1_fmul_sf” 4 (and (eq_attr “cpu” “74kf1_1”) (and (eq_attr “type” “fmul,fmadd”) (eq_attr “mode” “SF”))) “r74k_fpu_arith”)
(define_insn_reservation “r74kf2_1_fmul_sf” 8 (and (eq_attr “cpu” “74kf2_1”) (and (eq_attr “type” “fmul,fmadd”) (eq_attr “mode” “SF”))) “r74k_fpu_arith*2”)
(define_insn_reservation “r74kf3_2_fmul_sf” 6 (and (eq_attr “cpu” “74kf3_2”) (and (eq_attr “type” “fmul,fmadd”) (eq_attr “mode” “SF”))) “r74k_fpu_arith”)
(define_insn_reservation “r74kf1_1_fmul_df” 5 (and (eq_attr “cpu” “74kf1_1”) (and (eq_attr “type” “fmul,fmadd”) (eq_attr “mode” “DF”))) “r74k_fpu_arith*2”)
(define_insn_reservation “r74kf2_1_fmul_df” 10 (and (eq_attr “cpu” “74kf2_1”) (and (eq_attr “type” “fmul,fmadd”) (eq_attr “mode” “DF”))) “r74k_fpu_arith*4”)
(define_insn_reservation “r74kf3_2_fmul_df” 7 (and (eq_attr “cpu” “74kf3_2”) (and (eq_attr “type” “fmul,fmadd”) (eq_attr “mode” “DF”))) “r74k_fpu_arith*2”)
;; fdiv, fsqrt (define_insn_reservation “r74kf1_1_fdiv_sf” 17 (and (eq_attr “cpu” “74kf1_1”) (and (eq_attr “type” “fdiv,fsqrt”) (eq_attr “mode” “SF”))) “r74k_fpu_arith*14”)
(define_insn_reservation “r74kf2_1_fdiv_sf” 34 (and (eq_attr “cpu” “74kf2_1”) (and (eq_attr “type” “fdiv,fsqrt”) (eq_attr “mode” “SF”))) “r74k_fpu_arith*28”)
(define_insn_reservation “r74kf3_2_fdiv_sf” 25 (and (eq_attr “cpu” “74kf3_2”) (and (eq_attr “type” “fdiv,fsqrt”) (eq_attr “mode” “SF”))) “r74k_fpu_arith*14”)
(define_insn_reservation “r74kf1_1_fdiv_df” 32 (and (eq_attr “cpu” “74kf1_1”) (and (eq_attr “type” “fdiv,fsqrt”) (eq_attr “mode” “DF”))) “r74k_fpu_arith*29”)
(define_insn_reservation “r74kf2_1_fdiv_df” 64 (and (eq_attr “cpu” “74kf2_1”) (and (eq_attr “type” “fdiv,fsqrt”) (eq_attr “mode” “DF”))) “r74k_fpu_arith*58”)
(define_insn_reservation “r74kf3_2_fdiv_df” 48 (and (eq_attr “cpu” “74kf3_2”) (and (eq_attr “type” “fdiv,fsqrt”) (eq_attr “mode” “DF”))) “r74k_fpu_arith*29”)
;; frsqrt (define_insn_reservation “r74kf1_1_frsqrt_sf” 17 (and (eq_attr “cpu” “74kf1_1”) (and (eq_attr “type” “frsqrt”) (eq_attr “mode” “SF”))) “r74k_fpu_arith*14”)
(define_insn_reservation “r74kf2_1_frsqrt_sf” 34 (and (eq_attr “cpu” “74kf2_1”) (and (eq_attr “type” “frsqrt”) (eq_attr “mode” “SF”))) “r74k_fpu_arith*28”)
(define_insn_reservation “r74kf3_2_frsqrt_sf” 25 (and (eq_attr “cpu” “74kf3_2”) (and (eq_attr “type” “frsqrt”) (eq_attr “mode” “SF”))) “r74k_fpu_arith*14”)
(define_insn_reservation “r74kf1_1_frsqrt_df” 36 (and (eq_attr “cpu” “74kf1_1”) (and (eq_attr “type” “frsqrt”) (eq_attr “mode” “DF”))) “r74k_fpu_arith*31”)
(define_insn_reservation “r74kf2_1_frsqrt_df” 72 (and (eq_attr “cpu” “74kf2_1”) (and (eq_attr “type” “frsqrt”) (eq_attr “mode” “DF”))) “r74k_fpu_arith*62”)
(define_insn_reservation “r74kf3_2_frsqrt_df” 54 (and (eq_attr “cpu” “74kf3_2”) (and (eq_attr “type” “frsqrt”) (eq_attr “mode” “DF”))) “r74k_fpu_arith*31”)
;; fcmp (define_insn_reservation “r74kf1_1_fcmp” 4 (and (eq_attr “cpu” “74kf1_1”) (eq_attr “type” “fcmp”)) “r74k_fpu_arith”)
(define_insn_reservation “r74kf2_1_fcmp” 8 (and (eq_attr “cpu” “74kf2_1”) (eq_attr “type” “fcmp”)) “r74k_fpu_arith*2”)
(define_insn_reservation “r74kf3_2_fcmp” 6 (and (eq_attr “cpu” “74kf3_2”) (eq_attr “type” “fcmp”)) “r74k_fpu_arith”)
;; fcvt (define_insn_reservation “r74kf1_1_fcvt” 4 (and (eq_attr “cpu” “74kf1_1”) (eq_attr “type” “fcvt”)) “r74k_fpu_arith”)
(define_insn_reservation “r74kf2_1_fcvt” 8 (and (eq_attr “cpu” “74kf2_1”) (eq_attr “type” “fcvt”)) “r74k_fpu_arith*2”)
(define_insn_reservation “r74kf3_2_fcvt” 6 (and (eq_attr “cpu” “74kf3_2”) (eq_attr “type” “fcvt”)) “r74k_fpu_arith”)
;; fxfer (MTC1, DMTC1: latency is 4) (MFC1, DMFC1: latency is 1) (define_insn_reservation “r74kf1_1_fxfer_to_c1” 4 (and (eq_attr “cpu” “74kf1_1”) (eq_attr “type” “mtc”)) “r74k_fpu_arith”)
(define_insn_reservation “r74kf2_1_fxfer_to_c1” 8 (and (eq_attr “cpu” “74kf2_1”) (eq_attr “type” “mtc”)) “r74k_fpu_arith*2”)
(define_insn_reservation “r74kf3_2_fxfer_to_c1” 6 (and (eq_attr “cpu” “74kf3_2”) (eq_attr “type” “mtc”)) “r74k_fpu_arith”)
(define_insn_reservation “r74kf1_1_fxfer_from_c1” 1 (and (eq_attr “cpu” “74kf1_1”) (eq_attr “type” “mfc”)) “r74k_fpu_arith”)
(define_insn_reservation “r74kf2_1_fxfer_from_c1” 2 (and (eq_attr “cpu” “74kf2_1”) (eq_attr “type” “mfc”)) “r74k_fpu_arith*2”)
(define_insn_reservation “r74kf3_2_fxfer_from_c1” 1 (and (eq_attr “cpu” “74kf3_2”) (eq_attr “type” “mfc”)) “r74k_fpu_arith”)