;; Machine description for NVPTX. ;; Copyright (C) 2014-2021 Free Software Foundation, Inc. ;; Contributed by Bernd Schmidt bernds@codesourcery.com ;; ;; This file is part of GCC. ;; ;; GCC is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 3, or (at your option) ;; any later version. ;; ;; GCC is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with GCC; see the file COPYING3. If not see ;; http://www.gnu.org/licenses/.

(define_c_enum “unspec” [ UNSPEC_ARG_REG

UNSPEC_COPYSIGN UNSPEC_LOG2 UNSPEC_EXP2 UNSPEC_SIN UNSPEC_COS

UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT

UNSPEC_BITREV

UNSPEC_ALLOCA

UNSPEC_SET_SOFTSTACK

UNSPEC_DIM_SIZE

UNSPEC_BIT_CONV

UNSPEC_VOTE_BALLOT

UNSPEC_LANEID

UNSPEC_SHUFFLE UNSPEC_BR_UNIFIED ])

(define_c_enum “unspecv” [ UNSPECV_LOCK UNSPECV_CAS UNSPECV_XCHG UNSPECV_BARSYNC UNSPECV_MEMBAR UNSPECV_MEMBAR_CTA UNSPECV_DIM_POS

UNSPECV_FORK UNSPECV_FORKED UNSPECV_JOINING UNSPECV_JOIN

UNSPECV_NOUNROLL

UNSPECV_SIMT_ENTER UNSPECV_SIMT_EXIT

UNSPECV_RED_PART ])

(define_attr “subregs_ok” “false,true” (const_string “false”))

(define_attr “atomic” “false,true” (const_string “false”))

;; The nvptx operand predicates, in general, don't permit subregs and ;; only literal constants, which differ from the generic ones, which ;; permit subregs and symbolc constants (as appropriate) (define_predicate “nvptx_register_operand” (match_code “reg”) { return register_operand (op, mode); })

(define_predicate “nvptx_nonimmediate_operand” (match_code “mem,reg”) { return (REG_P (op) ? register_operand (op, mode) : memory_operand (op, mode)); })

(define_predicate “nvptx_nonmemory_operand” (match_code “reg,const_int,const_double”) { return (REG_P (op) ? register_operand (op, mode) : immediate_operand (op, mode)); })

(define_predicate “const0_operand” (and (match_code “const_int”) (match_test “op == const0_rtx”)))

;; True if this operator is valid for predication. (define_predicate “predicate_operator” (match_code “eq,ne”))

(define_predicate “ne_operator” (match_code “ne”))

(define_predicate “nvptx_comparison_operator” (match_code “eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu”))

(define_predicate “nvptx_float_comparison_operator” (match_code “eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered”))

(define_predicate “nvptx_vector_index_operand” (and (match_code “const_int”) (match_test “UINTVAL (op) < 4”)))

;; Test for a valid operand for a call instruction. (define_predicate “call_insn_operand” (match_code “symbol_ref,reg”) { return REG_P (op) || SYMBOL_REF_FUNCTION_P (op); })

;; Return true if OP is a call with parallel USEs of the argument ;; pseudos. (define_predicate “call_operation” (match_code “parallel”) { int arg_end = XVECLEN (op, 0);

for (int i = 1; i < arg_end; i++) { rtx elt = XVECEXP (op, 0, i);

  if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0)))
    return false;
}

return true; })

;; Test for a function symbol ref operand (define_predicate “symbol_ref_function_operand” (match_code “symbol_ref”) { return SYMBOL_REF_FUNCTION_P (op); })

(define_attr “predicable” “false,true” (const_string “true”))

(define_cond_exec [(match_operator 0 “predicate_operator” [(match_operand:BI 1 “nvptx_register_operand” "") (match_operand:BI 2 “const0_operand” "")])] "" "" )

(define_constraint “P0” “An integer with the value 0.” (and (match_code “const_int”) (match_test “ival == 0”)))

(define_constraint “P1” “An integer with the value 1.” (and (match_code “const_int”) (match_test “ival == 1”)))

(define_constraint “Pn” “An integer with the value -1.” (and (match_code “const_int”) (match_test “ival == -1”)))

(define_constraint “R” “A pseudo register.” (match_code “reg”))

(define_constraint “Ia” “Any integer constant.” (and (match_code “const_int”) (match_test “true”)))

(define_mode_iterator QHSDISDFM [QI HI SI DI SF DF]) (define_mode_iterator QHSDIM [QI HI SI DI]) (define_mode_iterator HSDIM [HI SI DI]) (define_mode_iterator BHSDIM [BI HI SI DI]) (define_mode_iterator SDIM [SI DI]) (define_mode_iterator SDISDFM [SI DI SF DF]) (define_mode_iterator QHIM [QI HI]) (define_mode_iterator QHSIM [QI HI SI]) (define_mode_iterator SDFM [SF DF]) (define_mode_iterator SDCM [SC DC]) (define_mode_iterator BITS [SI SF]) (define_mode_iterator BITD [DI DF]) (define_mode_iterator VECIM [V2SI V2DI])

;; This mode iterator allows :P to be used for patterns that operate on ;; pointer-sized quantities. Exactly one of the two alternatives will match. (define_mode_iterator P [(SI “Pmode == SImode”) (DI “Pmode == DImode”)])

;; Define element mode for each vector mode. (define_mode_attr VECELEM [(V2SI “SI”) (V2DI “DI”)]) (define_mode_attr Vecelem [(V2SI “si”) (V2DI “di”)])

;; We should get away with not defining memory alternatives, since we don't ;; get variables in this mode and pseudos are never spilled. (define_insn “movbi” [(set (match_operand:BI 0 “nvptx_register_operand” “=R,R,R”) (match_operand:BI 1 “nvptx_nonmemory_operand” “R,P0,Pn”))] "" “@ %.\tmov%t0\t%0, %1; %.\tsetp.eq.u32\t%0, 1, 0; %.\tsetp.eq.u32\t%0, 1, 1;”)

(define_insn “*mov_insn” [(set (match_operand:VECIM 0 “nonimmediate_operand” “=R,R,m”) (match_operand:VECIM 1 “general_operand” “Ri,m,R”))] “!MEM_P (operands[0]) || REG_P (operands[1])” { if (which_alternative == 1) return “%.\tld%A1%u1\t%0, %1;”; if (which_alternative == 2) return “%.\tst%A0%u0\t%0, %1;”;

return nvptx_output_mov_insn (operands[0], operands[1]); } [(set_attr “subregs_ok” “true”)])

(define_insn “*mov_insn” [(set (match_operand:QHSDIM 0 “nonimmediate_operand” “=R,R,m”) (match_operand:QHSDIM 1 “general_operand” “Ri,m,R”))] “!MEM_P (operands[0]) || REG_P (operands[1])” { if (which_alternative == 1) return “%.\tld%A1%u1\t%0, %1;”; if (which_alternative == 2) return “%.\tst%A0%u0\t%0, %1;”;

return nvptx_output_mov_insn (operands[0], operands[1]); } [(set_attr “subregs_ok” “true”)])

;; ptxas segfaults on ‘mov.u64 %r24,bar+4096’, so break it up. (define_split [(set (match_operand:DI 0 “nvptx_register_operand”) (const:DI (plus:DI (match_operand:DI 1 “symbol_ref_function_operand”) (match_operand 2 “const_int_operand”))))] "" [(set (match_dup 0) (match_dup 1)) (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2))) ] "")

(define_insn “*mov_insn” [(set (match_operand:SDFM 0 “nonimmediate_operand” “=R,R,m”) (match_operand:SDFM 1 “general_operand” “RF,m,R”))] “!MEM_P (operands[0]) || REG_P (operands[1])” { if (which_alternative == 1) return “%.\tld%A1%u0\t%0, %1;”; if (which_alternative == 2) return “%.\tst%A0%u1\t%0, %1;”;

return nvptx_output_mov_insn (operands[0], operands[1]); } [(set_attr “subregs_ok” “true”)])

(define_insn “load_arg_reg” [(set (match_operand:QHIM 0 “nvptx_register_operand” “=R”) (unspec:QHIM [(match_operand 1 “const_int_operand” “n”)] UNSPEC_ARG_REG))] "" “%.\tcvt%t0.u32\t%0, %%ar%1;”)

(define_insn “load_arg_reg” [(set (match_operand:SDISDFM 0 “nvptx_register_operand” “=R”) (unspec:SDISDFM [(match_operand 1 “const_int_operand” “n”)] UNSPEC_ARG_REG))] "" “%.\tmov%t0\t%0, %%ar%1;”)

(define_expand “mov” [(set (match_operand:VECIM 0 “nonimmediate_operand” "") (match_operand:VECIM 1 “general_operand” ""))] "" { if (MEM_P (operands[0]) && !REG_P (operands[1])) { rtx tmp = gen_reg_rtx (mode); emit_move_insn (tmp, operands[1]); emit_move_insn (operands[0], tmp); DONE; } })

(define_expand “mov” [(set (match_operand:QHSDISDFM 0 “nonimmediate_operand” "") (match_operand:QHSDISDFM 1 “general_operand” ""))] "" { if (MEM_P (operands[0]) && !REG_P (operands[1])) { rtx tmp = gen_reg_rtx (mode); emit_move_insn (tmp, operands[1]); emit_move_insn (operands[0], tmp); DONE; }

if (GET_CODE (operands[1]) == LABEL_REF) sorry (“target cannot support label values”); })

(define_insn “zero_extendqihi2” [(set (match_operand:HI 0 “nvptx_register_operand” “=R,R”) (zero_extend:HI (match_operand:QI 1 “nvptx_nonimmediate_operand” “R,m”)))] "" “@ %.\tcvt.u16.u%T1\t%0, %1; %.\tld%A1.u8\t%0, %1;” [(set_attr “subregs_ok” “true”)])

(define_insn “zero_extendsi2” [(set (match_operand:SI 0 “nvptx_register_operand” “=R,R”) (zero_extend:SI (match_operand:QHIM 1 “nvptx_nonimmediate_operand” “R,m”)))] "" “@ %.\tcvt.u32.u%T1\t%0, %1; %.\tld%A1.u%T1\t%0, %1;” [(set_attr “subregs_ok” “true”)])

(define_insn “zero_extenddi2” [(set (match_operand:DI 0 “nvptx_register_operand” “=R,R”) (zero_extend:DI (match_operand:QHSIM 1 “nvptx_nonimmediate_operand” “R,m”)))] "" “@ %.\tcvt.u64.u%T1\t%0, %1; %.\tld%A1%u1\t%0, %1;” [(set_attr “subregs_ok” “true”)])

(define_insn “extendqihi2” [(set (match_operand:HI 0 “nvptx_register_operand” “=R”) (sign_extend:HI (match_operand:QI 1 “nvptx_register_operand” “R”)))] "" “%.\tcvt.s16.s8\t%0, %1;” [(set_attr “subregs_ok” “true”)])

(define_insn “extendsi2” [(set (match_operand:SI 0 “nvptx_register_operand” “=R,R”) (sign_extend:SI (match_operand:QHIM 1 “nvptx_nonimmediate_operand” “R,m”)))] "" “@ %.\tcvt.s32.s%T1\t%0, %1; %.\tld%A1.s%T1\t%0, %1;” [(set_attr “subregs_ok” “true”)])

(define_insn “extenddi2” [(set (match_operand:DI 0 “nvptx_register_operand” “=R,R”) (sign_extend:DI (match_operand:QHSIM 1 “nvptx_nonimmediate_operand” “R,m”)))] "" “@ %.\tcvt.s64.s%T1\t%0, %1; %.\tld%A1.s%T1\t%0, %1;” [(set_attr “subregs_ok” “true”)])

(define_insn “trunchiqi2” [(set (match_operand:QI 0 “nvptx_nonimmediate_operand” “=R,m”) (truncate:QI (match_operand:HI 1 “nvptx_register_operand” “R,R”)))] "" “@ %.\tcvt%t0.u16\t%0, %1; %.\tst%A0.u8\t%0, %1;” [(set_attr “subregs_ok” “true”)])

(define_insn “truncsi2” [(set (match_operand:QHIM 0 “nvptx_nonimmediate_operand” “=R,m”) (truncate:QHIM (match_operand:SI 1 “nvptx_register_operand” “R,R”)))] "" { if (which_alternative == 1) return “%.\tst%A0.u%T0\t%0, %1;”; if (GET_MODE (operands[0]) == QImode) return “%.\tmov%t0\t%0, %1;”; return “%.\tcvt%t0.u32\t%0, %1;”; } [(set_attr “subregs_ok” “true”)])

(define_insn “truncdi2” [(set (match_operand:QHSIM 0 “nvptx_nonimmediate_operand” “=R,m”) (truncate:QHSIM (match_operand:DI 1 “nvptx_register_operand” “R,R”)))] "" “@ %.\tcvt%t0.u64\t%0, %1; %.\tst%A0.u%T0\t%0, %1;” [(set_attr “subregs_ok” “true”)])

;; Integer arithmetic

(define_insn “add3” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (plus:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “R”) (match_operand:HSDIM 2 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\tadd%t0\t%0, %1, %2;”)

(define_insn “*vadd_addsi4” [(set (match_operand:SI 0 “nvptx_register_operand” “=R”) (plus:SI (plus:SI (match_operand:SI 1 “nvptx_register_operand” “R”) (match_operand:SI 2 “nvptx_register_operand” “R”)) (match_operand:SI 3 “nvptx_register_operand” “R”)))] "" “%.\tvadd%t0%t1%t2.add\t%0, %1, %2, %3;”)

(define_insn “*vsub_addsi4” [(set (match_operand:SI 0 “nvptx_register_operand” “=R”) (plus:SI (minus:SI (match_operand:SI 1 “nvptx_register_operand” “R”) (match_operand:SI 2 “nvptx_register_operand” “R”)) (match_operand:SI 3 “nvptx_register_operand” “R”)))] "" “%.\tvsub%t0%t1%t2.add\t%0, %1, %2, %3;”)

(define_insn “sub3” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (minus:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “R”) (match_operand:HSDIM 2 “nvptx_register_operand” “R”)))] "" “%.\tsub%t0\t%0, %1, %2;”)

(define_insn “mul3” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (mult:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “R”) (match_operand:HSDIM 2 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\tmul.lo%t0\t%0, %1, %2;”)

(define_insn “*mad3” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “R”) (match_operand:HSDIM 2 “nvptx_nonmemory_operand” “Ri”)) (match_operand:HSDIM 3 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\tmad.lo%t0\t%0, %1, %2, %3;”)

(define_insn “div3” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (div:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “R”) (match_operand:HSDIM 2 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\tdiv.s%T0\t%0, %1, %2;”)

(define_insn “udiv3” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (udiv:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “R”) (match_operand:HSDIM 2 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\tdiv.u%T0\t%0, %1, %2;”)

(define_insn “mod3” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (mod:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “Ri”) (match_operand:HSDIM 2 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\trem.s%T0\t%0, %1, %2;”)

(define_insn “umod3” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (umod:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “Ri”) (match_operand:HSDIM 2 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\trem.u%T0\t%0, %1, %2;”)

(define_insn “smin3” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (smin:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “R”) (match_operand:HSDIM 2 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\tmin.s%T0\t%0, %1, %2;”)

(define_insn “umin3” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (umin:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “R”) (match_operand:HSDIM 2 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\tmin.u%T0\t%0, %1, %2;”)

(define_insn “smax3” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (smax:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “R”) (match_operand:HSDIM 2 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\tmax.s%T0\t%0, %1, %2;”)

(define_insn “umax3” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (umax:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “R”) (match_operand:HSDIM 2 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\tmax.u%T0\t%0, %1, %2;”)

(define_insn “abs2” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (abs:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “R”)))] "" “%.\tabs.s%T0\t%0, %1;”)

(define_insn “neg2” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (neg:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “R”)))] "" “%.\tneg.s%T0\t%0, %1;”)

(define_insn “one_cmpl2” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (not:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “R”)))] "" “%.\tnot.b%T0\t%0, %1;”)

(define_insn “bitrev2” [(set (match_operand:SDIM 0 “nvptx_register_operand” “=R”) (unspec:SDIM [(match_operand:SDIM 1 “nvptx_register_operand” “R”)] UNSPEC_BITREV))] "" “%.\tbrev.b%T0\t%0, %1;”)

(define_insn “clz2” [(set (match_operand:SI 0 “nvptx_register_operand” “=R”) (clz:SI (match_operand:SDIM 1 “nvptx_register_operand” “R”)))] "" “%.\tclz.b%T1\t%0, %1;”)

(define_expand “ctz2” [(set (match_operand:SI 0 “nvptx_register_operand” "") (ctz:SI (match_operand:SDIM 1 “nvptx_register_operand” "")))] "" { rtx tmpreg = gen_reg_rtx (mode); emit_insn (gen_bitrev2 (tmpreg, operands[1])); emit_insn (gen_clz2 (operands[0], tmpreg)); DONE; })

(define_insn “popcount2” [(set (match_operand:SI 0 “nvptx_register_operand” “=R”) (popcount:SI (match_operand:SDIM 1 “nvptx_register_operand” “R”)))] "" “%.\tpopc.b%T1\t%0, %1;”)

;; Multiplication variants

(define_insn “mulhisi3” [(set (match_operand:SI 0 “nvptx_register_operand” “=R”) (mult:SI (sign_extend:SI (match_operand:HI 1 “nvptx_register_operand” “R”)) (sign_extend:SI (match_operand:HI 2 “nvptx_register_operand” “R”))))] "" “%.\tmul.wide.s16\t%0, %1, %2;”)

(define_insn “mulsidi3” [(set (match_operand:DI 0 “nvptx_register_operand” “=R”) (mult:DI (sign_extend:DI (match_operand:SI 1 “nvptx_register_operand” “R”)) (sign_extend:DI (match_operand:SI 2 “nvptx_register_operand” “R”))))] "" “%.\tmul.wide.s32\t%0, %1, %2;”)

(define_insn “umulhisi3” [(set (match_operand:SI 0 “nvptx_register_operand” “=R”) (mult:SI (zero_extend:SI (match_operand:HI 1 “nvptx_register_operand” “R”)) (zero_extend:SI (match_operand:HI 2 “nvptx_register_operand” “R”))))] "" “%.\tmul.wide.u16\t%0, %1, %2;”)

(define_insn “umulsidi3” [(set (match_operand:DI 0 “nvptx_register_operand” “=R”) (mult:DI (zero_extend:DI (match_operand:SI 1 “nvptx_register_operand” “R”)) (zero_extend:DI (match_operand:SI 2 “nvptx_register_operand” “R”))))] "" “%.\tmul.wide.u32\t%0, %1, %2;”)

(define_insn “smulhi3_highpart” [(set (match_operand:HI 0 “nvptx_register_operand” “=R”) (truncate:HI (lshiftrt:SI (mult:SI (sign_extend:SI (match_operand:HI 1 “nvptx_register_operand” “R”)) (sign_extend:SI (match_operand:HI 2 “nvptx_register_operand” “R”))) (const_int 16))))] "" “%.\tmul.hi.s16\t%0, %1, %2;”)

(define_insn “smulsi3_highpart” [(set (match_operand:SI 0 “nvptx_register_operand” “=R”) (truncate:SI (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 “nvptx_register_operand” “R”)) (sign_extend:DI (match_operand:SI 2 “nvptx_register_operand” “R”))) (const_int 32))))] "" “%.\tmul.hi.s32\t%0, %1, %2;”)

(define_insn “umulhi3_highpart” [(set (match_operand:HI 0 “nvptx_register_operand” “=R”) (truncate:HI (lshiftrt:SI (mult:SI (zero_extend:SI (match_operand:HI 1 “nvptx_register_operand” “R”)) (zero_extend:SI (match_operand:HI 2 “nvptx_register_operand” “R”))) (const_int 16))))] "" “%.\tmul.hi.u16\t%0, %1, %2;”)

(define_insn “umulsi3_highpart” [(set (match_operand:SI 0 “nvptx_register_operand” “=R”) (truncate:SI (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 “nvptx_register_operand” “R”)) (zero_extend:DI (match_operand:SI 2 “nvptx_register_operand” “R”))) (const_int 32))))] "" “%.\tmul.hi.u32\t%0, %1, %2;”)

;; Shifts

(define_insn “ashl3” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (ashift:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “R”) (match_operand:SI 2 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\tshl.b%T0\t%0, %1, %2;”)

(define_insn “ashr3” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (ashiftrt:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “R”) (match_operand:SI 2 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\tshr.s%T0\t%0, %1, %2;”)

(define_insn “lshr3” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (lshiftrt:HSDIM (match_operand:HSDIM 1 “nvptx_register_operand” “R”) (match_operand:SI 2 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\tshr.u%T0\t%0, %1, %2;”)

;; Logical operations

(define_insn “and3” [(set (match_operand:BHSDIM 0 “nvptx_register_operand” “=R”) (and:BHSDIM (match_operand:BHSDIM 1 “nvptx_register_operand” “R”) (match_operand:BHSDIM 2 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\tand.b%T0\t%0, %1, %2;”)

(define_insn “ior3” [(set (match_operand:BHSDIM 0 “nvptx_register_operand” “=R”) (ior:BHSDIM (match_operand:BHSDIM 1 “nvptx_register_operand” “R”) (match_operand:BHSDIM 2 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\tor.b%T0\t%0, %1, %2;”)

(define_insn “xor3” [(set (match_operand:BHSDIM 0 “nvptx_register_operand” “=R”) (xor:BHSDIM (match_operand:BHSDIM 1 “nvptx_register_operand” “R”) (match_operand:BHSDIM 2 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\txor.b%T0\t%0, %1, %2;”)

;; Comparisons and branches

(define_insn “*cmp” [(set (match_operand:BI 0 “nvptx_register_operand” “=R”) (match_operator:BI 1 “nvptx_comparison_operator” [(match_operand:HSDIM 2 “nvptx_register_operand” “R”) (match_operand:HSDIM 3 “nvptx_nonmemory_operand” “Ri”)]))] "" “%.\tsetp%c1\t%0, %2, %3;”)

(define_insn “*cmp” [(set (match_operand:BI 0 “nvptx_register_operand” “=R”) (match_operator:BI 1 “nvptx_float_comparison_operator” [(match_operand:SDFM 2 “nvptx_register_operand” “R”) (match_operand:SDFM 3 “nvptx_nonmemory_operand” “RF”)]))] "" “%.\tsetp%c1\t%0, %2, %3;”)

(define_insn “jump” [(set (pc) (label_ref (match_operand 0 "" "")))] "" “%.\tbra\t%l0;”)

(define_insn “br_true” [(set (pc) (if_then_else (ne (match_operand:BI 0 “nvptx_register_operand” “R”) (const_int 0)) (label_ref (match_operand 1 "" "")) (pc)))] "" “%j0\tbra\t%l1;” [(set_attr “predicable” “false”)])

(define_insn “br_false” [(set (pc) (if_then_else (eq (match_operand:BI 0 “nvptx_register_operand” “R”) (const_int 0)) (label_ref (match_operand 1 "" "")) (pc)))] "" “%J0\tbra\t%l1;” [(set_attr “predicable” “false”)])

;; unified conditional branch (define_insn “br_true_uni” [(set (pc) (if_then_else (ne (unspec:BI [(match_operand:BI 0 “nvptx_register_operand” “R”)] UNSPEC_BR_UNIFIED) (const_int 0)) (label_ref (match_operand 1 "" "")) (pc)))] "" “%j0\tbra.uni\t%l1;” [(set_attr “predicable” “false”)])

(define_insn “br_false_uni” [(set (pc) (if_then_else (eq (unspec:BI [(match_operand:BI 0 “nvptx_register_operand” “R”)] UNSPEC_BR_UNIFIED) (const_int 0)) (label_ref (match_operand 1 "" "")) (pc)))] "" “%J0\tbra.uni\t%l1;” [(set_attr “predicable” “false”)])

(define_expand “cbranch4” [(set (pc) (if_then_else (match_operator 0 “nvptx_comparison_operator” [(match_operand:HSDIM 1 “nvptx_register_operand” "") (match_operand:HSDIM 2 “nvptx_nonmemory_operand” "")]) (label_ref (match_operand 3 "" "")) (pc)))] "" { rtx t = nvptx_expand_compare (operands[0]); operands[0] = t; operands[1] = XEXP (t, 0); operands[2] = XEXP (t, 1); })

(define_expand “cbranch4” [(set (pc) (if_then_else (match_operator 0 “nvptx_float_comparison_operator” [(match_operand:SDFM 1 “nvptx_register_operand” "") (match_operand:SDFM 2 “nvptx_nonmemory_operand” "")]) (label_ref (match_operand 3 "" "")) (pc)))] "" { rtx t = nvptx_expand_compare (operands[0]); operands[0] = t; operands[1] = XEXP (t, 0); operands[2] = XEXP (t, 1); })

(define_expand “cbranchbi4” [(set (pc) (if_then_else (match_operator 0 “predicate_operator” [(match_operand:BI 1 “nvptx_register_operand” "") (match_operand:BI 2 “const0_operand” "")]) (label_ref (match_operand 3 "" "")) (pc)))] "" "")

;; Conditional stores

(define_insn “setcc_from_bi” [(set (match_operand:SI 0 “nvptx_register_operand” “=R”) (ne:SI (match_operand:BI 1 “nvptx_register_operand” “R”) (const_int 0)))] "" “%.\tselp%t0 %0,-1,0,%1;”)

(define_insn “sel_true” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (if_then_else:HSDIM (ne (match_operand:BI 1 “nvptx_register_operand” “R”) (const_int 0)) (match_operand:HSDIM 2 “nvptx_nonmemory_operand” “Ri”) (match_operand:HSDIM 3 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\tselp%t0\t%0, %2, %3, %1;”)

(define_insn “sel_true” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (if_then_else:SDFM (ne (match_operand:BI 1 “nvptx_register_operand” “R”) (const_int 0)) (match_operand:SDFM 2 “nvptx_nonmemory_operand” “RF”) (match_operand:SDFM 3 “nvptx_nonmemory_operand” “RF”)))] "" “%.\tselp%t0\t%0, %2, %3, %1;”)

(define_insn “sel_false” [(set (match_operand:HSDIM 0 “nvptx_register_operand” “=R”) (if_then_else:HSDIM (eq (match_operand:BI 1 “nvptx_register_operand” “R”) (const_int 0)) (match_operand:HSDIM 2 “nvptx_nonmemory_operand” “Ri”) (match_operand:HSDIM 3 “nvptx_nonmemory_operand” “Ri”)))] "" “%.\tselp%t0\t%0, %3, %2, %1;”)

(define_insn “sel_false” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (if_then_else:SDFM (eq (match_operand:BI 1 “nvptx_register_operand” “R”) (const_int 0)) (match_operand:SDFM 2 “nvptx_nonmemory_operand” “RF”) (match_operand:SDFM 3 “nvptx_nonmemory_operand” “RF”)))] "" “%.\tselp%t0\t%0, %3, %2, %1;”)

(define_insn “setcc_int” [(set (match_operand:SI 0 “nvptx_register_operand” “=R”) (match_operator:SI 1 “nvptx_comparison_operator” [(match_operand:HSDIM 2 “nvptx_register_operand” “R”) (match_operand:HSDIM 3 “nvptx_nonmemory_operand” “Ri”)]))] "" “%.\tset%t0%c1\t%0, %2, %3;”)

(define_insn “setcc_int” [(set (match_operand:SI 0 “nvptx_register_operand” “=R”) (match_operator:SI 1 “nvptx_float_comparison_operator” [(match_operand:SDFM 2 “nvptx_register_operand” “R”) (match_operand:SDFM 3 “nvptx_nonmemory_operand” “RF”)]))] "" “%.\tset%t0%c1\t%0, %2, %3;”)

(define_insn “setcc_float” [(set (match_operand:SF 0 “nvptx_register_operand” “=R”) (match_operator:SF 1 “nvptx_comparison_operator” [(match_operand:HSDIM 2 “nvptx_register_operand” “R”) (match_operand:HSDIM 3 “nvptx_nonmemory_operand” “Ri”)]))] "" “%.\tset%t0%c1\t%0, %2, %3;”)

(define_insn “setcc_float” [(set (match_operand:SF 0 “nvptx_register_operand” “=R”) (match_operator:SF 1 “nvptx_float_comparison_operator” [(match_operand:SDFM 2 “nvptx_register_operand” “R”) (match_operand:SDFM 3 “nvptx_nonmemory_operand” “RF”)]))] "" “%.\tset%t0%c1\t%0, %2, %3;”)

(define_expand “cstorebi4” [(set (match_operand:SI 0 “nvptx_register_operand”) (match_operator:SI 1 “ne_operator” [(match_operand:BI 2 “nvptx_register_operand”) (match_operand:BI 3 “const0_operand”)]))] "" "")

(define_expand “cstore4” [(set (match_operand:SI 0 “nvptx_register_operand”) (match_operator:SI 1 “nvptx_comparison_operator” [(match_operand:HSDIM 2 “nvptx_register_operand”) (match_operand:HSDIM 3 “nvptx_nonmemory_operand”)]))] "" "")

(define_expand “cstore4” [(set (match_operand:SI 0 “nvptx_register_operand”) (match_operator:SI 1 “nvptx_float_comparison_operator” [(match_operand:SDFM 2 “nvptx_register_operand”) (match_operand:SDFM 3 “nvptx_nonmemory_operand”)]))] "" "")

;; Calls

(define_insn “call_insn_” [(match_parallel 2 “call_operation” [(call (mem:QI (match_operand:P 0 “call_insn_operand” “Rs”)) (match_operand 1))])] "" { return nvptx_output_call_insn (insn, NULL_RTX, operands[0]); })

(define_insn “call_value_insn_” [(match_parallel 3 “call_operation” [(set (match_operand 0 “nvptx_register_operand” “=R”) (call (mem:QI (match_operand:P 1 “call_insn_operand” “Rs”)) (match_operand 2)))])] "" { return nvptx_output_call_insn (insn, operands[0], operands[1]); })

(define_expand “call” [(match_operand 0 "" "")] "" { nvptx_expand_call (NULL_RTX, operands[0]); DONE; })

(define_expand “call_value” [(match_operand 0 "" "") (match_operand 1 "" "")] "" { nvptx_expand_call (operands[0], operands[1]); DONE; })

;; Floating point arithmetic.

(define_insn “add3” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (plus:SDFM (match_operand:SDFM 1 “nvptx_register_operand” “R”) (match_operand:SDFM 2 “nvptx_nonmemory_operand” “RF”)))] "" “%.\tadd%t0\t%0, %1, %2;”)

(define_insn “sub3” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (minus:SDFM (match_operand:SDFM 1 “nvptx_register_operand” “R”) (match_operand:SDFM 2 “nvptx_register_operand” “R”)))] "" “%.\tsub%t0\t%0, %1, %2;”)

(define_insn “mul3” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (mult:SDFM (match_operand:SDFM 1 “nvptx_register_operand” “R”) (match_operand:SDFM 2 “nvptx_nonmemory_operand” “RF”)))] "" “%.\tmul%t0\t%0, %1, %2;”)

(define_insn “fma4” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (fma:SDFM (match_operand:SDFM 1 “nvptx_register_operand” “R”) (match_operand:SDFM 2 “nvptx_nonmemory_operand” “RF”) (match_operand:SDFM 3 “nvptx_nonmemory_operand” “RF”)))] "" “%.\tfma%#%t0\t%0, %1, %2, %3;”)

(define_insn “*recip2” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (div:SDFM (match_operand:SDFM 2 “const_double_operand” “F”) (match_operand:SDFM 1 “nvptx_register_operand” “R”)))] “CONST_DOUBLE_P (operands[2]) && real_identical (CONST_DOUBLE_REAL_VALUE (operands[2]), &dconst1)” “%.\trcp%#%t0\t%0, %1;”)

(define_insn “div3” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (div:SDFM (match_operand:SDFM 1 “nvptx_register_operand” “R”) (match_operand:SDFM 2 “nvptx_nonmemory_operand” “RF”)))] "" “%.\tdiv%#%t0\t%0, %1, %2;”)

(define_insn “copysign3” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (unspec:SDFM [(match_operand:SDFM 1 “nvptx_register_operand” “R”) (match_operand:SDFM 2 “nvptx_register_operand” “R”)] UNSPEC_COPYSIGN))] "" “%.\tcopysign%t0\t%0, %2, %1;”)

(define_insn “smin3” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (smin:SDFM (match_operand:SDFM 1 “nvptx_register_operand” “R”) (match_operand:SDFM 2 “nvptx_nonmemory_operand” “RF”)))] "" “%.\tmin%t0\t%0, %1, %2;”)

(define_insn “smax3” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (smax:SDFM (match_operand:SDFM 1 “nvptx_register_operand” “R”) (match_operand:SDFM 2 “nvptx_nonmemory_operand” “RF”)))] "" “%.\tmax%t0\t%0, %1, %2;”)

(define_insn “abs2” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (abs:SDFM (match_operand:SDFM 1 “nvptx_register_operand” “R”)))] "" “%.\tabs%t0\t%0, %1;”)

(define_insn “neg2” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (neg:SDFM (match_operand:SDFM 1 “nvptx_register_operand” “R”)))] "" “%.\tneg%t0\t%0, %1;”)

(define_insn “sqrt2” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (sqrt:SDFM (match_operand:SDFM 1 “nvptx_register_operand” “R”)))] "" “%.\tsqrt%#%t0\t%0, %1;”)

(define_expand “sincossf3” [(set (match_operand:SF 0 “nvptx_register_operand” “=R”) (unspec:SF [(match_operand:SF 2 “nvptx_register_operand” “R”)] UNSPEC_COS)) (set (match_operand:SF 1 “nvptx_register_operand” “=R”) (unspec:SF [(match_dup 2)] UNSPEC_SIN))] “flag_unsafe_math_optimizations” { operands[2] = make_safe_from (operands[2], operands[0]); })

(define_insn “sinsf2” [(set (match_operand:SF 0 “nvptx_register_operand” “=R”) (unspec:SF [(match_operand:SF 1 “nvptx_register_operand” “R”)] UNSPEC_SIN))] “flag_unsafe_math_optimizations” “%.\tsin.approx%t0\t%0, %1;”)

(define_insn “cossf2” [(set (match_operand:SF 0 “nvptx_register_operand” “=R”) (unspec:SF [(match_operand:SF 1 “nvptx_register_operand” “R”)] UNSPEC_COS))] “flag_unsafe_math_optimizations” “%.\tcos.approx%t0\t%0, %1;”)

(define_insn “log2sf2” [(set (match_operand:SF 0 “nvptx_register_operand” “=R”) (unspec:SF [(match_operand:SF 1 “nvptx_register_operand” “R”)] UNSPEC_LOG2))] “flag_unsafe_math_optimizations” “%.\tlg2.approx%t0\t%0, %1;”)

(define_insn “exp2sf2” [(set (match_operand:SF 0 “nvptx_register_operand” “=R”) (unspec:SF [(match_operand:SF 1 “nvptx_register_operand” “R”)] UNSPEC_EXP2))] “flag_unsafe_math_optimizations” “%.\tex2.approx%t0\t%0, %1;”)

;; Conversions involving floating point

(define_insn “extendsfdf2” [(set (match_operand:DF 0 “nvptx_register_operand” “=R”) (float_extend:DF (match_operand:SF 1 “nvptx_register_operand” “R”)))] "" “%.\tcvt%t0%t1\t%0, %1;”)

(define_insn “truncdfsf2” [(set (match_operand:SF 0 “nvptx_register_operand” “=R”) (float_truncate:SF (match_operand:DF 1 “nvptx_register_operand” “R”)))] "" “%.\tcvt%#%t0%t1\t%0, %1;”)

(define_insn “floatunssi2” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (unsigned_float:SDFM (match_operand:SI 1 “nvptx_register_operand” “R”)))] "" “%.\tcvt%#%t0.u%T1\t%0, %1;”)

(define_insn “floatsi2” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (float:SDFM (match_operand:SI 1 “nvptx_register_operand” “R”)))] "" “%.\tcvt%#%t0.s%T1\t%0, %1;”)

(define_insn “floatunsdi2” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (unsigned_float:SDFM (match_operand:DI 1 “nvptx_register_operand” “R”)))] "" “%.\tcvt%#%t0.u%T1\t%0, %1;”)

(define_insn “floatdi2” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (float:SDFM (match_operand:DI 1 “nvptx_register_operand” “R”)))] "" “%.\tcvt%#%t0.s%T1\t%0, %1;”)

(define_insn “fixuns_truncsi2” [(set (match_operand:SI 0 “nvptx_register_operand” “=R”) (unsigned_fix:SI (match_operand:SDFM 1 “nvptx_register_operand” “R”)))] "" “%.\tcvt.rzi.u%T0%t1\t%0, %1;”)

(define_insn “fix_truncsi2” [(set (match_operand:SI 0 “nvptx_register_operand” “=R”) (fix:SI (match_operand:SDFM 1 “nvptx_register_operand” “R”)))] "" “%.\tcvt.rzi.s%T0%t1\t%0, %1;”)

(define_insn “fixuns_truncdi2” [(set (match_operand:DI 0 “nvptx_register_operand” “=R”) (unsigned_fix:DI (match_operand:SDFM 1 “nvptx_register_operand” “R”)))] "" “%.\tcvt.rzi.u%T0%t1\t%0, %1;”)

(define_insn “fix_truncdi2” [(set (match_operand:DI 0 “nvptx_register_operand” “=R”) (fix:DI (match_operand:SDFM 1 “nvptx_register_operand” “R”)))] "" “%.\tcvt.rzi.s%T0%t1\t%0, %1;”)

(define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT]) (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR “floor”) (UNSPEC_FPINT_BTRUNC “btrunc”) (UNSPEC_FPINT_CEIL “ceil”) (UNSPEC_FPINT_NEARBYINT “nearbyint”)]) (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR “.rmi”) (UNSPEC_FPINT_BTRUNC “.rzi”) (UNSPEC_FPINT_CEIL “.rpi”) (UNSPEC_FPINT_NEARBYINT “%#i”)])

(define_insn “FPINT:fpint_nameSDFM:mode2” [(set (match_operand:SDFM 0 “nvptx_register_operand” “=R”) (unspec:SDFM [(match_operand:SDFM 1 “nvptx_register_operand” “R”)] FPINT))] "" “%.\tcvtFPINT:fpint_roundingmode%t0%t1\t%0, %1;”)

(define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL]) (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR “lfloor”) (UNSPEC_FPINT_CEIL “lceil”)]) (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR “.rmi”) (UNSPEC_FPINT_CEIL “.rpi”)])

(define_insn “FPINT2:fpint2_nameSDFM:modeSDIM:mode2” [(set (match_operand:SDIM 0 “nvptx_register_operand” “=R”) (unspec:SDIM [(match_operand:SDFM 1 “nvptx_register_operand” “R”)] FPINT2))] "" “%.\tcvtFPINT2:fpint2_roundingmode.s%T0%t1\t%0, %1;”)

;; Vector operations

(define_insn “*vec_set_0” [(set (match_operand:VECIM 0 “nvptx_register_operand” “=R”) (vec_merge:VECIM (vec_duplicate:VECIM (match_operand: 1 “nvptx_register_operand” “R”)) (match_dup 0) (const_int 1)))] "" “%.\tmov%t1\t%0.x, %1;”)

(define_insn “*vec_set_1” [(set (match_operand:VECIM 0 “nvptx_register_operand” “=R”) (vec_merge:VECIM (vec_duplicate:VECIM (match_operand: 1 “nvptx_register_operand” “R”)) (match_dup 0) (const_int 2)))] "" “%.\tmov%t1\t%0.y, %1;”)

(define_insn “*vec_set_2” [(set (match_operand:VECIM 0 “nvptx_register_operand” “=R”) (vec_merge:VECIM (vec_duplicate:VECIM (match_operand: 1 “nvptx_register_operand” “R”)) (match_dup 0) (const_int 4)))] "" “%.\tmov%t1\t%0.z, %1;”)

(define_insn “*vec_set_3” [(set (match_operand:VECIM 0 “nvptx_register_operand” “=R”) (vec_merge:VECIM (vec_duplicate:VECIM (match_operand: 1 “nvptx_register_operand” “R”)) (match_dup 0) (const_int 8)))] "" “%.\tmov%t1\t%0.w, %1;”)

(define_expand “vec_set” [(match_operand:VECIM 0 “nvptx_register_operand”) (match_operand: 1 “nvptx_register_operand”) (match_operand:SI 2 “nvptx_vector_index_operand”)] "" { enum machine_mode mode = GET_MODE (operands[0]); int mask = 1 << INTVAL (operands[2]); rtx tmp = gen_rtx_VEC_DUPLICATE (mode, operands[1]); tmp = gen_rtx_VEC_MERGE (mode, tmp, operands[0], GEN_INT (mask)); emit_insn (gen_rtx_SET (operands[0], tmp)); DONE; })

(define_insn “vec_extract” [(set (match_operand: 0 “nvptx_register_operand” “=R”) (vec_select: (match_operand:VECIM 1 “nvptx_register_operand” “R”) (parallel [(match_operand:SI 2 “nvptx_vector_index_operand” "")])))] "" { static const char *const asms[4] = { “%.\tmov%t0\t%0, %1.x;”, “%.\tmov%t0\t%0, %1.y;”, “%.\tmov%t0\t%0, %1.z;”, “%.\tmov%t0\t%0, %1.w;” }; return asms[INTVAL (operands[2])]; })

;; Miscellaneous

(define_insn “nop” [(const_int 0)] "" "")

(define_insn “exit” [(const_int 1)] "" “exit;”)

(define_insn “fake_nop” [(const_int 2)] "" “{ .reg .u32 %%nop_src; .reg .u32 %%nop_dst; mov.u32 %%nop_dst, %%nop_src; }”)

(define_insn “return” [(return)] "" { return nvptx_output_return (); } [(set_attr “predicable” “false”)])

(define_expand “epilogue” [(clobber (const_int 0))] "" { if (TARGET_SOFT_STACK) emit_insn (gen_set_softstack (Pmode, gen_rtx_REG (Pmode, SOFTSTACK_PREV_REGNUM))); emit_jump_insn (gen_return ()); DONE; })

(define_expand “nonlocal_goto” [(match_operand 0 "" "") (match_operand 1 "" "") (match_operand 2 "" "") (match_operand 3 "" "")] "" { sorry (“target cannot support nonlocal goto.”); emit_insn (gen_nop ()); DONE; })

(define_expand “nonlocal_goto_receiver” [(const_int 0)] "" { sorry (“target cannot support nonlocal goto.”); })

(define_expand “allocate_stack” [(match_operand 0 “nvptx_register_operand”) (match_operand 1 “nvptx_register_operand”)] "" { if (TARGET_SOFT_STACK) { emit_move_insn (stack_pointer_rtx, gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1])); emit_insn (gen_set_softstack (Pmode, stack_pointer_rtx)); emit_move_insn (operands[0], virtual_stack_dynamic_rtx); DONE; } /* The ptx documentation specifies an alloca intrinsic (for 32 bit only) but notes it is not implemented. The assembler emits a confused error message. Issue a blunt one now instead. */ sorry (“target cannot support alloca.”); emit_insn (gen_nop ()); DONE; })

(define_insn “@set_softstack_” [(unspec [(match_operand:P 0 “nvptx_register_operand” “R”)] UNSPEC_SET_SOFTSTACK)] “TARGET_SOFT_STACK” { return nvptx_output_set_softstack (REGNO (operands[0])); })

(define_expand “restore_stack_block” [(match_operand 0 “register_operand” "") (match_operand 1 “register_operand” "")] "" { if (TARGET_SOFT_STACK) { emit_move_insn (operands[0], operands[1]); emit_insn (gen_set_softstack (Pmode, operands[0])); } DONE; })

(define_expand “restore_stack_function” [(match_operand 0 “register_operand” "") (match_operand 1 “register_operand” "")] "" { DONE; })

(define_insn “trap” [(trap_if (const_int 1) (const_int 0))] "" “trap; exit;”)

(define_insn “trap_if_true” [(trap_if (ne (match_operand:BI 0 “nvptx_register_operand” “R”) (const_int 0)) (const_int 0))] "" “%j0 trap; %j0 exit;” [(set_attr “predicable” “false”)])

(define_insn “trap_if_false” [(trap_if (eq (match_operand:BI 0 “nvptx_register_operand” “R”) (const_int 0)) (const_int 0))] "" “%J0 trap; %J0 exit;” [(set_attr “predicable” “false”)])

(define_expand “ctrap4” [(trap_if (match_operator 0 “nvptx_comparison_operator” [(match_operand:SDIM 1 “nvptx_register_operand”) (match_operand:SDIM 2 “nvptx_nonmemory_operand”)]) (match_operand 3 “const0_operand”))] "" { rtx t = nvptx_expand_compare (operands[0]); emit_insn (gen_trap_if_true (t)); DONE; })

(define_insn “oacc_dim_size” [(set (match_operand:SI 0 “nvptx_register_operand” "") (unspec:SI [(match_operand:SI 1 “const_int_operand” "")] UNSPEC_DIM_SIZE))] "" { static const char const asms[] = { / Must match oacc_loop_levels ordering. / “%.\tmov.u32\t%0, %%nctaid.x;”, / gang / “%.\tmov.u32\t%0, %%ntid.y;”, / worker / “%.\tmov.u32\t%0, %%ntid.x;”, / vector */ }; return asms[INTVAL (operands[1])]; })

(define_insn “oacc_dim_pos” [(set (match_operand:SI 0 “nvptx_register_operand” "") (unspec_volatile:SI [(match_operand:SI 1 “const_int_operand” "")] UNSPECV_DIM_POS))] "" { static const char const asms[] = { / Must match oacc_loop_levels ordering. / “%.\tmov.u32\t%0, %%ctaid.x;”, / gang / “%.\tmov.u32\t%0, %%tid.y;”, / worker / “%.\tmov.u32\t%0, %%tid.x;”, / vector */ }; return asms[INTVAL (operands[1])]; })

(define_insn “nvptx_fork” [(unspec_volatile:SI [(match_operand:SI 0 “const_int_operand” "")] UNSPECV_FORK)] "" “// fork %0;” [(set_attr “predicable” “false”)])

(define_insn “nvptx_forked” [(unspec_volatile:SI [(match_operand:SI 0 “const_int_operand” "")] UNSPECV_FORKED)] "" “// forked %0;” [(set_attr “predicable” “false”)])

(define_insn “nvptx_joining” [(unspec_volatile:SI [(match_operand:SI 0 “const_int_operand” "")] UNSPECV_JOINING)] "" “// joining %0;” [(set_attr “predicable” “false”)])

(define_insn “nvptx_join” [(unspec_volatile:SI [(match_operand:SI 0 “const_int_operand” "")] UNSPECV_JOIN)] "" “// join %0;” [(set_attr “predicable” “false”)])

(define_expand “oacc_fork” [(set (match_operand:SI 0 “nvptx_nonmemory_operand” "") (match_operand:SI 1 “general_operand” "")) (unspec_volatile:SI [(match_operand:SI 2 “const_int_operand” "")] UNSPECV_FORKED)] "" { if (operands[0] != const0_rtx) emit_move_insn (operands[0], operands[1]); nvptx_expand_oacc_fork (INTVAL (operands[2])); DONE; })

(define_expand “oacc_join” [(set (match_operand:SI 0 “nvptx_nonmemory_operand” "") (match_operand:SI 1 “general_operand” "")) (unspec_volatile:SI [(match_operand:SI 2 “const_int_operand” "")] UNSPECV_JOIN)] "" { if (operands[0] != const0_rtx) emit_move_insn (operands[0], operands[1]); nvptx_expand_oacc_join (INTVAL (operands[2])); DONE; })

;; only 32-bit shuffles exist. (define_insn “nvptx_shuffle” [(set (match_operand:BITS 0 “nvptx_register_operand” “=R”) (unspec:BITS [(match_operand:BITS 1 “nvptx_register_operand” “R”) (match_operand:SI 2 “nvptx_nonmemory_operand” “Ri”) (match_operand:SI 3 “const_int_operand” “n”)] UNSPEC_SHUFFLE))] "" { if (TARGET_PTX_6_3) return “%.\tshfl.sync%S3.b32\t%0, %1, %2, 31, 0xffffffff;”; else return “%.\tshfl%S3.b32\t%0, %1, %2, 31;”; })

(define_insn “nvptx_vote_ballot” [(set (match_operand:SI 0 “nvptx_register_operand” “=R”) (unspec:SI [(match_operand:BI 1 “nvptx_register_operand” “R”)] UNSPEC_VOTE_BALLOT))] "" { if (TARGET_PTX_6_3) return “%.\tvote.sync.ballot.b32\t%0, %1, 0xffffffff;”; else return “%.\tvote.ballot.b32\t%0, %1;”; })

;; Patterns for OpenMP SIMD-via-SIMT lowering

(define_insn “@omp_simt_enter_” [(set (match_operand:P 0 “nvptx_register_operand” “=R”) (unspec_volatile:P [(match_operand:P 1 “nvptx_nonmemory_operand” “Ri”) (match_operand:P 2 “nvptx_nonmemory_operand” “Ri”)] UNSPECV_SIMT_ENTER))] "" { return nvptx_output_simt_enter (operands[0], operands[1], operands[2]); })

(define_expand “omp_simt_enter” [(match_operand 0 “nvptx_register_operand” “=R”) (match_operand 1 “nvptx_nonmemory_operand” “Ri”) (match_operand 2 “const_int_operand” “n”)] "" { if (!CONST_INT_P (operands[1])) cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U; else cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]), cfun->machine->simt_stack_size); cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]), cfun->machine->simt_stack_align); cfun->machine->has_simtreg = true; emit_insn (gen_omp_simt_enter (Pmode, operands[0], operands[1], operands[2])); DONE; })

(define_expand “omp_simt_exit” [(match_operand 0 “nvptx_register_operand” “R”)] "" { emit_insn (gen_omp_simt_exit (Pmode, operands[0])); DONE; })

(define_insn “@omp_simt_exit_” [(unspec_volatile [(match_operand:P 0 “nvptx_register_operand” “R”)] UNSPECV_SIMT_EXIT)] "" { return nvptx_output_simt_exit (operands[0]); })

;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index (define_insn “omp_simt_lane” [(set (match_operand:SI 0 “nvptx_register_operand” "") (unspec:SI [(const_int 0)] UNSPEC_LANEID))] "" “%.\tmov.u32\t%0, %%laneid;”)

;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and ;; place a compiler barrier to disallow unrolling/peeling the containing loop (define_expand “omp_simt_ordered” [(match_operand:SI 0 “nvptx_register_operand” “=R”) (match_operand:SI 1 “nvptx_register_operand” “R”)] "" { emit_move_insn (operands[0], operands[1]); emit_insn (gen_nvptx_nounroll ()); DONE; })

;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a “butterfly” exchange ;; across lanes (define_expand “omp_simt_xchg_bfly” [(match_operand 0 “nvptx_register_operand” “=R”) (match_operand 1 “nvptx_register_operand” “R”) (match_operand:SI 2 “nvptx_nonmemory_operand” “Ri”)] "" { emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2], SHUFFLE_BFLY)); DONE; })

;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1 ;; from lane given by index in operand 2 to operand 0 in all lanes (define_expand “omp_simt_xchg_idx” [(match_operand 0 “nvptx_register_operand” “=R”) (match_operand 1 “nvptx_register_operand” “R”) (match_operand:SI 2 “nvptx_nonmemory_operand” “Ri”)] "" { emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2], SHUFFLE_IDX)); DONE; })

;; Implement IFN_GOMP_SIMT_VOTE_ANY: ;; set operand 0 to zero iff all lanes supply zero in operand 1 (define_expand “omp_simt_vote_any” [(match_operand:SI 0 “nvptx_register_operand” “=R”) (match_operand:SI 1 “nvptx_register_operand” “R”)] "" { rtx pred = gen_reg_rtx (BImode); emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx)); emit_insn (gen_nvptx_vote_ballot (operands[0], pred)); DONE; })

;; Implement IFN_GOMP_SIMT_LAST_LANE: ;; set operand 0 to the lowest lane index that passed non-zero in operand 1 (define_expand “omp_simt_last_lane” [(match_operand:SI 0 “nvptx_register_operand” “=R”) (match_operand:SI 1 “nvptx_register_operand” “R”)] "" { rtx pred = gen_reg_rtx (BImode); rtx tmp = gen_reg_rtx (SImode); emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx)); emit_insn (gen_nvptx_vote_ballot (tmp, pred)); emit_insn (gen_ctzsi2 (operands[0], tmp)); DONE; })

;; extract parts of a 64 bit object into 2 32-bit ints (define_insn “unpacksi2” [(set (match_operand:SI 0 “nvptx_register_operand” “=R”) (unspec:SI [(match_operand:BITD 2 “nvptx_register_operand” “R”) (const_int 0)] UNSPEC_BIT_CONV)) (set (match_operand:SI 1 “nvptx_register_operand” “=R”) (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))] "" “%.\tmov.b64\t{%0,%1}, %2;”)

;; pack 2 32-bit ints into a 64 bit object (define_insn “packsi2” [(set (match_operand:BITD 0 “nvptx_register_operand” “=R”) (unspec:BITD [(match_operand:SI 1 “nvptx_register_operand” “R”) (match_operand:SI 2 “nvptx_register_operand” “R”)] UNSPEC_BIT_CONV))] "" “%.\tmov.b64\t%0, {%1,%2};”)

;; Atomic insns.

(define_expand “atomic_compare_and_swap” [(match_operand:SI 0 “nvptx_register_operand”) ;; bool success output (match_operand:SDIM 1 “nvptx_register_operand”) ;; oldval output (match_operand:SDIM 2 “memory_operand”) ;; memory (match_operand:SDIM 3 “nvptx_register_operand”) ;; expected input (match_operand:SDIM 4 “nvptx_register_operand”) ;; newval input (match_operand:SI 5 “const_int_operand”) ;; is_weak (match_operand:SI 6 “const_int_operand”) ;; success model (match_operand:SI 7 “const_int_operand”)] ;; failure model "" { emit_insn (gen_atomic_compare_and_swap_1 (operands[1], operands[2], operands[3], operands[4], operands[6]));

rtx cond = gen_reg_rtx (BImode); emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3])); emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0))); DONE; })

(define_insn “atomic_compare_and_swap_1” [(set (match_operand:SDIM 0 “nvptx_register_operand” “=R”) (unspec_volatile:SDIM [(match_operand:SDIM 1 “memory_operand” “+m”) (match_operand:SDIM 2 “nvptx_nonmemory_operand” “Ri”) (match_operand:SDIM 3 “nvptx_nonmemory_operand” “Ri”) (match_operand:SI 4 “const_int_operand”)] UNSPECV_CAS)) (set (match_dup 1) (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] "" { const char *t = “%.\tatom%A1.cas.b%T0\t%0, %1, %2, %3;”; return nvptx_output_atomic_insn (t, operands, 1, 4); } [(set_attr “atomic” “true”)])

(define_insn “atomic_exchange” [(set (match_operand:SDIM 0 “nvptx_register_operand” “=R”) ;; output (unspec_volatile:SDIM [(match_operand:SDIM 1 “memory_operand” “+m”) ;; memory (match_operand:SI 3 “const_int_operand”)] ;; model UNSPECV_XCHG)) (set (match_dup 1) (match_operand:SDIM 2 “nvptx_nonmemory_operand” “Ri”))] ;; input "" { const char *t = “%.\tatom%A1.exch.b%T0\t%0, %1, %2;”; return nvptx_output_atomic_insn (t, operands, 1, 3); } [(set_attr “atomic” “true”)])

(define_insn “atomic_fetch_add” [(set (match_operand:SDIM 1 “memory_operand” “+m”) (unspec_volatile:SDIM [(plus:SDIM (match_dup 1) (match_operand:SDIM 2 “nvptx_nonmemory_operand” “Ri”)) (match_operand:SI 3 “const_int_operand”)] ;; model UNSPECV_LOCK)) (set (match_operand:SDIM 0 “nvptx_register_operand” “=R”) (match_dup 1))] "" { const char *t = “%.\tatom%A1.add%t0\t%0, %1, %2;”; return nvptx_output_atomic_insn (t, operands, 1, 3); } [(set_attr “atomic” “true”)])

(define_insn “atomic_fetch_addsf” [(set (match_operand:SF 1 “memory_operand” “+m”) (unspec_volatile:SF [(plus:SF (match_dup 1) (match_operand:SF 2 “nvptx_nonmemory_operand” “RF”)) (match_operand:SI 3 “const_int_operand”)] ;; model UNSPECV_LOCK)) (set (match_operand:SF 0 “nvptx_register_operand” “=R”) (match_dup 1))] "" { const char *t = “%.\tatom%A1.add%t0\t%0, %1, %2;”; return nvptx_output_atomic_insn (t, operands, 1, 3); } [(set_attr “atomic” “true”)])

(define_code_iterator any_logic [and ior xor]) (define_code_attr logic [(and “and”) (ior “or”) (xor “xor”)])

(define_insn “atomic_fetch_” [(set (match_operand:SDIM 1 “memory_operand” “+m”) (unspec_volatile:SDIM [(any_logic:SDIM (match_dup 1) (match_operand:SDIM 2 “nvptx_nonmemory_operand” “Ri”)) (match_operand:SI 3 “const_int_operand”)] ;; model UNSPECV_LOCK)) (set (match_operand:SDIM 0 “nvptx_register_operand” “=R”) (match_dup 1))] “mode == SImode || TARGET_SM35” { const char *t = “%.\tatom%A1.b%T0.\t%0, %1, %2;”; return nvptx_output_atomic_insn (t, operands, 1, 3); }

[(set_attr “atomic” “true”)])

(define_expand “atomic_test_and_set” [(match_operand:SI 0 “nvptx_register_operand”) ;; bool success output (match_operand:QI 1 “memory_operand”) ;; memory (match_operand:SI 2 “const_int_operand”)] ;; model "" { rtx libfunc; rtx addr; libfunc = init_one_libfunc (“__atomic_test_and_set_1”); addr = convert_memory_address (ptr_mode, XEXP (operands[1], 0)); emit_library_call_value (libfunc, operands[0], LCT_NORMAL, SImode, addr, ptr_mode, operands[2], SImode); DONE; })

(define_insn “nvptx_barsync” [(unspec_volatile [(match_operand:SI 0 “nvptx_nonmemory_operand” “Ri”) (match_operand:SI 1 “const_int_operand”)] UNSPECV_BARSYNC)] "" { if (INTVAL (operands[1]) == 0) return “\tbar.sync\t%0;”; else return “\tbar.sync\t%0, %1;”; } [(set_attr “predicable” “false”)])

(define_expand “memory_barrier” [(set (match_dup 0) (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))] "" { operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); MEM_VOLATILE_P (operands[0]) = 1; })

;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys ;; (corresponding to cuda functions threadfence_block, threadfence and ;; threadfence_system). For the insn memory_barrier we use membar.sys. This ;; may be overconservative, but before using membar.gl instead we‘ll need to ;; explain in detail why it’s safe to use. For now, use membar.sys. (define_insn “*memory_barrier” [(set (match_operand:BLK 0 "" "") (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))] "" “\tmembar.sys;” [(set_attr “predicable” “false”)])

(define_expand “nvptx_membar_cta” [(set (match_dup 0) (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))] "" { operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); MEM_VOLATILE_P (operands[0]) = 1; })

(define_insn “*nvptx_membar_cta” [(set (match_operand:BLK 0 "" "") (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))] "" “\tmembar.cta;” [(set_attr “predicable” “false”)])

(define_insn “nvptx_nounroll” [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)] "" “\t.pragma \"nounroll\";” [(set_attr “predicable” “false”)])

(define_insn “nvptx_red_partition” [(set (match_operand:DI 0 “nonimmediate_operand” “=R”) (unspec_volatile:DI [(match_operand:DI 1 “const_int_operand”)] UNSPECV_RED_PART))] "" { return nvptx_output_red_partition (operands[0], operands[1]); } [(set_attr “predicable” “false”)])