| /* Medium-level subroutines: convert bit-field store and extract |
| and shifts, multiplies and divides to rtl instructions. |
| Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, |
| 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 |
| Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "tm.h" |
| #include "toplev.h" |
| #include "rtl.h" |
| #include "tree.h" |
| #include "tm_p.h" |
| #include "flags.h" |
| #include "insn-config.h" |
| #include "expr.h" |
| #include "optabs.h" |
| #include "real.h" |
| #include "recog.h" |
| #include "langhooks.h" |
| #include "df.h" |
| #include "target.h" |
| |
| static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT, |
| unsigned HOST_WIDE_INT, |
| unsigned HOST_WIDE_INT, rtx); |
| static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT, |
| unsigned HOST_WIDE_INT, rtx); |
| static rtx extract_fixed_bit_field (enum machine_mode, rtx, |
| unsigned HOST_WIDE_INT, |
| unsigned HOST_WIDE_INT, |
| unsigned HOST_WIDE_INT, rtx, int); |
| static rtx mask_rtx (enum machine_mode, int, int, int); |
| static rtx lshift_value (enum machine_mode, rtx, int, int); |
| static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT, |
| unsigned HOST_WIDE_INT, int); |
| static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx); |
| static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT); |
| static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT); |
| |
| /* Test whether a value is zero of a power of two. */ |
| #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0) |
| |
| /* Nonzero means divides or modulus operations are relatively cheap for |
| powers of two, so don't use branches; emit the operation instead. |
| Usually, this will mean that the MD file will emit non-branch |
| sequences. */ |
| |
| static bool sdiv_pow2_cheap[2][NUM_MACHINE_MODES]; |
| static bool smod_pow2_cheap[2][NUM_MACHINE_MODES]; |
| |
| #ifndef SLOW_UNALIGNED_ACCESS |
| #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT |
| #endif |
| |
| /* For compilers that support multiple targets with different word sizes, |
| MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD. An example |
| is the H8/300(H) compiler. */ |
| |
| #ifndef MAX_BITS_PER_WORD |
| #define MAX_BITS_PER_WORD BITS_PER_WORD |
| #endif |
| |
| /* Reduce conditional compilation elsewhere. */ |
| #ifndef HAVE_insv |
| #define HAVE_insv 0 |
| #define CODE_FOR_insv CODE_FOR_nothing |
| #define gen_insv(a,b,c,d) NULL_RTX |
| #endif |
| #ifndef HAVE_extv |
| #define HAVE_extv 0 |
| #define CODE_FOR_extv CODE_FOR_nothing |
| #define gen_extv(a,b,c,d) NULL_RTX |
| #endif |
| #ifndef HAVE_extzv |
| #define HAVE_extzv 0 |
| #define CODE_FOR_extzv CODE_FOR_nothing |
| #define gen_extzv(a,b,c,d) NULL_RTX |
| #endif |
| |
| /* Cost of various pieces of RTL. Note that some of these are indexed by |
| shift count and some by mode. */ |
| static int zero_cost[2]; |
| static int add_cost[2][NUM_MACHINE_MODES]; |
| static int neg_cost[2][NUM_MACHINE_MODES]; |
| static int shift_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; |
| static int shiftadd_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; |
| static int shiftsub_cost[2][NUM_MACHINE_MODES][MAX_BITS_PER_WORD]; |
| static int mul_cost[2][NUM_MACHINE_MODES]; |
| static int sdiv_cost[2][NUM_MACHINE_MODES]; |
| static int udiv_cost[2][NUM_MACHINE_MODES]; |
| static int mul_widen_cost[2][NUM_MACHINE_MODES]; |
| static int mul_highpart_cost[2][NUM_MACHINE_MODES]; |
| |
| void |
| init_expmed (void) |
| { |
| struct |
| { |
| struct rtx_def reg; rtunion reg_fld[2]; |
| struct rtx_def plus; rtunion plus_fld1; |
| struct rtx_def neg; |
| struct rtx_def mult; rtunion mult_fld1; |
| struct rtx_def sdiv; rtunion sdiv_fld1; |
| struct rtx_def udiv; rtunion udiv_fld1; |
| struct rtx_def zext; |
| struct rtx_def sdiv_32; rtunion sdiv_32_fld1; |
| struct rtx_def smod_32; rtunion smod_32_fld1; |
| struct rtx_def wide_mult; rtunion wide_mult_fld1; |
| struct rtx_def wide_lshr; rtunion wide_lshr_fld1; |
| struct rtx_def wide_trunc; |
| struct rtx_def shift; rtunion shift_fld1; |
| struct rtx_def shift_mult; rtunion shift_mult_fld1; |
| struct rtx_def shift_add; rtunion shift_add_fld1; |
| struct rtx_def shift_sub; rtunion shift_sub_fld1; |
| } all; |
| |
| rtx pow2[MAX_BITS_PER_WORD]; |
| rtx cint[MAX_BITS_PER_WORD]; |
| int m, n; |
| enum machine_mode mode, wider_mode; |
| int speed; |
| |
| |
| for (m = 1; m < MAX_BITS_PER_WORD; m++) |
| { |
| pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m); |
| cint[m] = GEN_INT (m); |
| } |
| memset (&all, 0, sizeof all); |
| |
| PUT_CODE (&all.reg, REG); |
| /* Avoid using hard regs in ways which may be unsupported. */ |
| SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1); |
| |
| PUT_CODE (&all.plus, PLUS); |
| XEXP (&all.plus, 0) = &all.reg; |
| XEXP (&all.plus, 1) = &all.reg; |
| |
| PUT_CODE (&all.neg, NEG); |
| XEXP (&all.neg, 0) = &all.reg; |
| |
| PUT_CODE (&all.mult, MULT); |
| XEXP (&all.mult, 0) = &all.reg; |
| XEXP (&all.mult, 1) = &all.reg; |
| |
| PUT_CODE (&all.sdiv, DIV); |
| XEXP (&all.sdiv, 0) = &all.reg; |
| XEXP (&all.sdiv, 1) = &all.reg; |
| |
| PUT_CODE (&all.udiv, UDIV); |
| XEXP (&all.udiv, 0) = &all.reg; |
| XEXP (&all.udiv, 1) = &all.reg; |
| |
| PUT_CODE (&all.sdiv_32, DIV); |
| XEXP (&all.sdiv_32, 0) = &all.reg; |
| XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32); |
| |
| PUT_CODE (&all.smod_32, MOD); |
| XEXP (&all.smod_32, 0) = &all.reg; |
| XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1); |
| |
| PUT_CODE (&all.zext, ZERO_EXTEND); |
| XEXP (&all.zext, 0) = &all.reg; |
| |
| PUT_CODE (&all.wide_mult, MULT); |
| XEXP (&all.wide_mult, 0) = &all.zext; |
| XEXP (&all.wide_mult, 1) = &all.zext; |
| |
| PUT_CODE (&all.wide_lshr, LSHIFTRT); |
| XEXP (&all.wide_lshr, 0) = &all.wide_mult; |
| |
| PUT_CODE (&all.wide_trunc, TRUNCATE); |
| XEXP (&all.wide_trunc, 0) = &all.wide_lshr; |
| |
| PUT_CODE (&all.shift, ASHIFT); |
| XEXP (&all.shift, 0) = &all.reg; |
| |
| PUT_CODE (&all.shift_mult, MULT); |
| XEXP (&all.shift_mult, 0) = &all.reg; |
| |
| PUT_CODE (&all.shift_add, PLUS); |
| XEXP (&all.shift_add, 0) = &all.shift_mult; |
| XEXP (&all.shift_add, 1) = &all.reg; |
| |
| PUT_CODE (&all.shift_sub, MINUS); |
| XEXP (&all.shift_sub, 0) = &all.shift_mult; |
| XEXP (&all.shift_sub, 1) = &all.reg; |
| |
| for (speed = 0; speed < 2; speed++) |
| { |
| crtl->maybe_hot_insn_p = speed; |
| zero_cost[speed] = rtx_cost (const0_rtx, 0, speed); |
| |
| for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); |
| mode != VOIDmode; |
| mode = GET_MODE_WIDER_MODE (mode)) |
| { |
| PUT_MODE (&all.reg, mode); |
| PUT_MODE (&all.plus, mode); |
| PUT_MODE (&all.neg, mode); |
| PUT_MODE (&all.mult, mode); |
| PUT_MODE (&all.sdiv, mode); |
| PUT_MODE (&all.udiv, mode); |
| PUT_MODE (&all.sdiv_32, mode); |
| PUT_MODE (&all.smod_32, mode); |
| PUT_MODE (&all.wide_trunc, mode); |
| PUT_MODE (&all.shift, mode); |
| PUT_MODE (&all.shift_mult, mode); |
| PUT_MODE (&all.shift_add, mode); |
| PUT_MODE (&all.shift_sub, mode); |
| |
| add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed); |
| neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed); |
| mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed); |
| sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed); |
| udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed); |
| |
| sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed) |
| <= 2 * add_cost[speed][mode]); |
| smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed) |
| <= 4 * add_cost[speed][mode]); |
| |
| wider_mode = GET_MODE_WIDER_MODE (mode); |
| if (wider_mode != VOIDmode) |
| { |
| PUT_MODE (&all.zext, wider_mode); |
| PUT_MODE (&all.wide_mult, wider_mode); |
| PUT_MODE (&all.wide_lshr, wider_mode); |
| XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode)); |
| |
| mul_widen_cost[speed][wider_mode] |
| = rtx_cost (&all.wide_mult, SET, speed); |
| mul_highpart_cost[speed][mode] |
| = rtx_cost (&all.wide_trunc, SET, speed); |
| } |
| |
| shift_cost[speed][mode][0] = 0; |
| shiftadd_cost[speed][mode][0] = shiftsub_cost[speed][mode][0] |
| = add_cost[speed][mode]; |
| |
| n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode)); |
| for (m = 1; m < n; m++) |
| { |
| XEXP (&all.shift, 1) = cint[m]; |
| XEXP (&all.shift_mult, 1) = pow2[m]; |
| |
| shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed); |
| shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed); |
| shiftsub_cost[speed][mode][m] = rtx_cost (&all.shift_sub, SET, speed); |
| } |
| } |
| } |
| default_rtl_profile (); |
| } |
| |
| /* Return an rtx representing minus the value of X. |
| MODE is the intended mode of the result, |
| useful if X is a CONST_INT. */ |
| |
| rtx |
| negate_rtx (enum machine_mode mode, rtx x) |
| { |
| rtx result = simplify_unary_operation (NEG, mode, x, mode); |
| |
| if (result == 0) |
| result = expand_unop (mode, neg_optab, x, NULL_RTX, 0); |
| |
| return result; |
| } |
| |
| /* Report on the availability of insv/extv/extzv and the desired mode |
| of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo |
| is false; else the mode of the specified operand. If OPNO is -1, |
| all the caller cares about is whether the insn is available. */ |
| enum machine_mode |
| mode_for_extraction (enum extraction_pattern pattern, int opno) |
| { |
| const struct insn_data *data; |
| |
| switch (pattern) |
| { |
| case EP_insv: |
| if (HAVE_insv) |
| { |
| data = &insn_data[CODE_FOR_insv]; |
| break; |
| } |
| return MAX_MACHINE_MODE; |
| |
| case EP_extv: |
| if (HAVE_extv) |
| { |
| data = &insn_data[CODE_FOR_extv]; |
| break; |
| } |
| return MAX_MACHINE_MODE; |
| |
| case EP_extzv: |
| if (HAVE_extzv) |
| { |
| data = &insn_data[CODE_FOR_extzv]; |
| break; |
| } |
| return MAX_MACHINE_MODE; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| if (opno == -1) |
| return VOIDmode; |
| |
| /* Everyone who uses this function used to follow it with |
| if (result == VOIDmode) result = word_mode; */ |
| if (data->operand[opno].mode == VOIDmode) |
| return word_mode; |
| return data->operand[opno].mode; |
| } |
| |
| /* Return true if X, of mode MODE, matches the predicate for operand |
| OPNO of instruction ICODE. Allow volatile memories, regardless of |
| the ambient volatile_ok setting. */ |
| |
| static bool |
| check_predicate_volatile_ok (enum insn_code icode, int opno, |
| rtx x, enum machine_mode mode) |
| { |
| bool save_volatile_ok, result; |
| |
| save_volatile_ok = volatile_ok; |
| result = insn_data[(int) icode].operand[opno].predicate (x, mode); |
| volatile_ok = save_volatile_ok; |
| return result; |
| } |
| |
| /* A subroutine of store_bit_field, with the same arguments. Return true |
| if the operation could be implemented. |
| |
| If FALLBACK_P is true, fall back to store_fixed_bit_field if we have |
| no other way of implementing the operation. If FALLBACK_P is false, |
| return false instead. */ |
| |
| static bool |
| store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, |
| unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode, |
| rtx value, bool fallback_p) |
| { |
| unsigned int unit |
| = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD; |
| unsigned HOST_WIDE_INT offset, bitpos; |
| rtx op0 = str_rtx; |
| int byte_offset; |
| rtx orig_value; |
| |
| enum machine_mode op_mode = mode_for_extraction (EP_insv, 3); |
| |
| while (GET_CODE (op0) == SUBREG) |
| { |
| /* The following line once was done only if WORDS_BIG_ENDIAN, |
| but I think that is a mistake. WORDS_BIG_ENDIAN is |
| meaningful at a much higher level; when structures are copied |
| between memory and regs, the higher-numbered regs |
| always get higher addresses. */ |
| int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))); |
| int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0)); |
| |
| byte_offset = 0; |
| |
| /* Paradoxical subregs need special handling on big endian machines. */ |
| if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size) |
| { |
| int difference = inner_mode_size - outer_mode_size; |
| |
| if (WORDS_BIG_ENDIAN) |
| byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD; |
| if (BYTES_BIG_ENDIAN) |
| byte_offset += difference % UNITS_PER_WORD; |
| } |
| else |
| byte_offset = SUBREG_BYTE (op0); |
| |
| bitnum += byte_offset * BITS_PER_UNIT; |
| op0 = SUBREG_REG (op0); |
| } |
| |
| /* No action is needed if the target is a register and if the field |
| lies completely outside that register. This can occur if the source |
| code contains an out-of-bounds access to a small array. */ |
| if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) |
| return true; |
| |
| /* Use vec_set patterns for inserting parts of vectors whenever |
| available. */ |
| if (VECTOR_MODE_P (GET_MODE (op0)) |
| && !MEM_P (op0) |
| && (optab_handler (vec_set_optab, GET_MODE (op0))->insn_code |
| != CODE_FOR_nothing) |
| && fieldmode == GET_MODE_INNER (GET_MODE (op0)) |
| && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) |
| && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) |
| { |
| enum machine_mode outermode = GET_MODE (op0); |
| enum machine_mode innermode = GET_MODE_INNER (outermode); |
| int icode = (int) optab_handler (vec_set_optab, outermode)->insn_code; |
| int pos = bitnum / GET_MODE_BITSIZE (innermode); |
| rtx rtxpos = GEN_INT (pos); |
| rtx src = value; |
| rtx dest = op0; |
| rtx pat, seq; |
| enum machine_mode mode0 = insn_data[icode].operand[0].mode; |
| enum machine_mode mode1 = insn_data[icode].operand[1].mode; |
| enum machine_mode mode2 = insn_data[icode].operand[2].mode; |
| |
| start_sequence (); |
| |
| if (! (*insn_data[icode].operand[1].predicate) (src, mode1)) |
| src = copy_to_mode_reg (mode1, src); |
| |
| if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2)) |
| rtxpos = copy_to_mode_reg (mode1, rtxpos); |
| |
| /* We could handle this, but we should always be called with a pseudo |
| for our targets and all insns should take them as outputs. */ |
| gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0) |
| && (*insn_data[icode].operand[1].predicate) (src, mode1) |
| && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2)); |
| pat = GEN_FCN (icode) (dest, src, rtxpos); |
| seq = get_insns (); |
| end_sequence (); |
| if (pat) |
| { |
| emit_insn (seq); |
| emit_insn (pat); |
| return true; |
| } |
| } |
| |
| /* If the target is a register, overwriting the entire object, or storing |
| a full-word or multi-word field can be done with just a SUBREG. |
| |
| If the target is memory, storing any naturally aligned field can be |
| done with a simple store. For targets that support fast unaligned |
| memory, any naturally sized, unit aligned field can be done directly. */ |
| |
| offset = bitnum / unit; |
| bitpos = bitnum % unit; |
| byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT |
| + (offset * UNITS_PER_WORD); |
| |
| if (bitpos == 0 |
| && bitsize == GET_MODE_BITSIZE (fieldmode) |
| && (!MEM_P (op0) |
| ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD |
| || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode)) |
| && byte_offset % GET_MODE_SIZE (fieldmode) == 0) |
| : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0)) |
| || (offset * BITS_PER_UNIT % bitsize == 0 |
| && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0)))) |
| { |
| if (MEM_P (op0)) |
| op0 = adjust_address (op0, fieldmode, offset); |
| else if (GET_MODE (op0) != fieldmode) |
| op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0), |
| byte_offset); |
| emit_move_insn (op0, value); |
| return true; |
| } |
| |
| /* Make sure we are playing with integral modes. Pun with subregs |
| if we aren't. This must come after the entire register case above, |
| since that case is valid for any mode. The following cases are only |
| valid for integral modes. */ |
| { |
| enum machine_mode imode = int_mode_for_mode (GET_MODE (op0)); |
| if (imode != GET_MODE (op0)) |
| { |
| if (MEM_P (op0)) |
| op0 = adjust_address (op0, imode, 0); |
| else |
| { |
| gcc_assert (imode != BLKmode); |
| op0 = gen_lowpart (imode, op0); |
| } |
| } |
| } |
| |
| /* We may be accessing data outside the field, which means |
| we can alias adjacent data. */ |
| if (MEM_P (op0)) |
| { |
| op0 = shallow_copy_rtx (op0); |
| set_mem_alias_set (op0, 0); |
| set_mem_expr (op0, 0); |
| } |
| |
| /* If OP0 is a register, BITPOS must count within a word. |
| But as we have it, it counts within whatever size OP0 now has. |
| On a bigendian machine, these are not the same, so convert. */ |
| if (BYTES_BIG_ENDIAN |
| && !MEM_P (op0) |
| && unit > GET_MODE_BITSIZE (GET_MODE (op0))) |
| bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0)); |
| |
| /* Storing an lsb-aligned field in a register |
| can be done with a movestrict instruction. */ |
| |
| if (!MEM_P (op0) |
| && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0) |
| && bitsize == GET_MODE_BITSIZE (fieldmode) |
| && (optab_handler (movstrict_optab, fieldmode)->insn_code |
| != CODE_FOR_nothing)) |
| { |
| int icode = optab_handler (movstrict_optab, fieldmode)->insn_code; |
| rtx insn; |
| rtx start = get_last_insn (); |
| rtx arg0 = op0; |
| |
| /* Get appropriate low part of the value being stored. */ |
| if (GET_CODE (value) == CONST_INT || REG_P (value)) |
| value = gen_lowpart (fieldmode, value); |
| else if (!(GET_CODE (value) == SYMBOL_REF |
| || GET_CODE (value) == LABEL_REF |
| || GET_CODE (value) == CONST)) |
| value = convert_to_mode (fieldmode, value, 0); |
| |
| if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode)) |
| value = copy_to_mode_reg (fieldmode, value); |
| |
| if (GET_CODE (op0) == SUBREG) |
| { |
| /* Else we've got some float mode source being extracted into |
| a different float mode destination -- this combination of |
| subregs results in Severe Tire Damage. */ |
| gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode |
| || GET_MODE_CLASS (fieldmode) == MODE_INT |
| || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT); |
| arg0 = SUBREG_REG (op0); |
| } |
| |
| insn = (GEN_FCN (icode) |
| (gen_rtx_SUBREG (fieldmode, arg0, |
| (bitnum % BITS_PER_WORD) / BITS_PER_UNIT |
| + (offset * UNITS_PER_WORD)), |
| value)); |
| if (insn) |
| { |
| emit_insn (insn); |
| return true; |
| } |
| delete_insns_since (start); |
| } |
| |
| /* Handle fields bigger than a word. */ |
| |
| if (bitsize > BITS_PER_WORD) |
| { |
| /* Here we transfer the words of the field |
| in the order least significant first. |
| This is because the most significant word is the one which may |
| be less than full. |
| However, only do that if the value is not BLKmode. */ |
| |
| unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode; |
| unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
| unsigned int i; |
| rtx last; |
| |
| /* This is the mode we must force value to, so that there will be enough |
| subwords to extract. Note that fieldmode will often (always?) be |
| VOIDmode, because that is what store_field uses to indicate that this |
| is a bit field, but passing VOIDmode to operand_subword_force |
| is not allowed. */ |
| fieldmode = GET_MODE (value); |
| if (fieldmode == VOIDmode) |
| fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT); |
| |
| last = get_last_insn (); |
| for (i = 0; i < nwords; i++) |
| { |
| /* If I is 0, use the low-order word in both field and target; |
| if I is 1, use the next to lowest word; and so on. */ |
| unsigned int wordnum = (backwards ? nwords - i - 1 : i); |
| unsigned int bit_offset = (backwards |
| ? MAX ((int) bitsize - ((int) i + 1) |
| * BITS_PER_WORD, |
| 0) |
| : (int) i * BITS_PER_WORD); |
| rtx value_word = operand_subword_force (value, wordnum, fieldmode); |
| |
| if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD, |
| bitsize - i * BITS_PER_WORD), |
| bitnum + bit_offset, word_mode, |
| value_word, fallback_p)) |
| { |
| delete_insns_since (last); |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /* From here on we can assume that the field to be stored in is |
| a full-word (whatever type that is), since it is shorter than a word. */ |
| |
| /* OFFSET is the number of words or bytes (UNIT says which) |
| from STR_RTX to the first word or byte containing part of the field. */ |
| |
| if (!MEM_P (op0)) |
| { |
| if (offset != 0 |
| || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) |
| { |
| if (!REG_P (op0)) |
| { |
| /* Since this is a destination (lvalue), we can't copy |
| it to a pseudo. We can remove a SUBREG that does not |
| change the size of the operand. Such a SUBREG may |
| have been added above. */ |
| gcc_assert (GET_CODE (op0) == SUBREG |
| && (GET_MODE_SIZE (GET_MODE (op0)) |
| == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))))); |
| op0 = SUBREG_REG (op0); |
| } |
| op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0), |
| op0, (offset * UNITS_PER_WORD)); |
| } |
| offset = 0; |
| } |
| |
| /* If VALUE has a floating-point or complex mode, access it as an |
| integer of the corresponding size. This can occur on a machine |
| with 64 bit registers that uses SFmode for float. It can also |
| occur for unaligned float or complex fields. */ |
| orig_value = value; |
| if (GET_MODE (value) != VOIDmode |
| && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT |
| && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT) |
| { |
| value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value))); |
| emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value); |
| } |
| |
| /* Now OFFSET is nonzero only if OP0 is memory |
| and is therefore always measured in bytes. */ |
| |
| if (HAVE_insv |
| && GET_MODE (value) != BLKmode |
| && bitsize > 0 |
| && GET_MODE_BITSIZE (op_mode) >= bitsize |
| && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG) |
| && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))) |
| && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize), |
| VOIDmode) |
| && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode)) |
| { |
| int xbitpos = bitpos; |
| rtx value1; |
| rtx xop0 = op0; |
| rtx last = get_last_insn (); |
| rtx pat; |
| |
| /* Add OFFSET into OP0's address. */ |
| if (MEM_P (xop0)) |
| xop0 = adjust_address (xop0, byte_mode, offset); |
| |
| /* If xop0 is a register, we need it in OP_MODE |
| to make it acceptable to the format of insv. */ |
| if (GET_CODE (xop0) == SUBREG) |
| /* We can't just change the mode, because this might clobber op0, |
| and we will need the original value of op0 if insv fails. */ |
| xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0)); |
| if (REG_P (xop0) && GET_MODE (xop0) != op_mode) |
| xop0 = gen_rtx_SUBREG (op_mode, xop0, 0); |
| |
| /* On big-endian machines, we count bits from the most significant. |
| If the bit field insn does not, we must invert. */ |
| |
| if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) |
| xbitpos = unit - bitsize - xbitpos; |
| |
| /* We have been counting XBITPOS within UNIT. |
| Count instead within the size of the register. */ |
| if (BITS_BIG_ENDIAN && !MEM_P (xop0)) |
| xbitpos += GET_MODE_BITSIZE (op_mode) - unit; |
| |
| unit = GET_MODE_BITSIZE (op_mode); |
| |
| /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */ |
| value1 = value; |
| if (GET_MODE (value) != op_mode) |
| { |
| if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize) |
| { |
| /* Optimization: Don't bother really extending VALUE |
| if it has all the bits we will actually use. However, |
| if we must narrow it, be sure we do it correctly. */ |
| |
| if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode)) |
| { |
| rtx tmp; |
| |
| tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0); |
| if (! tmp) |
| tmp = simplify_gen_subreg (op_mode, |
| force_reg (GET_MODE (value), |
| value1), |
| GET_MODE (value), 0); |
| value1 = tmp; |
| } |
| else |
| value1 = gen_lowpart (op_mode, value1); |
| } |
| else if (GET_CODE (value) == CONST_INT) |
| value1 = gen_int_mode (INTVAL (value), op_mode); |
| else |
| /* Parse phase is supposed to make VALUE's data type |
| match that of the component reference, which is a type |
| at least as wide as the field; so VALUE should have |
| a mode that corresponds to that type. */ |
| gcc_assert (CONSTANT_P (value)); |
| } |
| |
| /* If this machine's insv insists on a register, |
| get VALUE1 into a register. */ |
| if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate) |
| (value1, op_mode))) |
| value1 = force_reg (op_mode, value1); |
| |
| pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1); |
| if (pat) |
| { |
| emit_insn (pat); |
| |
| /* If the mode of the insertion is wider than the mode of the |
| target register we created a paradoxical subreg for the |
| target. Truncate the paradoxical subreg of the target to |
| itself properly. */ |
| if (!TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (op0)), |
| GET_MODE_BITSIZE (op_mode)) |
| && (REG_P (xop0) |
| || GET_CODE (xop0) == SUBREG)) |
| convert_move (op0, xop0, true); |
| return true; |
| } |
| delete_insns_since (last); |
| } |
| |
| /* If OP0 is a memory, try copying it to a register and seeing if a |
| cheap register alternative is available. */ |
| if (HAVE_insv && MEM_P (op0)) |
| { |
| enum machine_mode bestmode; |
| |
| /* Get the mode to use for inserting into this field. If OP0 is |
| BLKmode, get the smallest mode consistent with the alignment. If |
| OP0 is a non-BLKmode object that is no wider than OP_MODE, use its |
| mode. Otherwise, use the smallest mode containing the field. */ |
| |
| if (GET_MODE (op0) == BLKmode |
| || (op_mode != MAX_MACHINE_MODE |
| && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode))) |
| bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), |
| (op_mode == MAX_MACHINE_MODE |
| ? VOIDmode : op_mode), |
| MEM_VOLATILE_P (op0)); |
| else |
| bestmode = GET_MODE (op0); |
| |
| if (bestmode != VOIDmode |
| && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode) |
| && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0)) |
| && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0))) |
| { |
| rtx last, tempreg, xop0; |
| unsigned HOST_WIDE_INT xoffset, xbitpos; |
| |
| last = get_last_insn (); |
| |
| /* Adjust address to point to the containing unit of |
| that mode. Compute the offset as a multiple of this unit, |
| counting in bytes. */ |
| unit = GET_MODE_BITSIZE (bestmode); |
| xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode); |
| xbitpos = bitnum % unit; |
| xop0 = adjust_address (op0, bestmode, xoffset); |
| |
| /* Fetch that unit, store the bitfield in it, then store |
| the unit. */ |
| tempreg = copy_to_reg (xop0); |
| if (store_bit_field_1 (tempreg, bitsize, xbitpos, |
| fieldmode, orig_value, false)) |
| { |
| emit_move_insn (xop0, tempreg); |
| return true; |
| } |
| delete_insns_since (last); |
| } |
| } |
| |
| if (!fallback_p) |
| return false; |
| |
| store_fixed_bit_field (op0, offset, bitsize, bitpos, value); |
| return true; |
| } |
| |
| /* Generate code to store value from rtx VALUE |
| into a bit-field within structure STR_RTX |
| containing BITSIZE bits starting at bit BITNUM. |
| FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */ |
| |
| void |
| store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, |
| unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode, |
| rtx value) |
| { |
| if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true)) |
| gcc_unreachable (); |
| } |
| |
| /* Use shifts and boolean operations to store VALUE |
| into a bit field of width BITSIZE |
| in a memory location specified by OP0 except offset by OFFSET bytes. |
| (OFFSET must be 0 if OP0 is a register.) |
| The field starts at position BITPOS within the byte. |
| (If OP0 is a register, it may be a full word or a narrower mode, |
| but BITPOS still counts within a full word, |
| which is significant on bigendian machines.) */ |
| |
| static void |
| store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset, |
| unsigned HOST_WIDE_INT bitsize, |
| unsigned HOST_WIDE_INT bitpos, rtx value) |
| { |
| enum machine_mode mode; |
| unsigned int total_bits = BITS_PER_WORD; |
| rtx temp; |
| int all_zero = 0; |
| int all_one = 0; |
| |
| /* There is a case not handled here: |
| a structure with a known alignment of just a halfword |
| and a field split across two aligned halfwords within the structure. |
| Or likewise a structure with a known alignment of just a byte |
| and a field split across two bytes. |
| Such cases are not supposed to be able to occur. */ |
| |
| if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
| { |
| gcc_assert (!offset); |
| /* Special treatment for a bit field split across two registers. */ |
| if (bitsize + bitpos > BITS_PER_WORD) |
| { |
| store_split_bit_field (op0, bitsize, bitpos, value); |
| return; |
| } |
| } |
| else |
| { |
| /* Get the proper mode to use for this field. We want a mode that |
| includes the entire field. If such a mode would be larger than |
| a word, we won't be doing the extraction the normal way. |
| We don't want a mode bigger than the destination. */ |
| |
| mode = GET_MODE (op0); |
| if (GET_MODE_BITSIZE (mode) == 0 |
| || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode)) |
| mode = word_mode; |
| mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, |
| MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0)); |
| |
| if (mode == VOIDmode) |
| { |
| /* The only way this should occur is if the field spans word |
| boundaries. */ |
| store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT, |
| value); |
| return; |
| } |
| |
| total_bits = GET_MODE_BITSIZE (mode); |
| |
| /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to |
| be in the range 0 to total_bits-1, and put any excess bytes in |
| OFFSET. */ |
| if (bitpos >= total_bits) |
| { |
| offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT); |
| bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT) |
| * BITS_PER_UNIT); |
| } |
| |
| /* Get ref to an aligned byte, halfword, or word containing the field. |
| Adjust BITPOS to be position within a word, |
| and OFFSET to be the offset of that word. |
| Then alter OP0 to refer to that word. */ |
| bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT; |
| offset -= (offset % (total_bits / BITS_PER_UNIT)); |
| op0 = adjust_address (op0, mode, offset); |
| } |
| |
| mode = GET_MODE (op0); |
| |
| /* Now MODE is either some integral mode for a MEM as OP0, |
| or is a full-word for a REG as OP0. TOTAL_BITS corresponds. |
| The bit field is contained entirely within OP0. |
| BITPOS is the starting bit number within OP0. |
| (OP0's mode may actually be narrower than MODE.) */ |
| |
| if (BYTES_BIG_ENDIAN) |
| /* BITPOS is the distance between our msb |
| and that of the containing datum. |
| Convert it to the distance from the lsb. */ |
| bitpos = total_bits - bitsize - bitpos; |
| |
| /* Now BITPOS is always the distance between our lsb |
| and that of OP0. */ |
| |
| /* Shift VALUE left by BITPOS bits. If VALUE is not constant, |
| we must first convert its mode to MODE. */ |
| |
| if (GET_CODE (value) == CONST_INT) |
| { |
| HOST_WIDE_INT v = INTVAL (value); |
| |
| if (bitsize < HOST_BITS_PER_WIDE_INT) |
| v &= ((HOST_WIDE_INT) 1 << bitsize) - 1; |
| |
| if (v == 0) |
| all_zero = 1; |
| else if ((bitsize < HOST_BITS_PER_WIDE_INT |
| && v == ((HOST_WIDE_INT) 1 << bitsize) - 1) |
| || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1)) |
| all_one = 1; |
| |
| value = lshift_value (mode, value, bitpos, bitsize); |
| } |
| else |
| { |
| int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize |
| && bitpos + bitsize != GET_MODE_BITSIZE (mode)); |
| |
| if (GET_MODE (value) != mode) |
| value = convert_to_mode (mode, value, 1); |
| |
| if (must_and) |
| value = expand_binop (mode, and_optab, value, |
| mask_rtx (mode, 0, bitsize, 0), |
| NULL_RTX, 1, OPTAB_LIB_WIDEN); |
| if (bitpos > 0) |
| value = expand_shift (LSHIFT_EXPR, mode, value, |
| build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1); |
| } |
| |
| /* Now clear the chosen bits in OP0, |
| except that if VALUE is -1 we need not bother. */ |
| /* We keep the intermediates in registers to allow CSE to combine |
| consecutive bitfield assignments. */ |
| |
| temp = force_reg (mode, op0); |
| |
| if (! all_one) |
| { |
| temp = expand_binop (mode, and_optab, temp, |
| mask_rtx (mode, bitpos, bitsize, 1), |
| NULL_RTX, 1, OPTAB_LIB_WIDEN); |
| temp = force_reg (mode, temp); |
| } |
| |
| /* Now logical-or VALUE into OP0, unless it is zero. */ |
| |
| if (! all_zero) |
| { |
| temp = expand_binop (mode, ior_optab, temp, value, |
| NULL_RTX, 1, OPTAB_LIB_WIDEN); |
| temp = force_reg (mode, temp); |
| } |
| |
| if (op0 != temp) |
| { |
| op0 = copy_rtx (op0); |
| emit_move_insn (op0, temp); |
| } |
| } |
| |
| /* Store a bit field that is split across multiple accessible memory objects. |
| |
| OP0 is the REG, SUBREG or MEM rtx for the first of the objects. |
| BITSIZE is the field width; BITPOS the position of its first bit |
| (within the word). |
| VALUE is the value to store. |
| |
| This does not yet handle fields wider than BITS_PER_WORD. */ |
| |
| static void |
| store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
| unsigned HOST_WIDE_INT bitpos, rtx value) |
| { |
| unsigned int unit; |
| unsigned int bitsdone = 0; |
| |
| /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
| much at a time. */ |
| if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
| unit = BITS_PER_WORD; |
| else |
| unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
| |
| /* If VALUE is a constant other than a CONST_INT, get it into a register in |
| WORD_MODE. If we can do this using gen_lowpart_common, do so. Note |
| that VALUE might be a floating-point constant. */ |
| if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT) |
| { |
| rtx word = gen_lowpart_common (word_mode, value); |
| |
| if (word && (value != word)) |
| value = word; |
| else |
| value = gen_lowpart_common (word_mode, |
| force_reg (GET_MODE (value) != VOIDmode |
| ? GET_MODE (value) |
| : word_mode, value)); |
| } |
| |
| while (bitsdone < bitsize) |
| { |
| unsigned HOST_WIDE_INT thissize; |
| rtx part, word; |
| unsigned HOST_WIDE_INT thispos; |
| unsigned HOST_WIDE_INT offset; |
| |
| offset = (bitpos + bitsdone) / unit; |
| thispos = (bitpos + bitsdone) % unit; |
| |
| /* THISSIZE must not overrun a word boundary. Otherwise, |
| store_fixed_bit_field will call us again, and we will mutually |
| recurse forever. */ |
| thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); |
| thissize = MIN (thissize, unit - thispos); |
| |
| if (BYTES_BIG_ENDIAN) |
| { |
| int total_bits; |
| |
| /* We must do an endian conversion exactly the same way as it is |
| done in extract_bit_field, so that the two calls to |
| extract_fixed_bit_field will have comparable arguments. */ |
| if (!MEM_P (value) || GET_MODE (value) == BLKmode) |
| total_bits = BITS_PER_WORD; |
| else |
| total_bits = GET_MODE_BITSIZE (GET_MODE (value)); |
| |
| /* Fetch successively less significant portions. */ |
| if (GET_CODE (value) == CONST_INT) |
| part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
| >> (bitsize - bitsdone - thissize)) |
| & (((HOST_WIDE_INT) 1 << thissize) - 1)); |
| else |
| /* The args are chosen so that the last part includes the |
| lsb. Give extract_bit_field the value it needs (with |
| endianness compensation) to fetch the piece we want. */ |
| part = extract_fixed_bit_field (word_mode, value, 0, thissize, |
| total_bits - bitsize + bitsdone, |
| NULL_RTX, 1); |
| } |
| else |
| { |
| /* Fetch successively more significant portions. */ |
| if (GET_CODE (value) == CONST_INT) |
| part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value)) |
| >> bitsdone) |
| & (((HOST_WIDE_INT) 1 << thissize) - 1)); |
| else |
| part = extract_fixed_bit_field (word_mode, value, 0, thissize, |
| bitsdone, NULL_RTX, 1); |
| } |
| |
| /* If OP0 is a register, then handle OFFSET here. |
| |
| When handling multiword bitfields, extract_bit_field may pass |
| down a word_mode SUBREG of a larger REG for a bitfield that actually |
| crosses a word boundary. Thus, for a SUBREG, we must find |
| the current word starting from the base register. */ |
| if (GET_CODE (op0) == SUBREG) |
| { |
| int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset; |
| word = operand_subword_force (SUBREG_REG (op0), word_offset, |
| GET_MODE (SUBREG_REG (op0))); |
| offset = 0; |
| } |
| else if (REG_P (op0)) |
| { |
| word = operand_subword_force (op0, offset, GET_MODE (op0)); |
| offset = 0; |
| } |
| else |
| word = op0; |
| |
| /* OFFSET is in UNITs, and UNIT is in bits. |
| store_fixed_bit_field wants offset in bytes. */ |
| store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize, |
| thispos, part); |
| bitsdone += thissize; |
| } |
| } |
| |
| /* A subroutine of extract_bit_field_1 that converts return value X |
| to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments |
| to extract_bit_field. */ |
| |
| static rtx |
| convert_extracted_bit_field (rtx x, enum machine_mode mode, |
| enum machine_mode tmode, bool unsignedp) |
| { |
| if (GET_MODE (x) == tmode || GET_MODE (x) == mode) |
| return x; |
| |
| /* If the x mode is not a scalar integral, first convert to the |
| integer mode of that size and then access it as a floating-point |
| value via a SUBREG. */ |
| if (!SCALAR_INT_MODE_P (tmode)) |
| { |
| enum machine_mode smode; |
| |
| smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0); |
| x = convert_to_mode (smode, x, unsignedp); |
| x = force_reg (smode, x); |
| return gen_lowpart (tmode, x); |
| } |
| |
| return convert_to_mode (tmode, x, unsignedp); |
| } |
| |
| /* A subroutine of extract_bit_field, with the same arguments. |
| If FALLBACK_P is true, fall back to extract_fixed_bit_field |
| if we can find no other means of implementing the operation. |
| if FALLBACK_P is false, return NULL instead. */ |
| |
| static rtx |
| extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, |
| unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, |
| enum machine_mode mode, enum machine_mode tmode, |
| bool fallback_p) |
| { |
| unsigned int unit |
| = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD; |
| unsigned HOST_WIDE_INT offset, bitpos; |
| rtx op0 = str_rtx; |
| enum machine_mode int_mode; |
| enum machine_mode ext_mode; |
| enum machine_mode mode1; |
| enum insn_code icode; |
| int byte_offset; |
| |
| if (tmode == VOIDmode) |
| tmode = mode; |
| |
| while (GET_CODE (op0) == SUBREG) |
| { |
| bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT; |
| op0 = SUBREG_REG (op0); |
| } |
| |
| /* If we have an out-of-bounds access to a register, just return an |
| uninitialized register of the required mode. This can occur if the |
| source code contains an out-of-bounds access to a small array. */ |
| if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0))) |
| return gen_reg_rtx (tmode); |
| |
| if (REG_P (op0) |
| && mode == GET_MODE (op0) |
| && bitnum == 0 |
| && bitsize == GET_MODE_BITSIZE (GET_MODE (op0))) |
| { |
| /* We're trying to extract a full register from itself. */ |
| return op0; |
| } |
| |
| /* See if we can get a better vector mode before extracting. */ |
| if (VECTOR_MODE_P (GET_MODE (op0)) |
| && !MEM_P (op0) |
| && GET_MODE_INNER (GET_MODE (op0)) != tmode) |
| { |
| enum machine_mode new_mode; |
| int nunits = GET_MODE_NUNITS (GET_MODE (op0)); |
| |
| if (GET_MODE_CLASS (tmode) == MODE_FLOAT) |
| new_mode = MIN_MODE_VECTOR_FLOAT; |
| else if (GET_MODE_CLASS (tmode) == MODE_FRACT) |
| new_mode = MIN_MODE_VECTOR_FRACT; |
| else if (GET_MODE_CLASS (tmode) == MODE_UFRACT) |
| new_mode = MIN_MODE_VECTOR_UFRACT; |
| else if (GET_MODE_CLASS (tmode) == MODE_ACCUM) |
| new_mode = MIN_MODE_VECTOR_ACCUM; |
| else if (GET_MODE_CLASS (tmode) == MODE_UACCUM) |
| new_mode = MIN_MODE_VECTOR_UACCUM; |
| else |
| new_mode = MIN_MODE_VECTOR_INT; |
| |
| for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode)) |
| if (GET_MODE_NUNITS (new_mode) == nunits |
| && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0)) |
| && targetm.vector_mode_supported_p (new_mode)) |
| break; |
| if (new_mode != VOIDmode) |
| op0 = gen_lowpart (new_mode, op0); |
| } |
| |
| /* Use vec_extract patterns for extracting parts of vectors whenever |
| available. */ |
| if (VECTOR_MODE_P (GET_MODE (op0)) |
| && !MEM_P (op0) |
| && (optab_handler (vec_extract_optab, GET_MODE (op0))->insn_code |
| != CODE_FOR_nothing) |
| && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))) |
| == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0))))) |
| { |
| enum machine_mode outermode = GET_MODE (op0); |
| enum machine_mode innermode = GET_MODE_INNER (outermode); |
| int icode = (int) optab_handler (vec_extract_optab, outermode)->insn_code; |
| unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode); |
| rtx rtxpos = GEN_INT (pos); |
| rtx src = op0; |
| rtx dest = NULL, pat, seq; |
| enum machine_mode mode0 = insn_data[icode].operand[0].mode; |
| enum machine_mode mode1 = insn_data[icode].operand[1].mode; |
| enum machine_mode mode2 = insn_data[icode].operand[2].mode; |
| |
| if (innermode == tmode || innermode == mode) |
| dest = target; |
| |
| if (!dest) |
| dest = gen_reg_rtx (innermode); |
| |
| start_sequence (); |
| |
| if (! (*insn_data[icode].operand[0].predicate) (dest, mode0)) |
| dest = copy_to_mode_reg (mode0, dest); |
| |
| if (! (*insn_data[icode].operand[1].predicate) (src, mode1)) |
| src = copy_to_mode_reg (mode1, src); |
| |
| if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2)) |
| rtxpos = copy_to_mode_reg (mode1, rtxpos); |
| |
| /* We could handle this, but we should always be called with a pseudo |
| for our targets and all insns should take them as outputs. */ |
| gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0) |
| && (*insn_data[icode].operand[1].predicate) (src, mode1) |
| && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2)); |
| |
| pat = GEN_FCN (icode) (dest, src, rtxpos); |
| seq = get_insns (); |
| end_sequence (); |
| if (pat) |
| { |
| emit_insn (seq); |
| emit_insn (pat); |
| if (mode0 != mode) |
| return gen_lowpart (tmode, dest); |
| return dest; |
| } |
| } |
| |
| /* Make sure we are playing with integral modes. Pun with subregs |
| if we aren't. */ |
| { |
| enum machine_mode imode = int_mode_for_mode (GET_MODE (op0)); |
| if (imode != GET_MODE (op0)) |
| { |
| if (MEM_P (op0)) |
| op0 = adjust_address (op0, imode, 0); |
| else if (imode != BLKmode) |
| { |
| op0 = gen_lowpart (imode, op0); |
| |
| /* If we got a SUBREG, force it into a register since we |
| aren't going to be able to do another SUBREG on it. */ |
| if (GET_CODE (op0) == SUBREG) |
| op0 = force_reg (imode, op0); |
| } |
| else if (REG_P (op0)) |
| { |
| rtx reg, subreg; |
| imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)), |
| MODE_INT); |
| reg = gen_reg_rtx (imode); |
| subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg); |
| emit_move_insn (subreg, op0); |
| op0 = reg; |
| bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT; |
| } |
| else |
| { |
| rtx mem = assign_stack_temp (GET_MODE (op0), |
| GET_MODE_SIZE (GET_MODE (op0)), 0); |
| emit_move_insn (mem, op0); |
| op0 = adjust_address (mem, BLKmode, 0); |
| } |
| } |
| } |
| |
| /* We may be accessing data outside the field, which means |
| we can alias adjacent data. */ |
| if (MEM_P (op0)) |
| { |
| op0 = shallow_copy_rtx (op0); |
| set_mem_alias_set (op0, 0); |
| set_mem_expr (op0, 0); |
| } |
| |
| /* Extraction of a full-word or multi-word value from a structure |
| in a register or aligned memory can be done with just a SUBREG. |
| A subword value in the least significant part of a register |
| can also be extracted with a SUBREG. For this, we need the |
| byte offset of the value in op0. */ |
| |
| bitpos = bitnum % unit; |
| offset = bitnum / unit; |
| byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD; |
| |
| /* If OP0 is a register, BITPOS must count within a word. |
| But as we have it, it counts within whatever size OP0 now has. |
| On a bigendian machine, these are not the same, so convert. */ |
| if (BYTES_BIG_ENDIAN |
| && !MEM_P (op0) |
| && unit > GET_MODE_BITSIZE (GET_MODE (op0))) |
| bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0)); |
| |
| /* ??? We currently assume TARGET is at least as big as BITSIZE. |
| If that's wrong, the solution is to test for it and set TARGET to 0 |
| if needed. */ |
| |
| /* Only scalar integer modes can be converted via subregs. There is an |
| additional problem for FP modes here in that they can have a precision |
| which is different from the size. mode_for_size uses precision, but |
| we want a mode based on the size, so we must avoid calling it for FP |
| modes. */ |
| mode1 = (SCALAR_INT_MODE_P (tmode) |
| ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) |
| : mode); |
| |
| if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode) |
| && bitpos % BITS_PER_WORD == 0) |
| || (mode1 != BLKmode |
| /* ??? The big endian test here is wrong. This is correct |
| if the value is in a register, and if mode_for_size is not |
| the same mode as op0. This causes us to get unnecessarily |
| inefficient code from the Thumb port when -mbig-endian. */ |
| && (BYTES_BIG_ENDIAN |
| ? bitpos + bitsize == BITS_PER_WORD |
| : bitpos == 0))) |
| && ((!MEM_P (op0) |
| && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1), |
| GET_MODE_BITSIZE (GET_MODE (op0))) |
| && GET_MODE_SIZE (mode1) != 0 |
| && byte_offset % GET_MODE_SIZE (mode1) == 0) |
| || (MEM_P (op0) |
| && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0)) |
| || (offset * BITS_PER_UNIT % bitsize == 0 |
| && MEM_ALIGN (op0) % bitsize == 0))))) |
| { |
| if (MEM_P (op0)) |
| op0 = adjust_address (op0, mode1, offset); |
| else if (mode1 != GET_MODE (op0)) |
| { |
| rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0), |
| byte_offset); |
| if (sub == NULL) |
| goto no_subreg_mode_swap; |
| op0 = sub; |
| } |
| if (mode1 != mode) |
| return convert_to_mode (tmode, op0, unsignedp); |
| return op0; |
| } |
| no_subreg_mode_swap: |
| |
| /* Handle fields bigger than a word. */ |
| |
| if (bitsize > BITS_PER_WORD) |
| { |
| /* Here we transfer the words of the field |
| in the order least significant first. |
| This is because the most significant word is the one which may |
| be less than full. */ |
| |
| unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD; |
| unsigned int i; |
| |
| if (target == 0 || !REG_P (target)) |
| target = gen_reg_rtx (mode); |
| |
| /* Indicate for flow that the entire target reg is being set. */ |
| emit_clobber (target); |
| |
| for (i = 0; i < nwords; i++) |
| { |
| /* If I is 0, use the low-order word in both field and target; |
| if I is 1, use the next to lowest word; and so on. */ |
| /* Word number in TARGET to use. */ |
| unsigned int wordnum |
| = (WORDS_BIG_ENDIAN |
| ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1 |
| : i); |
| /* Offset from start of field in OP0. */ |
| unsigned int bit_offset = (WORDS_BIG_ENDIAN |
| ? MAX (0, ((int) bitsize - ((int) i + 1) |
| * (int) BITS_PER_WORD)) |
| : (int) i * BITS_PER_WORD); |
| rtx target_part = operand_subword (target, wordnum, 1, VOIDmode); |
| rtx result_part |
| = extract_bit_field (op0, MIN (BITS_PER_WORD, |
| bitsize - i * BITS_PER_WORD), |
| bitnum + bit_offset, 1, target_part, mode, |
| word_mode); |
| |
| gcc_assert (target_part); |
| |
| if (result_part != target_part) |
| emit_move_insn (target_part, result_part); |
| } |
| |
| if (unsignedp) |
| { |
| /* Unless we've filled TARGET, the upper regs in a multi-reg value |
| need to be zero'd out. */ |
| if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD) |
| { |
| unsigned int i, total_words; |
| |
| total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD; |
| for (i = nwords; i < total_words; i++) |
| emit_move_insn |
| (operand_subword (target, |
| WORDS_BIG_ENDIAN ? total_words - i - 1 : i, |
| 1, VOIDmode), |
| const0_rtx); |
| } |
| return target; |
| } |
| |
| /* Signed bit field: sign-extend with two arithmetic shifts. */ |
| target = expand_shift (LSHIFT_EXPR, mode, target, |
| build_int_cst (NULL_TREE, |
| GET_MODE_BITSIZE (mode) - bitsize), |
| NULL_RTX, 0); |
| return expand_shift (RSHIFT_EXPR, mode, target, |
| build_int_cst (NULL_TREE, |
| GET_MODE_BITSIZE (mode) - bitsize), |
| NULL_RTX, 0); |
| } |
| |
| /* From here on we know the desired field is smaller than a word. */ |
| |
| /* Check if there is a correspondingly-sized integer field, so we can |
| safely extract it as one size of integer, if necessary; then |
| truncate or extend to the size that is wanted; then use SUBREGs or |
| convert_to_mode to get one of the modes we really wanted. */ |
| |
| int_mode = int_mode_for_mode (tmode); |
| if (int_mode == BLKmode) |
| int_mode = int_mode_for_mode (mode); |
| /* Should probably push op0 out to memory and then do a load. */ |
| gcc_assert (int_mode != BLKmode); |
| |
| /* OFFSET is the number of words or bytes (UNIT says which) |
| from STR_RTX to the first word or byte containing part of the field. */ |
| if (!MEM_P (op0)) |
| { |
| if (offset != 0 |
| || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD) |
| { |
| if (!REG_P (op0)) |
| op0 = copy_to_reg (op0); |
| op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0), |
| op0, (offset * UNITS_PER_WORD)); |
| } |
| offset = 0; |
| } |
| |
| /* Now OFFSET is nonzero only for memory operands. */ |
| ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0); |
| icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv; |
| if (ext_mode != MAX_MACHINE_MODE |
| && bitsize > 0 |
| && GET_MODE_BITSIZE (ext_mode) >= bitsize |
| /* If op0 is a register, we need it in EXT_MODE to make it |
| acceptable to the format of ext(z)v. */ |
| && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode) |
| && !((REG_P (op0) || GET_CODE (op0) == SUBREG) |
| && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))) |
| && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0))) |
| { |
| unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset; |
| rtx bitsize_rtx, bitpos_rtx; |
| rtx last = get_last_insn (); |
| rtx xop0 = op0; |
| rtx xtarget = target; |
| rtx xspec_target = target; |
| rtx xspec_target_subreg = 0; |
| rtx pat; |
| |
| /* If op0 is a register, we need it in EXT_MODE to make it |
| acceptable to the format of ext(z)v. */ |
| if (REG_P (xop0) && GET_MODE (xop0) != ext_mode) |
| xop0 = gen_rtx_SUBREG (ext_mode, xop0, 0); |
| if (MEM_P (xop0)) |
| /* Get ref to first byte containing part of the field. */ |
| xop0 = adjust_address (xop0, byte_mode, xoffset); |
| |
| /* On big-endian machines, we count bits from the most significant. |
| If the bit field insn does not, we must invert. */ |
| if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN) |
| xbitpos = unit - bitsize - xbitpos; |
| |
| /* Now convert from counting within UNIT to counting in EXT_MODE. */ |
| if (BITS_BIG_ENDIAN && !MEM_P (xop0)) |
| xbitpos += GET_MODE_BITSIZE (ext_mode) - unit; |
| |
| unit = GET_MODE_BITSIZE (ext_mode); |
| |
| if (xtarget == 0) |
| xtarget = xspec_target = gen_reg_rtx (tmode); |
| |
| if (GET_MODE (xtarget) != ext_mode) |
| { |
| /* Don't use LHS paradoxical subreg if explicit truncation is needed |
| between the mode of the extraction (word_mode) and the target |
| mode. Instead, create a temporary and use convert_move to set |
| the target. */ |
| if (REG_P (xtarget) |
| && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)), |
| GET_MODE_BITSIZE (ext_mode))) |
| { |
| xtarget = gen_lowpart (ext_mode, xtarget); |
| if (GET_MODE_SIZE (ext_mode) |
| > GET_MODE_SIZE (GET_MODE (xspec_target))) |
| xspec_target_subreg = xtarget; |
| } |
| else |
| xtarget = gen_reg_rtx (ext_mode); |
| } |
| |
| /* If this machine's ext(z)v insists on a register target, |
| make sure we have one. */ |
| if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode)) |
| xtarget = gen_reg_rtx (ext_mode); |
| |
| bitsize_rtx = GEN_INT (bitsize); |
| bitpos_rtx = GEN_INT (xbitpos); |
| |
| pat = (unsignedp |
| ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx) |
| : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx)); |
| if (pat) |
| { |
| emit_insn (pat); |
| if (xtarget == xspec_target) |
| return xtarget; |
| if (xtarget == xspec_target_subreg) |
| return xspec_target; |
| return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp); |
| } |
| delete_insns_since (last); |
| } |
| |
| /* If OP0 is a memory, try copying it to a register and seeing if a |
| cheap register alternative is available. */ |
| if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0)) |
| { |
| enum machine_mode bestmode; |
| |
| /* Get the mode to use for inserting into this field. If |
| OP0 is BLKmode, get the smallest mode consistent with the |
| alignment. If OP0 is a non-BLKmode object that is no |
| wider than EXT_MODE, use its mode. Otherwise, use the |
| smallest mode containing the field. */ |
| |
| if (GET_MODE (op0) == BLKmode |
| || (ext_mode != MAX_MACHINE_MODE |
| && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode))) |
| bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), |
| (ext_mode == MAX_MACHINE_MODE |
| ? VOIDmode : ext_mode), |
| MEM_VOLATILE_P (op0)); |
| else |
| bestmode = GET_MODE (op0); |
| |
| if (bestmode != VOIDmode |
| && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0)) |
| && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0))) |
| { |
| unsigned HOST_WIDE_INT xoffset, xbitpos; |
| |
| /* Compute the offset as a multiple of this unit, |
| counting in bytes. */ |
| unit = GET_MODE_BITSIZE (bestmode); |
| xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode); |
| xbitpos = bitnum % unit; |
| |
| /* Make sure the register is big enough for the whole field. */ |
| if (xoffset * BITS_PER_UNIT + unit |
| >= offset * BITS_PER_UNIT + bitsize) |
| { |
| rtx last, result, xop0; |
| |
| last = get_last_insn (); |
| |
| /* Fetch it to a register in that size. */ |
| xop0 = adjust_address (op0, bestmode, xoffset); |
| xop0 = force_reg (bestmode, xop0); |
| result = extract_bit_field_1 (xop0, bitsize, xbitpos, |
| unsignedp, target, |
| mode, tmode, false); |
| if (result) |
| return result; |
| |
| delete_insns_since (last); |
| } |
| } |
| } |
| |
| if (!fallback_p) |
| return NULL; |
| |
| target = extract_fixed_bit_field (int_mode, op0, offset, bitsize, |
| bitpos, target, unsignedp); |
| return convert_extracted_bit_field (target, mode, tmode, unsignedp); |
| } |
| |
| /* Generate code to extract a byte-field from STR_RTX |
| containing BITSIZE bits, starting at BITNUM, |
| and put it in TARGET if possible (if TARGET is nonzero). |
| Regardless of TARGET, we return the rtx for where the value is placed. |
| |
| STR_RTX is the structure containing the byte (a REG or MEM). |
| UNSIGNEDP is nonzero if this is an unsigned bit field. |
| MODE is the natural mode of the field value once extracted. |
| TMODE is the mode the caller would like the value to have; |
| but the value may be returned with type MODE instead. |
| |
| If a TARGET is specified and we can store in it at no extra cost, |
| we do so, and return TARGET. |
| Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred |
| if they are equally easy. */ |
| |
| rtx |
| extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize, |
| unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target, |
| enum machine_mode mode, enum machine_mode tmode) |
| { |
| return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, |
| target, mode, tmode, true); |
| } |
| |
| /* Extract a bit field using shifts and boolean operations |
| Returns an rtx to represent the value. |
| OP0 addresses a register (word) or memory (byte). |
| BITPOS says which bit within the word or byte the bit field starts in. |
| OFFSET says how many bytes farther the bit field starts; |
| it is 0 if OP0 is a register. |
| BITSIZE says how many bits long the bit field is. |
| (If OP0 is a register, it may be narrower than a full word, |
| but BITPOS still counts within a full word, |
| which is significant on bigendian machines.) |
| |
| UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value). |
| If TARGET is nonzero, attempts to store the value there |
| and return TARGET, but this is not guaranteed. |
| If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */ |
| |
| static rtx |
| extract_fixed_bit_field (enum machine_mode tmode, rtx op0, |
| unsigned HOST_WIDE_INT offset, |
| unsigned HOST_WIDE_INT bitsize, |
| unsigned HOST_WIDE_INT bitpos, rtx target, |
| int unsignedp) |
| { |
| unsigned int total_bits = BITS_PER_WORD; |
| enum machine_mode mode; |
| |
| if (GET_CODE (op0) == SUBREG || REG_P (op0)) |
| { |
| /* Special treatment for a bit field split across two registers. */ |
| if (bitsize + bitpos > BITS_PER_WORD) |
| return extract_split_bit_field (op0, bitsize, bitpos, unsignedp); |
| } |
| else |
| { |
| /* Get the proper mode to use for this field. We want a mode that |
| includes the entire field. If such a mode would be larger than |
| a word, we won't be doing the extraction the normal way. */ |
| |
| mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, |
| MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0)); |
| |
| if (mode == VOIDmode) |
| /* The only way this should occur is if the field spans word |
| boundaries. */ |
| return extract_split_bit_field (op0, bitsize, |
| bitpos + offset * BITS_PER_UNIT, |
| unsignedp); |
| |
| total_bits = GET_MODE_BITSIZE (mode); |
| |
| /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to |
| be in the range 0 to total_bits-1, and put any excess bytes in |
| OFFSET. */ |
| if (bitpos >= total_bits) |
| { |
| offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT); |
| bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT) |
| * BITS_PER_UNIT); |
| } |
| |
| /* Get ref to an aligned byte, halfword, or word containing the field. |
| Adjust BITPOS to be position within a word, |
| and OFFSET to be the offset of that word. |
| Then alter OP0 to refer to that word. */ |
| bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT; |
| offset -= (offset % (total_bits / BITS_PER_UNIT)); |
| op0 = adjust_address (op0, mode, offset); |
| } |
| |
| mode = GET_MODE (op0); |
| |
| if (BYTES_BIG_ENDIAN) |
| /* BITPOS is the distance between our msb and that of OP0. |
| Convert it to the distance from the lsb. */ |
| bitpos = total_bits - bitsize - bitpos; |
| |
| /* Now BITPOS is always the distance between the field's lsb and that of OP0. |
| We have reduced the big-endian case to the little-endian case. */ |
| |
| if (unsignedp) |
| { |
| if (bitpos) |
| { |
| /* If the field does not already start at the lsb, |
| shift it so it does. */ |
| tree amount = build_int_cst (NULL_TREE, bitpos); |
| /* Maybe propagate the target for the shift. */ |
| /* But not if we will return it--could confuse integrate.c. */ |
| rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
| if (tmode != mode) subtarget = 0; |
| op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1); |
| } |
| /* Convert the value to the desired mode. */ |
| if (mode != tmode) |
| op0 = convert_to_mode (tmode, op0, 1); |
| |
| /* Unless the msb of the field used to be the msb when we shifted, |
| mask out the upper bits. */ |
| |
| if (GET_MODE_BITSIZE (mode) != bitpos + bitsize) |
| return expand_binop (GET_MODE (op0), and_optab, op0, |
| mask_rtx (GET_MODE (op0), 0, bitsize, 0), |
| target, 1, OPTAB_LIB_WIDEN); |
| return op0; |
| } |
| |
| /* To extract a signed bit-field, first shift its msb to the msb of the word, |
| then arithmetic-shift its lsb to the lsb of the word. */ |
| op0 = force_reg (mode, op0); |
| if (mode != tmode) |
| target = 0; |
| |
| /* Find the narrowest integer mode that contains the field. */ |
| |
| for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode; |
| mode = GET_MODE_WIDER_MODE (mode)) |
| if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos) |
| { |
| op0 = convert_to_mode (mode, op0, 0); |
| break; |
| } |
| |
| if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos)) |
| { |
| tree amount |
| = build_int_cst (NULL_TREE, |
| GET_MODE_BITSIZE (mode) - (bitsize + bitpos)); |
| /* Maybe propagate the target for the shift. */ |
| rtx subtarget = (target != 0 && REG_P (target) ? target : 0); |
| op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1); |
| } |
| |
| return expand_shift (RSHIFT_EXPR, mode, op0, |
| build_int_cst (NULL_TREE, |
| GET_MODE_BITSIZE (mode) - bitsize), |
| target, 0); |
| } |
| |
| /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value |
| of mode MODE with BITSIZE ones followed by BITPOS zeros, or the |
| complement of that if COMPLEMENT. The mask is truncated if |
| necessary to the width of mode MODE. The mask is zero-extended if |
| BITSIZE+BITPOS is too small for MODE. */ |
| |
| static rtx |
| mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement) |
| { |
| HOST_WIDE_INT masklow, maskhigh; |
| |
| if (bitsize == 0) |
| masklow = 0; |
| else if (bitpos < HOST_BITS_PER_WIDE_INT) |
| masklow = (HOST_WIDE_INT) -1 << bitpos; |
| else |
| masklow = 0; |
| |
| if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT) |
| masklow &= ((unsigned HOST_WIDE_INT) -1 |
| >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize)); |
| |
| if (bitpos <= HOST_BITS_PER_WIDE_INT) |
| maskhigh = -1; |
| else |
| maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT); |
| |
| if (bitsize == 0) |
| maskhigh = 0; |
| else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT) |
| maskhigh &= ((unsigned HOST_WIDE_INT) -1 |
| >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize)); |
| else |
| maskhigh = 0; |
| |
| if (complement) |
| { |
| maskhigh = ~maskhigh; |
| masklow = ~masklow; |
| } |
| |
| return immed_double_const (masklow, maskhigh, mode); |
| } |
| |
| /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value |
| VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */ |
| |
| static rtx |
| lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize) |
| { |
| unsigned HOST_WIDE_INT v = INTVAL (value); |
| HOST_WIDE_INT low, high; |
| |
| if (bitsize < HOST_BITS_PER_WIDE_INT) |
| v &= ~((HOST_WIDE_INT) -1 << bitsize); |
| |
| if (bitpos < HOST_BITS_PER_WIDE_INT) |
| { |
| low = v << bitpos; |
| high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0); |
| } |
| else |
| { |
| low = 0; |
| high = v << (bitpos - HOST_BITS_PER_WIDE_INT); |
| } |
| |
| return immed_double_const (low, high, mode); |
| } |
| |
| /* Extract a bit field that is split across two words |
| and return an RTX for the result. |
| |
| OP0 is the REG, SUBREG or MEM rtx for the first of the two words. |
| BITSIZE is the field width; BITPOS, position of its first bit, in the word. |
| UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */ |
| |
| static rtx |
| extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize, |
| unsigned HOST_WIDE_INT bitpos, int unsignedp) |
| { |
| unsigned int unit; |
| unsigned int bitsdone = 0; |
| rtx result = NULL_RTX; |
| int first = 1; |
| |
| /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that |
| much at a time. */ |
| if (REG_P (op0) || GET_CODE (op0) == SUBREG) |
| unit = BITS_PER_WORD; |
| else |
| unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD); |
| |
| while (bitsdone < bitsize) |
| { |
| unsigned HOST_WIDE_INT thissize; |
| rtx part, word; |
| unsigned HOST_WIDE_INT thispos; |
| unsigned HOST_WIDE_INT offset; |
| |
| offset = (bitpos + bitsdone) / unit; |
| thispos = (bitpos + bitsdone) % unit; |
| |
| /* THISSIZE must not overrun a word boundary. Otherwise, |
| extract_fixed_bit_field will call us again, and we will mutually |
| recurse forever. */ |
| thissize = MIN (bitsize - bitsdone, BITS_PER_WORD); |
| thissize = MIN (thissize, unit - thispos); |
| |
| /* If OP0 is a register, then handle OFFSET here. |
| |
| When handling multiword bitfields, extract_bit_field may pass |
| down a word_mode SUBREG of a larger REG for a bitfield that actually |
| crosses a word boundary. Thus, for a SUBREG, we must find |
| the current word starting from the base register. */ |
| if (GET_CODE (op0) == SUBREG) |
| { |
| int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset; |
| word = operand_subword_force (SUBREG_REG (op0), word_offset, |
| GET_MODE (SUBREG_REG (op0))); |
| offset = 0; |
| } |
| else if (REG_P (op0)) |
| { |
| word = operand_subword_force (op0, offset, GET_MODE (op0)); |
| offset = 0; |
| } |
| else |
| word = op0; |
| |
| /* Extract the parts in bit-counting order, |
| whose meaning is determined by BYTES_PER_UNIT. |
| OFFSET is in UNITs, and UNIT is in bits. |
| extract_fixed_bit_field wants offset in bytes. */ |
| part = extract_fixed_bit_field (word_mode, word, |
| offset * unit / BITS_PER_UNIT, |
| thissize, thispos, 0, 1); |
| bitsdone += thissize; |
| |
| /* Shift this part into place for the result. */ |
| if (BYTES_BIG_ENDIAN) |
| { |
| if (bitsize != bitsdone) |
| part = expand_shift (LSHIFT_EXPR, word_mode, part, |
| build_int_cst (NULL_TREE, bitsize - bitsdone), |
| 0, 1); |
| } |
| else |
| { |
| if (bitsdone != thissize) |
| part = expand_shift (LSHIFT_EXPR, word_mode, part, |
| build_int_cst (NULL_TREE, |
| bitsdone - thissize), 0, 1); |
| } |
| |
| if (first) |
| result = part; |
| else |
| /* Combine the parts with bitwise or. This works |
| because we extracted each part as an unsigned bit field. */ |
| result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1, |
| OPTAB_LIB_WIDEN); |
| |
| first = 0; |
| } |
| |
| /* Unsigned bit field: we are done. */ |
| if (unsignedp) |
| return result; |
| /* Signed bit field: sign-extend with two arithmetic shifts. */ |
| result = expand_shift (LSHIFT_EXPR, word_mode, result, |
| build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize), |
| NULL_RTX, 0); |
| return expand_shift (RSHIFT_EXPR, word_mode, result, |
| build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize), |
| NULL_RTX, 0); |
| } |
| |
| /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving |
| the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than |
| MODE, fill the upper bits with zeros. Fail if the layout of either |
| mode is unknown (as for CC modes) or if the extraction would involve |
| unprofitable mode punning. Return the value on success, otherwise |
| return null. |
| |
| This is different from gen_lowpart* in these respects: |
| |
| - the returned value must always be considered an rvalue |
| |
| - when MODE is wider than SRC_MODE, the extraction involves |
| a zero extension |
| |
| - when MODE is smaller than SRC_MODE, the extraction involves |
| a truncation (and is thus subject to TRULY_NOOP_TRUNCATION). |
| |
| In other words, this routine performs a computation, whereas the |
| gen_lowpart* routines are conceptually lvalue or rvalue subreg |
| operations. */ |
| |
| rtx |
| extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src) |
| { |
| enum machine_mode int_mode, src_int_mode; |
| |
| if (mode == src_mode) |
| return src; |
| |
| if (CONSTANT_P (src)) |
| { |
| /* simplify_gen_subreg can't be used here, as if simplify_subreg |
| fails, it will happily create (subreg (symbol_ref)) or similar |
| invalid SUBREGs. */ |
| unsigned int byte = subreg_lowpart_offset (mode, src_mode); |
| rtx ret = simplify_subreg (mode, src, src_mode, byte); |
| if (ret) |
| return ret; |
| |
| if (GET_MODE (src) == VOIDmode |
| || !validate_subreg (mode, src_mode, src, byte)) |
| return NULL_RTX; |
| |
| src = force_reg (GET_MODE (src), src); |
| return gen_rtx_SUBREG (mode, src, byte); |
| } |
| |
| if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC) |
| return NULL_RTX; |
| |
| if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode) |
| && MODES_TIEABLE_P (mode, src_mode)) |
| { |
| rtx x = gen_lowpart_common (mode, src); |
| if (x) |
| return x; |
| } |
| |
| src_int_mode = int_mode_for_mode (src_mode); |
| int_mode = int_mode_for_mode (mode); |
| if (src_int_mode == BLKmode || int_mode == BLKmode) |
| return NULL_RTX; |
| |
| if (!MODES_TIEABLE_P (src_int_mode, src_mode)) |
| return NULL_RTX; |
| if (!MODES_TIEABLE_P (int_mode, mode)) |
| return NULL_RTX; |
| |
| src = gen_lowpart (src_int_mode, src); |
| src = convert_modes (int_mode, src_int_mode, src, true); |
| src = gen_lowpart (mode, src); |
| return src; |
| } |
| |
| /* Add INC into TARGET. */ |
| |
| void |
| expand_inc (rtx target, rtx inc) |
| { |
| rtx value = expand_binop (GET_MODE (target), add_optab, |
| target, inc, |
| target, 0, OPTAB_LIB_WIDEN); |
| if (value != target) |
| emit_move_insn (target, value); |
| } |
| |
| /* Subtract DEC from TARGET. */ |
| |
| void |
| expand_dec (rtx target, rtx dec) |
| { |
| rtx value = expand_binop (GET_MODE (target), sub_optab, |
| target, dec, |
| target, 0, OPTAB_LIB_WIDEN); |
| if (value != target) |
| emit_move_insn (target, value); |
| } |
| |
| /* Output a shift instruction for expression code CODE, |
| with SHIFTED being the rtx for the value to shift, |
| and AMOUNT the tree for the amount to shift by. |
| Store the result in the rtx TARGET, if that is convenient. |
| If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic. |
| Return the rtx for where the value is. */ |
| |
| rtx |
| expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted, |
| tree amount, rtx target, int unsignedp) |
| { |
| rtx op1, temp = 0; |
| int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR); |
| int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR); |
| optab lshift_optab = ashl_optab; |
| optab rshift_arith_optab = ashr_optab; |
| optab rshift_uns_optab = lshr_optab; |
| optab lrotate_optab = rotl_optab; |
| optab rrotate_optab = rotr_optab; |
| enum machine_mode op1_mode; |
| int attempt; |
| bool speed = optimize_insn_for_speed_p (); |
| |
| op1 = expand_normal (amount); |
| op1_mode = GET_MODE (op1); |
| |
| /* Determine whether the shift/rotate amount is a vector, or scalar. If the |
| shift amount is a vector, use the vector/vector shift patterns. */ |
| if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode)) |
| { |
| lshift_optab = vashl_optab; |
| rshift_arith_optab = vashr_optab; |
| rshift_uns_optab = vlshr_optab; |
| lrotate_optab = vrotl_optab; |
| rrotate_optab = vrotr_optab; |
| } |
| |
| /* Previously detected shift-counts computed by NEGATE_EXPR |
| and shifted in the other direction; but that does not work |
| on all machines. */ |
| |
| if (SHIFT_COUNT_TRUNCATED) |
| { |
| if (GET_CODE (op1) == CONST_INT |
| && ((unsigned HOST_WIDE_INT) INTVAL (op1) >= |
| (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode))) |
| op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1) |
| % GET_MODE_BITSIZE (mode)); |
| else if (GET_CODE (op1) == SUBREG |
| && subreg_lowpart_p (op1) |
| && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1)))) |
| op1 = SUBREG_REG (op1); |
| } |
| |
| if (op1 == const0_rtx) |
| return shifted; |
| |
| /* Check whether its cheaper to implement a left shift by a constant |
| bit count by a sequence of additions. */ |
| if (code == LSHIFT_EXPR |
| && GET_CODE (op1) == CONST_INT |
| && INTVAL (op1) > 0 |
| && INTVAL (op1) < GET_MODE_BITSIZE (mode) |
| && INTVAL (op1) < MAX_BITS_PER_WORD |
| && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode] |
| && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST) |
| { |
| int i; |
| for (i = 0; i < INTVAL (op1); i++) |
| { |
| temp = force_reg (mode, shifted); |
| shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX, |
| unsignedp, OPTAB_LIB_WIDEN); |
| } |
| return shifted; |
| } |
| |
| for (attempt = 0; temp == 0 && attempt < 3; attempt++) |
| { |
| enum optab_methods methods; |
| |
| if (attempt == 0) |
| methods = OPTAB_DIRECT; |
| else if (attempt == 1) |
| methods = OPTAB_WIDEN; |
| else |
| methods = OPTAB_LIB_WIDEN; |
| |
| if (rotate) |
| { |
| /* Widening does not work for rotation. */ |
| if (methods == OPTAB_WIDEN) |
| continue; |
| else if (methods == OPTAB_LIB_WIDEN) |
| { |
| /* If we have been unable to open-code this by a rotation, |
| do it as the IOR of two shifts. I.e., to rotate A |
| by N bits, compute (A << N) | ((unsigned) A >> (C - N)) |
| where C is the bitsize of A. |
| |
| It is theoretically possible that the target machine might |
| not be able to perform either shift and hence we would |
| be making two libcalls rather than just the one for the |
| shift (similarly if IOR could not be done). We will allow |
| this extremely unlikely lossage to avoid complicating the |
| code below. */ |
| |
| rtx subtarget = target == shifted ? 0 : target; |
| tree new_amount, other_amount; |
| rtx temp1; |
| tree type = TREE_TYPE (amount); |
| if (GET_MODE (op1) != TYPE_MODE (type) |
| && GET_MODE (op1) != VOIDmode) |
| op1 = convert_to_mode (TYPE_MODE (type), op1, 1); |
| new_amount = make_tree (type, op1); |
| other_amount |
| = fold_build2 (MINUS_EXPR, type, |
| build_int_cst (type, GET_MODE_BITSIZE (mode)), |
| new_amount); |
| |
| shifted = force_reg (mode, shifted); |
| |
| temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR, |
| mode, shifted, new_amount, 0, 1); |
| temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR, |
| mode, shifted, other_amount, subtarget, 1); |
| return expand_binop (mode, ior_optab, temp, temp1, target, |
| unsignedp, methods); |
| } |
| |
| temp = expand_binop (mode, |
| left ? lrotate_optab : rrotate_optab, |
| shifted, op1, target, unsignedp, methods); |
| } |
| else if (unsignedp) |
| temp = expand_binop (mode, |
| left ? lshift_optab : rshift_uns_optab, |
| shifted, op1, target, unsignedp, methods); |
| |
| /* Do arithmetic shifts. |
| Also, if we are going to widen the operand, we can just as well |
| use an arithmetic right-shift instead of a logical one. */ |
| if (temp == 0 && ! rotate |
| && (! unsignedp || (! left && methods == OPTAB_WIDEN))) |
| { |
| enum optab_methods methods1 = methods; |
| |
| /* If trying to widen a log shift to an arithmetic shift, |
| don't accept an arithmetic shift of the same size. */ |
| if (unsignedp) |
| methods1 = OPTAB_MUST_WIDEN; |
| |
| /* Arithmetic shift */ |
| |
| temp = expand_binop (mode, |
| left ? lshift_optab : rshift_arith_optab, |
| shifted, op1, target, unsignedp, methods1); |
| } |
| |
| /* We used to try extzv here for logical right shifts, but that was |
| only useful for one machine, the VAX, and caused poor code |
| generation there for lshrdi3, so the code was deleted and a |
| define_expand for lshrsi3 was added to vax.md. */ |
| } |
| |
| gcc_assert (temp); |
| return temp; |
| } |
| |
| enum alg_code { |
| alg_unknown, |
| alg_zero, |
| alg_m, alg_shift, |
| alg_add_t_m2, |
| alg_sub_t_m2, |
| alg_add_factor, |
| alg_sub_factor, |
| alg_add_t2_m, |
| alg_sub_t2_m, |
| alg_impossible |
| }; |
| |
| /* This structure holds the "cost" of a multiply sequence. The |
| "cost" field holds the total rtx_cost of every operator in the |
| synthetic multiplication sequence, hence cost(a op b) is defined |
| as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero. |
| The "latency" field holds the minimum possible latency of the |
| synthetic multiply, on a hypothetical infinitely parallel CPU. |
| This is the critical path, or the maximum height, of the expression |
| tree which is the sum of rtx_costs on the most expensive path from |
| any leaf to the root. Hence latency(a op b) is defined as zero for |
| leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise. */ |
| |
| struct mult_cost { |
| short cost; /* Total rtx_cost of the multiplication sequence. */ |
| short latency; /* The latency of the multiplication sequence. */ |
| }; |
| |
| /* This macro is used to compare a pointer to a mult_cost against an |
| single integer "rtx_cost" value. This is equivalent to the macro |
| CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}. */ |
| #define MULT_COST_LESS(X,Y) ((X)->cost < (Y) \ |
| || ((X)->cost == (Y) && (X)->latency < (Y))) |
| |
| /* This macro is used to compare two pointers to mult_costs against |
| each other. The macro returns true if X is cheaper than Y. |
| Currently, the cheaper of two mult_costs is the one with the |
| lower "cost". If "cost"s are tied, the lower latency is cheaper. */ |
| #define CHEAPER_MULT_COST(X,Y) ((X)->cost < (Y)->cost \ |
| || ((X)->cost == (Y)->cost \ |
| && (X)->latency < (Y)->latency)) |
| |
| /* This structure records a sequence of operations. |
| `ops' is the number of operations recorded. |
| `cost' is their total cost. |
| The operations are stored in `op' and the corresponding |
| logarithms of the integer coefficients in `log'. |
| |
| These are the operations: |
| alg_zero total := 0; |
| alg_m total := multiplicand; |
| alg_shift total := total * coeff |
| alg_add_t_m2 total := total + multiplicand * coeff; |
| alg_sub_t_m2 total := total - multiplicand * coeff; |
| alg_add_factor total := total * coeff + total; |
| alg_sub_factor total := total * coeff - total; |
| alg_add_t2_m total := total * coeff + multiplicand; |
| alg_sub_t2_m total := total * coeff - multiplicand; |
| |
| The first operand must be either alg_zero or alg_m. */ |
| |
| struct algorithm |
| { |
| struct mult_cost cost; |
| short ops; |
| /* The size of the OP and LOG fields are not directly related to the |
| word size, but the worst-case algorithms will be if we have few |
| consecutive ones or zeros, i.e., a multiplicand like 10101010101... |
| In that case we will generate shift-by-2, add, shift-by-2, add,..., |
| in total wordsize operations. */ |
| enum alg_code op[MAX_BITS_PER_WORD]; |
| char log[MAX_BITS_PER_WORD]; |
| }; |
| |
| /* The entry for our multiplication cache/hash table. */ |
| struct alg_hash_entry { |
| /* The number we are multiplying by. */ |
| unsigned HOST_WIDE_INT t; |
| |
| /* The mode in which we are multiplying something by T. */ |
| enum machine_mode mode; |
| |
| /* The best multiplication algorithm for t. */ |
| enum alg_code alg; |
| |
| /* The cost of multiplication if ALG_CODE is not alg_impossible. |
| Otherwise, the cost within which multiplication by T is |
| impossible. */ |
| struct mult_cost cost; |
| |
| /* OPtimized for speed? */ |
| bool speed; |
| }; |
| |
| /* The number of cache/hash entries. */ |
| #if HOST_BITS_PER_WIDE_INT == 64 |
| #define NUM_ALG_HASH_ENTRIES 1031 |
| #else |
| #define NUM_ALG_HASH_ENTRIES 307 |
| #endif |
| |
| /* Each entry of ALG_HASH caches alg_code for some integer. This is |
| actually a hash table. If we have a collision, that the older |
| entry is kicked out. */ |
| static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES]; |
| |
| /* Indicates the type of fixup needed after a constant multiplication. |
| BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that |
| the result should be negated, and ADD_VARIANT means that the |
| multiplicand should be added to the result. */ |
| enum mult_variant {basic_variant, negate_variant, add_variant}; |
| |
| static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, |
| const struct mult_cost *, enum machine_mode mode); |
| static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT, |
| struct algorithm *, enum mult_variant *, int); |
| static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx, |
| const struct algorithm *, enum mult_variant); |
| static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int, |
| int, rtx *, int *, int *); |
| static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); |
| static rtx extract_high_half (enum machine_mode, rtx); |
| static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int); |
| static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx, |
| int, int); |
| /* Compute and return the best algorithm for multiplying by T. |
| The algorithm must cost less than cost_limit |
| If retval.cost >= COST_LIMIT, no algorithm was found and all |
| other field of the returned struct are undefined. |
| MODE is the machine mode of the multiplication. */ |
| |
| static void |
| synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t, |
| const struct mult_cost *cost_limit, enum machine_mode mode) |
| { |
| int m; |
| struct algorithm *alg_in, *best_alg; |
| struct mult_cost best_cost; |
| struct mult_cost new_limit; |
| int op_cost, op_latency; |
| unsigned HOST_WIDE_INT q; |
| int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode)); |
| int hash_index; |
| bool cache_hit = false; |
| enum alg_code cache_alg = alg_zero; |
| bool speed = optimize_insn_for_speed_p (); |
| |
| /* Indicate that no algorithm is yet found. If no algorithm |
| is found, this value will be returned and indicate failure. */ |
| alg_out->cost.cost = cost_limit->cost + 1; |
| alg_out->cost.latency = cost_limit->latency + 1; |
| |
| if (cost_limit->cost < 0 |
| || (cost_limit->cost == 0 && cost_limit->latency <= 0)) |
| return; |
| |
| /* Restrict the bits of "t" to the multiplication's mode. */ |
| t &= GET_MODE_MASK (mode); |
| |
| /* t == 1 can be done in zero cost. */ |
| if (t == 1) |
| { |
| alg_out->ops = 1; |
| alg_out->cost.cost = 0; |
| alg_out->cost.latency = 0; |
| alg_out->op[0] = alg_m; |
| return; |
| } |
| |
| /* t == 0 sometimes has a cost. If it does and it exceeds our limit, |
| fail now. */ |
| if (t == 0) |
| { |
| if (MULT_COST_LESS (cost_limit, zero_cost[speed])) |
| return; |
| else |
| { |
| alg_out->ops = 1; |
| alg_out->cost.cost = zero_cost[speed]; |
| alg_out->cost.latency = zero_cost[speed]; |
| alg_out->op[0] = alg_zero; |
| return; |
| } |
| } |
| |
| /* We'll be needing a couple extra algorithm structures now. */ |
| |
| alg_in = XALLOCA (struct algorithm); |
| best_alg = XALLOCA (struct algorithm); |
| best_cost = *cost_limit; |
| |
| /* Compute the hash index. */ |
| hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES; |
| |
| /* See if we already know what to do for T. */ |
| if (alg_hash[hash_index].t == t |
| && alg_hash[hash_index].mode == mode |
| && alg_hash[hash_index].mode == mode |
| && alg_hash[hash_index].speed == speed |
| && alg_hash[hash_index].alg != alg_unknown) |
| { |
| cache_alg = alg_hash[hash_index].alg; |
| |
| if (cache_alg == alg_impossible) |
| { |
| /* The cache tells us that it's impossible to synthesize |
| multiplication by T within alg_hash[hash_index].cost. */ |
| if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit)) |
| /* COST_LIMIT is at least as restrictive as the one |
| recorded in the hash table, in which case we have no |
| hope of synthesizing a multiplication. Just |
| return. */ |
| return; |
| |
| /* If we get here, COST_LIMIT is less restrictive than the |
| one recorded in the hash table, so we may be able to |
| synthesize a multiplication. Proceed as if we didn't |
| have the cache entry. */ |
| } |
| else |
| { |
| if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost)) |
| /* The cached algorithm shows that this multiplication |
| requires more cost than COST_LIMIT. Just return. This |
| way, we don't clobber this cache entry with |
| alg_impossible but retain useful information. */ |
| return; |
| |
| cache_hit = true; |
| |
| switch (cache_alg) |
| { |
| case alg_shift: |
| goto do_alg_shift; |
| |
| case alg_add_t_m2: |
| case alg_sub_t_m2: |
| goto do_alg_addsub_t_m2; |
| |
| case alg_add_factor: |
| case alg_sub_factor: |
| goto do_alg_addsub_factor; |
| |
| case alg_add_t2_m: |
| goto do_alg_add_t2_m; |
| |
| case alg_sub_t2_m: |
| goto do_alg_sub_t2_m; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| } |
| |
| /* If we have a group of zero bits at the low-order part of T, try |
| multiplying by the remaining bits and then doing a shift. */ |
| |
| if ((t & 1) == 0) |
| { |
| do_alg_shift: |
| m = floor_log2 (t & -t); /* m = number of low zero bits */ |
| if (m < maxm) |
| { |
| q = t >> m; |
| /* The function expand_shift will choose between a shift and |
| a sequence of additions, so the observed cost is given as |
| MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */ |
| op_cost = m * add_cost[speed][mode]; |
| if (shift_cost[speed][mode][m] < op_cost) |
| op_cost = shift_cost[speed][mode][m]; |
| new_limit.cost = best_cost.cost - op_cost; |
| new_limit.latency = best_cost.latency - op_cost; |
| synth_mult (alg_in, q, &new_limit, mode); |
| |
| alg_in->cost.cost += op_cost; |
| alg_in->cost.latency += op_cost; |
| if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
| { |
| struct algorithm *x; |
| best_cost = alg_in->cost; |
| x = alg_in, alg_in = best_alg, best_alg = x; |
| best_alg->log[best_alg->ops] = m; |
| best_alg->op[best_alg->ops] = alg_shift; |
| } |
| } |
| if (cache_hit) |
| goto done; |
| } |
| |
| /* If we have an odd number, add or subtract one. */ |
| if ((t & 1) != 0) |
| { |
| unsigned HOST_WIDE_INT w; |
| |
| do_alg_addsub_t_m2: |
| for (w = 1; (w & t) != 0; w <<= 1) |
| ; |
| /* If T was -1, then W will be zero after the loop. This is another |
| case where T ends with ...111. Handling this with (T + 1) and |
| subtract 1 produces slightly better code and results in algorithm |
| selection much faster than treating it like the ...0111 case |
| below. */ |
| if (w == 0 |
| || (w > 2 |
| /* Reject the case where t is 3. |
| Thus we prefer addition in that case. */ |
| && t != 3)) |
| { |
| /* T ends with ...111. Multiply by (T + 1) and subtract 1. */ |
| |
| op_cost = add_cost[speed][mode]; |
| new_limit.cost = best_cost.cost - op_cost; |
| new_limit.latency = best_cost.latency - op_cost; |
| synth_mult (alg_in, t + 1, &new_limit, mode); |
| |
| alg_in->cost.cost += op_cost; |
| alg_in->cost.latency += op_cost; |
| if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
| { |
| struct algorithm *x; |
| best_cost = alg_in->cost; |
| x = alg_in, alg_in = best_alg, best_alg = x; |
| best_alg->log[best_alg->ops] = 0; |
| best_alg->op[best_alg->ops] = alg_sub_t_m2; |
| } |
| } |
| else |
| { |
| /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */ |
| |
| op_cost = add_cost[speed][mode]; |
| new_limit.cost = best_cost.cost - op_cost; |
| new_limit.latency = best_cost.latency - op_cost; |
| synth_mult (alg_in, t - 1, &new_limit, mode); |
| |
| alg_in->cost.cost += op_cost; |
| alg_in->cost.latency += op_cost; |
| if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
| { |
| struct algorithm *x; |
| best_cost = alg_in->cost; |
| x = alg_in, alg_in = best_alg, best_alg = x; |
| best_alg->log[best_alg->ops] = 0; |
| best_alg->op[best_alg->ops] = alg_add_t_m2; |
| } |
| } |
| if (cache_hit) |
| goto done; |
| } |
| |
| /* Look for factors of t of the form |
| t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)). |
| If we find such a factor, we can multiply by t using an algorithm that |
| multiplies by q, shift the result by m and add/subtract it to itself. |
| |
| We search for large factors first and loop down, even if large factors |
| are less probable than small; if we find a large factor we will find a |
| good sequence quickly, and therefore be able to prune (by decreasing |
| COST_LIMIT) the search. */ |
| |
| do_alg_addsub_factor: |
| for (m = floor_log2 (t - 1); m >= 2; m--) |
| { |
| unsigned HOST_WIDE_INT d; |
| |
| d = ((unsigned HOST_WIDE_INT) 1 << m) + 1; |
| if (t % d == 0 && t > d && m < maxm |
| && (!cache_hit || cache_alg == alg_add_factor)) |
| { |
| /* If the target has a cheap shift-and-add instruction use |
| that in preference to a shift insn followed by an add insn. |
| Assume that the shift-and-add is "atomic" with a latency |
| equal to its cost, otherwise assume that on superscalar |
| hardware the shift may be executed concurrently with the |
| earlier steps in the algorithm. */ |
| op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m]; |
| if (shiftadd_cost[speed][mode][m] < op_cost) |
| { |
| op_cost = shiftadd_cost[speed][mode][m]; |
| op_latency = op_cost; |
| } |
| else |
| op_latency = add_cost[speed][mode]; |
| |
| new_limit.cost = best_cost.cost - op_cost; |
| new_limit.latency = best_cost.latency - op_latency; |
| synth_mult (alg_in, t / d, &new_limit, mode); |
| |
| alg_in->cost.cost += op_cost; |
| alg_in->cost.latency += op_latency; |
| if (alg_in->cost.latency < op_cost) |
| alg_in->cost.latency = op_cost; |
| if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
| { |
| struct algorithm *x; |
| best_cost = alg_in->cost; |
| x = alg_in, alg_in = best_alg, best_alg = x; |
| best_alg->log[best_alg->ops] = m; |
| best_alg->op[best_alg->ops] = alg_add_factor; |
| } |
| /* Other factors will have been taken care of in the recursion. */ |
| break; |
| } |
| |
| d = ((unsigned HOST_WIDE_INT) 1 << m) - 1; |
| if (t % d == 0 && t > d && m < maxm |
| && (!cache_hit || cache_alg == alg_sub_factor)) |
| { |
| /* If the target has a cheap shift-and-subtract insn use |
| that in preference to a shift insn followed by a sub insn. |
| Assume that the shift-and-sub is "atomic" with a latency |
| equal to it's cost, otherwise assume that on superscalar |
| hardware the shift may be executed concurrently with the |
| earlier steps in the algorithm. */ |
| op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m]; |
| if (shiftsub_cost[speed][mode][m] < op_cost) |
| { |
| op_cost = shiftsub_cost[speed][mode][m]; |
| op_latency = op_cost; |
| } |
| else |
| op_latency = add_cost[speed][mode]; |
| |
| new_limit.cost = best_cost.cost - op_cost; |
| new_limit.latency = best_cost.latency - op_latency; |
| synth_mult (alg_in, t / d, &new_limit, mode); |
| |
| alg_in->cost.cost += op_cost; |
| alg_in->cost.latency += op_latency; |
| if (alg_in->cost.latency < op_cost) |
| alg_in->cost.latency = op_cost; |
| if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
| { |
| struct algorithm *x; |
| best_cost = alg_in->cost; |
| x = alg_in, alg_in = best_alg, best_alg = x; |
| best_alg->log[best_alg->ops] = m; |
| best_alg->op[best_alg->ops] = alg_sub_factor; |
| } |
| break; |
| } |
| } |
| if (cache_hit) |
| goto done; |
| |
| /* Try shift-and-add (load effective address) instructions, |
| i.e. do a*3, a*5, a*9. */ |
| if ((t & 1) != 0) |
| { |
| do_alg_add_t2_m: |
| q = t - 1; |
| q = q & -q; |
| m = exact_log2 (q); |
| if (m >= 0 && m < maxm) |
| { |
| op_cost = shiftadd_cost[speed][mode][m]; |
| new_limit.cost = best_cost.cost - op_cost; |
| new_limit.latency = best_cost.latency - op_cost; |
| synth_mult (alg_in, (t - 1) >> m, &new_limit, mode); |
| |
| alg_in->cost.cost += op_cost; |
| alg_in->cost.latency += op_cost; |
| if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
| { |
| struct algorithm *x; |
| best_cost = alg_in->cost; |
| x = alg_in, alg_in = best_alg, best_alg = x; |
| best_alg->log[best_alg->ops] = m; |
| best_alg->op[best_alg->ops] = alg_add_t2_m; |
| } |
| } |
| if (cache_hit) |
| goto done; |
| |
| do_alg_sub_t2_m: |
| q = t + 1; |
| q = q & -q; |
| m = exact_log2 (q); |
| if (m >= 0 && m < maxm) |
| { |
| op_cost = shiftsub_cost[speed][mode][m]; |
| new_limit.cost = best_cost.cost - op_cost; |
| new_limit.latency = best_cost.latency - op_cost; |
| synth_mult (alg_in, (t + 1) >> m, &new_limit, mode); |
| |
| alg_in->cost.cost += op_cost; |
| alg_in->cost.latency += op_cost; |
| if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost)) |
| { |
| struct algorithm *x; |
| best_cost = alg_in->cost; |
| x = alg_in, alg_in = best_alg, best_alg = x; |
| best_alg->log[best_alg->ops] = m; |
| best_alg->op[best_alg->ops] = alg_sub_t2_m; |
| } |
| } |
| if (cache_hit) |
| goto done; |
| } |
| |
| done: |
| /* If best_cost has not decreased, we have not found any algorithm. */ |
| if (!CHEAPER_MULT_COST (&best_cost, cost_limit)) |
| { |
| /* We failed to find an algorithm. Record alg_impossible for |
| this case (that is, <T, MODE, COST_LIMIT>) so that next time |
| we are asked to find an algorithm for T within the same or |
| lower COST_LIMIT, we can immediately return to the |
| caller. */ |
| alg_hash[hash_index].t = t; |
| alg_hash[hash_index].mode = mode; |
| alg_hash[hash_index].speed = speed; |
| alg_hash[hash_index].alg = alg_impossible; |
| alg_hash[hash_index].cost = *cost_limit; |
| return; |
| } |
| |
| /* Cache the result. */ |
| if (!cache_hit) |
| { |
| alg_hash[hash_index].t = t; |
| alg_hash[hash_index].mode = mode; |
| alg_hash[hash_index].speed = speed; |
| alg_hash[hash_index].alg = best_alg->op[best_alg->ops]; |
| alg_hash[hash_index].cost.cost = best_cost.cost; |
| alg_hash[hash_index].cost.latency = best_cost.latency; |
| } |
| |
| /* If we are getting a too long sequence for `struct algorithm' |
| to record, make this search fail. */ |
| if (best_alg->ops == MAX_BITS_PER_WORD) |
| return; |
| |
| /* Copy the algorithm from temporary space to the space at alg_out. |
| We avoid using structure assignment because the majority of |
| best_alg is normally undefined, and this is a critical function. */ |
| alg_out->ops = best_alg->ops + 1; |
| alg_out->cost = best_cost; |
| memcpy (alg_out->op, best_alg->op, |
| alg_out->ops * sizeof *alg_out->op); |
| memcpy (alg_out->log, best_alg->log, |
| alg_out->ops * sizeof *alg_out->log); |
| } |
| |
| /* Find the cheapest way of multiplying a value of mode MODE by VAL. |
| Try three variations: |
| |
| - a shift/add sequence based on VAL itself |
| - a shift/add sequence based on -VAL, followed by a negation |
| - a shift/add sequence based on VAL - 1, followed by an addition. |
| |
| Return true if the cheapest of these cost less than MULT_COST, |
| describing the algorithm in *ALG and final fixup in *VARIANT. */ |
| |
| static bool |
| choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val, |
| struct algorithm *alg, enum mult_variant *variant, |
| int mult_cost) |
| { |
| struct algorithm alg2; |
| struct mult_cost limit; |
| int op_cost; |
| bool speed = optimize_insn_for_speed_p (); |
| |
| /* Fail quickly for impossible bounds. */ |
| if (mult_cost < 0) |
| return false; |
| |
| /* Ensure that mult_cost provides a reasonable upper bound. |
| Any constant multiplication can be performed with less |
| than 2 * bits additions. */ |
| op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode]; |
| if (mult_cost > op_cost) |
| mult_cost = op_cost; |
| |
| *variant = basic_variant; |
| limit.cost = mult_cost; |
| limit.latency = mult_cost; |
| synth_mult (alg, val, &limit, mode); |
| |
| /* This works only if the inverted value actually fits in an |
| `unsigned int' */ |
| if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode)) |
| { |
| op_cost = neg_cost[speed][mode]; |
| if (MULT_COST_LESS (&alg->cost, mult_cost)) |
| { |
| limit.cost = alg->cost.cost - op_cost; |
| limit.latency = alg->cost.latency - op_cost; |
| } |
| else |
| { |
| limit.cost = mult_cost - op_cost; |
| limit.latency = mult_cost - op_cost; |
| } |
| |
| synth_mult (&alg2, -val, &limit, mode); |
| alg2.cost.cost += op_cost; |
| alg2.cost.latency += op_cost; |
| if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) |
| *alg = alg2, *variant = negate_variant; |
| } |
| |
| /* This proves very useful for division-by-constant. */ |
| op_cost = add_cost[speed][mode]; |
| if (MULT_COST_LESS (&alg->cost, mult_cost)) |
| { |
| limit.cost = alg->cost.cost - op_cost; |
| limit.latency = alg->cost.latency - op_cost; |
| } |
| else |
| { |
| limit.cost = mult_cost - op_cost; |
| limit.latency = mult_cost - op_cost; |
| } |
| |
| synth_mult (&alg2, val - 1, &limit, mode); |
| alg2.cost.cost += op_cost; |
| alg2.cost.latency += op_cost; |
| if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost)) |
| *alg = alg2, *variant = add_variant; |
| |
| return MULT_COST_LESS (&alg->cost, mult_cost); |
| } |
| |
| /* A subroutine of expand_mult, used for constant multiplications. |
| Multiply OP0 by VAL in mode MODE, storing the result in TARGET if |
| convenient. Use the shift/add sequence described by ALG and apply |
| the final fixup specified by VARIANT. */ |
| |
| static rtx |
| expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val, |
| rtx target, const struct algorithm *alg, |
| enum mult_variant variant) |
| { |
| HOST_WIDE_INT val_so_far; |
| rtx insn, accum, tem; |
| int opno; |
| enum machine_mode nmode; |
| |
| /* Avoid referencing memory over and over and invalid sharing |
| on SUBREGs. */ |
| op0 = force_reg (mode, op0); |
| |
| /* ACCUM starts out either as OP0 or as a zero, depending on |
| the first operation. */ |
| |
| if (alg->op[0] == alg_zero) |
| { |
| accum = copy_to_mode_reg (mode, const0_rtx); |
| val_so_far = 0; |
| } |
| else if (alg->op[0] == alg_m) |
| { |
| accum = copy_to_mode_reg (mode, op0); |
| val_so_far = 1; |
| } |
| else |
| gcc_unreachable (); |
| |
| for (opno = 1; opno < alg->ops; opno++) |
| { |
| int log = alg->log[opno]; |
| rtx shift_subtarget = optimize ? 0 : accum; |
| rtx add_target |
| = (opno == alg->ops - 1 && target != 0 && variant != add_variant |
| && !optimize) |
| ? target : 0; |
| rtx accum_target = optimize ? 0 : accum; |
| |
| switch (alg->op[opno]) |
| { |
| case alg_shift: |
| accum = expand_shift (LSHIFT_EXPR, mode, accum, |
| build_int_cst (NULL_TREE, log), |
| NULL_RTX, 0); |
| val_so_far <<= log; |
| break; |
| |
| case alg_add_t_m2: |
| tem = expand_shift (LSHIFT_EXPR, mode, op0, |
| build_int_cst (NULL_TREE, log), |
| NULL_RTX, 0); |
| accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
| add_target ? add_target : accum_target); |
| val_so_far += (HOST_WIDE_INT) 1 << log; |
| break; |
| |
| case alg_sub_t_m2: |
| tem = expand_shift (LSHIFT_EXPR, mode, op0, |
| build_int_cst (NULL_TREE, log), |
| NULL_RTX, 0); |
| accum = force_operand (gen_rtx_MINUS (mode, accum, tem), |
| add_target ? add_target : accum_target); |
| val_so_far -= (HOST_WIDE_INT) 1 << log; |
| break; |
| |
| case alg_add_t2_m: |
| accum = expand_shift (LSHIFT_EXPR, mode, accum, |
| build_int_cst (NULL_TREE, log), |
| shift_subtarget, |
| 0); |
| accum = force_operand (gen_rtx_PLUS (mode, accum, op0), |
| add_target ? add_target : accum_target); |
| val_so_far = (val_so_far << log) + 1; |
| break; |
| |
| case alg_sub_t2_m: |
| accum = expand_shift (LSHIFT_EXPR, mode, accum, |
| build_int_cst (NULL_TREE, log), |
| shift_subtarget, 0); |
| accum = force_operand (gen_rtx_MINUS (mode, accum, op0), |
| add_target ? add_target : accum_target); |
| val_so_far = (val_so_far << log) - 1; |
| break; |
| |
| case alg_add_factor: |
| tem = expand_shift (LSHIFT_EXPR, mode, accum, |
| build_int_cst (NULL_TREE, log), |
| NULL_RTX, 0); |
| accum = force_operand (gen_rtx_PLUS (mode, accum, tem), |
| add_target ? add_target : accum_target); |
| val_so_far += val_so_far << log; |
| break; |
| |
| case alg_sub_factor: |
| tem = expand_shift (LSHIFT_EXPR, mode, accum, |
| build_int_cst (NULL_TREE, log), |
| NULL_RTX, 0); |
| accum = force_operand (gen_rtx_MINUS (mode, tem, accum), |
| (add_target |
| ? add_target : (optimize ? 0 : tem))); |
| val_so_far = (val_so_far << log) - val_so_far; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| /* Write a REG_EQUAL note on the last insn so that we can cse |
| multiplication sequences. Note that if ACCUM is a SUBREG, |
| we've set the inner register and must properly indicate |
| that. */ |
| |
| tem = op0, nmode = mode; |
| if (GET_CODE (accum) == SUBREG) |
| { |
| nmode = GET_MODE (SUBREG_REG (accum)); |
| tem = gen_lowpart (nmode, op0); |
| } |
| |
| insn = get_last_insn (); |
| set_unique_reg_note (insn, REG_EQUAL, |
|