gcc/implicit-zee.c - gcc - Git at Google

 /* Redundant Zero-extension elimination for targets that implicitly
    zero-extend writes to the lower 32-bit portion of 64-bit registers.
    Copyright (C) 2010 Free Software Foundation, Inc.
    Contributed by Sriraman Tallam (tmsriram@google.com) and
                   Silvius Rus     (rus@google.com)

 This file is part of GCC.

 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
 Software Foundation; either version 3, or (at your option) any later
 version.

 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
 WARRANTY; without even the implied warranty of MERCHANTABILITY or
 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 for more details.

 You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */


 /* Problem Description :
    --------------------
    This pass is intended to be applicable only to targets that implicitly
    zero-extend 64-bit registers after writing to their lower 32-bit half.
    For instance, x86_64 zero-extends the upper bits of a register
    implicitly whenever an instruction writes to its lower 32-bit half.
    For example, the instruction *add edi,eax* also zero-extends the upper
    32-bits of rax after doing the addition.  These zero extensions come
    for free and GCC does not always exploit this well.  That is, it has
    been observed that there are plenty of cases where GCC explicitly
    zero-extends registers for x86_64 that are actually useless because
    these registers were already implicitly zero-extended in a prior
    instruction.  This pass tries to eliminate such useless zero extension
    instructions.

    How does this pass work  ?
    --------------------------

    This pass is run after register allocation.  Hence, all registers that
    this pass deals with are hard registers.  This pass first looks for a
    zero-extension instruction that could possibly be redundant. Such zero
    extension instructions show up in RTL with the pattern :
    (set (reg:DI x) (zero_extend:DI (reg:SI x))).
    where x can be any one of the 64-bit hard registers.
    Now, this pass tries to eliminate this instruction by merging the
    zero-extension with the definitions of register x. For instance, if
    one of the definitions of register x was  :
    (set (reg:SI x) (plus:SI (reg:SI z1) (reg:SI z2))),
    then the combination converts this into :
    (set (reg:DI x) (zero_extend:DI (plus:SI (reg:SI z1) (reg:SI z2)))).
    If all the merged definitions are recognizable assembly instructions,
    the zero-extension is effectively eliminated.  For example, in x86_64,
    implicit zero-extensions are captured with appropriate patterns in the
    i386.md file.  Hence, these merged definition can be matched to a single
    assembly instruction.  The original zero-extension instruction is then
    deleted if all the definitions can be merged.

    However, there are cases where the definition instruction cannot be
    merged with a zero-extend.  Examples are CALL instructions.  In such
    cases, the original zero extension is not redundant and this pass does
    not delete it.

    Handling conditional moves :
    ----------------------------

    Architectures like x86_64 support conditional moves whose semantics for
    zero-extension differ from the other instructions.  For instance, the
    instruction *cmov ebx, eax*
    zero-extends eax onto rax only when the move from ebx to eax happens.
    Otherwise, eax may not be zero-extended.  Conditional moves appear as
    RTL instructions of the form
    (set (reg:SI x) (if_then_else (cond) (reg:SI y) (reg:SI z))).
    This pass tries to merge a zero-extension with a conditional move by
    actually merging the defintions of y and z with a zero-extend and then
    converting the conditional move into :
    (set (reg:DI x) (if_then_else (cond) (reg:DI y) (reg:DI z))).
    Since registers y and z are zero-extended, register x will also be
    zero-extended after the conditional move.  Note that this step has to
    be done transitively since the definition of a conditional copy can be
    another conditional copy.

    Motivating Example I :
    ---------------------
    For this program :
    **********************************************
    bad_code.c

    int mask[1000];

    int foo(unsigned x)
    {
      if (x < 10)
        x = x * 45;
      else
        x = x * 78;
      return mask[x];
    }
    **********************************************

    $ gcc -O2 -fsee bad_code.c (Turned on existing sign-extension elimination)
      ........
      400315:       b8 4e 00 00 00          mov    $0x4e,%eax
      40031a:       0f af f8                imul   %eax,%edi
      40031d:       89 ff                   mov    %edi,%edi  --> Useless extend
      40031f:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
      400326:       c3                      retq
      ......
      400330:       ba 2d 00 00 00          mov    $0x2d,%edx
      400335:       0f af fa                imul   %edx,%edi
      400338:       89 ff                   mov    %edi,%edi  --> Useless extend
      40033a:       8b 04 bd 60 19 40 00    mov    0x401960(,%rdi,4),%eax
      400341:       c3                      retq

    $ gcc -O2 -fzee bad_code.c
      ......
      400315:       6b ff 4e                imul   $0x4e,%edi,%edi
      400318:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
      40031f:       c3                      retq
      400320:       6b ff 2d                imul   $0x2d,%edi,%edi
      400323:       8b 04 bd 40 19 40 00    mov    0x401940(,%rdi,4),%eax
      40032a:       c3                      retq

    Motivating Example II :
    ---------------------

    Here is an example with a conditional move.

    For this program :
    **********************************************

    unsigned long long foo(unsigned x , unsigned y)
    {
      unsigned z;
      if (x > 100)
        z = x + y;
      else
        z = x - y;
      return (unsigned long long)(z);
    }

    $ gcc -O2 -fsee bad_code.c (Turned on existing sign-extension elimination)
      ............
      400360:       8d 14 3e                lea    (%rsi,%rdi,1),%edx
      400363:       89 f8                   mov    %edi,%eax
      400365:       29 f0                   sub    %esi,%eax
      400367:       83 ff 65                cmp    $0x65,%edi
      40036a:       0f 43 c2                cmovae %edx,%eax
      40036d:       89 c0                   mov    %eax,%eax  --> Useless extend
      40036f:       c3                      retq

    $ gcc -O2 -fzee bad_code.c
      .............
      400360:       89 fa                   mov    %edi,%edx
      400362:       8d 04 3e                lea    (%rsi,%rdi,1),%eax
      400365:       29 f2                   sub    %esi,%edx
      400367:       83 ff 65                cmp    $0x65,%edi
      40036a:       89 d6                   mov    %edx,%esi
      40036c:       48 0f 42 c6             cmovb  %rsi,%rax
      400370:       c3                      retq


    Usefulness :
    ----------

    This pass reduces the dynamic instruction count of a compression benchmark
    by 2.8% and improves its run time by about 1%.  The compression benchmark
    had the following code sequence in a very hot region of code before ZEE
    optimized it :

    shr $0x5, %edx
    mov %edx, %edx --> Useless zero-extend  */


 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
 #include "rtl.h"
 #include "tree.h"
 #include "tm_p.h"
 #include "flags.h"
 #include "regs.h"
 #include "hard-reg-set.h"
 #include "basic-block.h"
 #include "insn-config.h"
 #include "function.h"
 #include "expr.h"
 #include "insn-attr.h"
 #include "recog.h"
 #include "diagnostic-core.h"
 #include "target.h"
 #include "timevar.h"
 #include "optabs.h"
 #include "insn-codes.h"
 #include "rtlhooks-def.h"
 /* Include output.h for dump_file.  */
 #include "output.h"
 #include "params.h"
 #include "timevar.h"
 #include "tree-pass.h"
 #include "df.h"
 #include "cgraph.h"

 /* This says if a register is newly created for the purpose of
    zero-extension.  */

 enum insn_merge_code
 {
   MERGE_NOT_ATTEMPTED = 0,
   MERGE_SUCCESS
 };

 /* This says if a INSN UID or its definition has already been merged
    with a zero-extend or not.  */

 static enum insn_merge_code *is_insn_merge_attempted;
 static int max_insn_uid;

 /* Returns the merge code status for INSN.  */

 static enum insn_merge_code
 get_insn_status (rtx insn)
 {
   gcc_assert (INSN_UID (insn) < max_insn_uid);
   return is_insn_merge_attempted[INSN_UID (insn)];
 }

 /* Sets the merge code status of INSN to CODE.  */

 static void
 set_insn_status (rtx insn, enum insn_merge_code code)
 {
   gcc_assert (INSN_UID (insn) < max_insn_uid);
   is_insn_merge_attempted[INSN_UID (insn)] = code;
 }

 /* Given a insn (CURR_INSN) and a pointer to the SET rtx (ORIG_SET)
    that needs to be modified, this code modifies the SET rtx to a
    new SET rtx that zero_extends the right hand expression into a DImode
    register (NEWREG) on the left hand side.  Note that multiple
    assumptions are made about the nature of the set that needs
    to be true for this to work and is called from merge_def_and_ze.

    Original :
    (set (reg:SI a) (expression))

    Transform :
    (set (reg:DI a) (zero_extend (expression)))

    Special Cases :
    If the expression is a constant or another zero_extend directly
    assign it to the DI mode register.  */

 static bool
 combine_set_zero_extend (rtx curr_insn, rtx *orig_set, rtx newreg)
 {
   rtx temp_extension, simplified_temp_extension, new_set, new_const_int;
   rtx orig_src;
   HOST_WIDE_INT val;
   unsigned int mask, delta_width;

   /* Change the SET rtx and validate it.  */
   orig_src = SET_SRC (*orig_set);
   new_set = NULL_RTX;

   /* The right hand side can also be VOIDmode.  These cases have to be
      handled differently.  */

   if (GET_MODE (orig_src) != SImode)
     {
       /* Merge constants by directly moving the constant into the
          DImode register under some conditions.  */

       if (GET_CODE (orig_src) == CONST_INT
 	  && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (SImode))
         {
           if (INTVAL (orig_src) >= 0)
             new_set = gen_rtx_SET (VOIDmode, newreg, orig_src);
           else if (INTVAL (orig_src) < 0)
             {
               /* Zero-extending a negative SImode integer into DImode
                  makes it a positive integer.  Convert the given negative
                  integer into the appropriate integer when zero-extended.  */

               delta_width = HOST_BITS_PER_WIDE_INT - GET_MODE_BITSIZE (SImode);
               mask = (~(unsigned HOST_WIDE_INT) 0) >> delta_width;
               val = INTVAL (orig_src);
               val = val & mask;
               new_const_int = gen_rtx_CONST_INT (VOIDmode, val);
               new_set = gen_rtx_SET (VOIDmode, newreg, new_const_int);
             }
           else
             return false;
         }
       else
         {
           /* This is mostly due to a call insn that should not be
              optimized.  */

           return false;
         }
     }
   else if (GET_CODE (orig_src) == ZERO_EXTEND)
     {
       /* Here a zero-extend is used to get to SI. Why not make it
          all the  way till DI.  */

       temp_extension = gen_rtx_ZERO_EXTEND (DImode, XEXP (orig_src, 0));
       simplified_temp_extension = simplify_rtx (temp_extension);
       if (simplified_temp_extension)
         temp_extension = simplified_temp_extension;
       new_set = gen_rtx_SET (VOIDmode, newreg, temp_extension);
     }
   else if (GET_CODE (orig_src) == IF_THEN_ELSE)
     {
       /* Only IF_THEN_ELSE of phi-type copies are combined. Otherwise,
          in general, IF_THEN_ELSE should not be combined.  */

       return false;
     }
   else
     {
       /* This is the normal case we expect.  */

       temp_extension = gen_rtx_ZERO_EXTEND (DImode, orig_src);
       simplified_temp_extension = simplify_rtx (temp_extension);
       if (simplified_temp_extension)
         temp_extension = simplified_temp_extension;
       new_set = gen_rtx_SET (VOIDmode, newreg, temp_extension);
     }

   gcc_assert (new_set != NULL_RTX);

   /* This change is a part of a group of changes. Hence,
      validate_change will not try to commit the change.  */

   if (validate_change (curr_insn, orig_set, new_set, true))
     {
       if (dump_file)
         {
           fprintf (dump_file, "Merged Instruction with ZERO_EXTEND:\n");
           print_rtl_single (dump_file, curr_insn);
         }
       return true;
     }
   return false;
 }

 /* This returns the DI mode for the SI register REG_SI.  */

 static rtx
 get_reg_di (rtx reg_si)
 {
   rtx newreg;

   newreg = gen_rtx_REG (DImode, REGNO (reg_si));
   gcc_assert (newreg);
   return newreg;
 }

 /* Treat if_then_else insns, where the operands of both branches
    are registers, as copies. For instance,
    Original :
    (set (reg:SI a) (if_then_else (cond) (reg:SI b) (reg:SI c)))
    Transformed :
    (set (reg:DI a) (if_then_else (cond) (reg:DI b) (reg:DI c)))
    DEF_INSN is the if_then_else insn.  */

 static bool
 transform_ifelse (rtx def_insn)
 {
   rtx set_insn = PATTERN (def_insn);
   rtx srcreg, dstreg, srcreg2;
   rtx map_srcreg, map_dstreg, map_srcreg2;
   rtx ifexpr;
   rtx cond;
   rtx new_set;

   gcc_assert (GET_CODE (set_insn) == SET);
   cond = XEXP (SET_SRC (set_insn), 0);
   dstreg = SET_DEST (set_insn);
   srcreg = XEXP (SET_SRC (set_insn), 1);
   srcreg2 = XEXP (SET_SRC (set_insn), 2);
   map_srcreg = get_reg_di (srcreg);
   map_srcreg2 = get_reg_di (srcreg2);
   map_dstreg = get_reg_di (dstreg);
   ifexpr = gen_rtx_IF_THEN_ELSE (DImode, cond, map_srcreg, map_srcreg2);
   new_set = gen_rtx_SET (VOIDmode, map_dstreg, ifexpr);

   if (validate_change (def_insn, &PATTERN (def_insn), new_set, true))
     {
       if (dump_file)
         {
           fprintf (dump_file, "Cond_Move Instruction's mode extended :\n");
           print_rtl_single (dump_file, def_insn);
         }
       return true;
     }
   else
     return false;
 }

 /* Function to get all the immediate definitions of an instruction.
    The reaching definitions are desired for WHICH_REG used in
    CURR_INSN.  This function returns 0 if there was an error getting
    a definition.  Upon success, this function returns the number of
    definitions and stores the definitions in DEST.  */

 static int
 get_defs (rtx curr_insn, rtx which_reg, VEC (rtx,heap) **dest)
 {
   df_ref reg_info, *defs;
   struct df_link *def_chain;
   int n_refs = 0;

   defs = DF_INSN_USES (curr_insn);
   reg_info = NULL;

   while (*defs)
     {
       reg_info = *defs;
       if (GET_CODE (DF_REF_REG (reg_info)) == SUBREG)
         return 0;
       if (REGNO (DF_REF_REG (reg_info)) == REGNO (which_reg))
         break;
       defs++;
     }

   gcc_assert (reg_info != NULL && defs != NULL);
   def_chain = DF_REF_CHAIN (reg_info);

   while (def_chain)
     {
       /* Problem getting some definition for this instruction.  */

       if (def_chain->ref == NULL)
         return 0;
       if (DF_REF_INSN_INFO (def_chain->ref) == NULL)
         return 0;
       def_chain = def_chain->next;
     }

   def_chain = DF_REF_CHAIN (reg_info);

   if (dest == NULL)
     return 1;

   while (def_chain)
     {
       VEC_safe_push (rtx, heap, *dest, DF_REF_INSN (def_chain->ref));
       def_chain = def_chain->next;
       n_refs++;
     }
   return n_refs;
 }

 /* rtx function to check if this SET insn, EXPR, is a conditional copy insn :
    (set (reg:SI a ) (IF_THEN_ELSE (cond) (reg:SI b) (reg:SI c)))
    Called from is_insn_cond_copy.  DATA stores the two registers on each
    side of the condition.  */

 static int
 is_this_a_cmove (rtx expr, void *data)
 {
   /* Check for conditional (if-then-else) copy.  */

   if (GET_CODE (expr) == SET
       && GET_CODE (SET_DEST (expr)) == REG
       && GET_MODE (SET_DEST (expr)) == SImode
       && GET_CODE (SET_SRC (expr))  == IF_THEN_ELSE
       && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
       && GET_MODE (XEXP (SET_SRC (expr), 1)) == SImode
       && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG
       && GET_MODE (XEXP (SET_SRC (expr), 2)) == SImode)
     {
       ((rtx *)data)[0] = XEXP (SET_SRC (expr), 1);
       ((rtx *)data)[1] = XEXP (SET_SRC (expr), 2);
       return 1;
     }
   return 0;
 }

 /* This returns 1 if it found
    (SET (reg:SI REGNO (def_reg)) (if_then_else (cond) (REG:SI x1) (REG:SI x2)))
    in the DEF_INSN pattern.  It stores the x1 and x2 in COPY_REG_1
    and COPY_REG_2.  */

 static int
 is_insn_cond_copy (rtx def_insn, rtx *copy_reg_1, rtx *copy_reg_2)
 {
   int type;
   rtx set_expr;
   rtx srcreg[2];

   srcreg[0] = NULL_RTX;
   srcreg[1] = NULL_RTX;

   set_expr = single_set (def_insn);

   if (set_expr == NULL_RTX)
     return 0;

   type = is_this_a_cmove (set_expr, (void *) srcreg);

   if (type)
     {
       *copy_reg_1 = srcreg[0];
       *copy_reg_2 = srcreg[1];
       return type;
     }

   return 0;
 }

 /* Reaching Definitions of the zero-extended register could be conditional
    copies or regular definitions.  This function separates the two types into
    two lists, DEFS_LIST and COPIES_LIST.  This is necessary because, if a
    reaching definition is a conditional copy, combining the zero_extend with
    this definition is wrong.  Conditional copies are merged by transitively
    merging its definitions.  The defs_list is populated with all the reaching
    definitions of the zero-extension instruction (ZERO_EXTEND_INSN) which must
    be merged with a zero_extend.  The copies_list contains all the conditional
    moves that will later be extended into a DI mode conditonal move if all the
    merges are successful.  The function returns false when there is a failure
    in getting some definitions, like that of parameters.  It returns 1 upon
    success, 0 upon failure and 2 when all definitions of the ZERO_EXTEND_INSN
    were merged previously.  */

 static int
 make_defs_and_copies_lists (rtx zero_extend_insn, rtx set_pat,
                             VEC (rtx,heap) **defs_list,
                             VEC (rtx,heap) **copies_list)
 {
   bool *is_insn_visited;
   VEC (rtx,heap) *work_list;
   rtx srcreg, copy_reg_1, copy_reg_2;
   rtx def_insn;
   int n_defs = 0;
   int vec_index = 0;
   int n_worklist = 0;
   int i, is_copy;

   srcreg = XEXP (SET_SRC (set_pat), 0);
   work_list = VEC_alloc (rtx, heap, 8);

   /* Initialize the Work List */
   n_worklist = get_defs (zero_extend_insn, srcreg, &work_list);

   if (n_worklist == 0)
     {
       VEC_free (rtx, heap, work_list);
       /* The number of defs being equal to zero can only imply that all of its
          definitions have been previously merged.  */
       return 2;
     }

   is_insn_visited = XNEWVEC (bool, max_insn_uid);

   for (i = 0; i < max_insn_uid; i++)
     is_insn_visited[i] = false;


   /* Perform transitive closure for conditional copies.  */
   while (n_worklist > vec_index)
     {
       def_insn = VEC_index (rtx, work_list, vec_index);
       gcc_assert (INSN_UID (def_insn) < max_insn_uid);

       if (is_insn_visited[INSN_UID (def_insn)])
         {
           vec_index++;
           continue;
         }

       is_insn_visited[INSN_UID (def_insn)] = true;
       copy_reg_1 = copy_reg_2 = NULL_RTX;
       is_copy = is_insn_cond_copy (def_insn, &copy_reg_1, &copy_reg_2);
       if (is_copy)
         {
           gcc_assert (copy_reg_1 && copy_reg_2);

           /* Push it into the copy list first.  */

           VEC_safe_push (rtx, heap, *copies_list, def_insn);

           /* Perform transitive closure here */

           n_defs = get_defs (def_insn, copy_reg_1, &work_list);

           if (n_defs == 0)
             {
               VEC_free (rtx, heap, work_list);
               XDELETEVEC (is_insn_visited);
               return 0;
             }
           n_worklist += n_defs;

           n_defs = get_defs (def_insn, copy_reg_2, &work_list);
           if (n_defs == 0)
             {
               VEC_free (rtx, heap, work_list);
               XDELETEVEC (is_insn_visited);
               return 0;
             }
           n_worklist += n_defs;
         }
       else
         {
           VEC_safe_push (rtx, heap, *defs_list, def_insn);
         }
       vec_index++;
     }

   VEC_free (rtx, heap, work_list);
   XDELETEVEC (is_insn_visited);
   return 1;
 }

 /* Merge the DEF_INSN with a zero-extend.  Calls combine_set_zero_extend
    on the SET pattern.  */

 static bool
 merge_def_and_ze (rtx def_insn)
 {
   enum rtx_code code;
   rtx setreg;
   rtx *sub_rtx;
   rtx s_expr;
   int i;

   code = GET_CODE (PATTERN (def_insn));
   sub_rtx = NULL;

   if (code == PARALLEL)
     {
       for (i = 0; i < XVECLEN (PATTERN (def_insn), 0); i++)
         {
           s_expr = XVECEXP (PATTERN (def_insn), 0, i);
           if (GET_CODE (s_expr) != SET)
             continue;

           if (sub_rtx == NULL)
             sub_rtx = &XVECEXP (PATTERN (def_insn), 0, i);
           else
             {
               /* PARALLEL with multiple SETs.  */
               return false;
             }
         }
     }
   else if (code == SET)
     sub_rtx = &PATTERN (def_insn);
   else
     {
       /* It is not a PARALLEL or a SET, what could it be ? */
       return false;
     }

   gcc_assert (sub_rtx != NULL);

   if (GET_CODE (SET_DEST (*sub_rtx)) == REG
       && GET_MODE (SET_DEST (*sub_rtx)) == SImode)
     {
       setreg = get_reg_di (SET_DEST (*sub_rtx));
       return combine_set_zero_extend (def_insn, sub_rtx, setreg);
     }
   else
     return false;
   return true;
 }

 /* This function goes through all reaching defs of the source
    of the zero extension instruction (ZERO_EXTEND_INSN) and
    tries to combine the zero extension with the definition
    instruction.  The changes are made as a group so that even
    if one definition cannot be merged, all reaching definitions
    end up not being merged. When a conditional copy is encountered,
    merging is attempted transitively on its definitions.  It returns
    true upon success and false upon failure.  */

 static bool
 combine_reaching_defs (rtx zero_extend_insn, rtx set_pat)
 {
   rtx def_insn;
   bool merge_successful = true;
   int i;
   int defs_ix;
   int outcome;

   /* To store the definitions that have been merged.  */

   VEC (rtx, heap) *defs_list, *copies_list, *vec;
   enum insn_merge_code merge_code;

   defs_list = VEC_alloc (rtx, heap, 8);
   copies_list = VEC_alloc (rtx, heap, 8);

   outcome = make_defs_and_copies_lists (zero_extend_insn,
                                         set_pat, &defs_list, &copies_list);

   /* outcome == 2 implies that all the definitions for this zero_extend were
      merged while previously when handling other zero_extends.  */

   if (outcome == 2)
     {
       VEC_free (rtx, heap, defs_list);
       VEC_free (rtx, heap, copies_list);
       if (dump_file)
         fprintf (dump_file, "All definitions have been merged previously.\n");
       return true;
     }

   if (outcome == 0)
     {
       VEC_free (rtx, heap, defs_list);
       VEC_free (rtx, heap, copies_list);
       return false;
     }

   merge_successful = true;

   /* Go through the defs vector and try to merge all the definitions
      in this vector.  */

   vec = VEC_alloc (rtx, heap, 8);
   FOR_EACH_VEC_ELT (rtx, defs_list, defs_ix, def_insn)
     {
       merge_code = get_insn_status (def_insn);
       gcc_assert (merge_code == MERGE_NOT_ATTEMPTED);

       if (merge_def_and_ze (def_insn))
         VEC_safe_push (rtx, heap, vec, def_insn);
       else
         {
           merge_successful = false;
           break;
         }
     }

   /* Now go through the conditional copies vector and try to merge all
      the copies in this vector.  */

   if (merge_successful)
     {
       FOR_EACH_VEC_ELT (rtx, copies_list, i, def_insn)
         {
           if (transform_ifelse (def_insn))
             {
               VEC_safe_push (rtx, heap, vec, def_insn);
             }
           else
             {
               merge_successful = false;
               break;
             }
         }
     }

   if (merge_successful)
     {
       /* Commit the changes here if possible */
       /* XXX : Now, it is an all or nothing scenario.  Even if one definition
          cannot be merged we totally bail.  In future, allow zero-extensions to
          be partially eliminated along those paths where the definitions could
          be merged.  */

       if (apply_change_group ())
         {
           if (dump_file)
             fprintf (dump_file, "All merges were successful ....\n");

           FOR_EACH_VEC_ELT (rtx, vec, i, def_insn)
             {
               set_insn_status (def_insn, MERGE_SUCCESS);
             }

           VEC_free (rtx, heap, vec);
           VEC_free (rtx, heap, defs_list);
           VEC_free (rtx, heap, copies_list);
           return true;
         }
       else
         {
           /* Changes need not be cancelled explicitly as apply_change_group
              does it.  Print list of definitions in the dump_file for debug
              purposes.  This zero-extension cannot be deleted.  */

           if (dump_file)
             {
               FOR_EACH_VEC_ELT (rtx, vec, i, def_insn)
                 {
                   fprintf (dump_file, " Ummergable definitions : \n");
                   print_rtl_single (dump_file, def_insn);
                 }
             }
         }
     }
   else
     {
       /* Cancel any changes that have been made so far.  */
       cancel_changes (0);
     }

   VEC_free (rtx, heap, vec);
   VEC_free (rtx, heap, defs_list);
   VEC_free (rtx, heap, copies_list);
   return false;
 }

 /* Carry information about zero-extensions while walking the RTL.  */

 struct zero_extend_info
 {
   /* The insn where the zero-extension is.  */
   rtx insn;

   /* The list of candidates.  */
   VEC (rtx, heap) *insn_list;
 };

 /* Add a zero-extend pattern that could be eliminated.  This is called via
    note_stores from find_removable_zero_extends.  */

 static void
 add_removable_zero_extend (rtx x ATTRIBUTE_UNUSED, const_rtx expr, void *data)
 {
   struct zero_extend_info *zei = (struct zero_extend_info *)data;
   rtx src, dest;

   /* We are looking for SET (REG:DI N) (ZERO_EXTEND (REG:SI N)).  */
   if (GET_CODE (expr) != SET)
     return;

   src = SET_SRC (expr);
   dest = SET_DEST (expr);

   if (REG_P (dest)
       && GET_MODE (dest) == DImode
       && GET_CODE (src) == ZERO_EXTEND
       && REG_P (XEXP (src, 0))
       && GET_MODE (XEXP (src, 0)) == SImode
       && REGNO (dest) == REGNO (XEXP (src, 0)))
     {
       if (get_defs (zei->insn, XEXP (src, 0), NULL))
 	VEC_safe_push (rtx, heap, zei->insn_list, zei->insn);
       else if (dump_file)
 	{
 	  fprintf (dump_file, "Cannot eliminate zero-extension: \n");
 	  print_rtl_single (dump_file, zei->insn);
 	  fprintf (dump_file, "No defs. Could be extending parameters.\n");
 	}
     }
 }

 /* Traverse the instruction stream looking for zero-extends and return the
    list of candidates.  */

 static VEC (rtx,heap)*
 find_removable_zero_extends (void)
 {
   struct zero_extend_info zei;
   basic_block bb;
   rtx insn;

   zei.insn_list = VEC_alloc (rtx, heap, 8);

   FOR_EACH_BB (bb)
     FOR_BB_INSNS (bb, insn)
       {
 	if (!NONDEBUG_INSN_P (insn))
 	  continue;

 	zei.insn = insn;
 	note_stores (PATTERN (insn), add_removable_zero_extend, &zei);
       }

   return zei.insn_list;
 }

 /* This is the main function that checks the insn stream for redundant
    zero extensions and tries to remove them if possible.  */

 static unsigned int
 find_and_remove_ze (void)
 {
   rtx curr_insn = NULL_RTX;
   int i;
   int ix;
   long long num_realized = 0;
   long long num_ze_opportunities = 0;
   VEC (rtx, heap) *zeinsn_list;
   VEC (rtx, heap) *zeinsn_del_list;

   /* Construct DU chain to get all reaching definitions of each
      zero-extension instruction.  */

   df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
   df_analyze ();

   max_insn_uid = get_max_uid ();

   is_insn_merge_attempted
     = XNEWVEC (enum insn_merge_code,
 	       sizeof (enum insn_merge_code) * max_insn_uid);

   for (i = 0; i < max_insn_uid; i++)
     is_insn_merge_attempted[i] = MERGE_NOT_ATTEMPTED;

   num_ze_opportunities = num_realized = 0;

   zeinsn_del_list = VEC_alloc (rtx, heap, 4);

   zeinsn_list = find_removable_zero_extends ();

   FOR_EACH_VEC_ELT (rtx, zeinsn_list, ix, curr_insn)
     {
       num_ze_opportunities++;
       /* Try to combine the zero-extends with the definition here.  */

       if (dump_file)
         {
           fprintf (dump_file, "Trying to eliminate zero extension : \n");
           print_rtl_single (dump_file, curr_insn);
         }

       if (combine_reaching_defs (curr_insn, PATTERN (curr_insn)))
         {
           if (dump_file)
             fprintf (dump_file, "Eliminated the zero extension...\n");
           num_realized++;
           VEC_safe_push (rtx, heap, zeinsn_del_list, curr_insn);
         }
     }

   /* Delete all useless zero extensions here in one sweep.  */
   FOR_EACH_VEC_ELT (rtx, zeinsn_del_list, ix, curr_insn)
     delete_insn (curr_insn);

   free (is_insn_merge_attempted);
   VEC_free (rtx, heap, zeinsn_list);
   VEC_free (rtx, heap, zeinsn_del_list);

   if (dump_file && num_ze_opportunities > 0)
     fprintf (dump_file, "\n %s : num_zee_opportunities = %lld "
                         "num_realized = %lld \n",
                         current_function_name (),
                         num_ze_opportunities, num_realized);

   df_finish_pass (false);
   return 0;
 }

 /* Find and remove redundant zero extensions.  */

 static unsigned int
 rest_of_handle_zee (void)
 {
   timevar_push (TV_ZEE);
   find_and_remove_ze ();
   timevar_pop (TV_ZEE);
   return 0;
 }

 /* Run zee pass when flag_zee is set at optimization level > 0.  */

 static bool
 gate_handle_zee (void)
 {
   return (optimize > 0 && flag_zee);
 }

 struct rtl_opt_pass pass_implicit_zee =
 {
  {
   RTL_PASS,
   "zee",                                /* name */
   gate_handle_zee,                      /* gate */
   rest_of_handle_zee,                   /* execute */
   NULL,                                 /* sub */
   NULL,                                 /* next */
   0,                                    /* static_pass_number */
   TV_ZEE,                               /* tv_id */
   0,                                    /* properties_required */
   0,                                    /* properties_provided */
   0,                                    /* properties_destroyed */
   0,                                    /* todo_flags_start */
   TODO_ggc_collect |
   TODO_dump_func |
   TODO_verify_rtl_sharing,              /* todo_flags_finish */
  }
 };