gcc/matrix-reorg.c - gcc - Git at Google

 /* Matrix layout transformations.
    Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
    Contributed by Razya Ladelsky <razya@il.ibm.com>
    Originally written by Revital Eres and Mustafa Hagog.

 This file is part of GCC.

 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
 Software Foundation; either version 3, or (at your option) any later
 version.

 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
 WARRANTY; without even the implied warranty of MERCHANTABILITY or
 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 for more details.

 You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */

 /*
    Matrix flattening optimization tries to replace a N-dimensional
    matrix with its equivalent M-dimensional matrix, where M < N.
    This first implementation focuses on global matrices defined dynamically.

    When N==1, we actually flatten the whole matrix.
    For instance consider a two-dimensional array a [dim1] [dim2].
    The code for allocating space for it usually looks like:

      a = (int **)  malloc(dim1 * sizeof(int *));
      for (i=0; i<dim1; i++)
         a[i] = (int *) malloc (dim2 * sizeof(int));

    If the array "a" is found suitable for this optimization,
    its allocation is replaced by:

      a = (int *) malloc (dim1 * dim2 *sizeof(int));

    and all the references to a[i][j] are replaced by a[i * dim2 + j].

    The two main phases of the optimization are the analysis
    and transformation.
    The driver of the optimization is matrix_reorg ().


    Analysis phase:
    ===============

    We'll number the dimensions outside-in, meaning the most external
    is 0, then 1, and so on.
    The analysis part of the optimization determines K, the escape
    level of a N-dimensional matrix (K <= N), that allows flattening of
    the external dimensions 0,1,..., K-1. Escape level 0 means that the
    whole matrix escapes and no flattening is possible.

    The analysis part is implemented in analyze_matrix_allocation_site()
    and analyze_matrix_accesses().

    Transformation phase:
    =====================
    In this phase we define the new flattened matrices that replace the
    original matrices in the code.
    Implemented in transform_allocation_sites(),
    transform_access_sites().

    Matrix Transposing
    ==================
    The idea of Matrix Transposing is organizing the matrix in a different
    layout such that the dimensions are reordered.
    This could produce better cache behavior in some cases.

    For example, lets look at the matrix accesses in the following loop:

    for (i=0; i<N; i++)
     for (j=0; j<M; j++)
      access to a[i][j]

    This loop can produce good cache behavior because the elements of
    the inner dimension are accessed sequentially.

   However, if the accesses of the matrix were of the following form:

   for (i=0; i<N; i++)
    for (j=0; j<M; j++)
      access to a[j][i]

   In this loop we iterate the columns and not the rows.
   Therefore, replacing the rows and columns
   would have had an organization with better (cache) locality.
   Replacing the dimensions of the matrix is called matrix transposing.

   This  example, of course, could be enhanced to multiple dimensions matrices
   as well.

   Since a program could include all kind of accesses, there is a decision
   mechanism, implemented in analyze_transpose(), which implements a
   heuristic that tries to determine whether to transpose the matrix or not,
   according to the form of the more dominant accesses.
   This decision is transferred to the flattening mechanism, and whether
   the matrix was transposed or not, the matrix is flattened (if possible).

   This decision making is based on profiling information and loop information.
   If profiling information is available, decision making mechanism will be
   operated, otherwise the matrix will only be flattened (if possible).

   Both optimizations are described in the paper "Matrix flattening and
   transposing in GCC" which was presented in GCC summit 2006.
   http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf.  */

 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
 #include "tree.h"
 #include "rtl.h"
 #include "c-tree.h"
 #include "tree-inline.h"
 #include "tree-flow.h"
 #include "tree-flow-inline.h"
 #include "langhooks.h"
 #include "hashtab.h"
 #include "toplev.h"
 #include "flags.h"
 #include "ggc.h"
 #include "debug.h"
 #include "target.h"
 #include "cgraph.h"
 #include "diagnostic.h"
 #include "timevar.h"
 #include "params.h"
 #include "fibheap.h"
 #include "c-common.h"
 #include "intl.h"
 #include "function.h"
 #include "basic-block.h"
 #include "cfgloop.h"
 #include "tree-iterator.h"
 #include "tree-pass.h"
 #include "opts.h"
 #include "tree-data-ref.h"
 #include "tree-chrec.h"
 #include "tree-scalar-evolution.h"

 /* We need to collect a lot of data from the original malloc,
    particularly as the gimplifier has converted:

    orig_var = (struct_type *) malloc (x * sizeof (struct_type *));

    into

    T3 = <constant> ;  ** <constant> is amount to malloc; precomputed **
    T4 = malloc (T3);
    T5 = (struct_type *) T4;
    orig_var = T5;

    The following struct fields allow us to collect all the necessary data from
    the gimplified program.  The comments in the struct below are all based
    on the gimple example above.  */

 struct malloc_call_data
 {
   gimple call_stmt;		/* Tree for "T4 = malloc (T3);"                     */
   tree size_var;		/* Var decl for T3.                                 */
   tree malloc_size;		/* Tree for "<constant>", the rhs assigned to T3.   */
 };

 static tree can_calculate_expr_before_stmt (tree, sbitmap);
 static tree can_calculate_stmt_before_stmt (gimple, sbitmap);

 /* The front end of the compiler, when parsing statements of the form:

    var = (type_cast) malloc (sizeof (type));

    always converts this single statement into the following statements
    (GIMPLE form):

    T.1 = sizeof (type);
    T.2 = malloc (T.1);
    T.3 = (type_cast) T.2;
    var = T.3;

    Since we need to create new malloc statements and modify the original
    statements somewhat, we need to find all four of the above statements.
    Currently record_call_1 (called for building cgraph edges) finds and
    records the statements containing the actual call to malloc, but we
    need to find the rest of the variables/statements on our own.  That
    is what the following function does.  */
 static void
 collect_data_for_malloc_call (gimple stmt, struct malloc_call_data *m_data)
 {
   tree size_var = NULL;
   tree malloc_fn_decl;
   tree arg1;

   gcc_assert (is_gimple_call (stmt));

   malloc_fn_decl = gimple_call_fndecl (stmt);
   if (malloc_fn_decl == NULL
       || DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
     return;

   arg1 = gimple_call_arg (stmt, 0);
   size_var = arg1;

   m_data->call_stmt = stmt;
   m_data->size_var = size_var;
   if (TREE_CODE (size_var) != VAR_DECL)
     m_data->malloc_size = size_var;
   else
     m_data->malloc_size = NULL_TREE;
 }

 /* Information about matrix access site.
    For example: if an access site of matrix arr is arr[i][j]
    the ACCESS_SITE_INFO structure will have the address
    of arr as its stmt.  The INDEX_INFO will hold information about the
    initial address and index of each dimension.  */
 struct access_site_info
 {
   /* The statement (INDIRECT_REF or POINTER_PLUS_EXPR).  */
   gimple stmt;

   /* In case of POINTER_PLUS_EXPR, what is the offset.  */
   tree offset;

   /* The index which created the offset.  */
   tree index;

   /* The indirection level of this statement.  */
   int level;

   /* TRUE for allocation site FALSE for access site.  */
   bool is_alloc;

   /* The function containing the access site.  */
   tree function_decl;

   /* This access is iterated in the inner most loop */
   bool iterated_by_inner_most_loop_p;
 };

 typedef struct access_site_info *access_site_info_p;
 DEF_VEC_P (access_site_info_p);
 DEF_VEC_ALLOC_P (access_site_info_p, heap);

 /* Information about matrix to flatten.  */
 struct matrix_info
 {
   /* Decl tree of this matrix.  */
   tree decl;
   /* Number of dimensions; number
      of "*" in the type declaration.  */
   int num_dims;

   /* Minimum indirection level that escapes, 0 means that
      the whole matrix escapes, k means that dimensions
      0 to ACTUAL_DIM - k escapes.  */
   int min_indirect_level_escape;

   gimple min_indirect_level_escape_stmt;

   /* Hold the allocation site for each level (dimension).
      We can use NUM_DIMS as the upper bound and allocate the array
      once with this number of elements and no need to use realloc and
      MAX_MALLOCED_LEVEL.  */
   gimple *malloc_for_level;

   int max_malloced_level;

   /* Is the matrix transposed.  */
   bool is_transposed_p;

   /* The location of the allocation sites (they must be in one
      function).  */
   tree allocation_function_decl;

   /* The calls to free for each level of indirection.  */
   struct free_info
   {
     gimple stmt;
     tree func;
   } *free_stmts;

   /* An array which holds for each dimension its size. where
      dimension 0 is the outer most (one that contains all the others).
    */
   tree *dimension_size;

   /* An array which holds for each dimension it's original size
      (before transposing and flattening take place).  */
   tree *dimension_size_orig;

   /* An array which holds for each dimension the size of the type of
      of elements accessed in that level (in bytes).  */
   HOST_WIDE_INT *dimension_type_size;

   int dimension_type_size_len;

   /* An array collecting the count of accesses for each dimension.  */
   gcov_type *dim_hot_level;

   /* An array of the accesses to be flattened.
      elements are of type "struct access_site_info *".  */
   VEC (access_site_info_p, heap) * access_l;

   /* A map of how the dimensions will be organized at the end of
      the analyses.  */
   int *dim_map;
 };

 /* In each phi node we want to record the indirection level we have when we
    get to the phi node.  Usually we will have phi nodes with more than two
    arguments, then we must assure that all of them get to the phi node with
    the same indirection level, otherwise it's not safe to do the flattening.
    So we record the information regarding the indirection level each time we
    get to the phi node in this hash table.  */

 struct matrix_access_phi_node
 {
   gimple phi;
   int indirection_level;
 };

 /* We use this structure to find if the SSA variable is accessed inside the
    tree and record the tree containing it.  */

 struct ssa_acc_in_tree
 {
   /* The variable whose accesses in the tree we are looking for.  */
   tree ssa_var;
   /* The tree and code inside it the ssa_var is accessed, currently
      it could be an INDIRECT_REF or CALL_EXPR.  */
   enum tree_code t_code;
   tree t_tree;
   /* The place in the containing tree.  */
   tree *tp;
   tree second_op;
   bool var_found;
 };

 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
 				     sbitmap, bool);
 static int transform_allocation_sites (void **, void *);
 static int transform_access_sites (void **, void *);
 static int analyze_transpose (void **, void *);
 static int dump_matrix_reorg_analysis (void **, void *);

 static bool check_transpose_p;

 /* Hash function used for the phi nodes.  */

 static hashval_t
 mat_acc_phi_hash (const void *p)
 {
   const struct matrix_access_phi_node *const ma_phi =
     (const struct matrix_access_phi_node *) p;

   return htab_hash_pointer (ma_phi->phi);
 }

 /* Equality means phi node pointers are the same.  */

 static int
 mat_acc_phi_eq (const void *p1, const void *p2)
 {
   const struct matrix_access_phi_node *const phi1 =
     (const struct matrix_access_phi_node *) p1;
   const struct matrix_access_phi_node *const phi2 =
     (const struct matrix_access_phi_node *) p2;

   if (phi1->phi == phi2->phi)
     return 1;

   return 0;
 }

 /* Hold the PHI nodes we visit during the traversal for escaping
    analysis.  */
 static htab_t htab_mat_acc_phi_nodes = NULL;

 /* This hash-table holds the information about the matrices we are
    going to handle.  */
 static htab_t matrices_to_reorg = NULL;

 /* Return a hash for MTT, which is really a "matrix_info *".  */
 static hashval_t
 mtt_info_hash (const void *mtt)
 {
   return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
 }

 /* Return true if MTT1 and MTT2 (which are really both of type
    "matrix_info *") refer to the same decl.  */
 static int
 mtt_info_eq (const void *mtt1, const void *mtt2)
 {
   const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
   const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;

   if (i1->decl == i2->decl)
     return true;

   return false;
 }

 /* Return false if STMT may contain a vector expression.
    In this situation, all matrices should not be flattened.  */
 static bool
 may_flatten_matrices_1 (gimple stmt)
 {
   tree t;

   switch (gimple_code (stmt))
     {
     case GIMPLE_ASSIGN:
       if (!gimple_assign_cast_p (stmt))
 	return true;

       t = gimple_assign_rhs1 (stmt);
       while (CONVERT_EXPR_P (t))
 	{
 	  if (TREE_TYPE (t) && POINTER_TYPE_P (TREE_TYPE (t)))
 	    {
 	      tree pointee;

 	      pointee = TREE_TYPE (t);
 	      while (POINTER_TYPE_P (pointee))
 		pointee = TREE_TYPE (pointee);
 	      if (TREE_CODE (pointee) == VECTOR_TYPE)
 		{
 		  if (dump_file)
 		    fprintf (dump_file,
 			     "Found vector type, don't flatten matrix\n");
 		  return false;
 		}
 	    }
 	  t = TREE_OPERAND (t, 0);
 	}
       break;
     case GIMPLE_ASM:
       /* Asm code could contain vector operations.  */
       return false;
       break;
     default:
       break;
     }
   return true;
 }

 /* Return false if there are hand-written vectors in the program.
    We disable the flattening in such a case.  */
 static bool
 may_flatten_matrices (struct cgraph_node *node)
 {
   tree decl;
   struct function *func;
   basic_block bb;
   gimple_stmt_iterator gsi;

   decl = node->decl;
   if (node->analyzed)
     {
       func = DECL_STRUCT_FUNCTION (decl);
       FOR_EACH_BB_FN (bb, func)
 	for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 	if (!may_flatten_matrices_1 (gsi_stmt (gsi)))
 	  return false;
     }
   return true;
 }

 /* Given a VAR_DECL, check its type to determine whether it is
    a definition of a dynamic allocated matrix and therefore is
    a suitable candidate for the matrix flattening optimization.
    Return NULL if VAR_DECL is not such decl.  Otherwise, allocate
    a MATRIX_INFO structure, fill it with the relevant information
    and return a pointer to it.
    TODO: handle also statically defined arrays.  */
 static struct matrix_info *
 analyze_matrix_decl (tree var_decl)
 {
   struct matrix_info *m_node, tmpmi, *mi;
   tree var_type;
   int dim_num = 0;

   gcc_assert (matrices_to_reorg);

   if (TREE_CODE (var_decl) == PARM_DECL)
     var_type = DECL_ARG_TYPE (var_decl);
   else if (TREE_CODE (var_decl) == VAR_DECL)
     var_type = TREE_TYPE (var_decl);
   else
     return NULL;

   if (!POINTER_TYPE_P (var_type))
     return NULL;

   while (POINTER_TYPE_P (var_type))
     {
       var_type = TREE_TYPE (var_type);
       dim_num++;
     }

   if (dim_num <= 1)
     return NULL;

   if (!COMPLETE_TYPE_P (var_type)
       || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
     return NULL;

   /* Check to see if this pointer is already in there.  */
   tmpmi.decl = var_decl;
   mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);

   if (mi)
     return NULL;

   /* Record the matrix.  */

   m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
   m_node->decl = var_decl;
   m_node->num_dims = dim_num;
   m_node->free_stmts
     = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));

   /* Init min_indirect_level_escape to -1 to indicate that no escape
      analysis has been done yet.  */
   m_node->min_indirect_level_escape = -1;
   m_node->is_transposed_p = false;

   return m_node;
 }

 /* Free matrix E.  */
 static void
 mat_free (void *e)
 {
   struct matrix_info *mat = (struct matrix_info *) e;

   if (!mat)
     return;

   if (mat->free_stmts)
     free (mat->free_stmts);
   if (mat->dim_hot_level)
     free (mat->dim_hot_level);
   if (mat->malloc_for_level)
     free (mat->malloc_for_level);
 }

 /* Find all potential matrices.
    TODO: currently we handle only multidimensional
    dynamically allocated arrays.  */
 static void
 find_matrices_decl (void)
 {
   struct matrix_info *tmp;
   PTR *slot;
   struct varpool_node *vnode;

   gcc_assert (matrices_to_reorg);

   /* For every global variable in the program:
      Check to see if it's of a candidate type and record it.  */
   for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
     {
       tree var_decl = vnode->decl;

       if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
 	continue;

       if (matrices_to_reorg)
 	if ((tmp = analyze_matrix_decl (var_decl)))
 	  {
 	    if (!TREE_ADDRESSABLE (var_decl))
 	      {
 		slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
 		*slot = tmp;
 	      }
 	  }
     }
   return;
 }

 /* Mark that the matrix MI escapes at level L.  */
 static void
 mark_min_matrix_escape_level (struct matrix_info *mi, int l, gimple s)
 {
   if (mi->min_indirect_level_escape == -1
       || (mi->min_indirect_level_escape > l))
     {
       mi->min_indirect_level_escape = l;
       mi->min_indirect_level_escape_stmt = s;
     }
 }

 /* Find if the SSA variable is accessed inside the
    tree and record the tree containing it.
    The only relevant uses are the case of SSA_NAME, or SSA inside
    INDIRECT_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR.  */
 static void
 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
 {
   a->t_code = TREE_CODE (t);
   switch (a->t_code)
     {
     case SSA_NAME:
       if (t == a->ssa_var)
 	a->var_found = true;
       break;
     case INDIRECT_REF:
       if (SSA_VAR_P (TREE_OPERAND (t, 0))
 	  && TREE_OPERAND (t, 0) == a->ssa_var)
 	a->var_found = true;
       break;
     default:
       break;
     }
 }

 /* Find if the SSA variable is accessed on the right hand side of
    gimple call STMT. */

 static void
 ssa_accessed_in_call_rhs (gimple stmt, struct ssa_acc_in_tree *a)
 {
   tree decl;
   tree arg;
   size_t i;

   a->t_code = CALL_EXPR;
   for (i = 0; i < gimple_call_num_args (stmt); i++)
     {
       arg = gimple_call_arg (stmt, i);
       if (arg == a->ssa_var)
 	{
 	  a->var_found = true;
 	  decl = gimple_call_fndecl (stmt);
 	  a->t_tree = decl;
 	  break;
 	}
     }
 }

 /* Find if the SSA variable is accessed on the right hand side of
    gimple assign STMT. */

 static void
 ssa_accessed_in_assign_rhs (gimple stmt, struct ssa_acc_in_tree *a)
 {

   a->t_code = gimple_assign_rhs_code (stmt);
   switch (a->t_code)
     {
       tree op1, op2;

     case SSA_NAME:
     case INDIRECT_REF:
     CASE_CONVERT:
     case VIEW_CONVERT_EXPR:
       ssa_accessed_in_tree (gimple_assign_rhs1 (stmt), a);
       break;
     case POINTER_PLUS_EXPR:
     case PLUS_EXPR:
     case MULT_EXPR:
       op1 = gimple_assign_rhs1 (stmt);
       op2 = gimple_assign_rhs2 (stmt);

       if (op1 == a->ssa_var)
 	{
 	  a->var_found = true;
 	  a->second_op = op2;
 	}
       else if (op2 == a->ssa_var)
 	{
 	  a->var_found = true;
 	  a->second_op = op1;
 	}
       break;
     default:
       break;
     }
 }

 /* Record the access/allocation site information for matrix MI so we can
    handle it later in transformation.  */
 static void
 record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset,
 			       tree index, int level, bool is_alloc)
 {
   struct access_site_info *acc_info;

   if (!mi->access_l)
     mi->access_l = VEC_alloc (access_site_info_p, heap, 100);

   acc_info
     = (struct access_site_info *)
     xcalloc (1, sizeof (struct access_site_info));
   acc_info->stmt = stmt;
   acc_info->offset = offset;
   acc_info->index = index;
   acc_info->function_decl = current_function_decl;
   acc_info->level = level;
   acc_info->is_alloc = is_alloc;

   VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);

 }

 /* Record the malloc as the allocation site of the given LEVEL.  But
    first we Make sure that all the size parameters passed to malloc in
    all the allocation sites could be pre-calculated before the call to
    the malloc of level 0 (the main malloc call).  */
 static void
 add_allocation_site (struct matrix_info *mi, gimple stmt, int level)
 {
   struct malloc_call_data mcd;

   /* Make sure that the allocation sites are in the same function.  */
   if (!mi->allocation_function_decl)
     mi->allocation_function_decl = current_function_decl;
   else if (mi->allocation_function_decl != current_function_decl)
     {
       int min_malloc_level;

       gcc_assert (mi->malloc_for_level);

       /* Find the minimum malloc level that already has been seen;
          we known its allocation function must be
          MI->allocation_function_decl since it's different than
          CURRENT_FUNCTION_DECL then the escaping level should be
          MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
          must be set accordingly.  */
       for (min_malloc_level = 0;
 	   min_malloc_level < mi->max_malloced_level
 	   && mi->malloc_for_level[min_malloc_level]; min_malloc_level++);
       if (level < min_malloc_level)
 	{
 	  mi->allocation_function_decl = current_function_decl;
 	  mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
 	}
       else
 	{
 	  mark_min_matrix_escape_level (mi, level, stmt);
 	  /* cannot be that (level == min_malloc_level)
 	     we would have returned earlier.  */
 	  return;
 	}
     }

   /* Find the correct malloc information.  */
   collect_data_for_malloc_call (stmt, &mcd);

   /* We accept only calls to malloc function; we do not accept
      calls like calloc and realloc.  */
   if (!mi->malloc_for_level)
     {
       mi->malloc_for_level = XCNEWVEC (gimple, level + 1);
       mi->max_malloced_level = level + 1;
     }
   else if (mi->max_malloced_level <= level)
     {
       mi->malloc_for_level
 	= XRESIZEVEC (gimple, mi->malloc_for_level, level + 1);

       /* Zero the newly allocated items.  */
       memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
 	      0, (level - mi->max_malloced_level) * sizeof (tree));

       mi->max_malloced_level = level + 1;
     }
   mi->malloc_for_level[level] = stmt;
 }

 /* Given an assignment statement STMT that we know that its
    left-hand-side is the matrix MI variable, we traverse the immediate
    uses backwards until we get to a malloc site.  We make sure that
    there is one and only one malloc site that sets this variable.  When
    we are performing the flattening we generate a new variable that
    will hold the size for each dimension; each malloc that allocates a
    dimension has the size parameter; we use that parameter to
    initialize the dimension size variable so we can use it later in
    the address calculations.  LEVEL is the dimension we're inspecting.
    Return if STMT is related to an allocation site.  */

 static void
 analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt,
 				int level, sbitmap visited)
 {
   if (gimple_assign_copy_p (stmt) || gimple_assign_cast_p (stmt))
     {
       tree rhs = gimple_assign_rhs1 (stmt);

       if (TREE_CODE (rhs) == SSA_NAME)
 	{
 	  gimple def = SSA_NAME_DEF_STMT (rhs);

 	  analyze_matrix_allocation_site (mi, def, level, visited);
 	  return;
 	}
       /* If we are back to the original matrix variable then we
          are sure that this is analyzed as an access site.  */
       else if (rhs == mi->decl)
 	return;
     }
   /* A result of call to malloc.  */
   else if (is_gimple_call (stmt))
     {
       int call_flags = gimple_call_flags (stmt);

       if (!(call_flags & ECF_MALLOC))
 	{
 	  mark_min_matrix_escape_level (mi, level, stmt);
 	  return;
 	}
       else
 	{
 	  tree malloc_fn_decl;
 	  const char *malloc_fname;

 	  malloc_fn_decl = gimple_call_fndecl (stmt);
 	  if (malloc_fn_decl == NULL_TREE)
 	    {
 	      mark_min_matrix_escape_level (mi, level, stmt);
 	      return;
 	    }
 	  malloc_fname = IDENTIFIER_POINTER (DECL_NAME (malloc_fn_decl));
 	  if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
 	    {
 	      if (dump_file)
 		fprintf (dump_file,
 			 "Matrix %s is an argument to function %s\n",
 			 get_name (mi->decl), get_name (malloc_fn_decl));
 	      mark_min_matrix_escape_level (mi, level, stmt);
 	      return;
 	    }
 	}
       /* This is a call to malloc of level 'level'.
 	 mi->max_malloced_level-1 == level  means that we've
 	 seen a malloc statement of level 'level' before.
 	 If the statement is not the same one that we've
 	 seen before, then there's another malloc statement
 	 for the same level, which means that we need to mark
 	 it escaping.  */
       if (mi->malloc_for_level
 	  && mi->max_malloced_level-1 == level
 	  && mi->malloc_for_level[level] != stmt)
 	{
 	  mark_min_matrix_escape_level (mi, level, stmt);
 	  return;
 	}
       else
 	add_allocation_site (mi, stmt, level);
       return;
     }
   /* Looks like we don't know what is happening in this
      statement so be in the safe side and mark it as escaping.  */
   mark_min_matrix_escape_level (mi, level, stmt);
 }

 /* The transposing decision making.
    In order to to calculate the profitability of transposing, we collect two
    types of information regarding the accesses:
    1. profiling information used to express the hotness of an access, that
    is how often the matrix is accessed by this access site (count of the
    access site).
    2. which dimension in the access site is iterated by the inner
    most loop containing this access.

    The matrix will have a calculated value of weighted hotness for each
    dimension.
    Intuitively the hotness level of a dimension is a function of how
    many times it was the most frequently accessed dimension in the
    highly executed access sites of this matrix.

    As computed by following equation:
    m      n
    __   __
    \    \  dim_hot_level[i] +=
    /_   /_
    j     i
                  acc[j]->dim[i]->iter_by_inner_loop * count(j)

   Where n is the number of dims and m is the number of the matrix
   access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
   iterates over dim[i] in innermost loop, and is 0 otherwise.

   The organization of the new matrix should be according to the
   hotness of each dimension. The hotness of the dimension implies
   the locality of the elements.*/
 static int
 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
 {
   struct matrix_info *mi = (struct matrix_info *) *slot;
   int min_escape_l = mi->min_indirect_level_escape;
   struct loop *loop;
   affine_iv iv;
   struct access_site_info *acc_info;
   int i;

   if (min_escape_l < 2 || !mi->access_l)
     {
       if (mi->access_l)
 	{
 	  for (i = 0;
 	       VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
 	       i++)
 	    free (acc_info);
 	  VEC_free (access_site_info_p, heap, mi->access_l);

 	}
       return 1;
     }
   if (!mi->dim_hot_level)
     mi->dim_hot_level =
       (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));


   for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
        i++)
     {
       if (gimple_assign_rhs_code (acc_info->stmt) == POINTER_PLUS_EXPR
 	  && acc_info->level < min_escape_l)
 	{
 	  loop = loop_containing_stmt (acc_info->stmt);
 	  if (!loop || loop->inner)
 	    {
 	      free (acc_info);
 	      continue;
 	    }
 	  if (simple_iv (loop, loop, acc_info->offset, &iv, true))
 	    {
 	      if (iv.step != NULL)
 		{
 		  HOST_WIDE_INT istep;

 		  istep = int_cst_value (iv.step);
 		  if (istep != 0)
 		    {
 		      acc_info->iterated_by_inner_most_loop_p = 1;
 		      mi->dim_hot_level[acc_info->level] +=
 			gimple_bb (acc_info->stmt)->count;
 		    }

 		}
 	    }
 	}
       free (acc_info);
     }
   VEC_free (access_site_info_p, heap, mi->access_l);

   return 1;
 }

 /* Find the index which defines the OFFSET from base.
    We walk from use to def until we find how the offset was defined.  */
 static tree
 get_index_from_offset (tree offset, gimple def_stmt)
 {
   tree op1, op2, index;

   if (gimple_code (def_stmt) == GIMPLE_PHI)
     return NULL;
   if ((gimple_assign_copy_p (def_stmt) || gimple_assign_cast_p (def_stmt))
       && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
     return get_index_from_offset (offset,
 				  SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)));
   else if (is_gimple_assign (def_stmt)
 	   && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
     {
       op1 = gimple_assign_rhs1 (def_stmt);
       op2 = gimple_assign_rhs2 (def_stmt);
       if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
 	return NULL;
       index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
       return index;
     }
   else
     return NULL_TREE;
 }

 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
    of the type related to the SSA_VAR, or the type related to the
    lhs of STMT, in the case that it is an INDIRECT_REF.  */
 static void
 update_type_size (struct matrix_info *mi, gimple stmt, tree ssa_var,
 		  int current_indirect_level)
 {
   tree lhs;
   HOST_WIDE_INT type_size;

   /* Update type according to the type of the INDIRECT_REF expr.   */
   if (is_gimple_assign (stmt)
       && TREE_CODE (gimple_assign_lhs (stmt)) == INDIRECT_REF)
     {
       lhs = gimple_assign_lhs (stmt);
       gcc_assert (POINTER_TYPE_P
 		  (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
       type_size =
 	int_size_in_bytes (TREE_TYPE
 			   (TREE_TYPE
 			    (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
     }
   else
     type_size = int_size_in_bytes (TREE_TYPE (ssa_var));

   /* Record the size of elements accessed (as a whole)
      in the current indirection level (dimension).  If the size of
      elements is not known at compile time, mark it as escaping.  */
   if (type_size <= 0)
     mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
   else
     {
       int l = current_indirect_level;

       if (!mi->dimension_type_size)
 	{
 	  mi->dimension_type_size
 	    = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
 	  mi->dimension_type_size_len = l + 1;
 	}
       else if (mi->dimension_type_size_len < l + 1)
 	{
 	  mi->dimension_type_size
 	    = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
 					  (l + 1) * sizeof (HOST_WIDE_INT));
 	  memset (&mi->dimension_type_size[mi->dimension_type_size_len],
 		  0, (l + 1 - mi->dimension_type_size_len)
 		  * sizeof (HOST_WIDE_INT));
 	  mi->dimension_type_size_len = l + 1;
 	}
       /* Make sure all the accesses in the same level have the same size
          of the type.  */
       if (!mi->dimension_type_size[l])
 	mi->dimension_type_size[l] = type_size;
       else if (mi->dimension_type_size[l] != type_size)
 	mark_min_matrix_escape_level (mi, l, stmt);
     }
 }

 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
    ssa var that we want to check because it came from some use of matrix
    MI.  CURRENT_INDIRECT_LEVEL is the indirection level we reached so
    far.  */

 static int
 analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var,
 				gimple use_stmt, int current_indirect_level)
 {
   tree fndecl = gimple_call_fndecl (use_stmt);

   if (gimple_call_lhs (use_stmt))
     {
       tree lhs = gimple_call_lhs (use_stmt);
       struct ssa_acc_in_tree lhs_acc, rhs_acc;

       memset (&lhs_acc, 0, sizeof (lhs_acc));
       memset (&rhs_acc, 0, sizeof (rhs_acc));

       lhs_acc.ssa_var = ssa_var;
       lhs_acc.t_code = ERROR_MARK;
       ssa_accessed_in_tree (lhs, &lhs_acc);
       rhs_acc.ssa_var = ssa_var;
       rhs_acc.t_code = ERROR_MARK;
       ssa_accessed_in_call_rhs (use_stmt, &rhs_acc);

       /* The SSA must be either in the left side or in the right side,
 	 to understand what is happening.
 	 In case the SSA_NAME is found in both sides we should be escaping
 	 at this level because in this case we cannot calculate the
 	 address correctly.  */
       if ((lhs_acc.var_found && rhs_acc.var_found
 	   && lhs_acc.t_code == INDIRECT_REF)
 	  || (!rhs_acc.var_found && !lhs_acc.var_found))
 	{
 	  mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
 	  return current_indirect_level;
 	}
       gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);

       /* If we are storing to the matrix at some level, then mark it as
 	 escaping at that level.  */
       if (lhs_acc.var_found)
 	{
 	  int l = current_indirect_level + 1;

 	  gcc_assert (lhs_acc.t_code == INDIRECT_REF);
 	  mark_min_matrix_escape_level (mi, l, use_stmt);
 	  return current_indirect_level;
 	}
     }

   if (fndecl)
     {
       if (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_FREE)
 	{
 	  if (dump_file)
 	    fprintf (dump_file,
 		     "Matrix %s: Function call %s, level %d escapes.\n",
 		     get_name (mi->decl), get_name (fndecl),
 		     current_indirect_level);
 	  mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
 	}
       else if (mi->free_stmts[current_indirect_level].stmt != NULL
 	       && mi->free_stmts[current_indirect_level].stmt != use_stmt)
 	mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
       else
 	{
 	  /*Record the free statements so we can delete them
 	     later. */
 	  int l = current_indirect_level;

 	  mi->free_stmts[l].stmt = use_stmt;
 	  mi->free_stmts[l].func = current_function_decl;
 	}
     }
   return current_indirect_level;
 }

 /* USE_STMT represents a phi node of the ssa var that we want to
    check  because it came from some use of matrix
    MI.
    We check all the escaping levels that get to the PHI node
    and make sure they are all the same escaping;
    if not (which is rare) we let the escaping level be the
    minimum level that gets into that PHI because starting from
    that level we cannot expect the behavior of the indirections.
    CURRENT_INDIRECT_LEVEL is the indirection level we reached so far.  */

 static void
 analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt,
 			       int current_indirect_level, sbitmap visited,
 			       bool record_accesses)
 {

   struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;

   tmp_maphi.phi = use_stmt;
   if ((maphi = (struct matrix_access_phi_node *)
        htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
     {
       if (maphi->indirection_level == current_indirect_level)
 	return;
       else
 	{
 	  int level = MIN (maphi->indirection_level,
 			   current_indirect_level);
 	  size_t j;
 	  gimple stmt = NULL;

 	  maphi->indirection_level = level;
 	  for (j = 0; j < gimple_phi_num_args (use_stmt); j++)
 	    {
 	      tree def = PHI_ARG_DEF (use_stmt, j);

 	      if (gimple_code (SSA_NAME_DEF_STMT (def)) != GIMPLE_PHI)
 		stmt = SSA_NAME_DEF_STMT (def);
 	    }
 	  mark_min_matrix_escape_level (mi, level, stmt);
 	}
       return;
     }
   maphi = (struct matrix_access_phi_node *)
     xcalloc (1, sizeof (struct matrix_access_phi_node));
   maphi->phi = use_stmt;
   maphi->indirection_level = current_indirect_level;

   /* Insert to hash table.  */
   pmaphi = (struct matrix_access_phi_node **)
     htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
   gcc_assert (pmaphi);
   *pmaphi = maphi;

   if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
     {
       SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
       analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
 			       current_indirect_level, false, visited,
 			       record_accesses);
       RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
     }
 }

 /* USE_STMT represents an assign statement (the rhs or lhs include
    the ssa var that we want to check  because it came from some use of matrix
    MI.  CURRENT_INDIRECT_LEVEL is the indirection level we reached so far.  */

 static int
 analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var,
 				  gimple use_stmt, int current_indirect_level,
 				  bool last_op, sbitmap visited,
 				  bool record_accesses)
 {
   tree lhs = gimple_get_lhs (use_stmt);
   struct ssa_acc_in_tree lhs_acc, rhs_acc;

   memset (&lhs_acc, 0, sizeof (lhs_acc));
   memset (&rhs_acc, 0, sizeof (rhs_acc));

   lhs_acc.ssa_var = ssa_var;
   lhs_acc.t_code = ERROR_MARK;
   ssa_accessed_in_tree (lhs, &lhs_acc);
   rhs_acc.ssa_var = ssa_var;
   rhs_acc.t_code = ERROR_MARK;
   ssa_accessed_in_assign_rhs (use_stmt, &rhs_acc);

   /* The SSA must be either in the left side or in the right side,
      to understand what is happening.
      In case the SSA_NAME is found in both sides we should be escaping
      at this level because in this case we cannot calculate the
      address correctly.  */
   if ((lhs_acc.var_found && rhs_acc.var_found
        && lhs_acc.t_code == INDIRECT_REF)
       || (!rhs_acc.var_found && !lhs_acc.var_found))
     {
       mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
       return current_indirect_level;
     }
   gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);

   /* If we are storing to the matrix at some level, then mark it as
      escaping at that level.  */
   if (lhs_acc.var_found)
     {
       int l = current_indirect_level + 1;

       gcc_assert (lhs_acc.t_code == INDIRECT_REF);

       if (!(gimple_assign_copy_p (use_stmt)
 	    || gimple_assign_cast_p (use_stmt))
 	  || (TREE_CODE (gimple_assign_rhs1 (use_stmt)) != SSA_NAME))
 	mark_min_matrix_escape_level (mi, l, use_stmt);
       else
 	{
 	  gimple def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt));
 	  analyze_matrix_allocation_site (mi, def_stmt, l, visited);
 	  if (record_accesses)
 	    record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
 					   NULL_TREE, l, true);
 	  update_type_size (mi, use_stmt, NULL, l);
 	}
       return current_indirect_level;
     }
   /* Now, check the right-hand-side, to see how the SSA variable
      is used.  */
   if (rhs_acc.var_found)
     {
       if (rhs_acc.t_code != INDIRECT_REF
 	  && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
 	{
 	  mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
 	  return current_indirect_level;
 	}
       /* If the access in the RHS has an indirection increase the
          indirection level.  */
       if (rhs_acc.t_code == INDIRECT_REF)
 	{
 	  if (record_accesses)
 	    record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
 					   NULL_TREE,
 					   current_indirect_level, true);
 	  current_indirect_level += 1;
 	}
       else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
 	{
 	  gcc_assert (rhs_acc.second_op);
 	  if (last_op)
 	    /* Currently we support only one PLUS expression on the
 	       SSA_NAME that holds the base address of the current
 	       indirection level; to support more general case there
 	       is a need to hold a stack of expressions and regenerate
 	       the calculation later.  */
 	    mark_min_matrix_escape_level (mi, current_indirect_level,
 					  use_stmt);
 	  else
 	    {
 	      tree index;
 	      tree op1, op2;

 	      op1 = gimple_assign_rhs1 (use_stmt);
 	      op2 = gimple_assign_rhs2 (use_stmt);

 	      op2 = (op1 == ssa_var) ? op2 : op1;
 	      if (TREE_CODE (op2) == INTEGER_CST)
 		index =
 		  build_int_cst (TREE_TYPE (op1),
 				 TREE_INT_CST_LOW (op2) /
 				 int_size_in_bytes (TREE_TYPE (op1)));
 	      else
 		{
 		  index =
 		    get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
 		  if (index == NULL_TREE)
 		    {
 		      mark_min_matrix_escape_level (mi,
 						    current_indirect_level,
 						    use_stmt);
 		      return current_indirect_level;
 		    }
 		}
 	      if (record_accesses)
 		record_access_alloc_site_info (mi, use_stmt, op2,
 					       index,
 					       current_indirect_level, false);
 	    }
 	}
       /* If we are storing this level of indirection mark it as
          escaping.  */
       if (lhs_acc.t_code == INDIRECT_REF || TREE_CODE (lhs) != SSA_NAME)
 	{
 	  int l = current_indirect_level;

 	  /* One exception is when we are storing to the matrix
 	     variable itself; this is the case of malloc, we must make
 	     sure that it's the one and only one call to malloc so
 	     we call analyze_matrix_allocation_site to check
 	     this out.  */
 	  if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
 	    mark_min_matrix_escape_level (mi, current_indirect_level,
 					  use_stmt);
 	  else
 	    {
 	      /* Also update the escaping level.  */
 	      analyze_matrix_allocation_site (mi, use_stmt, l, visited);
 	      if (record_accesses)
 		record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
 					       NULL_TREE, l, true);
 	    }
 	}
       else
 	{
 	  /* We are placing it in an SSA, follow that SSA.  */
 	  analyze_matrix_accesses (mi, lhs,
 				   current_indirect_level,
 				   rhs_acc.t_code == POINTER_PLUS_EXPR,
 				   visited, record_accesses);
 	}
     }
   return current_indirect_level;
 }

 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
    follow its uses and level of indirection and find out the minimum
    indirection level it escapes in (the highest dimension) and the maximum
    level it is accessed in (this will be the actual dimension of the
    matrix).  The information is accumulated in MI.
    We look at the immediate uses, if one escapes we finish; if not,
    we make a recursive call for each one of the immediate uses of the
    resulting SSA name.  */
 static void
 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
 			 int current_indirect_level, bool last_op,
 			 sbitmap visited, bool record_accesses)
 {
   imm_use_iterator imm_iter;
   use_operand_p use_p;

   update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
 		    current_indirect_level);

   /* We don't go beyond the escaping level when we are performing the
      flattening.  NOTE: we keep the last indirection level that doesn't
      escape.  */
   if (mi->min_indirect_level_escape > -1
       && mi->min_indirect_level_escape <= current_indirect_level)
     return;

 /* Now go over the uses of the SSA_NAME and check how it is used in
    each one of them.  We are mainly looking for the pattern INDIRECT_REF,
    then a POINTER_PLUS_EXPR, then INDIRECT_REF etc.  while in between there could
    be any number of copies and casts.  */
   gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);

   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
   {
     gimple use_stmt = USE_STMT (use_p);
     if (gimple_code (use_stmt) == GIMPLE_PHI)
       /* We check all the escaping levels that get to the PHI node
          and make sure they are all the same escaping;
          if not (which is rare) we let the escaping level be the
          minimum level that gets into that PHI because starting from
          that level we cannot expect the behavior of the indirections.  */

       analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
 				     visited, record_accesses);

     else if (is_gimple_call (use_stmt))
       analyze_accesses_for_call_stmt (mi, ssa_var, use_stmt,
 				      current_indirect_level);
     else if (is_gimple_assign (use_stmt))
       current_indirect_level =
 	analyze_accesses_for_assign_stmt (mi, ssa_var, use_stmt,
 					  current_indirect_level, last_op,
 					  visited, record_accesses);
   }
 }

 typedef struct
 {
   tree fn;
   gimple stmt;
 } check_var_data;

 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
    the malloc size expression and check that those aren't changed
    over the function.  */
 static tree
 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
 {
   basic_block bb;
   tree t = *tp;
   check_var_data *callback_data = (check_var_data*) data;
   tree fn = callback_data->fn;
   gimple_stmt_iterator gsi;
   gimple stmt;

   if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
     return NULL_TREE;

   FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
   {
     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
       {
 	stmt = gsi_stmt (gsi);
 	if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
 	  continue;
 	if (gimple_get_lhs (stmt) == t)
 	  {
 	    callback_data->stmt = stmt;
 	    return t;
 	  }
       }
   }
   *walk_subtrees = 1;
   return NULL_TREE;
 }

 /* Go backwards in the use-def chains and find out the expression
    represented by the possible SSA name in STMT, until it is composed
    of only VAR_DECL, PARM_DECL and INT_CST.  In case of phi nodes
    we make sure that all the arguments represent the same subexpression,
    otherwise we fail.  */

 static tree
 can_calculate_stmt_before_stmt (gimple stmt, sbitmap visited)
 {
   tree op1, op2, res;
   enum tree_code code;

   switch (gimple_code (stmt))
     {
     case GIMPLE_ASSIGN:
       code = gimple_assign_rhs_code (stmt);
       op1 = gimple_assign_rhs1 (stmt);

       switch (code)
 	{
 	case POINTER_PLUS_EXPR:
 	case PLUS_EXPR:
 	case MINUS_EXPR:
 	case MULT_EXPR:

 	  op2 = gimple_assign_rhs2 (stmt);
 	  op1 = can_calculate_expr_before_stmt (op1, visited);
 	  if (!op1)
 	    return NULL_TREE;
 	  op2 = can_calculate_expr_before_stmt (op2, visited);
 	  if (op2)
 	    return fold_build2 (code, gimple_expr_type (stmt), op1, op2);
 	  return NULL_TREE;

 	CASE_CONVERT:
 	  res = can_calculate_expr_before_stmt (op1, visited);
 	  if (res != NULL_TREE)
 	    return build1 (code, gimple_expr_type (stmt), res);
 	  else
 	    return NULL_TREE;

 	default:
 	  if (gimple_assign_single_p (stmt))
 	    return can_calculate_expr_before_stmt (op1, visited);
 	  else
 	    return NULL_TREE;
 	}

     case GIMPLE_PHI:
       {
 	size_t j;

 	res = NULL_TREE;
 	/* Make sure all the arguments represent the same value.  */
 	for (j = 0; j < gimple_phi_num_args (stmt); j++)
 	  {
 	    tree new_res;
 	    tree def = PHI_ARG_DEF (stmt, j);

 	    new_res = can_calculate_expr_before_stmt (def, visited);
 	    if (res == NULL_TREE)
 	      res = new_res;
 	    else if (!new_res || !expressions_equal_p (res, new_res))
 	      return NULL_TREE;
 	  }
 	return res;
       }

     default:
       return NULL_TREE;
     }
 }

 /* Go backwards in the use-def chains and find out the expression
    represented by the possible SSA name in EXPR, until it is composed
    of only VAR_DECL, PARM_DECL and INT_CST.  In case of phi nodes
    we make sure that all the arguments represent the same subexpression,
    otherwise we fail.  */
 static tree
 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
 {
   gimple def_stmt;
   tree res;

   switch (TREE_CODE (expr))
     {
     case SSA_NAME:
       /* Case of loop, we don't know to represent this expression.  */
       if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
 	return NULL_TREE;

       SET_BIT (visited, SSA_NAME_VERSION (expr));
       def_stmt = SSA_NAME_DEF_STMT (expr);
       res = can_calculate_stmt_before_stmt (def_stmt, visited);
       RESET_BIT (visited, SSA_NAME_VERSION (expr));
       return res;
     case VAR_DECL:
     case PARM_DECL:
     case INTEGER_CST:
       return expr;

     default:
       return NULL_TREE;
     }
 }

 /* There should be only one allocation function for the dimensions
    that don't escape. Here we check the allocation sites in this
    function. We must make sure that all the dimensions are allocated
    using malloc and that the malloc size parameter expression could be
    pre-calculated before the call to the malloc of dimension 0.

    Given a candidate matrix for flattening -- MI -- check if it's
    appropriate for flattening -- we analyze the allocation
    sites that we recorded in the previous analysis.  The result of the
    analysis is a level of indirection (matrix dimension) in which the
    flattening is safe.  We check the following conditions:
    1. There is only one allocation site for each dimension.
    2. The allocation sites of all the dimensions are in the same
       function.
       (The above two are being taken care of during the analysis when
       we check the allocation site).
    3. All the dimensions that we flatten are allocated at once; thus
       the total size must be known before the allocation of the
       dimension 0 (top level) -- we must make sure we represent the
       size of the allocation as an expression of global parameters or
       constants and that those doesn't change over the function.  */

 static int
 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
 {
   int level;
   gimple_stmt_iterator gsi;
   basic_block bb_level_0;
   struct matrix_info *mi = (struct matrix_info *) *slot;
   sbitmap visited;

   if (!mi->malloc_for_level)
     return 1;

   visited = sbitmap_alloc (num_ssa_names);

   /* Do nothing if the current function is not the allocation
      function of MI.  */
   if (mi->allocation_function_decl != current_function_decl
       /* We aren't in the main allocation function yet.  */
       || !mi->malloc_for_level[0])
     return 1;

   for (level = 1; level < mi->max_malloced_level; level++)
     if (!mi->malloc_for_level[level])
       break;

   mark_min_matrix_escape_level (mi, level, NULL);

   gsi = gsi_for_stmt (mi->malloc_for_level[0]);
   bb_level_0 = gsi.bb;

   /* Check if the expression of the size passed to malloc could be
      pre-calculated before the malloc of level 0.  */
   for (level = 1; level < mi->min_indirect_level_escape; level++)
     {
       gimple call_stmt;
       tree size;
       struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};

       call_stmt = mi->malloc_for_level[level];

       /* Find the correct malloc information.  */
       collect_data_for_malloc_call (call_stmt, &mcd);

       /* No need to check anticipation for constants.  */
       if (TREE_CODE (mcd.size_var) == INTEGER_CST)
 	{
 	  if (!mi->dimension_size)
 	    {
 	      mi->dimension_size =
 		(tree *) xcalloc (mi->min_indirect_level_escape,
 				  sizeof (tree));
 	      mi->dimension_size_orig =
 		(tree *) xcalloc (mi->min_indirect_level_escape,
 				  sizeof (tree));
 	    }
 	  mi->dimension_size[level] = mcd.size_var;
 	  mi->dimension_size_orig[level] = mcd.size_var;
 	  continue;
 	}
       /* ??? Here we should also add the way to calculate the size
          expression not only know that it is anticipated.  */
       sbitmap_zero (visited);
       size = can_calculate_expr_before_stmt (mcd.size_var, visited);
       if (size == NULL_TREE)
 	{
 	  mark_min_matrix_escape_level (mi, level, call_stmt);
 	  if (dump_file)
 	    fprintf (dump_file,
 		     "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
 		     get_name (mi->decl), level);
 	  break;
 	}
       if (!mi->dimension_size)
 	{
 	  mi->dimension_size =
 	    (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
 	  mi->dimension_size_orig =
 	    (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
 	}
       mi->dimension_size[level] = size;
       mi->dimension_size_orig[level] = size;
     }

   /* We don't need those anymore.  */
   for (level = mi->min_indirect_level_escape;
        level < mi->max_malloced_level; level++)
     mi->malloc_for_level[level] = NULL;
   return 1;
 }

 /* Track all access and allocation sites.  */
 static void
 find_sites_in_func (bool record)
 {
   sbitmap visited_stmts_1;

   gimple_stmt_iterator gsi;
   gimple stmt;
   basic_block bb;
   struct matrix_info tmpmi, *mi;

   visited_stmts_1 = sbitmap_alloc (num_ssa_names);

   FOR_EACH_BB (bb)
   {
     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
       {
 	tree lhs;

 	stmt = gsi_stmt (gsi);
 	lhs = gimple_get_lhs (stmt);
 	if (lhs != NULL_TREE
 	    && TREE_CODE (lhs) == VAR_DECL)
 	  {
 	    tmpmi.decl = lhs;
 	    if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
 							&tmpmi)))
 	      {
 		sbitmap_zero (visited_stmts_1);
 		analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
 	      }
 	  }
 	if (is_gimple_assign (stmt)
 	    && gimple_assign_single_p (stmt)
 	    && TREE_CODE (lhs) == SSA_NAME
 	    && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL)
 	  {
 	    tmpmi.decl = gimple_assign_rhs1 (stmt);
 	    if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
 							&tmpmi)))
 	      {
 		sbitmap_zero (visited_stmts_1);
 		analyze_matrix_accesses (mi, lhs, 0,
 					 false, visited_stmts_1, record);
 	      }
 	  }
       }
   }
   sbitmap_free (visited_stmts_1);
 }

 /* Traverse the use-def chains to see if there are matrices that
    are passed through pointers and we cannot know how they are accessed.
    For each SSA-name defined by a global variable of our interest,
    we traverse the use-def chains of the SSA and follow the indirections,
    and record in what level of indirection the use of the variable
    escapes.  A use of a pointer escapes when it is passed to a function,
    stored into memory or assigned (except in malloc and free calls).  */

 static void
 record_all_accesses_in_func (void)
 {
   unsigned i;
   sbitmap visited_stmts_1;

   visited_stmts_1 = sbitmap_alloc (num_ssa_names);

   for (i = 0; i < num_ssa_names; i++)
     {
       struct matrix_info tmpmi, *mi;
       tree ssa_var = ssa_name (i);
       tree rhs, lhs;

       if (!ssa_var
 	  || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var))
 	  || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var)))
 	continue;
       rhs = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var));
       lhs = gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var));
       if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
 	continue;

       /* If the RHS is a matrix that we want to analyze, follow the def-use
          chain for this SSA_VAR and check for escapes or apply the
          flattening.  */
       tmpmi.decl = rhs;
       if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
 	{
 	  /* This variable will track the visited PHI nodes, so we can limit
 	     its size to the maximum number of SSA names.  */
 	  sbitmap_zero (visited_stmts_1);
 	  analyze_matrix_accesses (mi, ssa_var,
 				   0, false, visited_stmts_1, true);

 	}
     }
   sbitmap_free (visited_stmts_1);
 }

 /* Used when we want to convert the expression: RESULT = something *
    ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
    of 2, shift operations can be done, else division and
    multiplication.  */

 static tree
 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new_val, tree result)
 {

   int x, y;
   tree result1, ratio, log, orig_tree, new_tree;

   x = exact_log2 (orig);
   y = exact_log2 (new_val);

   if (x != -1 && y != -1)
     {
       if (x == y)
         return result;
       else if (x > y)
         {
           log = build_int_cst (TREE_TYPE (result), x - y);
           result1 =
             fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
           return result1;
         }
       log = build_int_cst (TREE_TYPE (result), y - x);
       result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);

       return result1;
     }
   orig_tree = build_int_cst (TREE_TYPE (result), orig);
   new_tree = build_int_cst (TREE_TYPE (result), new_val);
   ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
   result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);

   return result1;
 }


 /* We know that we are allowed to perform matrix flattening (according to the
    escape analysis), so we traverse the use-def chains of the SSA vars
    defined by the global variables pointing to the matrices of our interest.
    in each use of the SSA we calculate the offset from the base address
    according to the following equation:

      a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
      escaping level is m <= k, and a' is the new allocated matrix,
      will be translated to :

        b[I(m+1)]...[Ik]

        where
        b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
                                                       */

 static int
 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
 {
   gimple_stmt_iterator gsi;
   struct matrix_info *mi = (struct matrix_info *) *slot;
   int min_escape_l = mi->min_indirect_level_escape;
   struct access_site_info *acc_info;
   enum tree_code code;
   int i;

   if (min_escape_l < 2 || !mi->access_l)
     return 1;
   for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
        i++)
     {
       /* This is possible because we collect the access sites before
          we determine the final minimum indirection level.  */
       if (acc_info->level >= min_escape_l)
 	{
 	  free (acc_info);
 	  continue;
 	}
       if (acc_info->is_alloc)
 	{
 	  if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
 	    {
 	      ssa_op_iter iter;
 	      tree def;
 	      gimple stmt = acc_info->stmt;
 	      tree lhs;

 	      FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
 		mark_sym_for_renaming (SSA_NAME_VAR (def));
 	      gsi = gsi_for_stmt (stmt);
 	      gcc_assert (is_gimple_assign (acc_info->stmt));
 	      lhs = gimple_assign_lhs (acc_info->stmt);
 	      if (TREE_CODE (lhs) == SSA_NAME
 		  && acc_info->level < min_escape_l - 1)
 		{
 		  imm_use_iterator imm_iter;
 		  use_operand_p use_p;
 		  gimple use_stmt;

 		  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
 		    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 		  {
 		    tree rhs, tmp;
 		    gimple new_stmt;

 		    gcc_assert (gimple_assign_rhs_code (acc_info->stmt)
 				== INDIRECT_REF);
 		    /* Emit convert statement to convert to type of use.  */
 		    tmp = create_tmp_var (TREE_TYPE (lhs), "new");
 		    add_referenced_var (tmp);
 		    rhs = gimple_assign_rhs1 (acc_info->stmt);
 		    new_stmt = gimple_build_assign (tmp,
 						    TREE_OPERAND (rhs, 0));
 		    tmp = make_ssa_name (tmp, new_stmt);
 		    gimple_assign_set_lhs (new_stmt, tmp);
 		    gsi = gsi_for_stmt (acc_info->stmt);
 		    gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
 		    SET_USE (use_p, tmp);
 		  }
 		}
 	      if (acc_info->level < min_escape_l - 1)
 		gsi_remove (&gsi, true);
 	    }
 	  free (acc_info);
 	  continue;
 	}
       code = gimple_assign_rhs_code (acc_info->stmt);
       if (code == INDIRECT_REF
 	  && acc_info->level < min_escape_l - 1)
 	{
 	  /* Replace the INDIRECT_REF with NOP (cast) usually we are casting
 	     from "pointer to type" to "type".  */
 	  tree t =
 	    build1 (NOP_EXPR, TREE_TYPE (gimple_assign_rhs1 (acc_info->stmt)),
 		    TREE_OPERAND (gimple_assign_rhs1 (acc_info->stmt), 0));
 	  gimple_assign_set_rhs_code (acc_info->stmt, NOP_EXPR);
 	  gimple_assign_set_rhs1 (acc_info->stmt, t);
 	}
       else if (code == POINTER_PLUS_EXPR
 	       && acc_info->level < (min_escape_l))
 	{
 	  imm_use_iterator imm_iter;
 	  use_operand_p use_p;

 	  tree offset;
 	  int k = acc_info->level;
 	  tree num_elements, total_elements;
 	  tree tmp1;
 	  tree d_size = mi->dimension_size[k];

 	  /* We already make sure in the analysis that the first operand
 	     is the base and the second is the offset.  */
 	  offset = acc_info->offset;
 	  if (mi->dim_map[k] == min_escape_l - 1)
 	    {
 	      if (!check_transpose_p || mi->is_transposed_p == false)
 		tmp1 = offset;
 	      else
 		{
 		  tree new_offset;
 		  tree d_type_size, d_type_size_k;

 		  d_type_size = size_int (mi->dimension_type_size[min_escape_l]);
 		  d_type_size_k = size_int (mi->dimension_type_size[k + 1]);

 		  new_offset =
 		    compute_offset (mi->dimension_type_size[min_escape_l],
 				    mi->dimension_type_size[k + 1], offset);

 		  total_elements = new_offset;
 		  if (new_offset != offset)
 		    {
 		      gsi = gsi_for_stmt (acc_info->stmt);
 		      tmp1 = force_gimple_operand_gsi (&gsi, total_elements,
 						       true, NULL,
 						       true, GSI_SAME_STMT);
 		    }
 		  else
 		    tmp1 = offset;
 		}
 	    }
 	  else
 	    {
 	      d_size = mi->dimension_size[mi->dim_map[k] + 1];
 	      num_elements =
 		fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
 			    fold_convert (sizetype, d_size));
 	      add_referenced_var (d_size);
 	      gsi = gsi_for_stmt (acc_info->stmt);
 	      tmp1 = force_gimple_operand_gsi (&gsi, num_elements, true,
 					       NULL, true, GSI_SAME_STMT);
 	    }
 	  /* Replace the offset if needed.  */
 	  if (tmp1 != offset)
 	    {
 	      if (TREE_CODE (offset) == SSA_NAME)
 		{
 		  gimple use_stmt;

 		  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
 		    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 		      if (use_stmt == acc_info->stmt)
 		        SET_USE (use_p, tmp1);
 		}
 	      else
 		{
 		  gcc_assert (TREE_CODE (offset) == INTEGER_CST);
 		  gimple_assign_set_rhs2 (acc_info->stmt, tmp1);
 		  update_stmt (acc_info->stmt);
 		}
 	    }
 	}
       /* ??? meanwhile this happens because we record the same access
          site more than once; we should be using a hash table to
          avoid this and insert the STMT of the access site only
          once.
          else
          gcc_unreachable (); */
       free (acc_info);
     }
   VEC_free (access_site_info_p, heap, mi->access_l);

   update_ssa (TODO_update_ssa);
 #ifdef ENABLE_CHECKING
   verify_ssa (true);
 #endif
   return 1;
 }

 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order.  */

 static void
 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
 {
   int i, j, tmp1;
   gcov_type tmp;

   for (i = 0; i < n - 1; i++)
     {
       for (j = 0; j < n - 1 - i; j++)
 	{
 	  if (a[j + 1] < a[j])
 	    {
 	      tmp = a[j];	/* swap a[j] and a[j+1]      */
 	      a[j] = a[j + 1];
 	      a[j + 1] = tmp;
 	      tmp1 = dim_map[j];
 	      dim_map[j] = dim_map[j + 1];
 	      dim_map[j + 1] = tmp1;
 	    }
 	}
     }
 }

 /* Replace multiple mallocs (one for each dimension) to one malloc
    with the size of DIM1*DIM2*...*DIMN*size_of_element
    Make sure that we hold the size in the malloc site inside a
    new global variable; this way we ensure that the size doesn't
    change and it is accessible from all the other functions that
    uses the matrix.  Also, the original calls to free are deleted,
    and replaced by a new call to free the flattened matrix.  */

 static int
 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
 {
   int i;
   struct matrix_info *mi;
   tree type, oldfn, prev_dim_size;
   gimple call_stmt_0, use_stmt;
   struct cgraph_node *c_node;
   struct cgraph_edge *e;
   gimple_stmt_iterator gsi;
   struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
   HOST_WIDE_INT element_size;

   imm_use_iterator imm_iter;
   use_operand_p use_p;
   tree old_size_0, tmp;
   int min_escape_l;
   int id;

   mi = (struct matrix_info *) *slot;

   min_escape_l = mi->min_indirect_level_escape;

   if (!mi->malloc_for_level)
     mi->min_indirect_level_escape = 0;

   if (mi->min_indirect_level_escape < 2)
     return 1;

   mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
   for (i = 0; i < mi->min_indirect_level_escape; i++)
     mi->dim_map[i] = i;
   if (check_transpose_p)
     {
       int i;

       if (dump_file)
 	{
 	  fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
 	  for (i = 0; i < min_escape_l; i++)
 	    {
 	      fprintf (dump_file, "dim %d before sort ", i);
 	      if (mi->dim_hot_level)
 		fprintf (dump_file,
 			 "count is  " HOST_WIDEST_INT_PRINT_DEC "  \n",
 			 mi->dim_hot_level[i]);
 	    }
 	}
       sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
 			  mi->min_indirect_level_escape);
       if (dump_file)
 	for (i = 0; i < min_escape_l; i++)
 	  {
 	    fprintf (dump_file, "dim %d after sort\n", i);
 	    if (mi->dim_hot_level)
 	      fprintf (dump_file, "count is  " HOST_WIDE_INT_PRINT_DEC
 		       "  \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
 	  }
       for (i = 0; i < mi->min_indirect_level_escape; i++)
 	{
 	  if (dump_file)
 	    fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
 		     mi->dim_map[i]);
 	  if (mi->dim_map[i] != i)
 	    {
 	      if (dump_file)
 		fprintf (dump_file,
 			 "Transposed dimensions: dim %d is now dim %d\n",
 			 mi->dim_map[i], i);
 	      mi->is_transposed_p = true;
 	    }
 	}
     }
   else
     {
       for (i = 0; i < mi->min_indirect_level_escape; i++)
 	mi->dim_map[i] = i;
     }
   /* Call statement of allocation site of level 0.  */
   call_stmt_0 = mi->malloc_for_level[0];

   /* Finds the correct malloc information.  */
   collect_data_for_malloc_call (call_stmt_0, &mcd);

   mi->dimension_size[0] = mcd.size_var;
   mi->dimension_size_orig[0] = mcd.size_var;
   /* Make sure that the variables in the size expression for
      all the dimensions (above level 0) aren't modified in
      the allocation function.  */
   for (i = 1; i < mi->min_indirect_level_escape; i++)
     {
       tree t;
       check_var_data data;

       /* mi->dimension_size must contain the expression of the size calculated
          in check_allocation_function.  */
       gcc_assert (mi->dimension_size[i]);

       data.fn = mi->allocation_function_decl;
       data.stmt = NULL;
       t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
 					check_var_notmodified_p,
 					&data);
       if (t != NULL_TREE)
 	{
 	  mark_min_matrix_escape_level (mi, i, data.stmt);
 	  break;
 	}
     }

   if (mi->min_indirect_level_escape < 2)
     return 1;

   /* Since we should make sure that the size expression is available
      before the call to malloc of level 0.  */
   gsi = gsi_for_stmt (call_stmt_0);

   /* Find out the size of each dimension by looking at the malloc
      sites and create a global variable to hold it.
      We add the assignment to the global before the malloc of level 0.  */

   /* To be able to produce gimple temporaries.  */
   oldfn = current_function_decl;
   current_function_decl = mi->allocation_function_decl;
   push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));

   /* Set the dimension sizes as follows:
      DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
      where n is the maximum non escaping level.  */
   element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
   prev_dim_size = NULL_TREE;

   for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
     {
       tree dim_size, dim_var;
       gimple stmt;
       tree d_type_size;

       /* Now put the size expression in a global variable and initialize it to
          the size expression before the malloc of level 0.  */
       dim_var =
 	add_new_static_var (TREE_TYPE
 			    (mi->dimension_size_orig[mi->dim_map[i]]));
       type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);

       /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE.  */
       /* Find which dim ID becomes dim I.  */
       for (id = 0; id < mi->min_indirect_level_escape; id++)
 	if (mi->dim_map[id] == i)
 	  break;
        d_type_size =
         build_int_cst (type, mi->dimension_type_size[id + 1]);
       if (!prev_dim_size)
 	prev_dim_size = build_int_cst (type, element_size);
       if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
 	{
 	  dim_size = mi->dimension_size_orig[id];
 	}
       else
 	{
 	  dim_size =
 	    fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
 			 d_type_size);

 	  dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
 	}
       dim_size = force_gimple_operand_gsi (&gsi, dim_size, true, NULL,
 					   true, GSI_SAME_STMT);
       /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE.  */
       stmt = gimple_build_assign (dim_var, dim_size);
       mark_symbols_for_renaming (stmt);
       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);

       prev_dim_size = mi->dimension_size[i] = dim_var;
     }
   update_ssa (TODO_update_ssa);
   /* Replace the malloc size argument in the malloc of level 0 to be
      the size of all the dimensions.  */
   c_node = cgraph_node (mi->allocation_function_decl);
   old_size_0 = gimple_call_arg (call_stmt_0, 0);
   tmp = force_gimple_operand_gsi (&gsi, mi->dimension_size[0], true,
 				  NULL, true, GSI_SAME_STMT);
   if (TREE_CODE (old_size_0) == SSA_NAME)
     {
       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
 	FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
 	if (use_stmt == call_stmt_0)
 	SET_USE (use_p, tmp);
     }
   /* When deleting the calls to malloc we need also to remove the edge from
      the call graph to keep it consistent.  Notice that cgraph_edge may
      create a new node in the call graph if there is no node for the given
      declaration; this shouldn't be the case but currently there is no way to
      check this outside of "cgraph.c".  */
   for (i = 1; i < mi->min_indirect_level_escape; i++)
     {
       gimple_stmt_iterator gsi;
       gimple use_stmt1 = NULL;

       gimple call_stmt = mi->malloc_for_level[i];
       gcc_assert (is_gimple_call (call_stmt));
       e = cgraph_edge (c_node, call_stmt);
       gcc_assert (e);
       cgraph_remove_edge (e);
       gsi = gsi_for_stmt (call_stmt);
       /* Remove the call stmt.  */
       gsi_remove (&gsi, true);
       /* remove the type cast stmt.  */
       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
 			     gimple_call_lhs (call_stmt))
       {
 	use_stmt1 = use_stmt;
 	gsi = gsi_for_stmt (use_stmt);
 	gsi_remove (&gsi, true);
       }
       /* Remove the assignment of the allocated area.  */
       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
 			     gimple_get_lhs (use_stmt1))
       {
 	gsi = gsi_for_stmt (use_stmt);
 	gsi_remove (&gsi, true);
       }
     }
   update_ssa (TODO_update_ssa);
 #ifdef ENABLE_CHECKING
   verify_ssa (true);
 #endif
   /* Delete the calls to free.  */
   for (i = 1; i < mi->min_indirect_level_escape; i++)
     {
       gimple_stmt_iterator gsi;

       /* ??? wonder why this case is possible but we failed on it once.  */
       if (!mi->free_stmts[i].stmt)
 	continue;

       c_node = cgraph_node (mi->free_stmts[i].func);
       gcc_assert (is_gimple_call (mi->free_stmts[i].stmt));
       e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
       gcc_assert (e);
       cgraph_remove_edge (e);
       current_function_decl = mi->free_stmts[i].func;
       set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
       gsi = gsi_for_stmt (mi->free_stmts[i].stmt);
       gsi_remove (&gsi, true);
     }
   /* Return to the previous situation.  */
   current_function_decl = oldfn;
   pop_cfun ();
   return 1;

 }


 /* Print out the results of the escape analysis.  */
 static int
 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
 {
   struct matrix_info *mi = (struct matrix_info *) *slot;

   if (!dump_file)
     return 1;
   fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
 	   get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
   fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
   fprintf (dump_file, "\n");
   if (mi->min_indirect_level_escape >= 2)
     fprintf (dump_file, "Flattened %d dimensions \n",
 	     mi->min_indirect_level_escape);
   return 1;
 }

 /* Perform matrix flattening.  */

 static unsigned int
 matrix_reorg (void)
 {
   struct cgraph_node *node;

   if (profile_info)
     check_transpose_p = true;
   else
     check_transpose_p = false;
   /* If there are hand written vectors, we skip this optimization.  */
   for (node = cgraph_nodes; node; node = node->next)
     if (!may_flatten_matrices (node))
       return 0;
   matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
   /* Find and record all potential matrices in the program.  */
   find_matrices_decl ();
   /* Analyze the accesses of the matrices (escaping analysis).  */
   for (node = cgraph_nodes; node; node = node->next)
     if (node->analyzed)
       {
 	tree temp_fn;

 	temp_fn = current_function_decl;
 	current_function_decl = node->decl;
 	push_cfun (DECL_STRUCT_FUNCTION (node->decl));
 	bitmap_obstack_initialize (NULL);
 	gimple_register_cfg_hooks ();

 	if (!gimple_in_ssa_p (cfun))
 	  {
 	    free_dominance_info (CDI_DOMINATORS);
 	    free_dominance_info (CDI_POST_DOMINATORS);
 	    pop_cfun ();
 	    current_function_decl = temp_fn;
 	    bitmap_obstack_release (NULL);

 	    return 0;
 	  }

 #ifdef ENABLE_CHECKING
 	verify_flow_info ();
 #endif

 	if (!matrices_to_reorg)
 	  {
 	    free_dominance_info (CDI_DOMINATORS);
 	    free_dominance_info (CDI_POST_DOMINATORS);
 	    pop_cfun ();
 	    current_function_decl = temp_fn;
 	    bitmap_obstack_release (NULL);

 	    return 0;
 	  }

 	/* Create htap for phi nodes.  */
 	htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
 					      mat_acc_phi_eq, free);
 	if (!check_transpose_p)
 	  find_sites_in_func (false);
 	else
 	  {
 	    find_sites_in_func (true);
 	    loop_optimizer_init (LOOPS_NORMAL);
 	    if (current_loops)
 	      scev_initialize ();
 	    htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
 	    if (current_loops)
 	      {
 		scev_finalize ();
 		loop_optimizer_finalize ();
 		current_loops = NULL;
 	      }
 	  }
 	/* If the current function is the allocation function for any of
 	   the matrices we check its allocation and the escaping level.  */
 	htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
 	free_dominance_info (CDI_DOMINATORS);
 	free_dominance_info (CDI_POST_DOMINATORS);
 	pop_cfun ();
 	current_function_decl = temp_fn;
 	bitmap_obstack_release (NULL);
       }
   htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
   /* Now transform the accesses.  */
   for (node = cgraph_nodes; node; node = node->next)
     if (node->analyzed)
       {
 	/* Remember that allocation sites have been handled.  */
 	tree temp_fn;

 	temp_fn = current_function_decl;
 	current_function_decl = node->decl;
 	push_cfun (DECL_STRUCT_FUNCTION (node->decl));
 	bitmap_obstack_initialize (NULL);
 	gimple_register_cfg_hooks ();
 	record_all_accesses_in_func ();
 	htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
 	free_dominance_info (CDI_DOMINATORS);
 	free_dominance_info (CDI_POST_DOMINATORS);
 	pop_cfun ();
 	current_function_decl = temp_fn;
 	bitmap_obstack_release (NULL);
       }
   htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);

   current_function_decl = NULL;
   set_cfun (NULL);
   matrices_to_reorg = NULL;
   return 0;
 }


 /* The condition for matrix flattening to be performed.  */
 static bool
 gate_matrix_reorg (void)
 {
   return flag_ipa_matrix_reorg && flag_whole_program;
 }

 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
 {
  {
   SIMPLE_IPA_PASS,
   "matrix-reorg",		/* name */
   gate_matrix_reorg,		/* gate */
   matrix_reorg,			/* execute */
   NULL,				/* sub */
   NULL,				/* next */
   0,				/* static_pass_number */
   0,				/* tv_id */
   0,				/* properties_required */
   PROP_trees,			/* properties_provided */
   0,				/* properties_destroyed */
   0,				/* todo_flags_start */
   TODO_dump_cgraph | TODO_dump_func	/* todo_flags_finish */
  }
 };