Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1 | /* Loop autoparallelization. |
Nick Clifton | 6da7fc8 | 2009-02-10 17:59:08 +0000 | [diff] [blame] | 2 | Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc. |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 3 | Contributed by Sebastian Pop <pop@cri.ensmp.fr> and |
| 4 | Zdenek Dvorak <dvorakz@suse.cz>. |
| 5 | |
| 6 | This file is part of GCC. |
| 7 | |
| 8 | GCC is free software; you can redistribute it and/or modify it under |
| 9 | the terms of the GNU General Public License as published by the Free |
Nick Clifton | 6da7fc8 | 2009-02-10 17:59:08 +0000 | [diff] [blame] | 10 | Software Foundation; either version 3, or (at your option) any later |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 11 | version. |
| 12 | |
| 13 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| 14 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 16 | for more details. |
| 17 | |
| 18 | You should have received a copy of the GNU General Public License |
Nick Clifton | 6da7fc8 | 2009-02-10 17:59:08 +0000 | [diff] [blame] | 19 | along with GCC; see the file COPYING3. If not see |
| 20 | <http://www.gnu.org/licenses/>. */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 21 | |
| 22 | #include "config.h" |
| 23 | #include "system.h" |
| 24 | #include "coretypes.h" |
| 25 | #include "tm.h" |
| 26 | #include "tree.h" |
| 27 | #include "rtl.h" |
| 28 | #include "tree-flow.h" |
| 29 | #include "cfgloop.h" |
| 30 | #include "ggc.h" |
| 31 | #include "tree-data-ref.h" |
| 32 | #include "diagnostic.h" |
| 33 | #include "tree-pass.h" |
| 34 | #include "tree-scalar-evolution.h" |
| 35 | #include "hashtab.h" |
| 36 | #include "langhooks.h" |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 37 | #include "tree-vectorizer.h" |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 38 | |
| 39 | /* This pass tries to distribute iterations of loops into several threads. |
| 40 | The implementation is straightforward -- for each loop we test whether its |
| 41 | iterations are independent, and if it is the case (and some additional |
| 42 | conditions regarding profitability and correctness are satisfied), we |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 43 | add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion |
| 44 | machinery do its job. |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 45 | |
| 46 | The most of the complexity is in bringing the code into shape expected |
| 47 | by the omp expanders: |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 48 | -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction |
| 49 | variable and that the exit test is at the start of the loop body |
| 50 | -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 51 | variables by accesses through pointers, and breaking up ssa chains |
| 52 | by storing the values incoming to the parallelized loop to a structure |
| 53 | passed to the new function as an argument (something similar is done |
| 54 | in omp gimplification, unfortunately only a small part of the code |
| 55 | can be shared). |
| 56 | |
| 57 | TODO: |
| 58 | -- if there are several parallelizable loops in a function, it may be |
| 59 | possible to generate the threads just once (using synchronization to |
| 60 | ensure that cross-loop dependences are obeyed). |
| 61 | -- handling of common scalar dependence patterns (accumulation, ...) |
| 62 | -- handling of non-innermost loops */ |
| 63 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 64 | /* |
| 65 | Reduction handling: |
| 66 | currently we use vect_is_simple_reduction() to detect reduction patterns. |
| 67 | The code transformation will be introduced by an example. |
| 68 | |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 69 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 70 | parloop |
| 71 | { |
| 72 | int sum=1; |
| 73 | |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 74 | for (i = 0; i < N; i++) |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 75 | { |
| 76 | x[i] = i + 3; |
| 77 | sum+=x[i]; |
| 78 | } |
| 79 | } |
| 80 | |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 81 | gimple-like code: |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 82 | header_bb: |
| 83 | |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 84 | # sum_29 = PHI <sum_11(5), 1(3)> |
| 85 | # i_28 = PHI <i_12(5), 0(3)> |
| 86 | D.1795_8 = i_28 + 3; |
| 87 | x[i_28] = D.1795_8; |
| 88 | sum_11 = D.1795_8 + sum_29; |
| 89 | i_12 = i_28 + 1; |
| 90 | if (N_6(D) > i_12) |
| 91 | goto header_bb; |
| 92 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 93 | |
| 94 | exit_bb: |
| 95 | |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 96 | # sum_21 = PHI <sum_11(4)> |
| 97 | printf (&"%d"[0], sum_21); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 98 | |
| 99 | |
| 100 | after reduction transformation (only relevant parts): |
| 101 | |
| 102 | parloop |
| 103 | { |
| 104 | |
| 105 | .... |
| 106 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 107 | |
Ralf Wildenhues | fa10bee | 2008-06-06 05:42:00 +0000 | [diff] [blame] | 108 | # Storing the initial value given by the user. # |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 109 | |
Razya Ladelsky | ae0bce6 | 2007-12-18 11:21:48 +0000 | [diff] [blame] | 110 | .paral_data_store.32.sum.27 = 1; |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 111 | |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 112 | #pragma omp parallel num_threads(4) |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 113 | |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 114 | #pragma omp for schedule(static) |
Razya Ladelsky | ae0bce6 | 2007-12-18 11:21:48 +0000 | [diff] [blame] | 115 | |
| 116 | # The neutral element corresponding to the particular |
| 117 | reduction's operation, e.g. 0 for PLUS_EXPR, |
| 118 | 1 for MULT_EXPR, etc. replaces the user's initial value. # |
| 119 | |
| 120 | # sum.27_29 = PHI <sum.27_11, 0> |
| 121 | |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 122 | sum.27_11 = D.1827_8 + sum.27_29; |
Razya Ladelsky | ae0bce6 | 2007-12-18 11:21:48 +0000 | [diff] [blame] | 123 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 124 | GIMPLE_OMP_CONTINUE |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 125 | |
| 126 | # Adding this reduction phi is done at create_phi_for_local_result() # |
| 127 | # sum.27_56 = PHI <sum.27_11, 0> |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 128 | GIMPLE_OMP_RETURN |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 129 | |
| 130 | # Creating the atomic operation is done at |
| 131 | create_call_for_reduction_1() # |
| 132 | |
| 133 | #pragma omp atomic_load |
| 134 | D.1839_59 = *&.paral_data_load.33_51->reduction.23; |
| 135 | D.1840_60 = sum.27_56 + D.1839_59; |
| 136 | #pragma omp atomic_store (D.1840_60); |
| 137 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 138 | GIMPLE_OMP_RETURN |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 139 | |
| 140 | # collecting the result after the join of the threads is done at |
| 141 | create_loads_for_reductions(). |
Razya Ladelsky | ae0bce6 | 2007-12-18 11:21:48 +0000 | [diff] [blame] | 142 | The value computed by the threads is loaded from the |
| 143 | shared struct. # |
| 144 | |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 145 | |
| 146 | .paral_data_load.33_52 = &.paral_data_store.32; |
Razya Ladelsky | ae0bce6 | 2007-12-18 11:21:48 +0000 | [diff] [blame] | 147 | sum_37 = .paral_data_load.33_52->sum.27; |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 148 | sum_43 = D.1795_41 + sum_37; |
| 149 | |
| 150 | exit bb: |
| 151 | # sum_21 = PHI <sum_43, sum_26> |
| 152 | printf (&"%d"[0], sum_21); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 153 | |
| 154 | ... |
| 155 | |
| 156 | } |
| 157 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 158 | */ |
| 159 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 160 | /* Minimal number of iterations of a loop that should be executed in each |
| 161 | thread. */ |
| 162 | #define MIN_PER_THREAD 100 |
| 163 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 164 | /* Element of the hashtable, representing a |
| 165 | reduction in the current loop. */ |
| 166 | struct reduction_info |
| 167 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 168 | gimple reduc_stmt; /* reduction statement. */ |
| 169 | gimple reduc_phi; /* The phi node defining the reduction. */ |
| 170 | enum tree_code reduction_code;/* code for the reduction operation. */ |
| 171 | gimple keep_res; /* The PHI_RESULT of this phi is the resulting value |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 172 | of the reduction variable when existing the loop. */ |
Razya Ladelsky | ae0bce6 | 2007-12-18 11:21:48 +0000 | [diff] [blame] | 173 | tree initial_value; /* The initial value of the reduction var before entering the loop. */ |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 174 | tree field; /* the name of the field in the parloop data structure intended for reduction. */ |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 175 | tree init; /* reduction initialization value. */ |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 176 | gimple new_phi; /* (helper field) Newly created phi node whose result |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 177 | will be passed to the atomic operation. Represents |
| 178 | the local result each thread computed for the reduction |
| 179 | operation. */ |
| 180 | }; |
| 181 | |
| 182 | /* Equality and hash functions for hashtab code. */ |
| 183 | |
| 184 | static int |
| 185 | reduction_info_eq (const void *aa, const void *bb) |
| 186 | { |
| 187 | const struct reduction_info *a = (const struct reduction_info *) aa; |
| 188 | const struct reduction_info *b = (const struct reduction_info *) bb; |
| 189 | |
| 190 | return (a->reduc_phi == b->reduc_phi); |
| 191 | } |
| 192 | |
| 193 | static hashval_t |
| 194 | reduction_info_hash (const void *aa) |
| 195 | { |
| 196 | const struct reduction_info *a = (const struct reduction_info *) aa; |
| 197 | |
| 198 | return htab_hash_pointer (a->reduc_phi); |
| 199 | } |
| 200 | |
| 201 | static struct reduction_info * |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 202 | reduction_phi (htab_t reduction_list, gimple phi) |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 203 | { |
| 204 | struct reduction_info tmpred, *red; |
| 205 | |
| 206 | if (htab_elements (reduction_list) == 0) |
| 207 | return NULL; |
| 208 | |
| 209 | tmpred.reduc_phi = phi; |
Kaveh R. Ghazi | 3d9a9f9 | 2008-06-20 18:34:07 +0000 | [diff] [blame] | 210 | red = (struct reduction_info *) htab_find (reduction_list, &tmpred); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 211 | |
| 212 | return red; |
| 213 | } |
| 214 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 215 | /* Element of hashtable of names to copy. */ |
| 216 | |
| 217 | struct name_to_copy_elt |
| 218 | { |
| 219 | unsigned version; /* The version of the name to copy. */ |
| 220 | tree new_name; /* The new name used in the copy. */ |
| 221 | tree field; /* The field of the structure used to pass the |
| 222 | value. */ |
| 223 | }; |
| 224 | |
| 225 | /* Equality and hash functions for hashtab code. */ |
| 226 | |
| 227 | static int |
| 228 | name_to_copy_elt_eq (const void *aa, const void *bb) |
| 229 | { |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 230 | const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa; |
| 231 | const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 232 | |
| 233 | return a->version == b->version; |
| 234 | } |
| 235 | |
| 236 | static hashval_t |
| 237 | name_to_copy_elt_hash (const void *aa) |
| 238 | { |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 239 | const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 240 | |
| 241 | return (hashval_t) a->version; |
| 242 | } |
| 243 | |
| 244 | /* Returns true if the iterations of LOOP are independent on each other (that |
| 245 | is, if we can execute them in parallel), and if LOOP satisfies other |
| 246 | conditions that we need to be able to parallelize it. Description of number |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 247 | of iterations is stored to NITER. Reduction analysis is done, if |
| 248 | reductions are found, they are inserted to the REDUCTION_LIST. */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 249 | |
| 250 | static bool |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 251 | loop_parallel_p (struct loop *loop, htab_t reduction_list, |
| 252 | struct tree_niter_desc *niter) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 253 | { |
| 254 | edge exit = single_dom_exit (loop); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 255 | VEC (ddr_p, heap) * dependence_relations; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 256 | VEC (data_reference_p, heap) *datarefs; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 257 | lambda_trans_matrix trans; |
| 258 | bool ret = false; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 259 | gimple_stmt_iterator gsi; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 260 | loop_vec_info simple_loop_info; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 261 | |
| 262 | /* Only consider innermost loops with just one exit. The innermost-loop |
| 263 | restriction is not necessary, but it makes things simpler. */ |
| 264 | if (loop->inner || !exit) |
| 265 | return false; |
| 266 | |
| 267 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 268 | fprintf (dump_file, "\nConsidering loop %d\n", loop->num); |
| 269 | |
| 270 | /* We need to know # of iterations, and there should be no uses of values |
| 271 | defined inside loop outside of it, unless the values are invariants of |
| 272 | the loop. */ |
| 273 | if (!number_of_iterations_exit (loop, exit, niter, false)) |
| 274 | { |
| 275 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 276 | fprintf (dump_file, " FAILED: number of iterations not known\n"); |
| 277 | return false; |
| 278 | } |
| 279 | |
Razya Ladelsky | c0399c4 | 2008-11-19 16:08:01 +0000 | [diff] [blame] | 280 | vect_dump = NULL; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 281 | simple_loop_info = vect_analyze_loop_form (loop); |
| 282 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 283 | for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 284 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 285 | gimple phi = gsi_stmt (gsi); |
| 286 | gimple reduc_stmt = NULL; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 287 | |
| 288 | /* ??? TODO: Change this into a generic function that |
| 289 | recognizes reductions. */ |
| 290 | if (!is_gimple_reg (PHI_RESULT (phi))) |
| 291 | continue; |
| 292 | if (simple_loop_info) |
| 293 | reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi); |
| 294 | |
| 295 | /* Create a reduction_info struct, initialize it and insert it to |
| 296 | the reduction list. */ |
| 297 | |
| 298 | if (reduc_stmt) |
| 299 | { |
| 300 | PTR *slot; |
| 301 | struct reduction_info *new_reduction; |
| 302 | |
| 303 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 304 | { |
| 305 | fprintf (dump_file, |
| 306 | "Detected reduction. reduction stmt is: \n"); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 307 | print_gimple_stmt (dump_file, reduc_stmt, 0, 0); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 308 | fprintf (dump_file, "\n"); |
| 309 | } |
| 310 | |
| 311 | new_reduction = XCNEW (struct reduction_info); |
| 312 | |
| 313 | new_reduction->reduc_stmt = reduc_stmt; |
| 314 | new_reduction->reduc_phi = phi; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 315 | new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 316 | slot = htab_find_slot (reduction_list, new_reduction, INSERT); |
| 317 | *slot = new_reduction; |
| 318 | } |
| 319 | } |
| 320 | |
Zdenek Dvorak | 7242560 | 2008-03-27 11:25:36 +0100 | [diff] [blame] | 321 | /* Get rid of the information created by the vectorizer functions. */ |
| 322 | destroy_loop_vec_info (simple_loop_info, true); |
| 323 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 324 | for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi)) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 325 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 326 | gimple phi = gsi_stmt (gsi); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 327 | struct reduction_info *red; |
| 328 | imm_use_iterator imm_iter; |
| 329 | use_operand_p use_p; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 330 | gimple reduc_phi; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 331 | tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit); |
| 332 | |
| 333 | if (is_gimple_reg (val)) |
| 334 | { |
| 335 | if (dump_file && (dump_flags & TDF_DETAILS)) |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 336 | { |
| 337 | fprintf (dump_file, "phi is "); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 338 | print_gimple_stmt (dump_file, phi, 0, 0); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 339 | fprintf (dump_file, "arg of phi to exit: value "); |
| 340 | print_generic_expr (dump_file, val, 0); |
| 341 | fprintf (dump_file, " used outside loop\n"); |
| 342 | fprintf (dump_file, |
| 343 | " checking if it a part of reduction pattern: \n"); |
| 344 | } |
| 345 | if (htab_elements (reduction_list) == 0) |
| 346 | { |
| 347 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 348 | fprintf (dump_file, |
| 349 | " FAILED: it is not a part of reduction.\n"); |
| 350 | return false; |
| 351 | } |
| 352 | reduc_phi = NULL; |
| 353 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val) |
| 354 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 355 | if (flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))) |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 356 | { |
| 357 | reduc_phi = USE_STMT (use_p); |
| 358 | break; |
| 359 | } |
| 360 | } |
| 361 | red = reduction_phi (reduction_list, reduc_phi); |
| 362 | if (red == NULL) |
| 363 | { |
| 364 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 365 | fprintf (dump_file, |
| 366 | " FAILED: it is not a part of reduction.\n"); |
| 367 | return false; |
| 368 | } |
| 369 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 370 | { |
| 371 | fprintf (dump_file, "reduction phi is "); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 372 | print_gimple_stmt (dump_file, red->reduc_phi, 0, 0); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 373 | fprintf (dump_file, "reduction stmt is "); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 374 | print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 375 | } |
| 376 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 377 | } |
| 378 | } |
| 379 | |
| 380 | /* The iterations of the loop may communicate only through bivs whose |
| 381 | iteration space can be distributed efficiently. */ |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 382 | for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 383 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 384 | gimple phi = gsi_stmt (gsi); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 385 | tree def = PHI_RESULT (phi); |
| 386 | affine_iv iv; |
| 387 | |
Zdenek Dvorak | f017bf5 | 2009-03-04 18:50:20 +0100 | [diff] [blame] | 388 | if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true)) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 389 | { |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 390 | struct reduction_info *red; |
| 391 | |
| 392 | red = reduction_phi (reduction_list, phi); |
| 393 | if (red == NULL) |
| 394 | { |
| 395 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 396 | fprintf (dump_file, |
| 397 | " FAILED: scalar dependency between iterations\n"); |
| 398 | return false; |
| 399 | } |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 400 | } |
| 401 | } |
| 402 | |
| 403 | /* We need to version the loop to verify assumptions in runtime. */ |
| 404 | if (!can_duplicate_loop_p (loop)) |
| 405 | { |
| 406 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 407 | fprintf (dump_file, " FAILED: cannot be duplicated\n"); |
| 408 | return false; |
| 409 | } |
| 410 | |
| 411 | /* Check for problems with dependences. If the loop can be reversed, |
| 412 | the iterations are independent. */ |
| 413 | datarefs = VEC_alloc (data_reference_p, heap, 10); |
| 414 | dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10); |
| 415 | compute_data_dependences_for_loop (loop, true, &datarefs, |
| 416 | &dependence_relations); |
| 417 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 418 | dump_data_dependence_relations (dump_file, dependence_relations); |
| 419 | |
| 420 | trans = lambda_trans_matrix_new (1, 1); |
| 421 | LTM_MATRIX (trans)[0][0] = -1; |
| 422 | |
| 423 | if (lambda_transform_legal_p (trans, 1, dependence_relations)) |
| 424 | { |
| 425 | ret = true; |
| 426 | if (dump_file && (dump_flags & TDF_DETAILS)) |
| 427 | fprintf (dump_file, " SUCCESS: may be parallelized\n"); |
| 428 | } |
| 429 | else if (dump_file && (dump_flags & TDF_DETAILS)) |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 430 | fprintf (dump_file, |
| 431 | " FAILED: data dependencies exist across iterations\n"); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 432 | |
| 433 | free_dependence_relations (dependence_relations); |
| 434 | free_data_refs (datarefs); |
| 435 | |
| 436 | return ret; |
| 437 | } |
| 438 | |
Sebastian Pop | 1d4af1e | 2008-01-16 02:44:04 +0000 | [diff] [blame] | 439 | /* Return true when LOOP contains basic blocks marked with the |
| 440 | BB_IRREDUCIBLE_LOOP flag. */ |
| 441 | |
| 442 | static inline bool |
| 443 | loop_has_blocks_with_irreducible_flag (struct loop *loop) |
| 444 | { |
| 445 | unsigned i; |
| 446 | basic_block *bbs = get_loop_body_in_dom_order (loop); |
| 447 | bool res = true; |
| 448 | |
| 449 | for (i = 0; i < loop->num_nodes; i++) |
| 450 | if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP) |
| 451 | goto end; |
| 452 | |
| 453 | res = false; |
| 454 | end: |
| 455 | free (bbs); |
| 456 | return res; |
| 457 | } |
| 458 | |
Zdenek Dvorak | 8a171a5 | 2007-12-19 16:01:19 +0100 | [diff] [blame] | 459 | /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name. |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 460 | The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls |
Zdenek Dvorak | 8a171a5 | 2007-12-19 16:01:19 +0100 | [diff] [blame] | 461 | to their addresses that can be reused. The address of OBJ is known to |
| 462 | be invariant in the whole function. */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 463 | |
| 464 | static tree |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 465 | take_address_of (tree obj, tree type, edge entry, htab_t decl_address) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 466 | { |
Zdenek Dvorak | 8a171a5 | 2007-12-19 16:01:19 +0100 | [diff] [blame] | 467 | int uid; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 468 | void **dslot; |
| 469 | struct int_tree_map ielt, *nielt; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 470 | tree *var_p, name, bvar, addr; |
| 471 | gimple stmt; |
| 472 | gimple_seq stmts; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 473 | |
Zdenek Dvorak | 8a171a5 | 2007-12-19 16:01:19 +0100 | [diff] [blame] | 474 | /* Since the address of OBJ is invariant, the trees may be shared. |
| 475 | Avoid rewriting unrelated parts of the code. */ |
| 476 | obj = unshare_expr (obj); |
| 477 | for (var_p = &obj; |
| 478 | handled_component_p (*var_p); |
| 479 | var_p = &TREE_OPERAND (*var_p, 0)) |
| 480 | continue; |
| 481 | uid = DECL_UID (*var_p); |
| 482 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 483 | ielt.uid = uid; |
| 484 | dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT); |
| 485 | if (!*dslot) |
| 486 | { |
Zdenek Dvorak | 8a171a5 | 2007-12-19 16:01:19 +0100 | [diff] [blame] | 487 | addr = build_addr (*var_p, current_function_decl); |
| 488 | bvar = create_tmp_var (TREE_TYPE (addr), get_name (*var_p)); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 489 | add_referenced_var (bvar); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 490 | stmt = gimple_build_assign (bvar, addr); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 491 | name = make_ssa_name (bvar, stmt); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 492 | gimple_assign_set_lhs (stmt, name); |
| 493 | gsi_insert_on_edge_immediate (entry, stmt); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 494 | |
| 495 | nielt = XNEW (struct int_tree_map); |
| 496 | nielt->uid = uid; |
| 497 | nielt->to = name; |
| 498 | *dslot = nielt; |
Zdenek Dvorak | 8a171a5 | 2007-12-19 16:01:19 +0100 | [diff] [blame] | 499 | } |
| 500 | else |
| 501 | name = ((struct int_tree_map *) *dslot)->to; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 502 | |
Zdenek Dvorak | 8a171a5 | 2007-12-19 16:01:19 +0100 | [diff] [blame] | 503 | if (var_p != &obj) |
| 504 | { |
| 505 | *var_p = build1 (INDIRECT_REF, TREE_TYPE (*var_p), name); |
| 506 | name = force_gimple_operand (build_addr (obj, current_function_decl), |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 507 | &stmts, true, NULL_TREE); |
| 508 | if (!gimple_seq_empty_p (stmts)) |
| 509 | gsi_insert_seq_on_edge_immediate (entry, stmts); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 510 | } |
| 511 | |
Zdenek Dvorak | 8a171a5 | 2007-12-19 16:01:19 +0100 | [diff] [blame] | 512 | if (TREE_TYPE (name) != type) |
| 513 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 514 | name = force_gimple_operand (fold_convert (type, name), &stmts, true, |
Zdenek Dvorak | 8a171a5 | 2007-12-19 16:01:19 +0100 | [diff] [blame] | 515 | NULL_TREE); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 516 | if (!gimple_seq_empty_p (stmts)) |
| 517 | gsi_insert_seq_on_edge_immediate (entry, stmts); |
Zdenek Dvorak | 8a171a5 | 2007-12-19 16:01:19 +0100 | [diff] [blame] | 518 | } |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 519 | |
| 520 | return name; |
| 521 | } |
| 522 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 523 | /* Callback for htab_traverse. Create the initialization statement |
| 524 | for reduction described in SLOT, and place it at the preheader of |
| 525 | the loop described in DATA. */ |
| 526 | |
| 527 | static int |
| 528 | initialize_reductions (void **slot, void *data) |
| 529 | { |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 530 | tree init, c; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 531 | tree bvar, type, arg; |
| 532 | edge e; |
| 533 | |
Kaveh R. Ghazi | 3d9a9f9 | 2008-06-20 18:34:07 +0000 | [diff] [blame] | 534 | struct reduction_info *const reduc = (struct reduction_info *) *slot; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 535 | struct loop *loop = (struct loop *) data; |
| 536 | |
| 537 | /* Create initialization in preheader: |
| 538 | reduction_variable = initialization value of reduction. */ |
| 539 | |
| 540 | /* In the phi node at the header, replace the argument coming |
| 541 | from the preheader with the reduction initialization value. */ |
| 542 | |
| 543 | /* Create a new variable to initialize the reduction. */ |
| 544 | type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi)); |
| 545 | bvar = create_tmp_var (type, "reduction"); |
| 546 | add_referenced_var (bvar); |
| 547 | |
Aldy Hernandez | c2255bc | 2009-06-12 22:06:47 +0000 | [diff] [blame^] | 548 | c = build_omp_clause (gimple_location (reduc->reduc_stmt), |
| 549 | OMP_CLAUSE_REDUCTION); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 550 | OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 551 | OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 552 | |
| 553 | init = omp_reduction_init (c, TREE_TYPE (bvar)); |
| 554 | reduc->init = init; |
| 555 | |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 556 | /* Replace the argument representing the initialization value |
| 557 | with the initialization value for the reduction (neutral |
| 558 | element for the particular operation, e.g. 0 for PLUS_EXPR, |
| 559 | 1 for MULT_EXPR, etc). |
| 560 | Keep the old value in a new variable "reduction_initial", |
| 561 | that will be taken in consideration after the parallel |
| 562 | computing is done. */ |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 563 | |
| 564 | e = loop_preheader_edge (loop); |
| 565 | arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e); |
| 566 | /* Create new variable to hold the initial value. */ |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 567 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 568 | SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 569 | (reduc->reduc_phi, loop_preheader_edge (loop)), init); |
Razya Ladelsky | ae0bce6 | 2007-12-18 11:21:48 +0000 | [diff] [blame] | 570 | reduc->initial_value = arg; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 571 | return 1; |
| 572 | } |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 573 | |
| 574 | struct elv_data |
| 575 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 576 | struct walk_stmt_info info; |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 577 | edge entry; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 578 | htab_t decl_address; |
| 579 | bool changed; |
| 580 | }; |
| 581 | |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 582 | /* Eliminates references to local variables in *TP out of the single |
| 583 | entry single exit region starting at DTA->ENTRY. |
| 584 | DECL_ADDRESS contains addresses of the references that had their |
| 585 | address taken already. If the expression is changed, CHANGED is |
| 586 | set to true. Callback for walk_tree. */ |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 587 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 588 | static tree |
Zdenek Dvorak | 8a171a5 | 2007-12-19 16:01:19 +0100 | [diff] [blame] | 589 | eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 590 | { |
Kaveh R. Ghazi | 3d9a9f9 | 2008-06-20 18:34:07 +0000 | [diff] [blame] | 591 | struct elv_data *const dta = (struct elv_data *) data; |
Zdenek Dvorak | 8a171a5 | 2007-12-19 16:01:19 +0100 | [diff] [blame] | 592 | tree t = *tp, var, addr, addr_type, type, obj; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 593 | |
| 594 | if (DECL_P (t)) |
| 595 | { |
| 596 | *walk_subtrees = 0; |
| 597 | |
| 598 | if (!SSA_VAR_P (t) || DECL_EXTERNAL (t)) |
| 599 | return NULL_TREE; |
| 600 | |
| 601 | type = TREE_TYPE (t); |
| 602 | addr_type = build_pointer_type (type); |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 603 | addr = take_address_of (t, addr_type, dta->entry, dta->decl_address); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 604 | *tp = build1 (INDIRECT_REF, TREE_TYPE (*tp), addr); |
| 605 | |
| 606 | dta->changed = true; |
| 607 | return NULL_TREE; |
| 608 | } |
| 609 | |
| 610 | if (TREE_CODE (t) == ADDR_EXPR) |
| 611 | { |
Zdenek Dvorak | 8a171a5 | 2007-12-19 16:01:19 +0100 | [diff] [blame] | 612 | /* ADDR_EXPR may appear in two contexts: |
| 613 | -- as a gimple operand, when the address taken is a function invariant |
| 614 | -- as gimple rhs, when the resulting address in not a function |
| 615 | invariant |
| 616 | We do not need to do anything special in the latter case (the base of |
| 617 | the memory reference whose address is taken may be replaced in the |
| 618 | DECL_P case). The former case is more complicated, as we need to |
| 619 | ensure that the new address is still a gimple operand. Thus, it |
| 620 | is not sufficient to replace just the base of the memory reference -- |
| 621 | we need to move the whole computation of the address out of the |
| 622 | loop. */ |
| 623 | if (!is_gimple_val (t)) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 624 | return NULL_TREE; |
| 625 | |
| 626 | *walk_subtrees = 0; |
Zdenek Dvorak | 8a171a5 | 2007-12-19 16:01:19 +0100 | [diff] [blame] | 627 | obj = TREE_OPERAND (t, 0); |
| 628 | var = get_base_address (obj); |
| 629 | if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var)) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 630 | return NULL_TREE; |
| 631 | |
| 632 | addr_type = TREE_TYPE (t); |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 633 | addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 634 | *tp = addr; |
| 635 | |
| 636 | dta->changed = true; |
| 637 | return NULL_TREE; |
| 638 | } |
| 639 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 640 | if (!EXPR_P (t)) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 641 | *walk_subtrees = 0; |
| 642 | |
| 643 | return NULL_TREE; |
| 644 | } |
| 645 | |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 646 | /* Moves the references to local variables in STMT out of the single |
| 647 | entry single exit region starting at ENTRY. DECL_ADDRESS contains |
| 648 | addresses of the references that had their address taken |
| 649 | already. */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 650 | |
| 651 | static void |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 652 | eliminate_local_variables_stmt (edge entry, gimple stmt, |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 653 | htab_t decl_address) |
| 654 | { |
| 655 | struct elv_data dta; |
| 656 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 657 | memset (&dta.info, '\0', sizeof (dta.info)); |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 658 | dta.entry = entry; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 659 | dta.decl_address = decl_address; |
| 660 | dta.changed = false; |
| 661 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 662 | walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 663 | |
| 664 | if (dta.changed) |
| 665 | update_stmt (stmt); |
| 666 | } |
| 667 | |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 668 | /* Eliminates the references to local variables from the single entry |
| 669 | single exit region between the ENTRY and EXIT edges. |
| 670 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 671 | This includes: |
| 672 | 1) Taking address of a local variable -- these are moved out of the |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 673 | region (and temporary variable is created to hold the address if |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 674 | necessary). |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 675 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 676 | 2) Dereferencing a local variable -- these are replaced with indirect |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 677 | references. */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 678 | |
| 679 | static void |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 680 | eliminate_local_variables (edge entry, edge exit) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 681 | { |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 682 | basic_block bb; |
| 683 | VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 684 | unsigned i; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 685 | gimple_stmt_iterator gsi; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 686 | htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq, |
| 687 | free); |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 688 | basic_block entry_bb = entry->src; |
| 689 | basic_block exit_bb = exit->dest; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 690 | |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 691 | gather_blocks_in_sese_region (entry_bb, exit_bb, &body); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 692 | |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 693 | for (i = 0; VEC_iterate (basic_block, body, i, bb); i++) |
| 694 | if (bb != entry_bb && bb != exit_bb) |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 695 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
| 696 | eliminate_local_variables_stmt (entry, gsi_stmt (gsi), |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 697 | decl_address); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 698 | |
| 699 | htab_delete (decl_address); |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 700 | VEC_free (basic_block, heap, body); |
| 701 | } |
| 702 | |
| 703 | /* Returns true if expression EXPR is not defined between ENTRY and |
| 704 | EXIT, i.e. if all its operands are defined outside of the region. */ |
| 705 | |
| 706 | static bool |
| 707 | expr_invariant_in_region_p (edge entry, edge exit, tree expr) |
| 708 | { |
| 709 | basic_block entry_bb = entry->src; |
| 710 | basic_block exit_bb = exit->dest; |
| 711 | basic_block def_bb; |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 712 | |
| 713 | if (is_gimple_min_invariant (expr)) |
| 714 | return true; |
| 715 | |
| 716 | if (TREE_CODE (expr) == SSA_NAME) |
| 717 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 718 | def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 719 | if (def_bb |
| 720 | && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb) |
| 721 | && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb)) |
| 722 | return false; |
| 723 | |
| 724 | return true; |
| 725 | } |
| 726 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 727 | return false; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 728 | } |
| 729 | |
| 730 | /* If COPY_NAME_P is true, creates and returns a duplicate of NAME. |
| 731 | The copies are stored to NAME_COPIES, if NAME was already duplicated, |
| 732 | its duplicate stored in NAME_COPIES is returned. |
| 733 | |
| 734 | Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also |
| 735 | duplicated, storing the copies in DECL_COPIES. */ |
| 736 | |
| 737 | static tree |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 738 | separate_decls_in_region_name (tree name, |
| 739 | htab_t name_copies, htab_t decl_copies, |
| 740 | bool copy_name_p) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 741 | { |
| 742 | tree copy, var, var_copy; |
| 743 | unsigned idx, uid, nuid; |
| 744 | struct int_tree_map ielt, *nielt; |
| 745 | struct name_to_copy_elt elt, *nelt; |
| 746 | void **slot, **dslot; |
| 747 | |
| 748 | if (TREE_CODE (name) != SSA_NAME) |
| 749 | return name; |
| 750 | |
| 751 | idx = SSA_NAME_VERSION (name); |
| 752 | elt.version = idx; |
| 753 | slot = htab_find_slot_with_hash (name_copies, &elt, idx, |
| 754 | copy_name_p ? INSERT : NO_INSERT); |
| 755 | if (slot && *slot) |
| 756 | return ((struct name_to_copy_elt *) *slot)->new_name; |
| 757 | |
| 758 | var = SSA_NAME_VAR (name); |
| 759 | uid = DECL_UID (var); |
| 760 | ielt.uid = uid; |
| 761 | dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT); |
| 762 | if (!*dslot) |
| 763 | { |
| 764 | var_copy = create_tmp_var (TREE_TYPE (var), get_name (var)); |
Jakub Jelinek | 36ad792 | 2007-12-03 23:35:39 +0100 | [diff] [blame] | 765 | DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 766 | add_referenced_var (var_copy); |
| 767 | nielt = XNEW (struct int_tree_map); |
| 768 | nielt->uid = uid; |
| 769 | nielt->to = var_copy; |
| 770 | *dslot = nielt; |
| 771 | |
| 772 | /* Ensure that when we meet this decl next time, we won't duplicate |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 773 | it again. */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 774 | nuid = DECL_UID (var_copy); |
| 775 | ielt.uid = nuid; |
| 776 | dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT); |
| 777 | gcc_assert (!*dslot); |
| 778 | nielt = XNEW (struct int_tree_map); |
| 779 | nielt->uid = nuid; |
| 780 | nielt->to = var_copy; |
| 781 | *dslot = nielt; |
| 782 | } |
| 783 | else |
| 784 | var_copy = ((struct int_tree_map *) *dslot)->to; |
| 785 | |
| 786 | if (copy_name_p) |
| 787 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 788 | copy = duplicate_ssa_name (name, NULL); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 789 | nelt = XNEW (struct name_to_copy_elt); |
| 790 | nelt->version = idx; |
| 791 | nelt->new_name = copy; |
| 792 | nelt->field = NULL_TREE; |
| 793 | *slot = nelt; |
| 794 | } |
| 795 | else |
| 796 | { |
| 797 | gcc_assert (!slot); |
| 798 | copy = name; |
| 799 | } |
| 800 | |
| 801 | SSA_NAME_VAR (copy) = var_copy; |
| 802 | return copy; |
| 803 | } |
| 804 | |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 805 | /* Finds the ssa names used in STMT that are defined outside the |
| 806 | region between ENTRY and EXIT and replaces such ssa names with |
| 807 | their duplicates. The duplicates are stored to NAME_COPIES. Base |
| 808 | decls of all ssa names used in STMT (including those defined in |
| 809 | LOOP) are replaced with the new temporary variables; the |
| 810 | replacement decls are stored in DECL_COPIES. */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 811 | |
| 812 | static void |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 813 | separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt, |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 814 | htab_t name_copies, htab_t decl_copies) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 815 | { |
| 816 | use_operand_p use; |
| 817 | def_operand_p def; |
| 818 | ssa_op_iter oi; |
| 819 | tree name, copy; |
| 820 | bool copy_name_p; |
| 821 | |
| 822 | mark_virtual_ops_for_renaming (stmt); |
| 823 | |
| 824 | FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF) |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 825 | { |
| 826 | name = DEF_FROM_PTR (def); |
| 827 | gcc_assert (TREE_CODE (name) == SSA_NAME); |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 828 | copy = separate_decls_in_region_name (name, name_copies, decl_copies, |
| 829 | false); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 830 | gcc_assert (copy == name); |
| 831 | } |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 832 | |
| 833 | FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE) |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 834 | { |
| 835 | name = USE_FROM_PTR (use); |
| 836 | if (TREE_CODE (name) != SSA_NAME) |
| 837 | continue; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 838 | |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 839 | copy_name_p = expr_invariant_in_region_p (entry, exit, name); |
| 840 | copy = separate_decls_in_region_name (name, name_copies, decl_copies, |
| 841 | copy_name_p); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 842 | SET_USE (use, copy); |
| 843 | } |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 844 | } |
| 845 | |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 846 | /* Callback for htab_traverse. Adds a field corresponding to the reduction |
| 847 | specified in SLOT. The type is passed in DATA. */ |
| 848 | |
| 849 | static int |
| 850 | add_field_for_reduction (void **slot, void *data) |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 851 | { |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 852 | |
Kaveh R. Ghazi | 3d9a9f9 | 2008-06-20 18:34:07 +0000 | [diff] [blame] | 853 | struct reduction_info *const red = (struct reduction_info *) *slot; |
| 854 | tree const type = (tree) data; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 855 | tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt)); |
Aldy Hernandez | c2255bc | 2009-06-12 22:06:47 +0000 | [diff] [blame^] | 856 | tree field = build_decl (gimple_location (red->reduc_stmt), |
| 857 | FIELD_DECL, DECL_NAME (var), TREE_TYPE (var)); |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 858 | |
| 859 | insert_field_into_struct (type, field); |
| 860 | |
| 861 | red->field = field; |
| 862 | |
| 863 | return 1; |
| 864 | } |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 865 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 866 | /* Callback for htab_traverse. Adds a field corresponding to a ssa name |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 867 | described in SLOT. The type is passed in DATA. */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 868 | |
| 869 | static int |
| 870 | add_field_for_name (void **slot, void *data) |
| 871 | { |
Kaveh R. Ghazi | 3d9a9f9 | 2008-06-20 18:34:07 +0000 | [diff] [blame] | 872 | struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot; |
| 873 | tree type = (tree) data; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 874 | tree name = ssa_name (elt->version); |
| 875 | tree var = SSA_NAME_VAR (name); |
Aldy Hernandez | c2255bc | 2009-06-12 22:06:47 +0000 | [diff] [blame^] | 876 | tree field = build_decl (DECL_SOURCE_LOCATION (var), |
| 877 | FIELD_DECL, DECL_NAME (var), TREE_TYPE (var)); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 878 | |
| 879 | insert_field_into_struct (type, field); |
| 880 | elt->field = field; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 881 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 882 | return 1; |
| 883 | } |
| 884 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 885 | /* Callback for htab_traverse. A local result is the intermediate result |
| 886 | computed by a single |
Ralf Wildenhues | fa10bee | 2008-06-06 05:42:00 +0000 | [diff] [blame] | 887 | thread, or the initial value in case no iteration was executed. |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 888 | This function creates a phi node reflecting these values. |
| 889 | The phi's result will be stored in NEW_PHI field of the |
| 890 | reduction's data structure. */ |
| 891 | |
| 892 | static int |
| 893 | create_phi_for_local_result (void **slot, void *data) |
| 894 | { |
Kaveh R. Ghazi | 3d9a9f9 | 2008-06-20 18:34:07 +0000 | [diff] [blame] | 895 | struct reduction_info *const reduc = (struct reduction_info *) *slot; |
| 896 | const struct loop *const loop = (const struct loop *) data; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 897 | edge e; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 898 | gimple new_phi; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 899 | basic_block store_bb; |
| 900 | tree local_res; |
| 901 | |
| 902 | /* STORE_BB is the block where the phi |
| 903 | should be stored. It is the destination of the loop exit. |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 904 | (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */ |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 905 | store_bb = FALLTHRU_EDGE (loop->latch)->dest; |
| 906 | |
| 907 | /* STORE_BB has two predecessors. One coming from the loop |
| 908 | (the reduction's result is computed at the loop), |
| 909 | and another coming from a block preceding the loop, |
| 910 | when no iterations |
| 911 | are executed (the initial value should be taken). */ |
| 912 | if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch)) |
| 913 | e = EDGE_PRED (store_bb, 1); |
| 914 | else |
| 915 | e = EDGE_PRED (store_bb, 0); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 916 | local_res |
| 917 | = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)), |
| 918 | NULL); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 919 | new_phi = create_phi_node (local_res, store_bb); |
| 920 | SSA_NAME_DEF_STMT (local_res) = new_phi; |
| 921 | add_phi_arg (new_phi, reduc->init, e); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 922 | add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt), |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 923 | FALLTHRU_EDGE (loop->latch)); |
| 924 | reduc->new_phi = new_phi; |
| 925 | |
| 926 | return 1; |
| 927 | } |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 928 | |
| 929 | struct clsn_data |
| 930 | { |
| 931 | tree store; |
| 932 | tree load; |
| 933 | |
| 934 | basic_block store_bb; |
| 935 | basic_block load_bb; |
| 936 | }; |
| 937 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 938 | /* Callback for htab_traverse. Create an atomic instruction for the |
| 939 | reduction described in SLOT. |
| 940 | DATA annotates the place in memory the atomic operation relates to, |
| 941 | and the basic block it needs to be generated in. */ |
| 942 | |
| 943 | static int |
| 944 | create_call_for_reduction_1 (void **slot, void *data) |
| 945 | { |
Kaveh R. Ghazi | 3d9a9f9 | 2008-06-20 18:34:07 +0000 | [diff] [blame] | 946 | struct reduction_info *const reduc = (struct reduction_info *) *slot; |
| 947 | struct clsn_data *const clsn_data = (struct clsn_data *) data; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 948 | gimple_stmt_iterator gsi; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 949 | tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi)); |
| 950 | tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load)); |
| 951 | tree load_struct; |
| 952 | basic_block bb; |
| 953 | basic_block new_bb; |
| 954 | edge e; |
| 955 | tree t, addr, addr_type, ref, x; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 956 | tree tmp_load, name; |
| 957 | gimple load; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 958 | |
| 959 | load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load); |
| 960 | t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE); |
| 961 | addr_type = build_pointer_type (type); |
| 962 | |
| 963 | addr = build_addr (t, current_function_decl); |
| 964 | |
| 965 | /* Create phi node. */ |
| 966 | bb = clsn_data->load_bb; |
| 967 | |
| 968 | e = split_block (bb, t); |
| 969 | new_bb = e->dest; |
| 970 | |
| 971 | tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL); |
| 972 | add_referenced_var (tmp_load); |
| 973 | tmp_load = make_ssa_name (tmp_load, NULL); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 974 | load = gimple_build_omp_atomic_load (tmp_load, addr); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 975 | SSA_NAME_DEF_STMT (tmp_load) = load; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 976 | gsi = gsi_start_bb (new_bb); |
| 977 | gsi_insert_after (&gsi, load, GSI_NEW_STMT); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 978 | |
| 979 | e = split_block (new_bb, load); |
| 980 | new_bb = e->dest; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 981 | gsi = gsi_start_bb (new_bb); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 982 | ref = tmp_load; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 983 | x = fold_build2 (reduc->reduction_code, |
| 984 | TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref, |
| 985 | PHI_RESULT (reduc->new_phi)); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 986 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 987 | name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true, |
| 988 | GSI_CONTINUE_LINKING); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 989 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 990 | gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 991 | return 1; |
| 992 | } |
| 993 | |
| 994 | /* Create the atomic operation at the join point of the threads. |
| 995 | REDUCTION_LIST describes the reductions in the LOOP. |
| 996 | LD_ST_DATA describes the shared data structure where |
| 997 | shared data is stored in and loaded from. */ |
| 998 | static void |
| 999 | create_call_for_reduction (struct loop *loop, htab_t reduction_list, |
| 1000 | struct clsn_data *ld_st_data) |
| 1001 | { |
| 1002 | htab_traverse (reduction_list, create_phi_for_local_result, loop); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1003 | /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */ |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1004 | ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest; |
| 1005 | htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data); |
| 1006 | } |
| 1007 | |
Razya Ladelsky | ae0bce6 | 2007-12-18 11:21:48 +0000 | [diff] [blame] | 1008 | /* Callback for htab_traverse. Loads the final reduction value at the |
| 1009 | join point of all threads, and inserts it in the right place. */ |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1010 | |
| 1011 | static int |
| 1012 | create_loads_for_reductions (void **slot, void *data) |
| 1013 | { |
Kaveh R. Ghazi | 3d9a9f9 | 2008-06-20 18:34:07 +0000 | [diff] [blame] | 1014 | struct reduction_info *const red = (struct reduction_info *) *slot; |
| 1015 | struct clsn_data *const clsn_data = (struct clsn_data *) data; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1016 | gimple stmt; |
| 1017 | gimple_stmt_iterator gsi; |
| 1018 | tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt)); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1019 | tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load)); |
| 1020 | tree load_struct; |
Razya Ladelsky | ae0bce6 | 2007-12-18 11:21:48 +0000 | [diff] [blame] | 1021 | tree name; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1022 | tree x; |
| 1023 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1024 | gsi = gsi_after_labels (clsn_data->load_bb); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1025 | load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load); |
| 1026 | load_struct = build3 (COMPONENT_REF, type, load_struct, red->field, |
| 1027 | NULL_TREE); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1028 | |
Razya Ladelsky | ae0bce6 | 2007-12-18 11:21:48 +0000 | [diff] [blame] | 1029 | x = load_struct; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1030 | name = PHI_RESULT (red->keep_res); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1031 | stmt = gimple_build_assign (name, x); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1032 | SSA_NAME_DEF_STMT (name) = stmt; |
| 1033 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1034 | gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1035 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1036 | for (gsi = gsi_start_phis (gimple_bb (red->keep_res)); |
| 1037 | !gsi_end_p (gsi); gsi_next (&gsi)) |
| 1038 | if (gsi_stmt (gsi) == red->keep_res) |
| 1039 | { |
| 1040 | remove_phi_node (&gsi, false); |
| 1041 | return 1; |
| 1042 | } |
| 1043 | gcc_unreachable (); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1044 | } |
| 1045 | |
| 1046 | /* Load the reduction result that was stored in LD_ST_DATA. |
| 1047 | REDUCTION_LIST describes the list of reductions that the |
Ralf Wildenhues | fa10bee | 2008-06-06 05:42:00 +0000 | [diff] [blame] | 1048 | loads should be generated for. */ |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1049 | static void |
| 1050 | create_final_loads_for_reduction (htab_t reduction_list, |
| 1051 | struct clsn_data *ld_st_data) |
| 1052 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1053 | gimple_stmt_iterator gsi; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1054 | tree t; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1055 | gimple stmt; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1056 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1057 | gsi = gsi_after_labels (ld_st_data->load_bb); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1058 | t = build_fold_addr_expr (ld_st_data->store); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1059 | stmt = gimple_build_assign (ld_st_data->load, t); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1060 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1061 | gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); |
| 1062 | SSA_NAME_DEF_STMT (ld_st_data->load) = stmt; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1063 | |
| 1064 | htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data); |
| 1065 | |
| 1066 | } |
| 1067 | |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 1068 | /* Callback for htab_traverse. Store the neutral value for the |
| 1069 | particular reduction's operation, e.g. 0 for PLUS_EXPR, |
| 1070 | 1 for MULT_EXPR, etc. into the reduction field. |
| 1071 | The reduction is specified in SLOT. The store information is |
| 1072 | passed in DATA. */ |
| 1073 | |
| 1074 | static int |
| 1075 | create_stores_for_reduction (void **slot, void *data) |
| 1076 | { |
Kaveh R. Ghazi | 3d9a9f9 | 2008-06-20 18:34:07 +0000 | [diff] [blame] | 1077 | struct reduction_info *const red = (struct reduction_info *) *slot; |
| 1078 | struct clsn_data *const clsn_data = (struct clsn_data *) data; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1079 | tree t; |
| 1080 | gimple stmt; |
| 1081 | gimple_stmt_iterator gsi; |
| 1082 | tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt)); |
| 1083 | |
| 1084 | gsi = gsi_last_bb (clsn_data->store_bb); |
| 1085 | t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE); |
| 1086 | stmt = gimple_build_assign (t, red->initial_value); |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 1087 | mark_virtual_ops_for_renaming (stmt); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1088 | gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 1089 | |
| 1090 | return 1; |
| 1091 | } |
| 1092 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1093 | /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and |
| 1094 | store to a field of STORE in STORE_BB for the ssa name and its duplicate |
| 1095 | specified in SLOT. */ |
| 1096 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1097 | static int |
| 1098 | create_loads_and_stores_for_name (void **slot, void *data) |
| 1099 | { |
Kaveh R. Ghazi | 3d9a9f9 | 2008-06-20 18:34:07 +0000 | [diff] [blame] | 1100 | struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot; |
| 1101 | struct clsn_data *const clsn_data = (struct clsn_data *) data; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1102 | tree t; |
| 1103 | gimple stmt; |
| 1104 | gimple_stmt_iterator gsi; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1105 | tree type = TREE_TYPE (elt->new_name); |
| 1106 | tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load)); |
| 1107 | tree load_struct; |
| 1108 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1109 | gsi = gsi_last_bb (clsn_data->store_bb); |
| 1110 | t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE); |
| 1111 | stmt = gimple_build_assign (t, ssa_name (elt->version)); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1112 | mark_virtual_ops_for_renaming (stmt); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1113 | gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1114 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1115 | gsi = gsi_last_bb (clsn_data->load_bb); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1116 | load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1117 | t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE); |
| 1118 | stmt = gimple_build_assign (elt->new_name, t); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1119 | SSA_NAME_DEF_STMT (elt->new_name) = stmt; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1120 | gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1121 | |
| 1122 | return 1; |
| 1123 | } |
| 1124 | |
| 1125 | /* Moves all the variables used in LOOP and defined outside of it (including |
| 1126 | the initial values of loop phi nodes, and *PER_THREAD if it is a ssa |
| 1127 | name) to a structure created for this purpose. The code |
| 1128 | |
| 1129 | while (1) |
| 1130 | { |
| 1131 | use (a); |
| 1132 | use (b); |
| 1133 | } |
| 1134 | |
| 1135 | is transformed this way: |
| 1136 | |
| 1137 | bb0: |
| 1138 | old.a = a; |
| 1139 | old.b = b; |
| 1140 | |
| 1141 | bb1: |
| 1142 | a' = new->a; |
| 1143 | b' = new->b; |
| 1144 | while (1) |
| 1145 | { |
| 1146 | use (a'); |
| 1147 | use (b'); |
| 1148 | } |
| 1149 | |
| 1150 | `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The |
| 1151 | pointer `new' is intentionally not initialized (the loop will be split to a |
| 1152 | separate function later, and `new' will be initialized from its arguments). |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1153 | LD_ST_DATA holds information about the shared data structure used to pass |
| 1154 | information among the threads. It is initialized here, and |
| 1155 | gen_parallel_loop will pass it to create_call_for_reduction that |
| 1156 | needs this information. REDUCTION_LIST describes the reductions |
| 1157 | in LOOP. */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1158 | |
| 1159 | static void |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 1160 | separate_decls_in_region (edge entry, edge exit, htab_t reduction_list, |
| 1161 | tree *arg_struct, tree *new_arg_struct, |
| 1162 | struct clsn_data *ld_st_data) |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1163 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1164 | { |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 1165 | basic_block bb1 = split_edge (entry); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1166 | basic_block bb0 = single_pred (bb1); |
| 1167 | htab_t name_copies = htab_create (10, name_to_copy_elt_hash, |
| 1168 | name_to_copy_elt_eq, free); |
| 1169 | htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq, |
| 1170 | free); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1171 | unsigned i; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1172 | tree type, type_name, nvar; |
| 1173 | gimple_stmt_iterator gsi; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1174 | struct clsn_data clsn_data; |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 1175 | VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3); |
| 1176 | basic_block bb; |
| 1177 | basic_block entry_bb = bb1; |
| 1178 | basic_block exit_bb = exit->dest; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1179 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1180 | entry = single_succ_edge (entry_bb); |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 1181 | gather_blocks_in_sese_region (entry_bb, exit_bb, &body); |
| 1182 | |
| 1183 | for (i = 0; VEC_iterate (basic_block, body, i, bb); i++) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1184 | { |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 1185 | if (bb != entry_bb && bb != exit_bb) |
| 1186 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1187 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
| 1188 | separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi), |
| 1189 | name_copies, decl_copies); |
| 1190 | |
| 1191 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
| 1192 | separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi), |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 1193 | name_copies, decl_copies); |
| 1194 | } |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1195 | } |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 1196 | |
| 1197 | VEC_free (basic_block, heap, body); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1198 | |
Razya Ladelsky | c0399c4 | 2008-11-19 16:08:01 +0000 | [diff] [blame] | 1199 | if (htab_elements (name_copies) == 0 && reduction_list == 0) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1200 | { |
| 1201 | /* It may happen that there is nothing to copy (if there are only |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1202 | loop carried and external variables in the loop). */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1203 | *arg_struct = NULL; |
| 1204 | *new_arg_struct = NULL; |
| 1205 | } |
| 1206 | else |
| 1207 | { |
| 1208 | /* Create the type for the structure to store the ssa names to. */ |
| 1209 | type = lang_hooks.types.make_type (RECORD_TYPE); |
Aldy Hernandez | c2255bc | 2009-06-12 22:06:47 +0000 | [diff] [blame^] | 1210 | type_name = build_decl (BUILTINS_LOCATION, |
| 1211 | TYPE_DECL, create_tmp_var_name (".paral_data"), |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1212 | type); |
| 1213 | TYPE_NAME (type) = type_name; |
| 1214 | |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 1215 | htab_traverse (name_copies, add_field_for_name, type); |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 1216 | if (reduction_list && htab_elements (reduction_list) > 0) |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 1217 | { |
| 1218 | /* Create the fields for reductions. */ |
| 1219 | htab_traverse (reduction_list, add_field_for_reduction, |
| 1220 | type); |
| 1221 | } |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1222 | layout_type (type); |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 1223 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1224 | /* Create the loads and stores. */ |
| 1225 | *arg_struct = create_tmp_var (type, ".paral_data_store"); |
| 1226 | add_referenced_var (*arg_struct); |
| 1227 | nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load"); |
| 1228 | add_referenced_var (nvar); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1229 | *new_arg_struct = make_ssa_name (nvar, NULL); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1230 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1231 | ld_st_data->store = *arg_struct; |
| 1232 | ld_st_data->load = *new_arg_struct; |
| 1233 | ld_st_data->store_bb = bb0; |
| 1234 | ld_st_data->load_bb = bb1; |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 1235 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1236 | htab_traverse (name_copies, create_loads_and_stores_for_name, |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1237 | ld_st_data); |
| 1238 | |
Razya Ladelsky | ae0bce6 | 2007-12-18 11:21:48 +0000 | [diff] [blame] | 1239 | /* Load the calculation from memory (after the join of the threads). */ |
| 1240 | |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 1241 | if (reduction_list && htab_elements (reduction_list) > 0) |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1242 | { |
Razya Ladelsky | 0eb7e7a | 2007-11-06 10:29:12 +0000 | [diff] [blame] | 1243 | htab_traverse (reduction_list, create_stores_for_reduction, |
| 1244 | ld_st_data); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1245 | clsn_data.load = make_ssa_name (nvar, NULL); |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 1246 | clsn_data.load_bb = exit->dest; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1247 | clsn_data.store = ld_st_data->store; |
| 1248 | create_final_loads_for_reduction (reduction_list, &clsn_data); |
| 1249 | } |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1250 | } |
| 1251 | |
| 1252 | htab_delete (decl_copies); |
| 1253 | htab_delete (name_copies); |
| 1254 | } |
| 1255 | |
| 1256 | /* Bitmap containing uids of functions created by parallelization. We cannot |
| 1257 | allocate it from the default obstack, as it must live across compilation |
| 1258 | of several functions; we make it gc allocated instead. */ |
| 1259 | |
| 1260 | static GTY(()) bitmap parallelized_functions; |
| 1261 | |
| 1262 | /* Returns true if FN was created by create_loop_fn. */ |
| 1263 | |
| 1264 | static bool |
| 1265 | parallelized_function_p (tree fn) |
| 1266 | { |
| 1267 | if (!parallelized_functions || !DECL_ARTIFICIAL (fn)) |
| 1268 | return false; |
| 1269 | |
| 1270 | return bitmap_bit_p (parallelized_functions, DECL_UID (fn)); |
| 1271 | } |
| 1272 | |
| 1273 | /* Creates and returns an empty function that will receive the body of |
| 1274 | a parallelized loop. */ |
| 1275 | |
| 1276 | static tree |
| 1277 | create_loop_fn (void) |
| 1278 | { |
| 1279 | char buf[100]; |
| 1280 | char *tname; |
| 1281 | tree decl, type, name, t; |
| 1282 | struct function *act_cfun = cfun; |
| 1283 | static unsigned loopfn_num; |
| 1284 | |
| 1285 | snprintf (buf, 100, "%s.$loopfn", current_function_name ()); |
| 1286 | ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++); |
| 1287 | clean_symbol_name (tname); |
| 1288 | name = get_identifier (tname); |
| 1289 | type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); |
| 1290 | |
Aldy Hernandez | c2255bc | 2009-06-12 22:06:47 +0000 | [diff] [blame^] | 1291 | decl = build_decl (BUILTINS_LOCATION, |
| 1292 | FUNCTION_DECL, name, type); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1293 | if (!parallelized_functions) |
| 1294 | parallelized_functions = BITMAP_GGC_ALLOC (); |
| 1295 | bitmap_set_bit (parallelized_functions, DECL_UID (decl)); |
| 1296 | |
| 1297 | TREE_STATIC (decl) = 1; |
| 1298 | TREE_USED (decl) = 1; |
| 1299 | DECL_ARTIFICIAL (decl) = 1; |
| 1300 | DECL_IGNORED_P (decl) = 0; |
| 1301 | TREE_PUBLIC (decl) = 0; |
| 1302 | DECL_UNINLINABLE (decl) = 1; |
| 1303 | DECL_EXTERNAL (decl) = 0; |
| 1304 | DECL_CONTEXT (decl) = NULL_TREE; |
| 1305 | DECL_INITIAL (decl) = make_node (BLOCK); |
| 1306 | |
Aldy Hernandez | c2255bc | 2009-06-12 22:06:47 +0000 | [diff] [blame^] | 1307 | t = build_decl (BUILTINS_LOCATION, |
| 1308 | RESULT_DECL, NULL_TREE, void_type_node); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1309 | DECL_ARTIFICIAL (t) = 1; |
| 1310 | DECL_IGNORED_P (t) = 1; |
| 1311 | DECL_RESULT (decl) = t; |
| 1312 | |
Aldy Hernandez | c2255bc | 2009-06-12 22:06:47 +0000 | [diff] [blame^] | 1313 | t = build_decl (BUILTINS_LOCATION, |
| 1314 | PARM_DECL, get_identifier (".paral_data_param"), |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1315 | ptr_type_node); |
| 1316 | DECL_ARTIFICIAL (t) = 1; |
| 1317 | DECL_ARG_TYPE (t) = ptr_type_node; |
| 1318 | DECL_CONTEXT (t) = decl; |
| 1319 | TREE_USED (t) = 1; |
| 1320 | DECL_ARGUMENTS (decl) = t; |
| 1321 | |
Andreas Krebbel | 182e0d7 | 2007-11-26 17:33:23 +0000 | [diff] [blame] | 1322 | allocate_struct_function (decl, false); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1323 | |
| 1324 | /* The call to allocate_struct_function clobbers CFUN, so we need to restore |
| 1325 | it. */ |
Tom Tromey | 5576d6f | 2007-11-16 00:11:47 +0000 | [diff] [blame] | 1326 | set_cfun (act_cfun); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1327 | |
| 1328 | return decl; |
| 1329 | } |
| 1330 | |
Sebastian Pop | 7d4fba4 | 2009-03-03 03:47:22 +0000 | [diff] [blame] | 1331 | /* Bases all the induction variables in LOOP on a single induction |
| 1332 | variable (unsigned with base 0 and step 1), whose final value is |
| 1333 | compared with *NIT. When the IV type precision has to be larger |
| 1334 | than *NIT type precision, *NIT is converted to the larger type, the |
| 1335 | conversion code is inserted before the loop, and *NIT is updated to |
| 1336 | the new definition. The induction variable is incremented in the |
| 1337 | loop latch. REDUCTION_LIST describes the reductions in LOOP. |
| 1338 | Return the induction variable that was created. */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1339 | |
Sebastian Pop | 81b822d | 2008-12-11 07:23:02 +0000 | [diff] [blame] | 1340 | tree |
Sebastian Pop | 7d4fba4 | 2009-03-03 03:47:22 +0000 | [diff] [blame] | 1341 | canonicalize_loop_ivs (struct loop *loop, htab_t reduction_list, tree *nit) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1342 | { |
Sebastian Pop | 7d4fba4 | 2009-03-03 03:47:22 +0000 | [diff] [blame] | 1343 | unsigned precision = TYPE_PRECISION (TREE_TYPE (*nit)); |
| 1344 | unsigned original_precision = precision; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1345 | tree res, type, var_before, val, atype, mtype; |
| 1346 | gimple_stmt_iterator gsi, psi; |
| 1347 | gimple phi, stmt; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1348 | bool ok; |
| 1349 | affine_iv iv; |
| 1350 | edge exit = single_dom_exit (loop); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1351 | struct reduction_info *red; |
Sebastian Pop | 7d4fba4 | 2009-03-03 03:47:22 +0000 | [diff] [blame] | 1352 | gimple_seq stmts; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1353 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1354 | for (psi = gsi_start_phis (loop->header); |
| 1355 | !gsi_end_p (psi); gsi_next (&psi)) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1356 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1357 | phi = gsi_stmt (psi); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1358 | res = PHI_RESULT (phi); |
| 1359 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1360 | if (is_gimple_reg (res) && TYPE_PRECISION (TREE_TYPE (res)) > precision) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1361 | precision = TYPE_PRECISION (TREE_TYPE (res)); |
| 1362 | } |
| 1363 | |
| 1364 | type = lang_hooks.types.type_for_size (precision, 1); |
| 1365 | |
Sebastian Pop | 7d4fba4 | 2009-03-03 03:47:22 +0000 | [diff] [blame] | 1366 | if (original_precision != precision) |
| 1367 | { |
| 1368 | *nit = fold_convert (type, *nit); |
| 1369 | *nit = force_gimple_operand (*nit, &stmts, true, NULL_TREE); |
| 1370 | if (stmts) |
| 1371 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); |
| 1372 | } |
| 1373 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1374 | gsi = gsi_last_bb (loop->latch); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1375 | create_iv (build_int_cst_type (type, 0), build_int_cst (type, 1), NULL_TREE, |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1376 | loop, &gsi, true, &var_before, NULL); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1377 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1378 | gsi = gsi_after_labels (loop->header); |
| 1379 | for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); ) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1380 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1381 | phi = gsi_stmt (psi); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1382 | res = PHI_RESULT (phi); |
| 1383 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1384 | if (!is_gimple_reg (res) || res == var_before) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1385 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1386 | gsi_next (&psi); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1387 | continue; |
| 1388 | } |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1389 | |
Zdenek Dvorak | f017bf5 | 2009-03-04 18:50:20 +0100 | [diff] [blame] | 1390 | ok = simple_iv (loop, loop, res, &iv, true); |
Sebastian Pop | 81b822d | 2008-12-11 07:23:02 +0000 | [diff] [blame] | 1391 | |
| 1392 | if (reduction_list) |
| 1393 | red = reduction_phi (reduction_list, phi); |
| 1394 | else |
| 1395 | red = NULL; |
| 1396 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1397 | /* We preserve the reduction phi nodes. */ |
| 1398 | if (!ok && red) |
| 1399 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1400 | gsi_next (&psi); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1401 | continue; |
| 1402 | } |
| 1403 | else |
| 1404 | gcc_assert (ok); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1405 | remove_phi_node (&psi, false); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1406 | |
| 1407 | atype = TREE_TYPE (res); |
Jakub Jelinek | 36ad792 | 2007-12-03 23:35:39 +0100 | [diff] [blame] | 1408 | mtype = POINTER_TYPE_P (atype) ? sizetype : atype; |
| 1409 | val = fold_build2 (MULT_EXPR, mtype, unshare_expr (iv.step), |
| 1410 | fold_convert (mtype, var_before)); |
| 1411 | val = fold_build2 (POINTER_TYPE_P (atype) |
| 1412 | ? POINTER_PLUS_EXPR : PLUS_EXPR, |
| 1413 | atype, unshare_expr (iv.base), val); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1414 | val = force_gimple_operand_gsi (&gsi, val, false, NULL_TREE, true, |
| 1415 | GSI_SAME_STMT); |
| 1416 | stmt = gimple_build_assign (res, val); |
| 1417 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
| 1418 | SSA_NAME_DEF_STMT (res) = stmt; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1419 | } |
| 1420 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1421 | stmt = last_stmt (exit->src); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1422 | /* Make the loop exit if the control condition is not satisfied. */ |
| 1423 | if (exit->flags & EDGE_TRUE_VALUE) |
| 1424 | { |
| 1425 | edge te, fe; |
| 1426 | |
| 1427 | extract_true_false_edges_from_block (exit->src, &te, &fe); |
| 1428 | te->flags = EDGE_FALSE_VALUE; |
| 1429 | fe->flags = EDGE_TRUE_VALUE; |
| 1430 | } |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1431 | gimple_cond_set_code (stmt, LT_EXPR); |
| 1432 | gimple_cond_set_lhs (stmt, var_before); |
Sebastian Pop | 7d4fba4 | 2009-03-03 03:47:22 +0000 | [diff] [blame] | 1433 | gimple_cond_set_rhs (stmt, *nit); |
Sebastian Pop | 81b822d | 2008-12-11 07:23:02 +0000 | [diff] [blame] | 1434 | update_stmt (stmt); |
| 1435 | |
| 1436 | return var_before; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1437 | } |
| 1438 | |
| 1439 | /* Moves the exit condition of LOOP to the beginning of its header, and |
| 1440 | duplicates the part of the last iteration that gets disabled to the |
| 1441 | exit of the loop. NIT is the number of iterations of the loop |
| 1442 | (used to initialize the variables in the duplicated part). |
| 1443 | |
Ralf Wildenhues | fa10bee | 2008-06-06 05:42:00 +0000 | [diff] [blame] | 1444 | TODO: the common case is that latch of the loop is empty and immediately |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1445 | follows the loop exit. In this case, it would be better not to copy the |
| 1446 | body of the loop, but only move the entry of the loop directly before the |
| 1447 | exit check and increase the number of iterations of the loop by one. |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1448 | This may need some additional preconditioning in case NIT = ~0. |
| 1449 | REDUCTION_LIST describes the reductions in LOOP. */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1450 | |
| 1451 | static void |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1452 | transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1453 | { |
| 1454 | basic_block *bbs, *nbbs, ex_bb, orig_header; |
| 1455 | unsigned n; |
| 1456 | bool ok; |
| 1457 | edge exit = single_dom_exit (loop), hpred; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1458 | tree control, control_name, res, t; |
| 1459 | gimple phi, nphi, cond_stmt, stmt; |
| 1460 | gimple_stmt_iterator gsi; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1461 | |
| 1462 | split_block_after_labels (loop->header); |
| 1463 | orig_header = single_succ (loop->header); |
| 1464 | hpred = single_succ_edge (loop->header); |
| 1465 | |
| 1466 | cond_stmt = last_stmt (exit->src); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1467 | control = gimple_cond_lhs (cond_stmt); |
| 1468 | gcc_assert (gimple_cond_rhs (cond_stmt) == nit); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1469 | |
| 1470 | /* Make sure that we have phi nodes on exit for all loop header phis |
| 1471 | (create_parallel_loop requires that). */ |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1472 | for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1473 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1474 | phi = gsi_stmt (gsi); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1475 | res = PHI_RESULT (phi); |
| 1476 | t = make_ssa_name (SSA_NAME_VAR (res), phi); |
| 1477 | SET_PHI_RESULT (phi, t); |
| 1478 | |
| 1479 | nphi = create_phi_node (res, orig_header); |
| 1480 | SSA_NAME_DEF_STMT (res) = nphi; |
| 1481 | add_phi_arg (nphi, t, hpred); |
| 1482 | |
| 1483 | if (res == control) |
| 1484 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1485 | gimple_cond_set_lhs (cond_stmt, t); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1486 | update_stmt (cond_stmt); |
| 1487 | control = t; |
| 1488 | } |
| 1489 | } |
| 1490 | |
| 1491 | bbs = get_loop_body_in_dom_order (loop); |
| 1492 | for (n = 0; bbs[n] != exit->src; n++) |
| 1493 | continue; |
| 1494 | nbbs = XNEWVEC (basic_block, n); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1495 | ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit, |
| 1496 | bbs + 1, n, nbbs); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1497 | gcc_assert (ok); |
| 1498 | free (bbs); |
| 1499 | ex_bb = nbbs[0]; |
| 1500 | free (nbbs); |
| 1501 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1502 | /* Other than reductions, the only gimple reg that should be copied |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1503 | out of the loop is the control variable. */ |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1504 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1505 | control_name = NULL_TREE; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1506 | for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); ) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1507 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1508 | phi = gsi_stmt (gsi); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1509 | res = PHI_RESULT (phi); |
| 1510 | if (!is_gimple_reg (res)) |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1511 | { |
| 1512 | gsi_next (&gsi); |
| 1513 | continue; |
| 1514 | } |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1515 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1516 | /* Check if it is a part of reduction. If it is, |
| 1517 | keep the phi at the reduction's keep_res field. The |
| 1518 | PHI_RESULT of this phi is the resulting value of the reduction |
| 1519 | variable when exiting the loop. */ |
| 1520 | |
| 1521 | exit = single_dom_exit (loop); |
| 1522 | |
| 1523 | if (htab_elements (reduction_list) > 0) |
| 1524 | { |
| 1525 | struct reduction_info *red; |
| 1526 | |
| 1527 | tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit); |
| 1528 | |
| 1529 | red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val)); |
| 1530 | if (red) |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1531 | { |
| 1532 | red->keep_res = phi; |
| 1533 | gsi_next (&gsi); |
| 1534 | continue; |
| 1535 | } |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1536 | } |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1537 | gcc_assert (control_name == NULL_TREE |
| 1538 | && SSA_NAME_VAR (res) == SSA_NAME_VAR (control)); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1539 | control_name = res; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1540 | remove_phi_node (&gsi, false); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1541 | } |
| 1542 | gcc_assert (control_name != NULL_TREE); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1543 | |
| 1544 | /* Initialize the control variable to NIT. */ |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1545 | gsi = gsi_after_labels (ex_bb); |
| 1546 | nit = force_gimple_operand_gsi (&gsi, |
Zdenek Dvorak | 29ac1d9 | 2008-01-12 14:43:21 +0100 | [diff] [blame] | 1547 | fold_convert (TREE_TYPE (control_name), nit), |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1548 | false, NULL_TREE, false, GSI_SAME_STMT); |
| 1549 | stmt = gimple_build_assign (control_name, nit); |
| 1550 | gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); |
| 1551 | SSA_NAME_DEF_STMT (control_name) = stmt; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1552 | } |
| 1553 | |
| 1554 | /* Create the parallel constructs for LOOP as described in gen_parallel_loop. |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1555 | LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL. |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1556 | NEW_DATA is the variable that should be initialized from the argument |
| 1557 | of LOOP_FN. N_THREADS is the requested number of threads. Returns the |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1558 | basic block containing GIMPLE_OMP_PARALLEL tree. */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1559 | |
| 1560 | static basic_block |
| 1561 | create_parallel_loop (struct loop *loop, tree loop_fn, tree data, |
| 1562 | tree new_data, unsigned n_threads) |
| 1563 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1564 | gimple_stmt_iterator gsi; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1565 | basic_block bb, paral_bb, for_bb, ex_bb; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1566 | tree t, param, res; |
| 1567 | gimple stmt, for_stmt, phi, cond_stmt; |
| 1568 | tree cvar, cvar_init, initvar, cvar_next, cvar_base, type; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1569 | edge exit, nexit, guard, end, e; |
| 1570 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1571 | /* Prepare the GIMPLE_OMP_PARALLEL statement. */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1572 | bb = loop_preheader_edge (loop)->src; |
| 1573 | paral_bb = single_pred (bb); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1574 | gsi = gsi_last_bb (paral_bb); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1575 | |
Aldy Hernandez | c2255bc | 2009-06-12 22:06:47 +0000 | [diff] [blame^] | 1576 | t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_NUM_THREADS); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1577 | OMP_CLAUSE_NUM_THREADS_EXPR (t) |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1578 | = build_int_cst (integer_type_node, n_threads); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1579 | stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1580 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1581 | gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1582 | |
| 1583 | /* Initialize NEW_DATA. */ |
| 1584 | if (data) |
| 1585 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1586 | gsi = gsi_after_labels (bb); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1587 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1588 | param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL); |
| 1589 | stmt = gimple_build_assign (param, build_fold_addr_expr (data)); |
| 1590 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
| 1591 | SSA_NAME_DEF_STMT (param) = stmt; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1592 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1593 | stmt = gimple_build_assign (new_data, |
| 1594 | fold_convert (TREE_TYPE (new_data), param)); |
| 1595 | gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); |
| 1596 | SSA_NAME_DEF_STMT (new_data) = stmt; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1597 | } |
| 1598 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1599 | /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1600 | bb = split_loop_exit_edge (single_dom_exit (loop)); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1601 | gsi = gsi_last_bb (bb); |
| 1602 | gsi_insert_after (&gsi, gimple_build_omp_return (false), GSI_NEW_STMT); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1603 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1604 | /* Extract data for GIMPLE_OMP_FOR. */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1605 | gcc_assert (loop->header == single_dom_exit (loop)->src); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1606 | cond_stmt = last_stmt (loop->header); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1607 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1608 | cvar = gimple_cond_lhs (cond_stmt); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1609 | cvar_base = SSA_NAME_VAR (cvar); |
| 1610 | phi = SSA_NAME_DEF_STMT (cvar); |
| 1611 | cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1612 | initvar = make_ssa_name (cvar_base, NULL); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1613 | SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)), |
| 1614 | initvar); |
| 1615 | cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); |
| 1616 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1617 | gsi = gsi_last_bb (loop->latch); |
| 1618 | gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next)); |
| 1619 | gsi_remove (&gsi, true); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1620 | |
| 1621 | /* Prepare cfg. */ |
| 1622 | for_bb = split_edge (loop_preheader_edge (loop)); |
| 1623 | ex_bb = split_loop_exit_edge (single_dom_exit (loop)); |
| 1624 | extract_true_false_edges_from_block (loop->header, &nexit, &exit); |
| 1625 | gcc_assert (exit == single_dom_exit (loop)); |
| 1626 | |
| 1627 | guard = make_edge (for_bb, ex_bb, 0); |
| 1628 | single_succ_edge (loop->latch)->flags = 0; |
| 1629 | end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1630 | for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1631 | { |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1632 | phi = gsi_stmt (gsi); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1633 | res = PHI_RESULT (phi); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1634 | stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit)); |
| 1635 | add_phi_arg (phi, |
| 1636 | PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop)), |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1637 | guard); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1638 | add_phi_arg (phi, PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop)), |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1639 | end); |
| 1640 | } |
| 1641 | e = redirect_edge_and_branch (exit, nexit->dest); |
| 1642 | PENDING_STMT (e) = NULL; |
| 1643 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1644 | /* Emit GIMPLE_OMP_FOR. */ |
| 1645 | gimple_cond_set_lhs (cond_stmt, cvar_base); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1646 | type = TREE_TYPE (cvar); |
Aldy Hernandez | c2255bc | 2009-06-12 22:06:47 +0000 | [diff] [blame^] | 1647 | t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_SCHEDULE); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1648 | OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC; |
| 1649 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1650 | for_stmt = gimple_build_omp_for (NULL, t, 1, NULL); |
| 1651 | gimple_omp_for_set_index (for_stmt, 0, initvar); |
| 1652 | gimple_omp_for_set_initial (for_stmt, 0, cvar_init); |
| 1653 | gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt)); |
| 1654 | gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt)); |
| 1655 | gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type, |
| 1656 | cvar_base, |
| 1657 | build_int_cst (type, 1))); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1658 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1659 | gsi = gsi_last_bb (for_bb); |
| 1660 | gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1661 | SSA_NAME_DEF_STMT (initvar) = for_stmt; |
| 1662 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1663 | /* Emit GIMPLE_OMP_CONTINUE. */ |
| 1664 | gsi = gsi_last_bb (loop->latch); |
| 1665 | stmt = gimple_build_omp_continue (cvar_next, cvar); |
| 1666 | gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); |
| 1667 | SSA_NAME_DEF_STMT (cvar_next) = stmt; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1668 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1669 | /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */ |
| 1670 | gsi = gsi_last_bb (ex_bb); |
| 1671 | gsi_insert_after (&gsi, gimple_build_omp_return (true), GSI_NEW_STMT); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1672 | |
| 1673 | return paral_bb; |
| 1674 | } |
| 1675 | |
| 1676 | /* Generates code to execute the iterations of LOOP in N_THREADS threads in |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1677 | parallel. NITER describes number of iterations of LOOP. |
Ralf Wildenhues | fa10bee | 2008-06-06 05:42:00 +0000 | [diff] [blame] | 1678 | REDUCTION_LIST describes the reductions existent in the LOOP. */ |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1679 | |
| 1680 | static void |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1681 | gen_parallel_loop (struct loop *loop, htab_t reduction_list, |
| 1682 | unsigned n_threads, struct tree_niter_desc *niter) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1683 | { |
| 1684 | struct loop *nloop; |
Jerry DeLisle | 9326236 | 2008-01-16 04:04:37 +0000 | [diff] [blame] | 1685 | loop_iterator li; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1686 | tree many_iterations_cond, type, nit; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1687 | tree arg_struct, new_arg_struct; |
| 1688 | gimple_seq stmts; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1689 | basic_block parallel_head; |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 1690 | edge entry, exit; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1691 | struct clsn_data clsn_data; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1692 | unsigned prob; |
| 1693 | |
| 1694 | /* From |
| 1695 | |
| 1696 | --------------------------------------------------------------------- |
| 1697 | loop |
| 1698 | { |
| 1699 | IV = phi (INIT, IV + STEP) |
| 1700 | BODY1; |
| 1701 | if (COND) |
| 1702 | break; |
| 1703 | BODY2; |
| 1704 | } |
| 1705 | --------------------------------------------------------------------- |
| 1706 | |
| 1707 | with # of iterations NITER (possibly with MAY_BE_ZERO assumption), |
| 1708 | we generate the following code: |
| 1709 | |
| 1710 | --------------------------------------------------------------------- |
| 1711 | |
| 1712 | if (MAY_BE_ZERO |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1713 | || NITER < MIN_PER_THREAD * N_THREADS) |
| 1714 | goto original; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1715 | |
| 1716 | BODY1; |
| 1717 | store all local loop-invariant variables used in body of the loop to DATA. |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1718 | GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1719 | load the variables from DATA. |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1720 | GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static)) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1721 | BODY2; |
| 1722 | BODY1; |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1723 | GIMPLE_OMP_CONTINUE; |
| 1724 | GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR |
| 1725 | GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1726 | goto end; |
| 1727 | |
| 1728 | original: |
| 1729 | loop |
| 1730 | { |
| 1731 | IV = phi (INIT, IV + STEP) |
| 1732 | BODY1; |
| 1733 | if (COND) |
| 1734 | break; |
| 1735 | BODY2; |
| 1736 | } |
| 1737 | |
| 1738 | end: |
| 1739 | |
| 1740 | */ |
| 1741 | |
| 1742 | /* Create two versions of the loop -- in the old one, we know that the |
| 1743 | number of iterations is large enough, and we will transform it into the |
| 1744 | loop that will be split to loop_fn, the new one will be used for the |
| 1745 | remaining iterations. */ |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1746 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1747 | type = TREE_TYPE (niter->niter); |
| 1748 | nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true, |
| 1749 | NULL_TREE); |
| 1750 | if (stmts) |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1751 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1752 | |
| 1753 | many_iterations_cond = |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1754 | fold_build2 (GE_EXPR, boolean_type_node, |
| 1755 | nit, build_int_cst (type, MIN_PER_THREAD * n_threads)); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1756 | many_iterations_cond |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1757 | = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, |
| 1758 | invert_truthvalue (unshare_expr (niter->may_be_zero)), |
| 1759 | many_iterations_cond); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1760 | many_iterations_cond |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1761 | = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1762 | if (stmts) |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1763 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1764 | if (!is_gimple_condexpr (many_iterations_cond)) |
| 1765 | { |
| 1766 | many_iterations_cond |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1767 | = force_gimple_operand (many_iterations_cond, &stmts, |
| 1768 | true, NULL_TREE); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1769 | if (stmts) |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1770 | gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1771 | } |
| 1772 | |
| 1773 | initialize_original_copy_tables (); |
| 1774 | |
| 1775 | /* We assume that the loop usually iterates a lot. */ |
| 1776 | prob = 4 * REG_BR_PROB_BASE / 5; |
| 1777 | nloop = loop_version (loop, many_iterations_cond, NULL, |
| 1778 | prob, prob, REG_BR_PROB_BASE - prob, true); |
| 1779 | update_ssa (TODO_update_ssa); |
| 1780 | free_original_copy_tables (); |
| 1781 | |
| 1782 | /* Base all the induction variables in LOOP on a single control one. */ |
Sebastian Pop | 7d4fba4 | 2009-03-03 03:47:22 +0000 | [diff] [blame] | 1783 | canonicalize_loop_ivs (loop, reduction_list, &nit); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1784 | |
| 1785 | /* Ensure that the exit condition is the first statement in the loop. */ |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1786 | transform_to_exit_first_loop (loop, reduction_list, nit); |
| 1787 | |
Ralf Wildenhues | fa10bee | 2008-06-06 05:42:00 +0000 | [diff] [blame] | 1788 | /* Generate initializations for reductions. */ |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1789 | if (htab_elements (reduction_list) > 0) |
| 1790 | htab_traverse (reduction_list, initialize_reductions, loop); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1791 | |
| 1792 | /* Eliminate the references to local variables from the loop. */ |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 1793 | gcc_assert (single_exit (loop)); |
| 1794 | entry = loop_preheader_edge (loop); |
| 1795 | exit = single_dom_exit (loop); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1796 | |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 1797 | eliminate_local_variables (entry, exit); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1798 | /* In the old loop, move all variables non-local to the loop to a structure |
| 1799 | and back, and create separate decls for the variables used in loop. */ |
Antoniu Pop | 9f9f72a | 2008-04-24 16:23:51 +0100 | [diff] [blame] | 1800 | separate_decls_in_region (entry, exit, reduction_list, &arg_struct, |
| 1801 | &new_arg_struct, &clsn_data); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1802 | |
| 1803 | /* Create the parallel constructs. */ |
| 1804 | parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct, |
| 1805 | new_arg_struct, n_threads); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1806 | if (htab_elements (reduction_list) > 0) |
| 1807 | create_call_for_reduction (loop, reduction_list, &clsn_data); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1808 | |
| 1809 | scev_reset (); |
| 1810 | |
| 1811 | /* Cancel the loop (it is simpler to do it here rather than to teach the |
| 1812 | expander to do it). */ |
| 1813 | cancel_loop_tree (loop); |
| 1814 | |
Sebastian Pop | 92a6bdb | 2008-01-16 02:46:46 +0000 | [diff] [blame] | 1815 | /* Free loop bound estimations that could contain references to |
| 1816 | removed statements. */ |
| 1817 | FOR_EACH_LOOP (li, loop, 0) |
| 1818 | free_numbers_of_iterations_estimates_loop (loop); |
| 1819 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1820 | /* Expand the parallel constructs. We do it directly here instead of running |
| 1821 | a separate expand_omp pass, since it is more efficient, and less likely to |
| 1822 | cause troubles with further analyses not being able to deal with the |
| 1823 | OMP trees. */ |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1824 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1825 | omp_expand_local (parallel_head); |
| 1826 | } |
| 1827 | |
Sebastian Pop | 9857228 | 2008-05-20 19:17:12 +0000 | [diff] [blame] | 1828 | /* Returns true when LOOP contains vector phi nodes. */ |
| 1829 | |
| 1830 | static bool |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1831 | loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED) |
Sebastian Pop | 9857228 | 2008-05-20 19:17:12 +0000 | [diff] [blame] | 1832 | { |
| 1833 | unsigned i; |
| 1834 | basic_block *bbs = get_loop_body_in_dom_order (loop); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1835 | gimple_stmt_iterator gsi; |
Sebastian Pop | 9857228 | 2008-05-20 19:17:12 +0000 | [diff] [blame] | 1836 | bool res = true; |
Sebastian Pop | 9857228 | 2008-05-20 19:17:12 +0000 | [diff] [blame] | 1837 | |
| 1838 | for (i = 0; i < loop->num_nodes; i++) |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1839 | for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi)) |
| 1840 | if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE) |
Sebastian Pop | 9857228 | 2008-05-20 19:17:12 +0000 | [diff] [blame] | 1841 | goto end; |
| 1842 | |
| 1843 | res = false; |
| 1844 | end: |
| 1845 | free (bbs); |
| 1846 | return res; |
| 1847 | } |
| 1848 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1849 | /* Detect parallel loops and generate parallel code using libgomp |
| 1850 | primitives. Returns true if some loop was parallelized, false |
| 1851 | otherwise. */ |
| 1852 | |
| 1853 | bool |
| 1854 | parallelize_loops (void) |
| 1855 | { |
| 1856 | unsigned n_threads = flag_tree_parallelize_loops; |
| 1857 | bool changed = false; |
| 1858 | struct loop *loop; |
| 1859 | struct tree_niter_desc niter_desc; |
| 1860 | loop_iterator li; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1861 | htab_t reduction_list; |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1862 | |
| 1863 | /* Do not parallelize loops in the functions created by parallelization. */ |
| 1864 | if (parallelized_function_p (cfun->decl)) |
| 1865 | return false; |
| 1866 | |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1867 | reduction_list = htab_create (10, reduction_info_hash, |
| 1868 | reduction_info_eq, free); |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1869 | init_stmt_vec_info_vec (); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1870 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1871 | FOR_EACH_LOOP (li, loop, 0) |
| 1872 | { |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1873 | htab_empty (reduction_list); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1874 | if (/* Do not bother with loops in cold areas. */ |
Jan Hubicka | efd8f75 | 2008-08-29 12:35:57 +0200 | [diff] [blame] | 1875 | optimize_loop_nest_for_size_p (loop) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1876 | /* Or loops that roll too little. */ |
| 1877 | || expected_loop_iterations (loop) <= n_threads |
| 1878 | /* And of course, the loop must be parallelizable. */ |
| 1879 | || !can_duplicate_loop_p (loop) |
Sebastian Pop | 1d4af1e | 2008-01-16 02:44:04 +0000 | [diff] [blame] | 1880 | || loop_has_blocks_with_irreducible_flag (loop) |
Sebastian Pop | 9857228 | 2008-05-20 19:17:12 +0000 | [diff] [blame] | 1881 | /* FIXME: the check for vector phi nodes could be removed. */ |
| 1882 | || loop_has_vector_phi_nodes (loop) |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1883 | || !loop_parallel_p (loop, reduction_list, &niter_desc)) |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1884 | continue; |
| 1885 | |
| 1886 | changed = true; |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1887 | gen_parallel_loop (loop, reduction_list, n_threads, &niter_desc); |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1888 | verify_flow_info (); |
| 1889 | verify_dominators (CDI_DOMINATORS); |
| 1890 | verify_loop_structure (); |
| 1891 | verify_loop_closed_ssa (); |
| 1892 | } |
| 1893 | |
Richard Biener | 726a989 | 2008-07-28 14:33:56 +0000 | [diff] [blame] | 1894 | free_stmt_vec_info_vec (); |
Razya Ladelsky | a509ebb | 2007-10-29 11:05:04 +0000 | [diff] [blame] | 1895 | htab_delete (reduction_list); |
Richard Guenther | 6b8ed14 | 2009-05-25 13:35:10 +0000 | [diff] [blame] | 1896 | |
| 1897 | /* Parallelization will cause new function calls to be inserted through |
| 1898 | which local variables will escape. Reset the points-to solutions |
| 1899 | for ESCAPED and CALLUSED. */ |
| 1900 | if (changed) |
| 1901 | { |
| 1902 | pt_solution_reset (&cfun->gimple_df->escaped); |
| 1903 | pt_solution_reset (&cfun->gimple_df->callused); |
| 1904 | } |
| 1905 | |
Zdenek Dvorak | 5f40b3c | 2007-09-15 23:53:45 +0200 | [diff] [blame] | 1906 | return changed; |
| 1907 | } |
| 1908 | |
| 1909 | #include "gt-tree-parloops.h" |