blob: deff2d5e08b1e578f07e1b148d2139c5ff616605 [file] [log] [blame]
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001/* Loop autoparallelization.
Jakub Jelinek99dee822021-01-04 10:26:59 +01002 Copyright (C) 2006-2021 Free Software Foundation, Inc.
Razya Ladelsky70837b72012-05-21 07:39:38 +00003 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02005
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
Nick Clifton6da7fc82009-02-10 17:59:08 +000010Software Foundation; either version 3, or (at your option) any later
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +020011version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
Nick Clifton6da7fc82009-02-10 17:59:08 +000019along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +020021
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
Andrew MacLeodc7131fb2015-07-08 00:53:03 +000025#include "backend.h"
Michael Collison40e23962015-01-09 20:18:42 +000026#include "tree.h"
Andrew MacLeod18f429e2013-11-14 19:39:38 +000027#include "gimple.h"
Andrew MacLeod957060b2015-10-29 13:57:32 +000028#include "cfghooks.h"
29#include "tree-pass.h"
Andrew MacLeodc7131fb2015-07-08 00:53:03 +000030#include "ssa.h"
Andrew MacLeod957060b2015-10-29 13:57:32 +000031#include "cgraph.h"
32#include "gimple-pretty-print.h"
Andrew MacLeodc7131fb2015-07-08 00:53:03 +000033#include "fold-const.h"
Andrew MacLeod45b0be92013-11-12 20:26:43 +000034#include "gimplify.h"
Andrew Macleod5be5c232013-11-13 23:54:17 +000035#include "gimple-iterator.h"
Andrew MacLeod18f429e2013-11-14 19:39:38 +000036#include "gimplify-me.h"
Andrew Macleod5be5c232013-11-13 23:54:17 +000037#include "gimple-walk.h"
Diego Novillod8a2d372013-11-19 07:31:09 -050038#include "stor-layout.h"
39#include "tree-nested.h"
Andrew MacLeod442b4902013-10-23 12:16:58 +000040#include "tree-cfg.h"
Andrew MacLeode28030c2013-10-23 18:55:46 +000041#include "tree-ssa-loop-ivopts.h"
42#include "tree-ssa-loop-manip.h"
43#include "tree-ssa-loop-niter.h"
Andrew MacLeod442b4902013-10-23 12:16:58 +000044#include "tree-ssa-loop.h"
45#include "tree-into-ssa.h"
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +020046#include "cfgloop.h"
Sebastian Pop1bd64972010-12-28 17:09:16 +000047#include "tree-scalar-evolution.h"
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +020048#include "langhooks.h"
Razya Ladelskya509ebb2007-10-29 11:05:04 +000049#include "tree-vectorizer.h"
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +000050#include "tree-hasher.h"
Andrew MacLeodc1bf2a32013-10-09 14:11:30 +000051#include "tree-parloops.h"
Martin Jambor629b3d72016-12-14 23:30:41 +010052#include "omp-general.h"
Andrew MacLeod0645c1a2013-10-17 17:41:07 +000053#include "omp-low.h"
Tom de Vries7c82d822015-06-05 15:57:34 +000054#include "tree-ssa.h"
Tom de Vries61d9c522016-01-18 12:52:32 +000055#include "tree-ssa-alias.h"
56#include "tree-eh.h"
57#include "gomp-constants.h"
58#include "tree-dfa.h"
Martin Liska314e6352017-08-08 06:46:51 +020059#include "stringpool.h"
60#include "attribs.h"
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +020061
62/* This pass tries to distribute iterations of loops into several threads.
63 The implementation is straightforward -- for each loop we test whether its
64 iterations are independent, and if it is the case (and some additional
65 conditions regarding profitability and correctness are satisfied), we
Richard Biener726a9892008-07-28 14:33:56 +000066 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
67 machinery do its job.
H.J. Lub8698a02009-11-25 10:55:54 +000068
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +020069 The most of the complexity is in bringing the code into shape expected
70 by the omp expanders:
Richard Biener726a9892008-07-28 14:33:56 +000071 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
72 variable and that the exit test is at the start of the loop body
73 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +020074 variables by accesses through pointers, and breaking up ssa chains
75 by storing the values incoming to the parallelized loop to a structure
76 passed to the new function as an argument (something similar is done
77 in omp gimplification, unfortunately only a small part of the code
78 can be shared).
79
80 TODO:
81 -- if there are several parallelizable loops in a function, it may be
82 possible to generate the threads just once (using synchronization to
83 ensure that cross-loop dependences are obeyed).
Razya Ladelsky70837b72012-05-21 07:39:38 +000084 -- handling of common reduction patterns for outer loops.
85
86 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
H.J. Lub8698a02009-11-25 10:55:54 +000087/*
Razya Ladelskya509ebb2007-10-29 11:05:04 +000088 Reduction handling:
Richard Biener31de92e2019-09-18 18:07:06 +000089 currently we use code inspired by vect_force_simple_reduction to detect
90 reduction patterns.
Razya Ladelskya509ebb2007-10-29 11:05:04 +000091 The code transformation will be introduced by an example.
H.J. Lub8698a02009-11-25 10:55:54 +000092
93
Razya Ladelskya509ebb2007-10-29 11:05:04 +000094parloop
95{
96 int sum=1;
97
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +000098 for (i = 0; i < N; i++)
Razya Ladelskya509ebb2007-10-29 11:05:04 +000099 {
100 x[i] = i + 3;
101 sum+=x[i];
102 }
103}
104
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +0000105gimple-like code:
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000106header_bb:
107
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +0000108 # sum_29 = PHI <sum_11(5), 1(3)>
109 # i_28 = PHI <i_12(5), 0(3)>
110 D.1795_8 = i_28 + 3;
111 x[i_28] = D.1795_8;
112 sum_11 = D.1795_8 + sum_29;
113 i_12 = i_28 + 1;
114 if (N_6(D) > i_12)
115 goto header_bb;
116
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000117
118exit_bb:
119
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +0000120 # sum_21 = PHI <sum_11(4)>
121 printf (&"%d"[0], sum_21);
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000122
123
124after reduction transformation (only relevant parts):
125
126parloop
127{
128
129....
130
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000131
Ralf Wildenhuesfa10bee2008-06-06 05:42:00 +0000132 # Storing the initial value given by the user. #
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000133
Razya Ladelskyae0bce62007-12-18 11:21:48 +0000134 .paral_data_store.32.sum.27 = 1;
H.J. Lub8698a02009-11-25 10:55:54 +0000135
136 #pragma omp parallel num_threads(4)
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000137
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +0000138 #pragma omp for schedule(static)
Razya Ladelskyae0bce62007-12-18 11:21:48 +0000139
140 # The neutral element corresponding to the particular
141 reduction's operation, e.g. 0 for PLUS_EXPR,
142 1 for MULT_EXPR, etc. replaces the user's initial value. #
143
144 # sum.27_29 = PHI <sum.27_11, 0>
145
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +0000146 sum.27_11 = D.1827_8 + sum.27_29;
Razya Ladelskyae0bce62007-12-18 11:21:48 +0000147
Richard Biener726a9892008-07-28 14:33:56 +0000148 GIMPLE_OMP_CONTINUE
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +0000149
150 # Adding this reduction phi is done at create_phi_for_local_result() #
151 # sum.27_56 = PHI <sum.27_11, 0>
Richard Biener726a9892008-07-28 14:33:56 +0000152 GIMPLE_OMP_RETURN
H.J. Lub8698a02009-11-25 10:55:54 +0000153
154 # Creating the atomic operation is done at
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +0000155 create_call_for_reduction_1() #
156
157 #pragma omp atomic_load
158 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
159 D.1840_60 = sum.27_56 + D.1839_59;
160 #pragma omp atomic_store (D.1840_60);
H.J. Lub8698a02009-11-25 10:55:54 +0000161
Richard Biener726a9892008-07-28 14:33:56 +0000162 GIMPLE_OMP_RETURN
H.J. Lub8698a02009-11-25 10:55:54 +0000163
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +0000164 # collecting the result after the join of the threads is done at
165 create_loads_for_reductions().
Razya Ladelskyae0bce62007-12-18 11:21:48 +0000166 The value computed by the threads is loaded from the
167 shared struct. #
168
H.J. Lub8698a02009-11-25 10:55:54 +0000169
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +0000170 .paral_data_load.33_52 = &.paral_data_store.32;
Razya Ladelskyae0bce62007-12-18 11:21:48 +0000171 sum_37 = .paral_data_load.33_52->sum.27;
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +0000172 sum_43 = D.1795_41 + sum_37;
173
174 exit bb:
175 # sum_21 = PHI <sum_43, sum_26>
176 printf (&"%d"[0], sum_21);
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000177
178...
179
180}
181
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000182*/
183
Richard Biener31de92e2019-09-18 18:07:06 +0000184/* Error reporting helper for parloops_is_simple_reduction below. GIMPLE
185 statement STMT is printed with a message MSG. */
186
187static void
188report_ploop_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
189{
190 dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
191}
192
193/* DEF_STMT_INFO occurs in a loop that contains a potential reduction
194 operation. Return true if the results of DEF_STMT_INFO are something
195 that can be accumulated by such a reduction. */
196
197static bool
198parloops_valid_reduction_input_p (stmt_vec_info def_stmt_info)
199{
200 return (is_gimple_assign (def_stmt_info->stmt)
201 || is_gimple_call (def_stmt_info->stmt)
202 || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def
203 || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
204 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def
205 && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt))));
206}
207
208/* Detect SLP reduction of the form:
209
210 #a1 = phi <a5, a0>
211 a2 = operation (a1)
212 a3 = operation (a2)
213 a4 = operation (a3)
214 a5 = operation (a4)
215
216 #a = phi <a5>
217
218 PHI is the reduction phi node (#a1 = phi <a5, a0> above)
219 FIRST_STMT is the first reduction stmt in the chain
220 (a2 = operation (a1)).
221
222 Return TRUE if a reduction chain was detected. */
223
224static bool
225parloops_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
226 gimple *first_stmt)
227{
228 class loop *loop = (gimple_bb (phi))->loop_father;
229 class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
230 enum tree_code code;
231 gimple *loop_use_stmt = NULL;
232 stmt_vec_info use_stmt_info;
233 tree lhs;
234 imm_use_iterator imm_iter;
235 use_operand_p use_p;
236 int nloop_uses, size = 0, n_out_of_loop_uses;
237 bool found = false;
238
239 if (loop != vect_loop)
240 return false;
241
242 auto_vec<stmt_vec_info, 8> reduc_chain;
243 lhs = PHI_RESULT (phi);
244 code = gimple_assign_rhs_code (first_stmt);
245 while (1)
246 {
247 nloop_uses = 0;
248 n_out_of_loop_uses = 0;
249 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
250 {
251 gimple *use_stmt = USE_STMT (use_p);
252 if (is_gimple_debug (use_stmt))
253 continue;
254
255 /* Check if we got back to the reduction phi. */
256 if (use_stmt == phi)
257 {
258 loop_use_stmt = use_stmt;
259 found = true;
260 break;
261 }
262
263 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
264 {
265 loop_use_stmt = use_stmt;
266 nloop_uses++;
267 }
268 else
269 n_out_of_loop_uses++;
270
271 /* There are can be either a single use in the loop or two uses in
272 phi nodes. */
273 if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
274 return false;
275 }
276
277 if (found)
278 break;
279
280 /* We reached a statement with no loop uses. */
281 if (nloop_uses == 0)
282 return false;
283
284 /* This is a loop exit phi, and we haven't reached the reduction phi. */
285 if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
286 return false;
287
288 if (!is_gimple_assign (loop_use_stmt)
289 || code != gimple_assign_rhs_code (loop_use_stmt)
290 || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
291 return false;
292
293 /* Insert USE_STMT into reduction chain. */
294 use_stmt_info = loop_info->lookup_stmt (loop_use_stmt);
295 reduc_chain.safe_push (use_stmt_info);
296
297 lhs = gimple_assign_lhs (loop_use_stmt);
298 size++;
299 }
300
301 if (!found || loop_use_stmt != phi || size < 2)
302 return false;
303
304 /* Swap the operands, if needed, to make the reduction operand be the second
305 operand. */
306 lhs = PHI_RESULT (phi);
307 for (unsigned i = 0; i < reduc_chain.length (); ++i)
308 {
309 gassign *next_stmt = as_a <gassign *> (reduc_chain[i]->stmt);
310 if (gimple_assign_rhs2 (next_stmt) == lhs)
311 {
312 tree op = gimple_assign_rhs1 (next_stmt);
313 stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
314
315 /* Check that the other def is either defined in the loop
316 ("vect_internal_def"), or it's an induction (defined by a
317 loop-header phi-node). */
318 if (def_stmt_info
319 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
320 && parloops_valid_reduction_input_p (def_stmt_info))
321 {
322 lhs = gimple_assign_lhs (next_stmt);
323 continue;
324 }
325
326 return false;
327 }
328 else
329 {
330 tree op = gimple_assign_rhs2 (next_stmt);
331 stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
332
333 /* Check that the other def is either defined in the loop
334 ("vect_internal_def"), or it's an induction (defined by a
335 loop-header phi-node). */
336 if (def_stmt_info
337 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
338 && parloops_valid_reduction_input_p (def_stmt_info))
339 {
340 if (dump_enabled_p ())
341 dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: %G",
342 next_stmt);
343
344 swap_ssa_operands (next_stmt,
345 gimple_assign_rhs1_ptr (next_stmt),
346 gimple_assign_rhs2_ptr (next_stmt));
347 update_stmt (next_stmt);
Richard Biener31de92e2019-09-18 18:07:06 +0000348 }
349 else
350 return false;
351 }
352
353 lhs = gimple_assign_lhs (next_stmt);
354 }
355
356 /* Build up the actual chain. */
357 for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
358 {
359 REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
360 REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
361 }
362 REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
363 REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
364
365 /* Save the chain for further analysis in SLP detection. */
366 LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
367 REDUC_GROUP_SIZE (reduc_chain[0]) = size;
368
369 return true;
370}
371
372/* Return true if we need an in-order reduction for operation CODE
373 on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
374 overflow must wrap. */
375
376static bool
377parloops_needs_fold_left_reduction_p (tree type, tree_code code,
378 bool need_wrapping_integral_overflow)
379{
380 /* CHECKME: check for !flag_finite_math_only too? */
381 if (SCALAR_FLOAT_TYPE_P (type))
382 switch (code)
383 {
384 case MIN_EXPR:
385 case MAX_EXPR:
386 return false;
387
388 default:
389 return !flag_associative_math;
390 }
391
392 if (INTEGRAL_TYPE_P (type))
393 {
394 if (!operation_no_trapping_overflow (type, code))
395 return true;
396 if (need_wrapping_integral_overflow
397 && !TYPE_OVERFLOW_WRAPS (type)
398 && operation_can_overflow (code))
399 return true;
400 return false;
401 }
402
403 if (SAT_FIXED_POINT_TYPE_P (type))
404 return true;
405
406 return false;
407}
408
409
410/* Function parloops_is_simple_reduction
411
412 (1) Detect a cross-iteration def-use cycle that represents a simple
413 reduction computation. We look for the following pattern:
414
415 loop_header:
416 a1 = phi < a0, a2 >
417 a3 = ...
418 a2 = operation (a3, a1)
419
420 or
421
422 a3 = ...
423 loop_header:
424 a1 = phi < a0, a2 >
425 a2 = operation (a3, a1)
426
427 such that:
428 1. operation is commutative and associative and it is safe to
429 change the order of the computation
430 2. no uses for a2 in the loop (a2 is used out of the loop)
431 3. no uses of a1 in the loop besides the reduction operation
432 4. no uses of a1 outside the loop.
433
434 Conditions 1,4 are tested here.
435 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
436
437 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
438 nested cycles.
439
440 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
441 reductions:
442
443 a1 = phi < a0, a2 >
444 inner loop (def of a3)
445 a2 = phi < a3 >
446
447 (4) Detect condition expressions, ie:
448 for (int i = 0; i < N; i++)
449 if (a[i] < val)
450 ret_val = a[i];
451
452*/
453
454static stmt_vec_info
455parloops_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
456 bool *double_reduc,
457 bool need_wrapping_integral_overflow,
458 enum vect_reduction_type *v_reduc_type)
459{
460 gphi *phi = as_a <gphi *> (phi_info->stmt);
461 class loop *loop = (gimple_bb (phi))->loop_father;
462 class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
463 bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
464 gimple *phi_use_stmt = NULL;
465 enum tree_code orig_code, code;
466 tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
467 tree type;
468 tree name;
469 imm_use_iterator imm_iter;
470 use_operand_p use_p;
471 bool phi_def;
472
473 *double_reduc = false;
474 *v_reduc_type = TREE_CODE_REDUCTION;
475
476 tree phi_name = PHI_RESULT (phi);
477 /* ??? If there are no uses of the PHI result the inner loop reduction
478 won't be detected as possibly double-reduction by vectorizable_reduction
479 because that tries to walk the PHI arg from the preheader edge which
480 can be constant. See PR60382. */
481 if (has_zero_uses (phi_name))
482 return NULL;
483 unsigned nphi_def_loop_uses = 0;
484 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
485 {
486 gimple *use_stmt = USE_STMT (use_p);
487 if (is_gimple_debug (use_stmt))
488 continue;
489
490 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
491 {
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
494 "intermediate value used outside loop.\n");
495
496 return NULL;
497 }
498
499 nphi_def_loop_uses++;
500 phi_use_stmt = use_stmt;
501 }
502
503 edge latch_e = loop_latch_edge (loop);
504 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
505 if (TREE_CODE (loop_arg) != SSA_NAME)
506 {
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
509 "reduction: not ssa_name: %T\n", loop_arg);
510 return NULL;
511 }
512
513 stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg);
514 if (!def_stmt_info
515 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
516 return NULL;
517
518 if (gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt))
519 {
520 name = gimple_assign_lhs (def_stmt);
521 phi_def = false;
522 }
523 else if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
524 {
525 name = PHI_RESULT (def_stmt);
526 phi_def = true;
527 }
528 else
529 {
530 if (dump_enabled_p ())
531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
532 "reduction: unhandled reduction operation: %G",
533 def_stmt_info->stmt);
534 return NULL;
535 }
536
537 unsigned nlatch_def_loop_uses = 0;
538 auto_vec<gphi *, 3> lcphis;
539 bool inner_loop_of_double_reduc = false;
540 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
541 {
542 gimple *use_stmt = USE_STMT (use_p);
543 if (is_gimple_debug (use_stmt))
544 continue;
545 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
546 nlatch_def_loop_uses++;
547 else
548 {
549 /* We can have more than one loop-closed PHI. */
550 lcphis.safe_push (as_a <gphi *> (use_stmt));
551 if (nested_in_vect_loop
552 && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))
553 == vect_double_reduction_def))
554 inner_loop_of_double_reduc = true;
555 }
556 }
557
558 /* If this isn't a nested cycle or if the nested cycle reduction value
559 is used ouside of the inner loop we cannot handle uses of the reduction
560 value. */
561 if ((!nested_in_vect_loop || inner_loop_of_double_reduc)
562 && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
563 {
564 if (dump_enabled_p ())
565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
566 "reduction used in loop.\n");
567 return NULL;
568 }
569
570 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
571 defined in the inner loop. */
572 if (phi_def)
573 {
574 gphi *def_stmt = as_a <gphi *> (def_stmt_info->stmt);
575 op1 = PHI_ARG_DEF (def_stmt, 0);
576
577 if (gimple_phi_num_args (def_stmt) != 1
578 || TREE_CODE (op1) != SSA_NAME)
579 {
580 if (dump_enabled_p ())
581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
582 "unsupported phi node definition.\n");
583
584 return NULL;
585 }
586
587 gimple *def1 = SSA_NAME_DEF_STMT (op1);
588 if (gimple_bb (def1)
589 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
590 && loop->inner
591 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
592 && is_gimple_assign (def1)
593 && is_a <gphi *> (phi_use_stmt)
594 && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
595 {
596 if (dump_enabled_p ())
597 report_ploop_op (MSG_NOTE, def_stmt,
598 "detected double reduction: ");
599
600 *double_reduc = true;
601 return def_stmt_info;
602 }
603
604 return NULL;
605 }
606
607 /* If we are vectorizing an inner reduction we are executing that
608 in the original order only in case we are not dealing with a
609 double reduction. */
610 bool check_reduction = true;
611 if (flow_loop_nested_p (vect_loop, loop))
612 {
613 gphi *lcphi;
614 unsigned i;
615 check_reduction = false;
616 FOR_EACH_VEC_ELT (lcphis, i, lcphi)
617 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
618 {
619 gimple *use_stmt = USE_STMT (use_p);
620 if (is_gimple_debug (use_stmt))
621 continue;
622 if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
623 check_reduction = true;
624 }
625 }
626
627 gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt);
628 code = orig_code = gimple_assign_rhs_code (def_stmt);
629
630 if (nested_in_vect_loop && !check_reduction)
631 {
632 /* FIXME: Even for non-reductions code generation is funneled
633 through vectorizable_reduction for the stmt defining the
634 PHI latch value. So we have to artificially restrict ourselves
635 for the supported operations. */
636 switch (get_gimple_rhs_class (code))
637 {
638 case GIMPLE_BINARY_RHS:
639 case GIMPLE_TERNARY_RHS:
640 break;
641 default:
642 /* Not supported by vectorizable_reduction. */
643 if (dump_enabled_p ())
644 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
645 "nested cycle: not handled operation: ");
646 return NULL;
647 }
648 if (dump_enabled_p ())
649 report_ploop_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
650 return def_stmt_info;
651 }
652
653 /* We can handle "res -= x[i]", which is non-associative by
654 simply rewriting this into "res += -x[i]". Avoid changing
655 gimple instruction for the first simple tests and only do this
656 if we're allowed to change code at all. */
657 if (code == MINUS_EXPR && gimple_assign_rhs2 (def_stmt) != phi_name)
658 code = PLUS_EXPR;
659
660 if (code == COND_EXPR)
661 {
662 if (! nested_in_vect_loop)
663 *v_reduc_type = COND_REDUCTION;
664
665 op3 = gimple_assign_rhs1 (def_stmt);
666 if (COMPARISON_CLASS_P (op3))
667 {
668 op4 = TREE_OPERAND (op3, 1);
669 op3 = TREE_OPERAND (op3, 0);
670 }
671 if (op3 == phi_name || op4 == phi_name)
672 {
673 if (dump_enabled_p ())
674 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
675 "reduction: condition depends on previous"
676 " iteration: ");
677 return NULL;
678 }
679
680 op1 = gimple_assign_rhs2 (def_stmt);
681 op2 = gimple_assign_rhs3 (def_stmt);
682 }
683 else if (!commutative_tree_code (code) || !associative_tree_code (code))
684 {
685 if (dump_enabled_p ())
686 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
687 "reduction: not commutative/associative: ");
688 return NULL;
689 }
690 else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
691 {
692 op1 = gimple_assign_rhs1 (def_stmt);
693 op2 = gimple_assign_rhs2 (def_stmt);
694 }
695 else
696 {
697 if (dump_enabled_p ())
698 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
699 "reduction: not handled operation: ");
700 return NULL;
701 }
702
703 if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
704 {
705 if (dump_enabled_p ())
706 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
707 "reduction: both uses not ssa_names: ");
708
709 return NULL;
710 }
711
712 type = TREE_TYPE (gimple_assign_lhs (def_stmt));
713 if ((TREE_CODE (op1) == SSA_NAME
714 && !types_compatible_p (type,TREE_TYPE (op1)))
715 || (TREE_CODE (op2) == SSA_NAME
716 && !types_compatible_p (type, TREE_TYPE (op2)))
717 || (op3 && TREE_CODE (op3) == SSA_NAME
718 && !types_compatible_p (type, TREE_TYPE (op3)))
719 || (op4 && TREE_CODE (op4) == SSA_NAME
720 && !types_compatible_p (type, TREE_TYPE (op4))))
721 {
722 if (dump_enabled_p ())
723 {
724 dump_printf_loc (MSG_NOTE, vect_location,
725 "reduction: multiple types: operation type: "
726 "%T, operands types: %T,%T",
727 type, TREE_TYPE (op1), TREE_TYPE (op2));
728 if (op3)
729 dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3));
730
731 if (op4)
732 dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4));
733 dump_printf (MSG_NOTE, "\n");
734 }
735
736 return NULL;
737 }
738
739 /* Check whether it's ok to change the order of the computation.
740 Generally, when vectorizing a reduction we change the order of the
741 computation. This may change the behavior of the program in some
742 cases, so we need to check that this is ok. One exception is when
743 vectorizing an outer-loop: the inner-loop is executed sequentially,
744 and therefore vectorizing reductions in the inner-loop during
745 outer-loop vectorization is safe. */
746 if (check_reduction
747 && *v_reduc_type == TREE_CODE_REDUCTION
748 && parloops_needs_fold_left_reduction_p (type, code,
749 need_wrapping_integral_overflow))
750 *v_reduc_type = FOLD_LEFT_REDUCTION;
751
752 /* Reduction is safe. We're dealing with one of the following:
753 1) integer arithmetic and no trapv
754 2) floating point arithmetic, and special flags permit this optimization
755 3) nested cycle (i.e., outer loop vectorization). */
756 stmt_vec_info def1_info = loop_info->lookup_def (op1);
757 stmt_vec_info def2_info = loop_info->lookup_def (op2);
758 if (code != COND_EXPR && !def1_info && !def2_info)
759 {
760 if (dump_enabled_p ())
761 report_ploop_op (MSG_NOTE, def_stmt,
762 "reduction: no defs for operands: ");
763 return NULL;
764 }
765
766 /* Check that one def is the reduction def, defined by PHI,
767 the other def is either defined in the loop ("vect_internal_def"),
768 or it's an induction (defined by a loop-header phi-node). */
769
770 if (def2_info
771 && def2_info->stmt == phi
772 && (code == COND_EXPR
773 || !def1_info
774 || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt))
775 || parloops_valid_reduction_input_p (def1_info)))
776 {
777 if (dump_enabled_p ())
778 report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
779 return def_stmt_info;
780 }
781
782 if (def1_info
783 && def1_info->stmt == phi
784 && (code == COND_EXPR
785 || !def2_info
786 || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
787 || parloops_valid_reduction_input_p (def2_info)))
788 {
789 if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
790 {
791 /* Check if we can swap operands (just for simplicity - so that
792 the rest of the code can assume that the reduction variable
793 is always the last (second) argument). */
794 if (code == COND_EXPR)
795 {
796 /* Swap cond_expr by inverting the condition. */
797 tree cond_expr = gimple_assign_rhs1 (def_stmt);
798 enum tree_code invert_code = ERROR_MARK;
799 enum tree_code cond_code = TREE_CODE (cond_expr);
800
801 if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
802 {
803 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
804 invert_code = invert_tree_comparison (cond_code, honor_nans);
805 }
806 if (invert_code != ERROR_MARK)
807 {
808 TREE_SET_CODE (cond_expr, invert_code);
809 swap_ssa_operands (def_stmt,
810 gimple_assign_rhs2_ptr (def_stmt),
811 gimple_assign_rhs3_ptr (def_stmt));
812 }
813 else
814 {
815 if (dump_enabled_p ())
816 report_ploop_op (MSG_NOTE, def_stmt,
817 "detected reduction: cannot swap operands "
818 "for cond_expr");
819 return NULL;
820 }
821 }
822 else
823 swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
824 gimple_assign_rhs2_ptr (def_stmt));
825
826 if (dump_enabled_p ())
827 report_ploop_op (MSG_NOTE, def_stmt,
828 "detected reduction: need to swap operands: ");
Richard Biener31de92e2019-09-18 18:07:06 +0000829 }
830 else
831 {
832 if (dump_enabled_p ())
833 report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
834 }
835
836 return def_stmt_info;
837 }
838
839 /* Try to find SLP reduction chain. */
840 if (! nested_in_vect_loop
841 && code != COND_EXPR
842 && orig_code != MINUS_EXPR
843 && parloops_is_slp_reduction (loop_info, phi, def_stmt))
844 {
845 if (dump_enabled_p ())
846 report_ploop_op (MSG_NOTE, def_stmt,
847 "reduction: detected reduction chain: ");
848
849 return def_stmt_info;
850 }
851
852 /* Look for the expression computing loop_arg from loop PHI result. */
853 if (check_reduction_path (vect_location, loop, phi, loop_arg, code))
854 return def_stmt_info;
855
856 if (dump_enabled_p ())
857 {
858 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
859 "reduction: unknown pattern: ");
860 }
861
862 return NULL;
863}
864
865/* Wrapper around vect_is_simple_reduction, which will modify code
866 in-place if it enables detection of more reductions. Arguments
867 as there. */
868
869stmt_vec_info
870parloops_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
871 bool *double_reduc,
872 bool need_wrapping_integral_overflow)
873{
874 enum vect_reduction_type v_reduc_type;
875 stmt_vec_info def_info
876 = parloops_is_simple_reduction (loop_info, phi_info, double_reduc,
877 need_wrapping_integral_overflow,
878 &v_reduc_type);
879 if (def_info)
880 {
881 STMT_VINFO_REDUC_TYPE (phi_info) = v_reduc_type;
882 STMT_VINFO_REDUC_DEF (phi_info) = def_info;
883 STMT_VINFO_REDUC_TYPE (def_info) = v_reduc_type;
884 STMT_VINFO_REDUC_DEF (def_info) = phi_info;
885 }
886 return def_info;
887}
888
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +0200889/* Minimal number of iterations of a loop that should be executed in each
890 thread. */
Martin Liska028d4092019-11-12 11:08:40 +0100891#define MIN_PER_THREAD param_parloops_min_per_thread
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +0200892
H.J. Lub8698a02009-11-25 10:55:54 +0000893/* Element of the hashtable, representing a
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000894 reduction in the current loop. */
895struct reduction_info
896{
Trevor Saunders355fe082015-09-20 00:52:59 +0000897 gimple *reduc_stmt; /* reduction statement. */
898 gimple *reduc_phi; /* The phi node defining the reduction. */
Richard Biener726a9892008-07-28 14:33:56 +0000899 enum tree_code reduction_code;/* code for the reduction operation. */
Jakub Jelinek5d1fd1d2010-12-18 22:07:12 +0100900 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
901 result. */
David Malcolm538dd0b2014-11-19 17:00:54 +0000902 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000903 of the reduction variable when existing the loop. */
Razya Ladelskyae0bce62007-12-18 11:21:48 +0000904 tree initial_value; /* The initial value of the reduction var before entering the loop. */
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000905 tree field; /* the name of the field in the parloop data structure intended for reduction. */
Tom de Vries61d9c522016-01-18 12:52:32 +0000906 tree reduc_addr; /* The address of the reduction variable for
907 openacc reductions. */
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000908 tree init; /* reduction initialization value. */
David Malcolm538dd0b2014-11-19 17:00:54 +0000909 gphi *new_phi; /* (helper field) Newly created phi node whose result
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000910 will be passed to the atomic operation. Represents
911 the local result each thread computed for the reduction
912 operation. */
913};
914
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +0000915/* Reduction info hashtable helpers. */
916
Richard Sandiford95fbe132015-06-25 17:06:24 +0000917struct reduction_hasher : free_ptr_hash <reduction_info>
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +0000918{
Trevor Saunders67f58942015-04-18 18:13:18 +0000919 static inline hashval_t hash (const reduction_info *);
920 static inline bool equal (const reduction_info *, const reduction_info *);
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +0000921};
922
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000923/* Equality and hash functions for hashtab code. */
924
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +0000925inline bool
Trevor Saunders67f58942015-04-18 18:13:18 +0000926reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000927{
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000928 return (a->reduc_phi == b->reduc_phi);
929}
930
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +0000931inline hashval_t
Trevor Saunders67f58942015-04-18 18:13:18 +0000932reduction_hasher::hash (const reduction_info *a)
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000933{
Jakub Jelinek5d1fd1d2010-12-18 22:07:12 +0100934 return a->reduc_version;
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000935}
936
Trevor Saundersc203e8a2014-06-24 13:21:35 +0000937typedef hash_table<reduction_hasher> reduction_info_table_type;
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +0000938
939
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000940static struct reduction_info *
Trevor Saunders355fe082015-09-20 00:52:59 +0000941reduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000942{
943 struct reduction_info tmpred, *red;
944
Martin Liskab119c052019-05-03 14:37:22 +0200945 if (reduction_list->is_empty () || phi == NULL)
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000946 return NULL;
947
Tom de Vriesfdce4932015-10-13 14:54:01 +0000948 if (gimple_uid (phi) == (unsigned int)-1
949 || gimple_uid (phi) == 0)
950 return NULL;
951
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000952 tmpred.reduc_phi = phi;
Jakub Jelinek5d1fd1d2010-12-18 22:07:12 +0100953 tmpred.reduc_version = gimple_uid (phi);
Trevor Saundersc203e8a2014-06-24 13:21:35 +0000954 red = reduction_list->find (&tmpred);
Tom de Vriesfdce4932015-10-13 14:54:01 +0000955 gcc_assert (red == NULL || red->reduc_phi == phi);
Razya Ladelskya509ebb2007-10-29 11:05:04 +0000956
957 return red;
958}
959
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +0200960/* Element of hashtable of names to copy. */
961
962struct name_to_copy_elt
963{
964 unsigned version; /* The version of the name to copy. */
965 tree new_name; /* The new name used in the copy. */
966 tree field; /* The field of the structure used to pass the
967 value. */
968};
969
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +0000970/* Name copies hashtable helpers. */
971
Richard Sandiford95fbe132015-06-25 17:06:24 +0000972struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +0000973{
Trevor Saunders67f58942015-04-18 18:13:18 +0000974 static inline hashval_t hash (const name_to_copy_elt *);
975 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +0000976};
977
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +0200978/* Equality and hash functions for hashtab code. */
979
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +0000980inline bool
Trevor Saunders67f58942015-04-18 18:13:18 +0000981name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +0200982{
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +0200983 return a->version == b->version;
984}
985
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +0000986inline hashval_t
Trevor Saunders67f58942015-04-18 18:13:18 +0000987name_to_copy_hasher::hash (const name_to_copy_elt *a)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +0200988{
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +0200989 return (hashval_t) a->version;
990}
991
Trevor Saundersc203e8a2014-06-24 13:21:35 +0000992typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +0000993
Sebastian Popb305e3d2011-01-25 21:24:23 +0000994/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
995 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
996 represents the denominator for every element in the matrix. */
997typedef struct lambda_trans_matrix_s
998{
999 lambda_matrix matrix;
1000 int rowsize;
1001 int colsize;
1002 int denominator;
1003} *lambda_trans_matrix;
1004#define LTM_MATRIX(T) ((T)->matrix)
1005#define LTM_ROWSIZE(T) ((T)->rowsize)
1006#define LTM_COLSIZE(T) ((T)->colsize)
1007#define LTM_DENOMINATOR(T) ((T)->denominator)
1008
1009/* Allocate a new transformation matrix. */
1010
1011static lambda_trans_matrix
1012lambda_trans_matrix_new (int colsize, int rowsize,
1013 struct obstack * lambda_obstack)
1014{
1015 lambda_trans_matrix ret;
1016
1017 ret = (lambda_trans_matrix)
1018 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
1019 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
1020 LTM_ROWSIZE (ret) = rowsize;
1021 LTM_COLSIZE (ret) = colsize;
1022 LTM_DENOMINATOR (ret) = 1;
1023 return ret;
1024}
1025
1026/* Multiply a vector VEC by a matrix MAT.
1027 MAT is an M*N matrix, and VEC is a vector with length N. The result
1028 is stored in DEST which must be a vector of length M. */
1029
1030static void
1031lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
1032 lambda_vector vec, lambda_vector dest)
1033{
1034 int i, j;
1035
1036 lambda_vector_clear (dest, m);
1037 for (i = 0; i < m; i++)
1038 for (j = 0; j < n; j++)
1039 dest[i] += matrix[i][j] * vec[j];
1040}
1041
1042/* Return true if TRANS is a legal transformation matrix that respects
1043 the dependence vectors in DISTS and DIRS. The conservative answer
1044 is false.
1045
1046 "Wolfe proves that a unimodular transformation represented by the
1047 matrix T is legal when applied to a loop nest with a set of
1048 lexicographically non-negative distance vectors RDG if and only if
1049 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
1050 i.e.: if and only if it transforms the lexicographically positive
1051 distance vectors to lexicographically positive vectors. Note that
1052 a unimodular matrix must transform the zero vector (and only it) to
1053 the zero vector." S.Muchnick. */
1054
1055static bool
1056lambda_transform_legal_p (lambda_trans_matrix trans,
1057 int nb_loops,
Diego Novillo9771b262012-11-17 21:54:30 -05001058 vec<ddr_p> dependence_relations)
Sebastian Popb305e3d2011-01-25 21:24:23 +00001059{
1060 unsigned int i, j;
1061 lambda_vector distres;
1062 struct data_dependence_relation *ddr;
1063
1064 gcc_assert (LTM_COLSIZE (trans) == nb_loops
1065 && LTM_ROWSIZE (trans) == nb_loops);
1066
1067 /* When there are no dependences, the transformation is correct. */
Diego Novillo9771b262012-11-17 21:54:30 -05001068 if (dependence_relations.length () == 0)
Sebastian Popb305e3d2011-01-25 21:24:23 +00001069 return true;
1070
Diego Novillo9771b262012-11-17 21:54:30 -05001071 ddr = dependence_relations[0];
Sebastian Popb305e3d2011-01-25 21:24:23 +00001072 if (ddr == NULL)
1073 return true;
1074
1075 /* When there is an unknown relation in the dependence_relations, we
1076 know that it is no worth looking at this loop nest: give up. */
1077 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1078 return false;
1079
1080 distres = lambda_vector_new (nb_loops);
1081
1082 /* For each distance vector in the dependence graph. */
Diego Novillo9771b262012-11-17 21:54:30 -05001083 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
Sebastian Popb305e3d2011-01-25 21:24:23 +00001084 {
1085 /* Don't care about relations for which we know that there is no
1086 dependence, nor about read-read (aka. output-dependences):
1087 these data accesses can happen in any order. */
1088 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
1089 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
1090 continue;
1091
1092 /* Conservatively answer: "this transformation is not valid". */
1093 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1094 return false;
1095
1096 /* If the dependence could not be captured by a distance vector,
1097 conservatively answer that the transform is not valid. */
1098 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1099 return false;
1100
1101 /* Compute trans.dist_vect */
1102 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
1103 {
1104 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
1105 DDR_DIST_VECT (ddr, j), distres);
1106
1107 if (!lambda_vector_lexico_pos (distres, nb_loops))
1108 return false;
1109 }
1110 }
1111 return true;
1112}
Razya Ladelsky08dab972009-07-30 08:39:57 +00001113
1114/* Data dependency analysis. Returns true if the iterations of LOOP
1115 are independent on each other (that is, if we can execute them
1116 in parallel). */
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001117
1118static bool
Martin Sebor99b1c312019-07-09 18:32:49 +00001119loop_parallel_p (class loop *loop, struct obstack * parloop_obstack)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001120{
Diego Novillo9771b262012-11-17 21:54:30 -05001121 vec<ddr_p> dependence_relations;
1122 vec<data_reference_p> datarefs;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001123 lambda_trans_matrix trans;
1124 bool ret = false;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001125
1126 if (dump_file && (dump_flags & TDF_DETAILS))
Razya Ladelsky48710222009-10-22 14:43:40 +00001127 {
1128 fprintf (dump_file, "Considering loop %d\n", loop->num);
1129 if (!loop->inner)
1130 fprintf (dump_file, "loop is innermost\n");
H.J. Lub8698a02009-11-25 10:55:54 +00001131 else
Razya Ladelsky48710222009-10-22 14:43:40 +00001132 fprintf (dump_file, "loop NOT innermost\n");
1133 }
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001134
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001135 /* Check for problems with dependences. If the loop can be reversed,
1136 the iterations are independent. */
Trevor Saunders00f96dc2013-12-20 20:34:33 +00001137 auto_vec<loop_p, 3> loop_nest;
Diego Novillo9771b262012-11-17 21:54:30 -05001138 datarefs.create (10);
Trevor Saunders07687832013-11-01 20:31:32 +00001139 dependence_relations.create (100);
Andrey Belevantsev9ca3d002012-01-25 17:11:50 +04001140 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
1141 &dependence_relations))
1142 {
1143 if (dump_file && (dump_flags & TDF_DETAILS))
1144 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
1145 ret = false;
1146 goto end;
1147 }
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001148 if (dump_file && (dump_flags & TDF_DETAILS))
1149 dump_data_dependence_relations (dump_file, dependence_relations);
1150
Laurynas Biveinisf873b202010-04-22 12:42:15 +00001151 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001152 LTM_MATRIX (trans)[0][0] = -1;
1153
1154 if (lambda_transform_legal_p (trans, 1, dependence_relations))
1155 {
1156 ret = true;
1157 if (dump_file && (dump_flags & TDF_DETAILS))
1158 fprintf (dump_file, " SUCCESS: may be parallelized\n");
1159 }
1160 else if (dump_file && (dump_flags & TDF_DETAILS))
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001161 fprintf (dump_file,
1162 " FAILED: data dependencies exist across iterations\n");
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001163
Andrey Belevantsev9ca3d002012-01-25 17:11:50 +04001164 end:
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001165 free_dependence_relations (dependence_relations);
1166 free_data_refs (datarefs);
1167
1168 return ret;
1169}
1170
Sebastian Pop1d4af1e2008-01-16 02:44:04 +00001171/* Return true when LOOP contains basic blocks marked with the
1172 BB_IRREDUCIBLE_LOOP flag. */
1173
1174static inline bool
Martin Sebor99b1c312019-07-09 18:32:49 +00001175loop_has_blocks_with_irreducible_flag (class loop *loop)
Sebastian Pop1d4af1e2008-01-16 02:44:04 +00001176{
1177 unsigned i;
1178 basic_block *bbs = get_loop_body_in_dom_order (loop);
1179 bool res = true;
1180
1181 for (i = 0; i < loop->num_nodes; i++)
1182 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
1183 goto end;
1184
1185 res = false;
1186 end:
1187 free (bbs);
1188 return res;
1189}
1190
Zdenek Dvorak8a171a52007-12-19 16:01:19 +01001191/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001192 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
Zdenek Dvorak8a171a52007-12-19 16:01:19 +01001193 to their addresses that can be reused. The address of OBJ is known to
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001194 be invariant in the whole function. Other needed statements are placed
1195 right before GSI. */
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001196
1197static tree
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001198take_address_of (tree obj, tree type, edge entry,
Trevor Saundersc203e8a2014-06-24 13:21:35 +00001199 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001200{
Zdenek Dvorak8a171a52007-12-19 16:01:19 +01001201 int uid;
Richard Guenther83d59772012-08-10 09:20:29 +00001202 tree *var_p, name, addr;
David Malcolm538dd0b2014-11-19 17:00:54 +00001203 gassign *stmt;
Richard Biener726a9892008-07-28 14:33:56 +00001204 gimple_seq stmts;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001205
Zdenek Dvorak8a171a52007-12-19 16:01:19 +01001206 /* Since the address of OBJ is invariant, the trees may be shared.
1207 Avoid rewriting unrelated parts of the code. */
1208 obj = unshare_expr (obj);
1209 for (var_p = &obj;
1210 handled_component_p (*var_p);
1211 var_p = &TREE_OPERAND (*var_p, 0))
1212 continue;
Zdenek Dvorak8a171a52007-12-19 16:01:19 +01001213
Richard Guentherc9a410f2010-10-21 10:38:51 +00001214 /* Canonicalize the access to base on a MEM_REF. */
1215 if (DECL_P (*var_p))
1216 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
1217
1218 /* Assign a canonical SSA name to the address of the base decl used
1219 in the address and share it for all accesses and addresses based
1220 on it. */
1221 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
Trevor Saunders84baa4b2014-06-24 13:21:53 +00001222 int_tree_map elt;
1223 elt.uid = uid;
1224 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
1225 if (!slot->to)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001226 {
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001227 if (gsi == NULL)
1228 return NULL;
Richard Guentherc9a410f2010-10-21 10:38:51 +00001229 addr = TREE_OPERAND (*var_p, 0);
Jakub Jelinek29b89442013-08-18 17:23:24 +02001230 const char *obj_name
1231 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
1232 if (obj_name)
1233 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
1234 else
Jakub Jelinekb731b392014-11-29 12:35:30 +01001235 name = make_ssa_name (TREE_TYPE (addr));
Richard Guenther83d59772012-08-10 09:20:29 +00001236 stmt = gimple_build_assign (name, addr);
Richard Biener726a9892008-07-28 14:33:56 +00001237 gsi_insert_on_edge_immediate (entry, stmt);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001238
Trevor Saunders84baa4b2014-06-24 13:21:53 +00001239 slot->uid = uid;
1240 slot->to = name;
Zdenek Dvorak8a171a52007-12-19 16:01:19 +01001241 }
1242 else
Trevor Saunders84baa4b2014-06-24 13:21:53 +00001243 name = slot->to;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001244
Richard Guentherc9a410f2010-10-21 10:38:51 +00001245 /* Express the address in terms of the canonical SSA name. */
1246 TREE_OPERAND (*var_p, 0) = name;
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001247 if (gsi == NULL)
1248 return build_fold_addr_expr_with_type (obj, type);
1249
Richard Bieneraa000592015-10-16 07:45:09 +00001250 name = force_gimple_operand (build_addr (obj),
Richard Guentherc9a410f2010-10-21 10:38:51 +00001251 &stmts, true, NULL_TREE);
1252 if (!gimple_seq_empty_p (stmts))
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001253 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001254
Richard Guentherc9a410f2010-10-21 10:38:51 +00001255 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
Zdenek Dvorak8a171a52007-12-19 16:01:19 +01001256 {
Richard Biener726a9892008-07-28 14:33:56 +00001257 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
Zdenek Dvorak8a171a52007-12-19 16:01:19 +01001258 NULL_TREE);
Richard Biener726a9892008-07-28 14:33:56 +00001259 if (!gimple_seq_empty_p (stmts))
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001260 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
Zdenek Dvorak8a171a52007-12-19 16:01:19 +01001261 }
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001262
1263 return name;
1264}
1265
Tom de Vries12efb1d2015-07-28 07:54:04 +00001266static tree
Trevor Saunders355fe082015-09-20 00:52:59 +00001267reduc_stmt_res (gimple *stmt)
Tom de Vries12efb1d2015-07-28 07:54:04 +00001268{
1269 return (gimple_code (stmt) == GIMPLE_PHI
1270 ? gimple_phi_result (stmt)
1271 : gimple_assign_lhs (stmt));
1272}
1273
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001274/* Callback for htab_traverse. Create the initialization statement
H.J. Lub8698a02009-11-25 10:55:54 +00001275 for reduction described in SLOT, and place it at the preheader of
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001276 the loop described in DATA. */
1277
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001278int
Martin Sebor99b1c312019-07-09 18:32:49 +00001279initialize_reductions (reduction_info **slot, class loop *loop)
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001280{
Thomas Schwingef2c9f712015-09-23 16:46:55 +02001281 tree init;
1282 tree type, arg;
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001283 edge e;
1284
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001285 struct reduction_info *const reduc = *slot;
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001286
H.J. Lub8698a02009-11-25 10:55:54 +00001287 /* Create initialization in preheader:
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001288 reduction_variable = initialization value of reduction. */
1289
H.J. Lub8698a02009-11-25 10:55:54 +00001290 /* In the phi node at the header, replace the argument coming
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001291 from the preheader with the reduction initialization value. */
1292
Thomas Schwingef2c9f712015-09-23 16:46:55 +02001293 /* Initialize the reduction. */
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001294 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
Thomas Schwingef2c9f712015-09-23 16:46:55 +02001295 init = omp_reduction_init_op (gimple_location (reduc->reduc_stmt),
1296 reduc->reduction_code, type);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001297 reduc->init = init;
1298
H.J. Lub8698a02009-11-25 10:55:54 +00001299 /* Replace the argument representing the initialization value
1300 with the initialization value for the reduction (neutral
1301 element for the particular operation, e.g. 0 for PLUS_EXPR,
1302 1 for MULT_EXPR, etc).
1303 Keep the old value in a new variable "reduction_initial",
1304 that will be taken in consideration after the parallel
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +00001305 computing is done. */
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001306
1307 e = loop_preheader_edge (loop);
1308 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
1309 /* Create new variable to hold the initial value. */
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001310
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001311 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +00001312 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
Razya Ladelskyae0bce62007-12-18 11:21:48 +00001313 reduc->initial_value = arg;
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001314 return 1;
1315}
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001316
1317struct elv_data
1318{
Richard Biener726a9892008-07-28 14:33:56 +00001319 struct walk_stmt_info info;
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001320 edge entry;
Trevor Saundersc203e8a2014-06-24 13:21:35 +00001321 int_tree_htab_type *decl_address;
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001322 gimple_stmt_iterator *gsi;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001323 bool changed;
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001324 bool reset;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001325};
1326
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001327/* Eliminates references to local variables in *TP out of the single
1328 entry single exit region starting at DTA->ENTRY.
1329 DECL_ADDRESS contains addresses of the references that had their
1330 address taken already. If the expression is changed, CHANGED is
1331 set to true. Callback for walk_tree. */
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001332
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001333static tree
Zdenek Dvorak8a171a52007-12-19 16:01:19 +01001334eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001335{
Kaveh R. Ghazi3d9a9f92008-06-20 18:34:07 +00001336 struct elv_data *const dta = (struct elv_data *) data;
Zdenek Dvorak8a171a52007-12-19 16:01:19 +01001337 tree t = *tp, var, addr, addr_type, type, obj;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001338
1339 if (DECL_P (t))
1340 {
1341 *walk_subtrees = 0;
1342
1343 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
1344 return NULL_TREE;
1345
1346 type = TREE_TYPE (t);
1347 addr_type = build_pointer_type (type);
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001348 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
1349 dta->gsi);
1350 if (dta->gsi == NULL && addr == NULL_TREE)
1351 {
1352 dta->reset = true;
1353 return NULL_TREE;
1354 }
1355
Richard Guenther70f34812010-07-01 08:49:19 +00001356 *tp = build_simple_mem_ref (addr);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001357
1358 dta->changed = true;
1359 return NULL_TREE;
1360 }
1361
1362 if (TREE_CODE (t) == ADDR_EXPR)
1363 {
Zdenek Dvorak8a171a52007-12-19 16:01:19 +01001364 /* ADDR_EXPR may appear in two contexts:
1365 -- as a gimple operand, when the address taken is a function invariant
1366 -- as gimple rhs, when the resulting address in not a function
1367 invariant
1368 We do not need to do anything special in the latter case (the base of
1369 the memory reference whose address is taken may be replaced in the
1370 DECL_P case). The former case is more complicated, as we need to
1371 ensure that the new address is still a gimple operand. Thus, it
1372 is not sufficient to replace just the base of the memory reference --
1373 we need to move the whole computation of the address out of the
1374 loop. */
1375 if (!is_gimple_val (t))
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001376 return NULL_TREE;
1377
1378 *walk_subtrees = 0;
Zdenek Dvorak8a171a52007-12-19 16:01:19 +01001379 obj = TREE_OPERAND (t, 0);
1380 var = get_base_address (obj);
1381 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001382 return NULL_TREE;
1383
1384 addr_type = TREE_TYPE (t);
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001385 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
1386 dta->gsi);
1387 if (dta->gsi == NULL && addr == NULL_TREE)
1388 {
1389 dta->reset = true;
1390 return NULL_TREE;
1391 }
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001392 *tp = addr;
1393
1394 dta->changed = true;
1395 return NULL_TREE;
1396 }
1397
Richard Biener726a9892008-07-28 14:33:56 +00001398 if (!EXPR_P (t))
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001399 *walk_subtrees = 0;
1400
1401 return NULL_TREE;
1402}
1403
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001404/* Moves the references to local variables in STMT at *GSI out of the single
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001405 entry single exit region starting at ENTRY. DECL_ADDRESS contains
1406 addresses of the references that had their address taken
1407 already. */
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001408
1409static void
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001410eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
Trevor Saundersc203e8a2014-06-24 13:21:35 +00001411 int_tree_htab_type *decl_address)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001412{
1413 struct elv_data dta;
Trevor Saunders355fe082015-09-20 00:52:59 +00001414 gimple *stmt = gsi_stmt (*gsi);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001415
Richard Biener726a9892008-07-28 14:33:56 +00001416 memset (&dta.info, '\0', sizeof (dta.info));
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001417 dta.entry = entry;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001418 dta.decl_address = decl_address;
1419 dta.changed = false;
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001420 dta.reset = false;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001421
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00001422 if (gimple_debug_bind_p (stmt))
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001423 {
1424 dta.gsi = NULL;
1425 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
1426 eliminate_local_variables_1, &dta.info, NULL);
1427 if (dta.reset)
1428 {
1429 gimple_debug_bind_reset_value (stmt);
1430 dta.changed = true;
1431 }
1432 }
Jakub Jelinek29b89442013-08-18 17:23:24 +02001433 else if (gimple_clobber_p (stmt))
1434 {
Tom de Vries42fb90d2016-01-23 20:28:17 +00001435 unlink_stmt_vdef (stmt);
Jakub Jelinek29b89442013-08-18 17:23:24 +02001436 stmt = gimple_build_nop ();
1437 gsi_replace (gsi, stmt, false);
1438 dta.changed = true;
1439 }
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00001440 else
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001441 {
1442 dta.gsi = gsi;
1443 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
1444 }
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001445
1446 if (dta.changed)
1447 update_stmt (stmt);
1448}
1449
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001450/* Eliminates the references to local variables from the single entry
1451 single exit region between the ENTRY and EXIT edges.
H.J. Lub8698a02009-11-25 10:55:54 +00001452
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001453 This includes:
H.J. Lub8698a02009-11-25 10:55:54 +00001454 1) Taking address of a local variable -- these are moved out of the
1455 region (and temporary variable is created to hold the address if
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001456 necessary).
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001457
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001458 2) Dereferencing a local variable -- these are replaced with indirect
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001459 references. */
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001460
1461static void
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001462eliminate_local_variables (edge entry, edge exit)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001463{
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001464 basic_block bb;
Trevor Saunders00f96dc2013-12-20 20:34:33 +00001465 auto_vec<basic_block, 3> body;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001466 unsigned i;
Richard Biener726a9892008-07-28 14:33:56 +00001467 gimple_stmt_iterator gsi;
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001468 bool has_debug_stmt = false;
Trevor Saundersc203e8a2014-06-24 13:21:35 +00001469 int_tree_htab_type decl_address (10);
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001470 basic_block entry_bb = entry->src;
1471 basic_block exit_bb = exit->dest;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001472
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001473 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001474
Diego Novillo9771b262012-11-17 21:54:30 -05001475 FOR_EACH_VEC_ELT (body, i, bb)
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001476 if (bb != entry_bb && bb != exit_bb)
Patrick Palka6b37bda2016-04-06 23:07:21 +00001477 {
1478 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1479 if (is_gimple_debug (gsi_stmt (gsi)))
1480 {
1481 if (gimple_debug_bind_p (gsi_stmt (gsi)))
1482 has_debug_stmt = true;
1483 }
1484 else
1485 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
1486 }
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001487
1488 if (has_debug_stmt)
Diego Novillo9771b262012-11-17 21:54:30 -05001489 FOR_EACH_VEC_ELT (body, i, bb)
Jakub Jelinekcba1eb62010-11-05 12:15:28 +01001490 if (bb != entry_bb && bb != exit_bb)
1491 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1492 if (gimple_debug_bind_p (gsi_stmt (gsi)))
Trevor Saundersc203e8a2014-06-24 13:21:35 +00001493 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001494}
1495
1496/* Returns true if expression EXPR is not defined between ENTRY and
1497 EXIT, i.e. if all its operands are defined outside of the region. */
1498
1499static bool
1500expr_invariant_in_region_p (edge entry, edge exit, tree expr)
1501{
1502 basic_block entry_bb = entry->src;
1503 basic_block exit_bb = exit->dest;
1504 basic_block def_bb;
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001505
1506 if (is_gimple_min_invariant (expr))
1507 return true;
1508
1509 if (TREE_CODE (expr) == SSA_NAME)
1510 {
Richard Biener726a9892008-07-28 14:33:56 +00001511 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001512 if (def_bb
1513 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
1514 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
1515 return false;
1516
1517 return true;
1518 }
1519
Richard Biener726a9892008-07-28 14:33:56 +00001520 return false;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001521}
1522
1523/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
1524 The copies are stored to NAME_COPIES, if NAME was already duplicated,
1525 its duplicate stored in NAME_COPIES is returned.
H.J. Lub8698a02009-11-25 10:55:54 +00001526
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001527 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
1528 duplicated, storing the copies in DECL_COPIES. */
1529
1530static tree
Trevor Saundersc203e8a2014-06-24 13:21:35 +00001531separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
1532 int_tree_htab_type *decl_copies,
1533 bool copy_name_p)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001534{
1535 tree copy, var, var_copy;
1536 unsigned idx, uid, nuid;
Trevor Saunders84baa4b2014-06-24 13:21:53 +00001537 struct int_tree_map ielt;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001538 struct name_to_copy_elt elt, *nelt;
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001539 name_to_copy_elt **slot;
Trevor Saunders84baa4b2014-06-24 13:21:53 +00001540 int_tree_map *dslot;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001541
1542 if (TREE_CODE (name) != SSA_NAME)
1543 return name;
1544
1545 idx = SSA_NAME_VERSION (name);
1546 elt.version = idx;
Trevor Saundersc203e8a2014-06-24 13:21:35 +00001547 slot = name_copies->find_slot_with_hash (&elt, idx,
1548 copy_name_p ? INSERT : NO_INSERT);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001549 if (slot && *slot)
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001550 return (*slot)->new_name;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001551
Richard Guenther70b5e7d2012-08-10 08:29:29 +00001552 if (copy_name_p)
1553 {
1554 copy = duplicate_ssa_name (name, NULL);
1555 nelt = XNEW (struct name_to_copy_elt);
1556 nelt->version = idx;
1557 nelt->new_name = copy;
1558 nelt->field = NULL_TREE;
1559 *slot = nelt;
1560 }
1561 else
1562 {
1563 gcc_assert (!slot);
1564 copy = name;
1565 }
1566
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001567 var = SSA_NAME_VAR (name);
Richard Guenther70b5e7d2012-08-10 08:29:29 +00001568 if (!var)
1569 return copy;
1570
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001571 uid = DECL_UID (var);
1572 ielt.uid = uid;
Trevor Saunders84baa4b2014-06-24 13:21:53 +00001573 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
1574 if (!dslot->to)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001575 {
1576 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
Richard Bienereb72dc62020-04-22 10:40:51 +02001577 DECL_NOT_GIMPLE_REG_P (var_copy) = DECL_NOT_GIMPLE_REG_P (var);
Trevor Saunders84baa4b2014-06-24 13:21:53 +00001578 dslot->uid = uid;
1579 dslot->to = var_copy;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001580
1581 /* Ensure that when we meet this decl next time, we won't duplicate
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001582 it again. */
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001583 nuid = DECL_UID (var_copy);
1584 ielt.uid = nuid;
Trevor Saunders84baa4b2014-06-24 13:21:53 +00001585 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
1586 gcc_assert (!dslot->to);
1587 dslot->uid = nuid;
1588 dslot->to = var_copy;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001589 }
1590 else
Trevor Saunders84baa4b2014-06-24 13:21:53 +00001591 var_copy = dslot->to;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001592
Richard Guentherb2ec94d2012-08-03 08:55:43 +00001593 replace_ssa_name_symbol (copy, var_copy);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001594 return copy;
1595}
1596
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001597/* Finds the ssa names used in STMT that are defined outside the
1598 region between ENTRY and EXIT and replaces such ssa names with
1599 their duplicates. The duplicates are stored to NAME_COPIES. Base
1600 decls of all ssa names used in STMT (including those defined in
1601 LOOP) are replaced with the new temporary variables; the
1602 replacement decls are stored in DECL_COPIES. */
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001603
1604static void
Trevor Saunders355fe082015-09-20 00:52:59 +00001605separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt,
Trevor Saundersc203e8a2014-06-24 13:21:35 +00001606 name_to_copy_table_type *name_copies,
1607 int_tree_htab_type *decl_copies)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001608{
1609 use_operand_p use;
1610 def_operand_p def;
1611 ssa_op_iter oi;
1612 tree name, copy;
1613 bool copy_name_p;
1614
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001615 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001616 {
1617 name = DEF_FROM_PTR (def);
1618 gcc_assert (TREE_CODE (name) == SSA_NAME);
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001619 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
1620 false);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001621 gcc_assert (copy == name);
1622 }
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001623
1624 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001625 {
1626 name = USE_FROM_PTR (use);
1627 if (TREE_CODE (name) != SSA_NAME)
1628 continue;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001629
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01001630 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
1631 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
1632 copy_name_p);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001633 SET_USE (use, copy);
1634 }
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001635}
1636
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00001637/* Finds the ssa names used in STMT that are defined outside the
1638 region between ENTRY and EXIT and replaces such ssa names with
1639 their duplicates. The duplicates are stored to NAME_COPIES. Base
1640 decls of all ssa names used in STMT (including those defined in
1641 LOOP) are replaced with the new temporary variables; the
1642 replacement decls are stored in DECL_COPIES. */
1643
1644static bool
Trevor Saunders355fe082015-09-20 00:52:59 +00001645separate_decls_in_region_debug (gimple *stmt,
Trevor Saundersc203e8a2014-06-24 13:21:35 +00001646 name_to_copy_table_type *name_copies,
1647 int_tree_htab_type *decl_copies)
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00001648{
1649 use_operand_p use;
1650 ssa_op_iter oi;
1651 tree var, name;
1652 struct int_tree_map ielt;
1653 struct name_to_copy_elt elt;
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001654 name_to_copy_elt **slot;
Trevor Saunders84baa4b2014-06-24 13:21:53 +00001655 int_tree_map *dslot;
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00001656
Jakub Jelinekddb555e2011-06-22 12:41:58 +02001657 if (gimple_debug_bind_p (stmt))
1658 var = gimple_debug_bind_get_var (stmt);
1659 else if (gimple_debug_source_bind_p (stmt))
1660 var = gimple_debug_source_bind_get_var (stmt);
1661 else
1662 return true;
Jakub Jelinek598e67d2012-02-29 18:43:56 +01001663 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
Jakub Jelinek4f2a9af2009-10-14 19:05:45 +02001664 return true;
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00001665 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
1666 ielt.uid = DECL_UID (var);
Trevor Saunders84baa4b2014-06-24 13:21:53 +00001667 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00001668 if (!dslot)
1669 return true;
Jakub Jelinekddb555e2011-06-22 12:41:58 +02001670 if (gimple_debug_bind_p (stmt))
Trevor Saunders84baa4b2014-06-24 13:21:53 +00001671 gimple_debug_bind_set_var (stmt, dslot->to);
Jakub Jelinekddb555e2011-06-22 12:41:58 +02001672 else if (gimple_debug_source_bind_p (stmt))
Trevor Saunders84baa4b2014-06-24 13:21:53 +00001673 gimple_debug_source_bind_set_var (stmt, dslot->to);
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00001674
1675 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
1676 {
1677 name = USE_FROM_PTR (use);
1678 if (TREE_CODE (name) != SSA_NAME)
1679 continue;
1680
1681 elt.version = SSA_NAME_VERSION (name);
Trevor Saundersc203e8a2014-06-24 13:21:35 +00001682 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00001683 if (!slot)
1684 {
1685 gimple_debug_bind_reset_value (stmt);
1686 update_stmt (stmt);
1687 break;
1688 }
1689
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001690 SET_USE (use, (*slot)->new_name);
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00001691 }
1692
1693 return false;
1694}
1695
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +00001696/* Callback for htab_traverse. Adds a field corresponding to the reduction
1697 specified in SLOT. The type is passed in DATA. */
1698
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001699int
1700add_field_for_reduction (reduction_info **slot, tree type)
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001701{
H.J. Lub8698a02009-11-25 10:55:54 +00001702
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001703 struct reduction_info *const red = *slot;
Tom de Vries12efb1d2015-07-28 07:54:04 +00001704 tree var = reduc_stmt_res (red->reduc_stmt);
Richard Bieneraa06a972013-05-06 15:06:41 +00001705 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
1706 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +00001707
1708 insert_field_into_struct (type, field);
1709
1710 red->field = field;
1711
1712 return 1;
1713}
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001714
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001715/* Callback for htab_traverse. Adds a field corresponding to a ssa name
H.J. Lub8698a02009-11-25 10:55:54 +00001716 described in SLOT. The type is passed in DATA. */
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001717
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001718int
1719add_field_for_name (name_to_copy_elt **slot, tree type)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001720{
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001721 struct name_to_copy_elt *const elt = *slot;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001722 tree name = ssa_name (elt->version);
Richard Guenther70b5e7d2012-08-10 08:29:29 +00001723 tree field = build_decl (UNKNOWN_LOCATION,
1724 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1725 TREE_TYPE (name));
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001726
1727 insert_field_into_struct (type, field);
1728 elt->field = field;
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001729
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001730 return 1;
1731}
1732
H.J. Lub8698a02009-11-25 10:55:54 +00001733/* Callback for htab_traverse. A local result is the intermediate result
1734 computed by a single
Ralf Wildenhuesfa10bee2008-06-06 05:42:00 +00001735 thread, or the initial value in case no iteration was executed.
H.J. Lub8698a02009-11-25 10:55:54 +00001736 This function creates a phi node reflecting these values.
1737 The phi's result will be stored in NEW_PHI field of the
1738 reduction's data structure. */
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001739
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001740int
Martin Sebor99b1c312019-07-09 18:32:49 +00001741create_phi_for_local_result (reduction_info **slot, class loop *loop)
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001742{
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001743 struct reduction_info *const reduc = *slot;
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001744 edge e;
David Malcolm538dd0b2014-11-19 17:00:54 +00001745 gphi *new_phi;
Tom de Vriese67d7a12015-07-31 06:26:44 +00001746 basic_block store_bb, continue_bb;
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001747 tree local_res;
David Malcolm620e5942018-11-13 20:05:03 +00001748 location_t locus;
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001749
H.J. Lub8698a02009-11-25 10:55:54 +00001750 /* STORE_BB is the block where the phi
1751 should be stored. It is the destination of the loop exit.
Richard Biener726a9892008-07-28 14:33:56 +00001752 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
Tom de Vriese67d7a12015-07-31 06:26:44 +00001753 continue_bb = single_pred (loop->latch);
1754 store_bb = FALLTHRU_EDGE (continue_bb)->dest;
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001755
1756 /* STORE_BB has two predecessors. One coming from the loop
1757 (the reduction's result is computed at the loop),
H.J. Lub8698a02009-11-25 10:55:54 +00001758 and another coming from a block preceding the loop,
1759 when no iterations
1760 are executed (the initial value should be taken). */
Tom de Vriese67d7a12015-07-31 06:26:44 +00001761 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001762 e = EDGE_PRED (store_bb, 1);
1763 else
1764 e = EDGE_PRED (store_bb, 0);
Tom de Vries12efb1d2015-07-28 07:54:04 +00001765 tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1766 local_res = copy_ssa_name (lhs);
Andrew MacLeodf5045c92009-07-30 18:36:30 +00001767 locus = gimple_location (reduc->reduc_stmt);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001768 new_phi = create_phi_node (local_res, store_bb);
Dehao Chen9e227d62012-07-16 11:08:21 +00001769 add_phi_arg (new_phi, reduc->init, e, locus);
Tom de Vriese67d7a12015-07-31 06:26:44 +00001770 add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001771 reduc->new_phi = new_phi;
1772
1773 return 1;
1774}
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001775
1776struct clsn_data
1777{
1778 tree store;
1779 tree load;
1780
1781 basic_block store_bb;
1782 basic_block load_bb;
1783};
1784
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001785/* Callback for htab_traverse. Create an atomic instruction for the
H.J. Lub8698a02009-11-25 10:55:54 +00001786 reduction described in SLOT.
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001787 DATA annotates the place in memory the atomic operation relates to,
1788 and the basic block it needs to be generated in. */
1789
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001790int
1791create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001792{
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001793 struct reduction_info *const reduc = *slot;
Richard Biener726a9892008-07-28 14:33:56 +00001794 gimple_stmt_iterator gsi;
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001795 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001796 tree load_struct;
1797 basic_block bb;
1798 basic_block new_bb;
1799 edge e;
Jakub Jelinek0f900df2009-11-28 17:21:00 +01001800 tree t, addr, ref, x;
Richard Biener726a9892008-07-28 14:33:56 +00001801 tree tmp_load, name;
Trevor Saunders355fe082015-09-20 00:52:59 +00001802 gimple *load;
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001803
Tom de Vries61d9c522016-01-18 12:52:32 +00001804 if (reduc->reduc_addr == NULL_TREE)
1805 {
1806 load_struct = build_simple_mem_ref (clsn_data->load);
1807 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001808
Tom de Vries61d9c522016-01-18 12:52:32 +00001809 addr = build_addr (t);
1810 }
1811 else
1812 {
1813 /* Set the address for the atomic store. */
1814 addr = reduc->reduc_addr;
1815
1816 /* Remove the non-atomic store '*addr = sum'. */
1817 tree res = PHI_RESULT (reduc->keep_res);
1818 use_operand_p use_p;
1819 gimple *stmt;
1820 bool single_use_p = single_imm_use (res, &use_p, &stmt);
1821 gcc_assert (single_use_p);
1822 replace_uses_by (gimple_vdef (stmt),
1823 gimple_vuse (stmt));
1824 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1825 gsi_remove (&gsi, true);
1826 }
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001827
1828 /* Create phi node. */
1829 bb = clsn_data->load_bb;
1830
Richard Bienerb13c9072015-03-12 08:48:32 +00001831 gsi = gsi_last_bb (bb);
1832 e = split_block (bb, gsi_stmt (gsi));
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001833 new_bb = e->dest;
1834
Jakub Jelinekb731b392014-11-29 12:35:30 +01001835 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1836 tmp_load = make_ssa_name (tmp_load);
Jakub Jelinek28567c42018-11-08 18:13:04 +01001837 load = gimple_build_omp_atomic_load (tmp_load, addr,
1838 OMP_MEMORY_ORDER_RELAXED);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001839 SSA_NAME_DEF_STMT (tmp_load) = load;
Richard Biener726a9892008-07-28 14:33:56 +00001840 gsi = gsi_start_bb (new_bb);
1841 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001842
1843 e = split_block (new_bb, load);
1844 new_bb = e->dest;
Richard Biener726a9892008-07-28 14:33:56 +00001845 gsi = gsi_start_bb (new_bb);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001846 ref = tmp_load;
Richard Biener726a9892008-07-28 14:33:56 +00001847 x = fold_build2 (reduc->reduction_code,
1848 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1849 PHI_RESULT (reduc->new_phi));
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001850
Richard Biener726a9892008-07-28 14:33:56 +00001851 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1852 GSI_CONTINUE_LINKING);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001853
Jakub Jelinek28567c42018-11-08 18:13:04 +01001854 gimple *store = gimple_build_omp_atomic_store (name,
1855 OMP_MEMORY_ORDER_RELAXED);
1856 gsi_insert_after (&gsi, store, GSI_NEW_STMT);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001857 return 1;
1858}
1859
H.J. Lub8698a02009-11-25 10:55:54 +00001860/* Create the atomic operation at the join point of the threads.
1861 REDUCTION_LIST describes the reductions in the LOOP.
1862 LD_ST_DATA describes the shared data structure where
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001863 shared data is stored in and loaded from. */
1864static void
Martin Sebor99b1c312019-07-09 18:32:49 +00001865create_call_for_reduction (class loop *loop,
Trevor Saundersc203e8a2014-06-24 13:21:35 +00001866 reduction_info_table_type *reduction_list,
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001867 struct clsn_data *ld_st_data)
1868{
Martin Sebor99b1c312019-07-09 18:32:49 +00001869 reduction_list->traverse <class loop *, create_phi_for_local_result> (loop);
Richard Biener726a9892008-07-28 14:33:56 +00001870 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
Tom de Vriese67d7a12015-07-31 06:26:44 +00001871 basic_block continue_bb = single_pred (loop->latch);
1872 ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001873 reduction_list
Trevor Saundersc203e8a2014-06-24 13:21:35 +00001874 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001875}
1876
Razya Ladelskyae0bce62007-12-18 11:21:48 +00001877/* Callback for htab_traverse. Loads the final reduction value at the
1878 join point of all threads, and inserts it in the right place. */
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001879
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001880int
1881create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001882{
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001883 struct reduction_info *const red = *slot;
Trevor Saunders355fe082015-09-20 00:52:59 +00001884 gimple *stmt;
Richard Biener726a9892008-07-28 14:33:56 +00001885 gimple_stmt_iterator gsi;
Tom de Vries12efb1d2015-07-28 07:54:04 +00001886 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001887 tree load_struct;
Razya Ladelskyae0bce62007-12-18 11:21:48 +00001888 tree name;
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001889 tree x;
1890
Tom de Vries79855462015-07-16 11:51:28 +00001891 /* If there's no exit phi, the result of the reduction is unused. */
1892 if (red->keep_res == NULL)
1893 return 1;
1894
Richard Biener726a9892008-07-28 14:33:56 +00001895 gsi = gsi_after_labels (clsn_data->load_bb);
Richard Guenther70f34812010-07-01 08:49:19 +00001896 load_struct = build_simple_mem_ref (clsn_data->load);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001897 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1898 NULL_TREE);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001899
Razya Ladelskyae0bce62007-12-18 11:21:48 +00001900 x = load_struct;
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001901 name = PHI_RESULT (red->keep_res);
Richard Biener726a9892008-07-28 14:33:56 +00001902 stmt = gimple_build_assign (name, x);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001903
Richard Biener726a9892008-07-28 14:33:56 +00001904 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001905
Richard Biener726a9892008-07-28 14:33:56 +00001906 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1907 !gsi_end_p (gsi); gsi_next (&gsi))
1908 if (gsi_stmt (gsi) == red->keep_res)
1909 {
1910 remove_phi_node (&gsi, false);
1911 return 1;
1912 }
1913 gcc_unreachable ();
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001914}
1915
H.J. Lub8698a02009-11-25 10:55:54 +00001916/* Load the reduction result that was stored in LD_ST_DATA.
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001917 REDUCTION_LIST describes the list of reductions that the
Ralf Wildenhuesfa10bee2008-06-06 05:42:00 +00001918 loads should be generated for. */
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001919static void
Trevor Saundersc203e8a2014-06-24 13:21:35 +00001920create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001921 struct clsn_data *ld_st_data)
1922{
Richard Biener726a9892008-07-28 14:33:56 +00001923 gimple_stmt_iterator gsi;
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001924 tree t;
Trevor Saunders355fe082015-09-20 00:52:59 +00001925 gimple *stmt;
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001926
Richard Biener726a9892008-07-28 14:33:56 +00001927 gsi = gsi_after_labels (ld_st_data->load_bb);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001928 t = build_fold_addr_expr (ld_st_data->store);
Richard Biener726a9892008-07-28 14:33:56 +00001929 stmt = gimple_build_assign (ld_st_data->load, t);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001930
Richard Biener726a9892008-07-28 14:33:56 +00001931 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001932
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001933 reduction_list
Trevor Saundersc203e8a2014-06-24 13:21:35 +00001934 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001935
1936}
1937
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +00001938/* Callback for htab_traverse. Store the neutral value for the
1939 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1940 1 for MULT_EXPR, etc. into the reduction field.
H.J. Lub8698a02009-11-25 10:55:54 +00001941 The reduction is specified in SLOT. The store information is
1942 passed in DATA. */
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +00001943
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001944int
1945create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +00001946{
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001947 struct reduction_info *const red = *slot;
Richard Biener726a9892008-07-28 14:33:56 +00001948 tree t;
Trevor Saunders355fe082015-09-20 00:52:59 +00001949 gimple *stmt;
Richard Biener726a9892008-07-28 14:33:56 +00001950 gimple_stmt_iterator gsi;
Tom de Vries12efb1d2015-07-28 07:54:04 +00001951 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
Richard Biener726a9892008-07-28 14:33:56 +00001952
1953 gsi = gsi_last_bb (clsn_data->store_bb);
1954 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1955 stmt = gimple_build_assign (t, red->initial_value);
Richard Biener726a9892008-07-28 14:33:56 +00001956 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +00001957
1958 return 1;
1959}
1960
Razya Ladelskya509ebb2007-10-29 11:05:04 +00001961/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1962 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1963 specified in SLOT. */
1964
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001965int
1966create_loads_and_stores_for_name (name_to_copy_elt **slot,
1967 struct clsn_data *clsn_data)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001968{
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00001969 struct name_to_copy_elt *const elt = *slot;
Richard Biener726a9892008-07-28 14:33:56 +00001970 tree t;
Trevor Saunders355fe082015-09-20 00:52:59 +00001971 gimple *stmt;
Richard Biener726a9892008-07-28 14:33:56 +00001972 gimple_stmt_iterator gsi;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001973 tree type = TREE_TYPE (elt->new_name);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001974 tree load_struct;
1975
Richard Biener726a9892008-07-28 14:33:56 +00001976 gsi = gsi_last_bb (clsn_data->store_bb);
1977 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1978 stmt = gimple_build_assign (t, ssa_name (elt->version));
Richard Biener726a9892008-07-28 14:33:56 +00001979 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001980
Richard Biener726a9892008-07-28 14:33:56 +00001981 gsi = gsi_last_bb (clsn_data->load_bb);
Richard Guenther70f34812010-07-01 08:49:19 +00001982 load_struct = build_simple_mem_ref (clsn_data->load);
Richard Biener726a9892008-07-28 14:33:56 +00001983 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1984 stmt = gimple_build_assign (elt->new_name, t);
Richard Biener726a9892008-07-28 14:33:56 +00001985 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001986
1987 return 1;
1988}
1989
1990/* Moves all the variables used in LOOP and defined outside of it (including
1991 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1992 name) to a structure created for this purpose. The code
H.J. Lub8698a02009-11-25 10:55:54 +00001993
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02001994 while (1)
1995 {
1996 use (a);
1997 use (b);
1998 }
1999
2000 is transformed this way:
2001
2002 bb0:
2003 old.a = a;
2004 old.b = b;
2005
2006 bb1:
2007 a' = new->a;
2008 b' = new->b;
2009 while (1)
2010 {
2011 use (a');
2012 use (b');
2013 }
2014
2015 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
2016 pointer `new' is intentionally not initialized (the loop will be split to a
2017 separate function later, and `new' will be initialized from its arguments).
Razya Ladelskya509ebb2007-10-29 11:05:04 +00002018 LD_ST_DATA holds information about the shared data structure used to pass
H.J. Lub8698a02009-11-25 10:55:54 +00002019 information among the threads. It is initialized here, and
2020 gen_parallel_loop will pass it to create_call_for_reduction that
2021 needs this information. REDUCTION_LIST describes the reductions
Razya Ladelskya509ebb2007-10-29 11:05:04 +00002022 in LOOP. */
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002023
2024static void
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00002025separate_decls_in_region (edge entry, edge exit,
Trevor Saundersc203e8a2014-06-24 13:21:35 +00002026 reduction_info_table_type *reduction_list,
H.J. Lub8698a02009-11-25 10:55:54 +00002027 tree *arg_struct, tree *new_arg_struct,
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01002028 struct clsn_data *ld_st_data)
Razya Ladelskya509ebb2007-10-29 11:05:04 +00002029
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002030{
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01002031 basic_block bb1 = split_edge (entry);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002032 basic_block bb0 = single_pred (bb1);
Trevor Saundersc203e8a2014-06-24 13:21:35 +00002033 name_to_copy_table_type name_copies (10);
2034 int_tree_htab_type decl_copies (10);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002035 unsigned i;
Richard Biener726a9892008-07-28 14:33:56 +00002036 tree type, type_name, nvar;
2037 gimple_stmt_iterator gsi;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002038 struct clsn_data clsn_data;
Trevor Saunders00f96dc2013-12-20 20:34:33 +00002039 auto_vec<basic_block, 3> body;
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01002040 basic_block bb;
2041 basic_block entry_bb = bb1;
2042 basic_block exit_bb = exit->dest;
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00002043 bool has_debug_stmt = false;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002044
Richard Biener726a9892008-07-28 14:33:56 +00002045 entry = single_succ_edge (entry_bb);
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01002046 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
2047
Diego Novillo9771b262012-11-17 21:54:30 -05002048 FOR_EACH_VEC_ELT (body, i, bb)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002049 {
H.J. Lub8698a02009-11-25 10:55:54 +00002050 if (bb != entry_bb && bb != exit_bb)
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01002051 {
Richard Biener726a9892008-07-28 14:33:56 +00002052 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2053 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
Trevor Saundersc203e8a2014-06-24 13:21:35 +00002054 &name_copies, &decl_copies);
Richard Biener726a9892008-07-28 14:33:56 +00002055
2056 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00002057 {
Trevor Saunders355fe082015-09-20 00:52:59 +00002058 gimple *stmt = gsi_stmt (gsi);
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00002059
2060 if (is_gimple_debug (stmt))
2061 has_debug_stmt = true;
2062 else
2063 separate_decls_in_region_stmt (entry, exit, stmt,
Trevor Saundersc203e8a2014-06-24 13:21:35 +00002064 &name_copies, &decl_copies);
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00002065 }
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01002066 }
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002067 }
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01002068
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00002069 /* Now process debug bind stmts. We must not create decls while
2070 processing debug stmts, so we defer their processing so as to
2071 make sure we will have debug info for as many variables as
2072 possible (all of those that were dealt with in the loop above),
2073 and discard those for which we know there's nothing we can
2074 do. */
2075 if (has_debug_stmt)
Diego Novillo9771b262012-11-17 21:54:30 -05002076 FOR_EACH_VEC_ELT (body, i, bb)
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00002077 if (bb != entry_bb && bb != exit_bb)
2078 {
2079 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2080 {
Trevor Saunders355fe082015-09-20 00:52:59 +00002081 gimple *stmt = gsi_stmt (gsi);
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00002082
Jakub Jelinekddb555e2011-06-22 12:41:58 +02002083 if (is_gimple_debug (stmt))
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00002084 {
Trevor Saundersc203e8a2014-06-24 13:21:35 +00002085 if (separate_decls_in_region_debug (stmt, &name_copies,
2086 &decl_copies))
Alexandre Olivab5b8b0a2009-09-02 02:42:21 +00002087 {
2088 gsi_remove (&gsi, true);
2089 continue;
2090 }
2091 }
2092
2093 gsi_next (&gsi);
2094 }
2095 }
2096
Martin Liskab119c052019-05-03 14:37:22 +02002097 if (name_copies.is_empty () && reduction_list->is_empty ())
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002098 {
2099 /* It may happen that there is nothing to copy (if there are only
Razya Ladelskya509ebb2007-10-29 11:05:04 +00002100 loop carried and external variables in the loop). */
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002101 *arg_struct = NULL;
2102 *new_arg_struct = NULL;
2103 }
2104 else
2105 {
2106 /* Create the type for the structure to store the ssa names to. */
2107 type = lang_hooks.types.make_type (RECORD_TYPE);
Jakub Jelinek9ff70652010-12-07 12:27:37 +01002108 type_name = build_decl (UNKNOWN_LOCATION,
Aldy Hernandezc2255bc2009-06-12 22:06:47 +00002109 TYPE_DECL, create_tmp_var_name (".paral_data"),
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002110 type);
2111 TYPE_NAME (type) = type_name;
2112
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00002113 name_copies.traverse <tree, add_field_for_name> (type);
Martin Liskab119c052019-05-03 14:37:22 +02002114 if (reduction_list && !reduction_list->is_empty ())
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +00002115 {
2116 /* Create the fields for reductions. */
Trevor Saundersc203e8a2014-06-24 13:21:35 +00002117 reduction_list->traverse <tree, add_field_for_reduction> (type);
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +00002118 }
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002119 layout_type (type);
H.J. Lub8698a02009-11-25 10:55:54 +00002120
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002121 /* Create the loads and stores. */
2122 *arg_struct = create_tmp_var (type, ".paral_data_store");
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002123 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
Jakub Jelinekb731b392014-11-29 12:35:30 +01002124 *new_arg_struct = make_ssa_name (nvar);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002125
Razya Ladelskya509ebb2007-10-29 11:05:04 +00002126 ld_st_data->store = *arg_struct;
2127 ld_st_data->load = *new_arg_struct;
2128 ld_st_data->store_bb = bb0;
2129 ld_st_data->load_bb = bb1;
Razya Ladelsky0eb7e7a2007-11-06 10:29:12 +00002130
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00002131 name_copies
2132 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
2133 (ld_st_data);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00002134
Razya Ladelskyae0bce62007-12-18 11:21:48 +00002135 /* Load the calculation from memory (after the join of the threads). */
2136
Martin Liskab119c052019-05-03 14:37:22 +02002137 if (reduction_list && !reduction_list->is_empty ())
Razya Ladelskya509ebb2007-10-29 11:05:04 +00002138 {
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00002139 reduction_list
Trevor Saundersc203e8a2014-06-24 13:21:35 +00002140 ->traverse <struct clsn_data *, create_stores_for_reduction>
2141 (ld_st_data);
Jakub Jelinekb731b392014-11-29 12:35:30 +01002142 clsn_data.load = make_ssa_name (nvar);
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01002143 clsn_data.load_bb = exit->dest;
Razya Ladelskya509ebb2007-10-29 11:05:04 +00002144 clsn_data.store = ld_st_data->store;
2145 create_final_loads_for_reduction (reduction_list, &clsn_data);
2146 }
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002147 }
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002148}
2149
Tom de Vriesa79b7ec2015-03-21 10:14:10 +00002150/* Returns true if FN was created to run in parallel. */
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002151
Richard Guenther62e0a1e2012-06-22 12:29:33 +00002152bool
Tom de Vriesa79b7ec2015-03-21 10:14:10 +00002153parallelized_function_p (tree fndecl)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002154{
Tom de Vriesa79b7ec2015-03-21 10:14:10 +00002155 cgraph_node *node = cgraph_node::get (fndecl);
2156 gcc_assert (node != NULL);
2157 return node->parallelized_function;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002158}
2159
2160/* Creates and returns an empty function that will receive the body of
2161 a parallelized loop. */
2162
2163static tree
Jakub Jelinek9ff70652010-12-07 12:27:37 +01002164create_loop_fn (location_t loc)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002165{
2166 char buf[100];
2167 char *tname;
2168 tree decl, type, name, t;
2169 struct function *act_cfun = cfun;
2170 static unsigned loopfn_num;
2171
Dehao Chen53682242012-09-19 19:56:42 +00002172 loc = LOCATION_LOCUS (loc);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002173 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
2174 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
2175 clean_symbol_name (tname);
2176 name = get_identifier (tname);
2177 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
2178
Jakub Jelinek9ff70652010-12-07 12:27:37 +01002179 decl = build_decl (loc, FUNCTION_DECL, name, type);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002180 TREE_STATIC (decl) = 1;
2181 TREE_USED (decl) = 1;
2182 DECL_ARTIFICIAL (decl) = 1;
2183 DECL_IGNORED_P (decl) = 0;
2184 TREE_PUBLIC (decl) = 0;
2185 DECL_UNINLINABLE (decl) = 1;
2186 DECL_EXTERNAL (decl) = 0;
2187 DECL_CONTEXT (decl) = NULL_TREE;
2188 DECL_INITIAL (decl) = make_node (BLOCK);
Richard Biener01771d42016-07-21 12:25:00 +00002189 BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002190
Jakub Jelinek9ff70652010-12-07 12:27:37 +01002191 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002192 DECL_ARTIFICIAL (t) = 1;
2193 DECL_IGNORED_P (t) = 1;
2194 DECL_RESULT (decl) = t;
2195
Jakub Jelinek9ff70652010-12-07 12:27:37 +01002196 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002197 ptr_type_node);
2198 DECL_ARTIFICIAL (t) = 1;
2199 DECL_ARG_TYPE (t) = ptr_type_node;
2200 DECL_CONTEXT (t) = decl;
2201 TREE_USED (t) = 1;
2202 DECL_ARGUMENTS (decl) = t;
2203
Andreas Krebbel182e0d72007-11-26 17:33:23 +00002204 allocate_struct_function (decl, false);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002205
2206 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
2207 it. */
Tom Tromey5576d6f2007-11-16 00:11:47 +00002208 set_cfun (act_cfun);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002209
2210 return decl;
2211}
2212
Tom de Vries7c82d822015-06-05 15:57:34 +00002213/* Replace uses of NAME by VAL in block BB. */
H.J. Lub8698a02009-11-25 10:55:54 +00002214
Tom de Vries7c82d822015-06-05 15:57:34 +00002215static void
2216replace_uses_in_bb_by (tree name, tree val, basic_block bb)
2217{
Trevor Saunders355fe082015-09-20 00:52:59 +00002218 gimple *use_stmt;
Tom de Vries7c82d822015-06-05 15:57:34 +00002219 imm_use_iterator imm_iter;
2220
2221 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
2222 {
2223 if (gimple_bb (use_stmt) != bb)
2224 continue;
2225
2226 use_operand_p use_p;
2227 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2228 SET_USE (use_p, val);
2229 }
2230}
2231
Tom de Vries7c82d822015-06-05 15:57:34 +00002232/* Do transformation from:
2233
2234 <bb preheader>:
2235 ...
2236 goto <bb header>
2237
2238 <bb header>:
2239 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2240 sum_a = PHI <sum_init (preheader), sum_b (latch)>
2241 ...
2242 use (ivtmp_a)
2243 ...
2244 sum_b = sum_a + sum_update
2245 ...
2246 if (ivtmp_a < n)
2247 goto <bb latch>;
2248 else
2249 goto <bb exit>;
2250
2251 <bb latch>:
2252 ivtmp_b = ivtmp_a + 1;
2253 goto <bb header>
2254
2255 <bb exit>:
Tom de Vries712cb0b2015-07-07 16:25:22 +00002256 sum_z = PHI <sum_b (cond[1]), ...>
Tom de Vries7c82d822015-06-05 15:57:34 +00002257
2258 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
2259 that's <bb header>.
2260
2261 to:
2262
2263 <bb preheader>:
2264 ...
2265 goto <bb newheader>
2266
2267 <bb header>:
2268 ivtmp_a = PHI <ivtmp_c (latch)>
2269 sum_a = PHI <sum_c (latch)>
2270 ...
2271 use (ivtmp_a)
2272 ...
2273 sum_b = sum_a + sum_update
2274 ...
2275 goto <bb latch>;
2276
2277 <bb newheader>:
2278 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2279 sum_c = PHI <sum_init (preheader), sum_b (latch)>
2280 if (ivtmp_c < n + 1)
2281 goto <bb header>;
2282 else
Tom de Vries712cb0b2015-07-07 16:25:22 +00002283 goto <bb newexit>;
Tom de Vries7c82d822015-06-05 15:57:34 +00002284
2285 <bb latch>:
2286 ivtmp_b = ivtmp_a + 1;
2287 goto <bb newheader>
2288
Tom de Vries712cb0b2015-07-07 16:25:22 +00002289 <bb newexit>:
2290 sum_y = PHI <sum_c (newheader)>
2291
Tom de Vries7c82d822015-06-05 15:57:34 +00002292 <bb exit>:
Tom de Vries712cb0b2015-07-07 16:25:22 +00002293 sum_z = PHI <sum_y (newexit), ...>
Tom de Vries7c82d822015-06-05 15:57:34 +00002294
2295
2296 In unified diff format:
2297
2298 <bb preheader>:
2299 ...
2300- goto <bb header>
2301+ goto <bb newheader>
2302
2303 <bb header>:
2304- ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2305- sum_a = PHI <sum_init (preheader), sum_b (latch)>
2306+ ivtmp_a = PHI <ivtmp_c (latch)>
2307+ sum_a = PHI <sum_c (latch)>
2308 ...
2309 use (ivtmp_a)
2310 ...
2311 sum_b = sum_a + sum_update
2312 ...
2313- if (ivtmp_a < n)
2314- goto <bb latch>;
2315+ goto <bb latch>;
2316+
2317+ <bb newheader>:
2318+ ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2319+ sum_c = PHI <sum_init (preheader), sum_b (latch)>
2320+ if (ivtmp_c < n + 1)
2321+ goto <bb header>;
2322 else
2323 goto <bb exit>;
2324
2325 <bb latch>:
2326 ivtmp_b = ivtmp_a + 1;
2327- goto <bb header>
2328+ goto <bb newheader>
2329
Tom de Vries712cb0b2015-07-07 16:25:22 +00002330+ <bb newexit>:
2331+ sum_y = PHI <sum_c (newheader)>
2332
Tom de Vries7c82d822015-06-05 15:57:34 +00002333 <bb exit>:
Tom de Vries712cb0b2015-07-07 16:25:22 +00002334- sum_z = PHI <sum_b (cond[1]), ...>
2335+ sum_z = PHI <sum_y (newexit), ...>
Tom de Vries7c82d822015-06-05 15:57:34 +00002336
2337 Note: the example does not show any virtual phis, but these are handled more
2338 or less as reductions.
2339
2340
2341 Moves the exit condition of LOOP to the beginning of its header.
2342 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
2343 bound. */
2344
2345static void
Martin Sebor99b1c312019-07-09 18:32:49 +00002346transform_to_exit_first_loop_alt (class loop *loop,
Tom de Vries7c82d822015-06-05 15:57:34 +00002347 reduction_info_table_type *reduction_list,
2348 tree bound)
2349{
2350 basic_block header = loop->header;
2351 basic_block latch = loop->latch;
2352 edge exit = single_dom_exit (loop);
2353 basic_block exit_block = exit->dest;
2354 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2355 tree control = gimple_cond_lhs (cond_stmt);
2356 edge e;
2357
Tom de Vries338392e2015-07-07 16:25:12 +00002358 /* Rewriting virtuals into loop-closed ssa normal form makes this
2359 transformation simpler. It also ensures that the virtuals are in
2360 loop-closed ssa normal from after the transformation, which is required by
2361 create_parallel_loop. */
2362 rewrite_virtuals_into_loop_closed_ssa (loop);
Tom de Vries7c82d822015-06-05 15:57:34 +00002363
2364 /* Create the new_header block. */
2365 basic_block new_header = split_block_before_cond_jump (exit->src);
Tom de Vries712cb0b2015-07-07 16:25:22 +00002366 edge edge_at_split = single_pred_edge (new_header);
Tom de Vries7c82d822015-06-05 15:57:34 +00002367
2368 /* Redirect entry edge to new_header. */
2369 edge entry = loop_preheader_edge (loop);
2370 e = redirect_edge_and_branch (entry, new_header);
2371 gcc_assert (e == entry);
2372
2373 /* Redirect post_inc_edge to new_header. */
2374 edge post_inc_edge = single_succ_edge (latch);
2375 e = redirect_edge_and_branch (post_inc_edge, new_header);
2376 gcc_assert (e == post_inc_edge);
2377
2378 /* Redirect post_cond_edge to header. */
2379 edge post_cond_edge = single_pred_edge (latch);
2380 e = redirect_edge_and_branch (post_cond_edge, header);
2381 gcc_assert (e == post_cond_edge);
2382
Tom de Vries712cb0b2015-07-07 16:25:22 +00002383 /* Redirect edge_at_split to latch. */
2384 e = redirect_edge_and_branch (edge_at_split, latch);
2385 gcc_assert (e == edge_at_split);
Tom de Vries7c82d822015-06-05 15:57:34 +00002386
2387 /* Set the new loop bound. */
2388 gimple_cond_set_rhs (cond_stmt, bound);
Tom de Vries5a5fd952015-06-22 16:26:16 +00002389 update_stmt (cond_stmt);
Tom de Vries7c82d822015-06-05 15:57:34 +00002390
2391 /* Repair the ssa. */
2392 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
2393 edge_var_map *vm;
2394 gphi_iterator gsi;
Tom de Vries338392e2015-07-07 16:25:12 +00002395 int i;
Tom de Vries7c82d822015-06-05 15:57:34 +00002396 for (gsi = gsi_start_phis (header), i = 0;
2397 !gsi_end_p (gsi) && v->iterate (i, &vm);
2398 gsi_next (&gsi), i++)
2399 {
2400 gphi *phi = gsi.phi ();
2401 tree res_a = PHI_RESULT (phi);
2402
2403 /* Create new phi. */
2404 tree res_c = copy_ssa_name (res_a, phi);
2405 gphi *nphi = create_phi_node (res_c, new_header);
2406
2407 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
2408 replace_uses_in_bb_by (res_a, res_c, new_header);
2409
2410 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
2411 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
2412
Tom de Vries338392e2015-07-07 16:25:12 +00002413 /* Replace sum_b with sum_c in exit phi. */
Tom de Vries7c82d822015-06-05 15:57:34 +00002414 tree res_b = redirect_edge_var_map_def (vm);
Tom de Vries338392e2015-07-07 16:25:12 +00002415 replace_uses_in_bb_by (res_b, res_c, exit_block);
Tom de Vries7c82d822015-06-05 15:57:34 +00002416
2417 struct reduction_info *red = reduction_phi (reduction_list, phi);
2418 gcc_assert (virtual_operand_p (res_a)
2419 || res_a == control
2420 || red != NULL);
2421
2422 if (red)
2423 {
2424 /* Register the new reduction phi. */
2425 red->reduc_phi = nphi;
2426 gimple_set_uid (red->reduc_phi, red->reduc_version);
2427 }
2428 }
2429 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
Tom de Vries7c82d822015-06-05 15:57:34 +00002430
2431 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
2432 flush_pending_stmts (entry);
2433
2434 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
2435 flush_pending_stmts (post_inc_edge);
2436
Tom de Vriesd42ba2d2015-11-11 20:22:12 +00002437
2438 basic_block new_exit_block = NULL;
2439 if (!single_pred_p (exit->dest))
2440 {
2441 /* Create a new empty exit block, inbetween the new loop header and the
2442 old exit block. The function separate_decls_in_region needs this block
2443 to insert code that is active on loop exit, but not any other path. */
2444 new_exit_block = split_edge (exit);
2445 }
Tom de Vries712cb0b2015-07-07 16:25:22 +00002446
2447 /* Insert and register the reduction exit phis. */
Tom de Vries7c82d822015-06-05 15:57:34 +00002448 for (gphi_iterator gsi = gsi_start_phis (exit_block);
2449 !gsi_end_p (gsi);
2450 gsi_next (&gsi))
2451 {
2452 gphi *phi = gsi.phi ();
Tom de Vriesd42ba2d2015-11-11 20:22:12 +00002453 gphi *nphi = NULL;
Tom de Vries7c82d822015-06-05 15:57:34 +00002454 tree res_z = PHI_RESULT (phi);
Tom de Vriesd42ba2d2015-11-11 20:22:12 +00002455 tree res_c;
Tom de Vries712cb0b2015-07-07 16:25:22 +00002456
Tom de Vriesd42ba2d2015-11-11 20:22:12 +00002457 if (new_exit_block != NULL)
2458 {
2459 /* Now that we have a new exit block, duplicate the phi of the old
2460 exit block in the new exit block to preserve loop-closed ssa. */
2461 edge succ_new_exit_block = single_succ_edge (new_exit_block);
2462 edge pred_new_exit_block = single_pred_edge (new_exit_block);
2463 tree res_y = copy_ssa_name (res_z, phi);
2464 nphi = create_phi_node (res_y, new_exit_block);
2465 res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
2466 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
2467 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
2468 }
2469 else
2470 res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
Tom de Vries712cb0b2015-07-07 16:25:22 +00002471
Tom de Vries7c82d822015-06-05 15:57:34 +00002472 if (virtual_operand_p (res_z))
2473 continue;
2474
Trevor Saunders355fe082015-09-20 00:52:59 +00002475 gimple *reduc_phi = SSA_NAME_DEF_STMT (res_c);
Tom de Vries7c82d822015-06-05 15:57:34 +00002476 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
2477 if (red != NULL)
Tom de Vriesd42ba2d2015-11-11 20:22:12 +00002478 red->keep_res = (nphi != NULL
2479 ? nphi
2480 : phi);
Tom de Vries7c82d822015-06-05 15:57:34 +00002481 }
2482
2483 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
2484 then we're still using some fields, so only bother about fields that are
2485 still used: header and latch.
2486 The loop has a new header bb, so we update it. The latch bb stays the
2487 same. */
2488 loop->header = new_header;
2489
2490 /* Recalculate dominance info. */
2491 free_dominance_info (CDI_DOMINATORS);
2492 calculate_dominance_info (CDI_DOMINATORS);
Tom de Vries4a4b6c42015-11-06 13:21:51 +00002493
2494 checking_verify_ssa (true, true);
Tom de Vries7c82d822015-06-05 15:57:34 +00002495}
2496
2497/* Tries to moves the exit condition of LOOP to the beginning of its header
2498 without duplication of the loop body. NIT is the number of iterations of the
2499 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
2500 transformation is successful. */
2501
2502static bool
Martin Sebor99b1c312019-07-09 18:32:49 +00002503try_transform_to_exit_first_loop_alt (class loop *loop,
Tom de Vries7c82d822015-06-05 15:57:34 +00002504 reduction_info_table_type *reduction_list,
2505 tree nit)
2506{
2507 /* Check whether the latch contains a single statement. */
Tom de Vries1b7f61e2015-06-08 11:53:27 +00002508 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
2509 return false;
Tom de Vries7c82d822015-06-05 15:57:34 +00002510
Tom de Vriesd95167e2016-01-11 09:38:28 +00002511 /* Check whether the latch contains no phis. */
2512 if (phi_nodes (loop->latch) != NULL)
2513 return false;
2514
Tom de Vries7c82d822015-06-05 15:57:34 +00002515 /* Check whether the latch contains the loop iv increment. */
2516 edge back = single_succ_edge (loop->latch);
2517 edge exit = single_dom_exit (loop);
2518 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2519 tree control = gimple_cond_lhs (cond_stmt);
2520 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
2521 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
2522 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
2523 return false;
2524
2525 /* Check whether there's no code between the loop condition and the latch. */
2526 if (!single_pred_p (loop->latch)
2527 || single_pred (loop->latch) != exit->src)
2528 return false;
2529
2530 tree alt_bound = NULL_TREE;
2531 tree nit_type = TREE_TYPE (nit);
2532
2533 /* Figure out whether nit + 1 overflows. */
2534 if (TREE_CODE (nit) == INTEGER_CST)
2535 {
Nathan Sidwellff22eb12017-07-18 13:22:50 +00002536 if (!tree_int_cst_equal (nit, TYPE_MAX_VALUE (nit_type)))
Tom de Vries7c82d822015-06-05 15:57:34 +00002537 {
2538 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
2539 nit, build_one_cst (nit_type));
2540
2541 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
Tom de Vriesfd7b3ef2015-06-29 13:53:32 +00002542 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
2543 return true;
Tom de Vries7c82d822015-06-05 15:57:34 +00002544 }
2545 else
2546 {
2547 /* Todo: Figure out if we can trigger this, if it's worth to handle
2548 optimally, and if we can handle it optimally. */
Tom de Vriesfd7b3ef2015-06-29 13:53:32 +00002549 return false;
Tom de Vries7c82d822015-06-05 15:57:34 +00002550 }
2551 }
Tom de Vriesfd7b3ef2015-06-29 13:53:32 +00002552
2553 gcc_assert (TREE_CODE (nit) == SSA_NAME);
2554
Tom de Vries4f75d602015-06-30 08:35:57 +00002555 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
2556 iv with base 0 and step 1 that is incremented in the latch, like this:
2557
2558 <bb header>:
2559 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
2560 ...
2561 if (iv_1 < nit)
2562 goto <bb latch>;
2563 else
2564 goto <bb exit>;
2565
2566 <bb latch>:
2567 iv_2 = iv_1 + 1;
2568 goto <bb header>;
2569
2570 The range of iv_1 is [0, nit]. The latch edge is taken for
2571 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
2572 number of latch executions is equal to nit.
2573
2574 The function max_loop_iterations gives us the maximum number of latch
2575 executions, so it gives us the maximum value of nit. */
2576 widest_int nit_max;
2577 if (!max_loop_iterations (loop, &nit_max))
2578 return false;
2579
2580 /* Check if nit + 1 overflows. */
Nathan Sidwellff22eb12017-07-18 13:22:50 +00002581 widest_int type_max = wi::to_widest (TYPE_MAX_VALUE (nit_type));
Richard Sandiford032c80e2016-05-02 09:39:09 +00002582 if (nit_max >= type_max)
Tom de Vries4f75d602015-06-30 08:35:57 +00002583 return false;
2584
Trevor Saunders355fe082015-09-20 00:52:59 +00002585 gimple *def = SSA_NAME_DEF_STMT (nit);
Tom de Vriesfd7b3ef2015-06-29 13:53:32 +00002586
Tom de Vries4f75d602015-06-30 08:35:57 +00002587 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
Tom de Vriesfd7b3ef2015-06-29 13:53:32 +00002588 if (def
2589 && is_gimple_assign (def)
2590 && gimple_assign_rhs_code (def) == PLUS_EXPR)
Tom de Vries7c82d822015-06-05 15:57:34 +00002591 {
Tom de Vriesfd7b3ef2015-06-29 13:53:32 +00002592 tree op1 = gimple_assign_rhs1 (def);
2593 tree op2 = gimple_assign_rhs2 (def);
2594 if (integer_minus_onep (op1))
2595 alt_bound = op2;
2596 else if (integer_minus_onep (op2))
2597 alt_bound = op1;
Tom de Vries7c82d822015-06-05 15:57:34 +00002598 }
2599
Tom de Vries9f620bf2015-07-10 08:25:18 +00002600 /* If not found, insert nit + 1. */
Tom de Vries7c82d822015-06-05 15:57:34 +00002601 if (alt_bound == NULL_TREE)
Tom de Vries9f620bf2015-07-10 08:25:18 +00002602 {
2603 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
2604 build_int_cst_type (nit_type, 1));
2605
2606 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
2607
2608 alt_bound
2609 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
2610 GSI_CONTINUE_LINKING);
2611 }
Tom de Vries7c82d822015-06-05 15:57:34 +00002612
2613 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
2614 return true;
2615}
2616
2617/* Moves the exit condition of LOOP to the beginning of its header. NIT is the
2618 number of iterations of the loop. REDUCTION_LIST describes the reductions in
2619 LOOP. */
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002620
2621static void
Martin Sebor99b1c312019-07-09 18:32:49 +00002622transform_to_exit_first_loop (class loop *loop,
Trevor Saundersc203e8a2014-06-24 13:21:35 +00002623 reduction_info_table_type *reduction_list,
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00002624 tree nit)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002625{
2626 basic_block *bbs, *nbbs, ex_bb, orig_header;
2627 unsigned n;
2628 bool ok;
2629 edge exit = single_dom_exit (loop), hpred;
Richard Biener726a9892008-07-28 14:33:56 +00002630 tree control, control_name, res, t;
David Malcolm538dd0b2014-11-19 17:00:54 +00002631 gphi *phi, *nphi;
2632 gassign *stmt;
2633 gcond *cond_stmt, *cond_nit;
Razya Ladelsky48710222009-10-22 14:43:40 +00002634 tree nit_1;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002635
2636 split_block_after_labels (loop->header);
2637 orig_header = single_succ (loop->header);
2638 hpred = single_succ_edge (loop->header);
2639
David Malcolm538dd0b2014-11-19 17:00:54 +00002640 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
Richard Biener726a9892008-07-28 14:33:56 +00002641 control = gimple_cond_lhs (cond_stmt);
2642 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002643
2644 /* Make sure that we have phi nodes on exit for all loop header phis
2645 (create_parallel_loop requires that). */
David Malcolm538dd0b2014-11-19 17:00:54 +00002646 for (gphi_iterator gsi = gsi_start_phis (loop->header);
2647 !gsi_end_p (gsi);
2648 gsi_next (&gsi))
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002649 {
David Malcolm538dd0b2014-11-19 17:00:54 +00002650 phi = gsi.phi ();
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002651 res = PHI_RESULT (phi);
Richard Guenther070ecdf2012-08-07 14:17:44 +00002652 t = copy_ssa_name (res, phi);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002653 SET_PHI_RESULT (phi, t);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002654 nphi = create_phi_node (res, orig_header);
Dehao Chen9e227d62012-07-16 11:08:21 +00002655 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002656
2657 if (res == control)
2658 {
Richard Biener726a9892008-07-28 14:33:56 +00002659 gimple_cond_set_lhs (cond_stmt, t);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002660 update_stmt (cond_stmt);
2661 control = t;
2662 }
2663 }
Razya Ladelsky12037892011-07-05 13:08:01 +00002664
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002665 bbs = get_loop_body_in_dom_order (loop);
Razya Ladelsky48710222009-10-22 14:43:40 +00002666
Razya Ladelsky69958392012-04-22 10:36:13 +00002667 for (n = 0; bbs[n] != exit->src; n++)
2668 continue;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002669 nbbs = XNEWVEC (basic_block, n);
Richard Biener726a9892008-07-28 14:33:56 +00002670 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
2671 bbs + 1, n, nbbs);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002672 gcc_assert (ok);
2673 free (bbs);
2674 ex_bb = nbbs[0];
2675 free (nbbs);
2676
H.J. Lub8698a02009-11-25 10:55:54 +00002677 /* Other than reductions, the only gimple reg that should be copied
Richard Biener726a9892008-07-28 14:33:56 +00002678 out of the loop is the control variable. */
Razya Ladelsky69958392012-04-22 10:36:13 +00002679 exit = single_dom_exit (loop);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002680 control_name = NULL_TREE;
David Malcolm538dd0b2014-11-19 17:00:54 +00002681 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
2682 !gsi_end_p (gsi); )
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002683 {
David Malcolm538dd0b2014-11-19 17:00:54 +00002684 phi = gsi.phi ();
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002685 res = PHI_RESULT (phi);
Richard Guentherea057352012-08-14 14:16:18 +00002686 if (virtual_operand_p (res))
Richard Biener726a9892008-07-28 14:33:56 +00002687 {
2688 gsi_next (&gsi);
2689 continue;
2690 }
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002691
Razya Ladelskya509ebb2007-10-29 11:05:04 +00002692 /* Check if it is a part of reduction. If it is,
H.J. Lub8698a02009-11-25 10:55:54 +00002693 keep the phi at the reduction's keep_res field. The
2694 PHI_RESULT of this phi is the resulting value of the reduction
Razya Ladelskya509ebb2007-10-29 11:05:04 +00002695 variable when exiting the loop. */
2696
Martin Liskab119c052019-05-03 14:37:22 +02002697 if (!reduction_list->is_empty ())
Razya Ladelskya509ebb2007-10-29 11:05:04 +00002698 {
2699 struct reduction_info *red;
2700
2701 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00002702 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
2703 if (red)
Richard Biener726a9892008-07-28 14:33:56 +00002704 {
2705 red->keep_res = phi;
2706 gsi_next (&gsi);
2707 continue;
2708 }
Razya Ladelskya509ebb2007-10-29 11:05:04 +00002709 }
Richard Biener726a9892008-07-28 14:33:56 +00002710 gcc_assert (control_name == NULL_TREE
2711 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002712 control_name = res;
Richard Biener726a9892008-07-28 14:33:56 +00002713 remove_phi_node (&gsi, false);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002714 }
2715 gcc_assert (control_name != NULL_TREE);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002716
H.J. Lub8698a02009-11-25 10:55:54 +00002717 /* Initialize the control variable to number of iterations
Razya Ladelsky48710222009-10-22 14:43:40 +00002718 according to the rhs of the exit condition. */
David Malcolm538dd0b2014-11-19 17:00:54 +00002719 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
2720 cond_nit = as_a <gcond *> (last_stmt (exit->src));
Razya Ladelsky48710222009-10-22 14:43:40 +00002721 nit_1 = gimple_cond_rhs (cond_nit);
2722 nit_1 = force_gimple_operand_gsi (&gsi,
2723 fold_convert (TREE_TYPE (control_name), nit_1),
Richard Biener726a9892008-07-28 14:33:56 +00002724 false, NULL_TREE, false, GSI_SAME_STMT);
Razya Ladelsky48710222009-10-22 14:43:40 +00002725 stmt = gimple_build_assign (control_name, nit_1);
Richard Biener726a9892008-07-28 14:33:56 +00002726 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002727}
2728
2729/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
Richard Biener726a9892008-07-28 14:33:56 +00002730 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002731 NEW_DATA is the variable that should be initialized from the argument
Thomas Schwingef99c3552016-02-23 16:07:54 +01002732 of LOOP_FN. N_THREADS is the requested number of threads, which can be 0 if
2733 that number is to be determined later. */
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002734
Tom de Vriesa6f4d492015-11-11 20:22:22 +00002735static void
Martin Sebor99b1c312019-07-09 18:32:49 +00002736create_parallel_loop (class loop *loop, tree loop_fn, tree data,
Tom de Vries61d9c522016-01-18 12:52:32 +00002737 tree new_data, unsigned n_threads, location_t loc,
2738 bool oacc_kernels_p)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002739{
Richard Biener726a9892008-07-28 14:33:56 +00002740 gimple_stmt_iterator gsi;
Tom de Vries61d9c522016-01-18 12:52:32 +00002741 basic_block for_bb, ex_bb, continue_bb;
Jakub Jelinek0f900df2009-11-28 17:21:00 +01002742 tree t, param;
David Malcolm538dd0b2014-11-19 17:00:54 +00002743 gomp_parallel *omp_par_stmt;
Trevor Saunders355fe082015-09-20 00:52:59 +00002744 gimple *omp_return_stmt1, *omp_return_stmt2;
2745 gimple *phi;
David Malcolm538dd0b2014-11-19 17:00:54 +00002746 gcond *cond_stmt;
2747 gomp_for *for_stmt;
2748 gomp_continue *omp_cont_stmt;
Richard Biener726a9892008-07-28 14:33:56 +00002749 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002750 edge exit, nexit, guard, end, e;
2751
Tom de Vries61d9c522016-01-18 12:52:32 +00002752 if (oacc_kernels_p)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002753 {
Thomas Schwinge25651632017-05-12 11:02:55 +02002754 gcc_checking_assert (lookup_attribute ("oacc kernels",
2755 DECL_ATTRIBUTES (cfun->decl)));
Thomas Schwingeb0f271c2017-05-12 11:18:34 +02002756 /* Indicate to later processing that this is a parallelized OpenACC
2757 kernels construct. */
2758 DECL_ATTRIBUTES (cfun->decl)
2759 = tree_cons (get_identifier ("oacc kernels parallelized"),
2760 NULL_TREE, DECL_ATTRIBUTES (cfun->decl));
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002761 }
Tom de Vries61d9c522016-01-18 12:52:32 +00002762 else
2763 {
Thomas Schwingeb0f271c2017-05-12 11:18:34 +02002764 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
2765
Tom de Vries61d9c522016-01-18 12:52:32 +00002766 basic_block bb = loop_preheader_edge (loop)->src;
2767 basic_block paral_bb = single_pred (bb);
2768 gsi = gsi_last_bb (paral_bb);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002769
Thomas Schwingef99c3552016-02-23 16:07:54 +01002770 gcc_checking_assert (n_threads != 0);
Tom de Vries61d9c522016-01-18 12:52:32 +00002771 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
2772 OMP_CLAUSE_NUM_THREADS_EXPR (t)
2773 = build_int_cst (integer_type_node, n_threads);
2774 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2775 gimple_set_location (omp_par_stmt, loc);
2776
2777 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
2778
2779 /* Initialize NEW_DATA. */
2780 if (data)
2781 {
2782 gassign *assign_stmt;
2783
2784 gsi = gsi_after_labels (bb);
2785
2786 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2787 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2788 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2789
2790 assign_stmt = gimple_build_assign (new_data,
2791 fold_convert (TREE_TYPE (new_data), param));
2792 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2793 }
2794
2795 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2796 bb = split_loop_exit_edge (single_dom_exit (loop));
2797 gsi = gsi_last_bb (bb);
2798 omp_return_stmt1 = gimple_build_omp_return (false);
2799 gimple_set_location (omp_return_stmt1, loc);
2800 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2801 }
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002802
Richard Biener726a9892008-07-28 14:33:56 +00002803 /* Extract data for GIMPLE_OMP_FOR. */
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002804 gcc_assert (loop->header == single_dom_exit (loop)->src);
David Malcolm538dd0b2014-11-19 17:00:54 +00002805 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002806
Richard Biener726a9892008-07-28 14:33:56 +00002807 cvar = gimple_cond_lhs (cond_stmt);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002808 cvar_base = SSA_NAME_VAR (cvar);
2809 phi = SSA_NAME_DEF_STMT (cvar);
2810 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
Jakub Jelinekb731b392014-11-29 12:35:30 +01002811 initvar = copy_ssa_name (cvar);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002812 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2813 initvar);
2814 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2815
Jakub Jelinek1dff453d2010-10-20 23:15:49 +02002816 gsi = gsi_last_nondebug_bb (loop->latch);
Richard Biener726a9892008-07-28 14:33:56 +00002817 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2818 gsi_remove (&gsi, true);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002819
2820 /* Prepare cfg. */
2821 for_bb = split_edge (loop_preheader_edge (loop));
2822 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2823 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2824 gcc_assert (exit == single_dom_exit (loop));
2825
2826 guard = make_edge (for_bb, ex_bb, 0);
Jan Hubicka357067f2017-06-29 18:40:53 +02002827 /* FIXME: What is the probability? */
2828 guard->probability = profile_probability::guessed_never ();
Tom de Vriese67d7a12015-07-31 06:26:44 +00002829 /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid. */
2830 loop->latch = split_edge (single_succ_edge (loop->latch));
2831 single_pred_edge (loop->latch)->flags = 0;
Jan Hubicka357067f2017-06-29 18:40:53 +02002832 end = make_single_succ_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
Tom de Vriese67d7a12015-07-31 06:26:44 +00002833 rescan_loop_exit (end, true, false);
2834
David Malcolm538dd0b2014-11-19 17:00:54 +00002835 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2836 !gsi_end_p (gpi); gsi_next (&gpi))
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002837 {
David Malcolm620e5942018-11-13 20:05:03 +00002838 location_t locus;
David Malcolm538dd0b2014-11-19 17:00:54 +00002839 gphi *phi = gpi.phi ();
Tom de Vries7781d262015-07-16 11:51:38 +00002840 tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
Trevor Saunders355fe082015-09-20 00:52:59 +00002841 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
David Malcolm538dd0b2014-11-19 17:00:54 +00002842
Tom de Vries7781d262015-07-16 11:51:38 +00002843 /* If the exit phi is not connected to a header phi in the same loop, this
2844 value is not modified in the loop, and we're done with this phi. */
2845 if (!(gimple_code (def_stmt) == GIMPLE_PHI
2846 && gimple_bb (def_stmt) == loop->header))
Tom de Vries1c5211b2016-01-11 12:08:38 +00002847 {
2848 locus = gimple_phi_arg_location_from_edge (phi, exit);
2849 add_phi_arg (phi, def, guard, locus);
2850 add_phi_arg (phi, def, end, locus);
2851 continue;
2852 }
Andrew MacLeodf5045c92009-07-30 18:36:30 +00002853
Tom de Vries7781d262015-07-16 11:51:38 +00002854 gphi *stmt = as_a <gphi *> (def_stmt);
Andrew MacLeodf5045c92009-07-30 18:36:30 +00002855 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
H.J. Lub8698a02009-11-25 10:55:54 +00002856 locus = gimple_phi_arg_location_from_edge (stmt,
Andrew MacLeodf5045c92009-07-30 18:36:30 +00002857 loop_preheader_edge (loop));
Dehao Chen9e227d62012-07-16 11:08:21 +00002858 add_phi_arg (phi, def, guard, locus);
Andrew MacLeodf5045c92009-07-30 18:36:30 +00002859
2860 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2861 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
Dehao Chen9e227d62012-07-16 11:08:21 +00002862 add_phi_arg (phi, def, end, locus);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002863 }
2864 e = redirect_edge_and_branch (exit, nexit->dest);
2865 PENDING_STMT (e) = NULL;
2866
Richard Biener726a9892008-07-28 14:33:56 +00002867 /* Emit GIMPLE_OMP_FOR. */
Tom de Vries61d9c522016-01-18 12:52:32 +00002868 if (oacc_kernels_p)
Thomas Schwingeb0f271c2017-05-12 11:18:34 +02002869 /* Parallelized OpenACC kernels constructs use gang parallelism. See also
2870 omp-offload.c:execute_oacc_device_lower. */
Tom de Vries61d9c522016-01-18 12:52:32 +00002871 t = build_omp_clause (loc, OMP_CLAUSE_GANG);
2872 else
2873 {
2874 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
Martin Liska028d4092019-11-12 11:08:40 +01002875 int chunk_size = param_parloops_chunk_size;
2876 switch (param_parloops_schedule)
Tom de Vries61d9c522016-01-18 12:52:32 +00002877 {
Martin Liska028d4092019-11-12 11:08:40 +01002878 case PARLOOPS_SCHEDULE_STATIC:
Tom de Vries61d9c522016-01-18 12:52:32 +00002879 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2880 break;
Martin Liska028d4092019-11-12 11:08:40 +01002881 case PARLOOPS_SCHEDULE_DYNAMIC:
Tom de Vries61d9c522016-01-18 12:52:32 +00002882 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_DYNAMIC;
2883 break;
Martin Liska028d4092019-11-12 11:08:40 +01002884 case PARLOOPS_SCHEDULE_GUIDED:
Tom de Vries61d9c522016-01-18 12:52:32 +00002885 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_GUIDED;
2886 break;
Martin Liska028d4092019-11-12 11:08:40 +01002887 case PARLOOPS_SCHEDULE_AUTO:
Tom de Vries61d9c522016-01-18 12:52:32 +00002888 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_AUTO;
2889 chunk_size = 0;
2890 break;
Martin Liska028d4092019-11-12 11:08:40 +01002891 case PARLOOPS_SCHEDULE_RUNTIME:
Tom de Vries61d9c522016-01-18 12:52:32 +00002892 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_RUNTIME;
2893 chunk_size = 0;
2894 break;
2895 default:
2896 gcc_unreachable ();
2897 }
2898 if (chunk_size != 0)
2899 OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2900 = build_int_cst (integer_type_node, chunk_size);
2901 }
2902
2903 for_stmt = gimple_build_omp_for (NULL,
2904 (oacc_kernels_p
2905 ? GF_OMP_FOR_KIND_OACC_LOOP
2906 : GF_OMP_FOR_KIND_FOR),
2907 t, 1, NULL);
2908
Richard Biener726a9892008-07-28 14:33:56 +00002909 gimple_cond_set_lhs (cond_stmt, cvar_base);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002910 type = TREE_TYPE (cvar);
Jakub Jelinek9ff70652010-12-07 12:27:37 +01002911 gimple_set_location (for_stmt, loc);
Richard Biener726a9892008-07-28 14:33:56 +00002912 gimple_omp_for_set_index (for_stmt, 0, initvar);
2913 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2914 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2915 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2916 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2917 cvar_base,
2918 build_int_cst (type, 1)));
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002919
Richard Biener726a9892008-07-28 14:33:56 +00002920 gsi = gsi_last_bb (for_bb);
2921 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002922 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2923
Richard Biener726a9892008-07-28 14:33:56 +00002924 /* Emit GIMPLE_OMP_CONTINUE. */
Tom de Vriese67d7a12015-07-31 06:26:44 +00002925 continue_bb = single_pred (loop->latch);
2926 gsi = gsi_last_bb (continue_bb);
David Malcolm538dd0b2014-11-19 17:00:54 +00002927 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2928 gimple_set_location (omp_cont_stmt, loc);
2929 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2930 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002931
Richard Biener726a9892008-07-28 14:33:56 +00002932 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2933 gsi = gsi_last_bb (ex_bb);
David Malcolm538dd0b2014-11-19 17:00:54 +00002934 omp_return_stmt2 = gimple_build_omp_return (true);
2935 gimple_set_location (omp_return_stmt2, loc);
2936 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002937
Richard Guenthercd7d9fd2012-03-05 14:36:18 +00002938 /* After the above dom info is hosed. Re-compute it. */
2939 free_dominance_info (CDI_DOMINATORS);
2940 calculate_dominance_info (CDI_DOMINATORS);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002941}
2942
Tom de Vriesc75c35e2018-03-21 12:25:03 +00002943/* Return number of phis in bb. If COUNT_VIRTUAL_P is false, don't count the
2944 virtual phi. */
2945
2946static unsigned int
2947num_phis (basic_block bb, bool count_virtual_p)
2948{
2949 unsigned int nr_phis = 0;
2950 gphi_iterator gsi;
2951 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2952 {
2953 if (!count_virtual_p && virtual_operand_p (PHI_RESULT (gsi.phi ())))
2954 continue;
2955
2956 nr_phis++;
2957 }
2958
2959 return nr_phis;
2960}
2961
Razya Ladelsky08dab972009-07-30 08:39:57 +00002962/* Generates code to execute the iterations of LOOP in N_THREADS
Thomas Schwingef99c3552016-02-23 16:07:54 +01002963 threads in parallel, which can be 0 if that number is to be determined
2964 later.
Razya Ladelsky08dab972009-07-30 08:39:57 +00002965
2966 NITER describes number of iterations of LOOP.
Ralf Wildenhuesfa10bee2008-06-06 05:42:00 +00002967 REDUCTION_LIST describes the reductions existent in the LOOP. */
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002968
2969static void
Martin Sebor99b1c312019-07-09 18:32:49 +00002970gen_parallel_loop (class loop *loop,
Trevor Saundersc203e8a2014-06-24 13:21:35 +00002971 reduction_info_table_type *reduction_list,
Martin Sebor99b1c312019-07-09 18:32:49 +00002972 unsigned n_threads, class tree_niter_desc *niter,
Tom de Vries61d9c522016-01-18 12:52:32 +00002973 bool oacc_kernels_p)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002974{
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002975 tree many_iterations_cond, type, nit;
Richard Biener726a9892008-07-28 14:33:56 +00002976 tree arg_struct, new_arg_struct;
2977 gimple_seq stmts;
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01002978 edge entry, exit;
Razya Ladelskya509ebb2007-10-29 11:05:04 +00002979 struct clsn_data clsn_data;
Jakub Jelinek9ff70652010-12-07 12:27:37 +01002980 location_t loc;
Trevor Saunders355fe082015-09-20 00:52:59 +00002981 gimple *cond_stmt;
Razya Ladelsky768da0d2012-05-20 11:41:45 +00002982 unsigned int m_p_thread=2;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02002983
2984 /* From
2985
2986 ---------------------------------------------------------------------
2987 loop
2988 {
2989 IV = phi (INIT, IV + STEP)
2990 BODY1;
2991 if (COND)
2992 break;
2993 BODY2;
2994 }
2995 ---------------------------------------------------------------------
2996
2997 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2998 we generate the following code:
2999
3000 ---------------------------------------------------------------------
3001
3002 if (MAY_BE_ZERO
Razya Ladelskya509ebb2007-10-29 11:05:04 +00003003 || NITER < MIN_PER_THREAD * N_THREADS)
3004 goto original;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003005
3006 BODY1;
3007 store all local loop-invariant variables used in body of the loop to DATA.
Richard Biener726a9892008-07-28 14:33:56 +00003008 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003009 load the variables from DATA.
Richard Biener726a9892008-07-28 14:33:56 +00003010 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003011 BODY2;
3012 BODY1;
Richard Biener726a9892008-07-28 14:33:56 +00003013 GIMPLE_OMP_CONTINUE;
3014 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
3015 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003016 goto end;
3017
3018 original:
3019 loop
3020 {
3021 IV = phi (INIT, IV + STEP)
3022 BODY1;
3023 if (COND)
3024 break;
3025 BODY2;
3026 }
3027
3028 end:
3029
3030 */
3031
3032 /* Create two versions of the loop -- in the old one, we know that the
3033 number of iterations is large enough, and we will transform it into the
3034 loop that will be split to loop_fn, the new one will be used for the
3035 remaining iterations. */
Razya Ladelskya509ebb2007-10-29 11:05:04 +00003036
Razya Ladelsky768da0d2012-05-20 11:41:45 +00003037 /* We should compute a better number-of-iterations value for outer loops.
3038 That is, if we have
3039
3040 for (i = 0; i < n; ++i)
3041 for (j = 0; j < m; ++j)
3042 ...
3043
3044 we should compute nit = n * m, not nit = n.
3045 Also may_be_zero handling would need to be adjusted. */
3046
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003047 type = TREE_TYPE (niter->niter);
3048 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
3049 NULL_TREE);
3050 if (stmts)
Richard Biener726a9892008-07-28 14:33:56 +00003051 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003052
Tom de Vries61d9c522016-01-18 12:52:32 +00003053 if (!oacc_kernels_p)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003054 {
Tom de Vries61d9c522016-01-18 12:52:32 +00003055 if (loop->inner)
3056 m_p_thread=2;
3057 else
3058 m_p_thread=MIN_PER_THREAD;
3059
Thomas Schwingef99c3552016-02-23 16:07:54 +01003060 gcc_checking_assert (n_threads != 0);
Tom de Vries61d9c522016-01-18 12:52:32 +00003061 many_iterations_cond =
3062 fold_build2 (GE_EXPR, boolean_type_node,
Richard Bienera851ce02017-11-17 13:15:34 +00003063 nit, build_int_cst (type, m_p_thread * n_threads - 1));
Tom de Vries61d9c522016-01-18 12:52:32 +00003064
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003065 many_iterations_cond
Tom de Vries61d9c522016-01-18 12:52:32 +00003066 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3067 invert_truthvalue (unshare_expr (niter->may_be_zero)),
3068 many_iterations_cond);
3069 many_iterations_cond
3070 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003071 if (stmts)
Richard Biener726a9892008-07-28 14:33:56 +00003072 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
Tom de Vries61d9c522016-01-18 12:52:32 +00003073 if (!is_gimple_condexpr (many_iterations_cond))
3074 {
3075 many_iterations_cond
3076 = force_gimple_operand (many_iterations_cond, &stmts,
3077 true, NULL_TREE);
3078 if (stmts)
3079 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop),
3080 stmts);
3081 }
3082
3083 initialize_original_copy_tables ();
3084
3085 /* We assume that the loop usually iterates a lot. */
Tom de Vries61d9c522016-01-18 12:52:32 +00003086 loop_version (loop, many_iterations_cond, NULL,
Jan Hubickaaf2bbc52017-07-01 22:46:40 +02003087 profile_probability::likely (),
3088 profile_probability::unlikely (),
3089 profile_probability::likely (),
3090 profile_probability::unlikely (), true);
Tom de Vries61d9c522016-01-18 12:52:32 +00003091 update_ssa (TODO_update_ssa);
3092 free_original_copy_tables ();
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003093 }
3094
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003095 /* Base all the induction variables in LOOP on a single control one. */
Sebastian Popc80a5402010-03-31 18:37:13 +00003096 canonicalize_loop_ivs (loop, &nit, true);
Tom de Vriesc75c35e2018-03-21 12:25:03 +00003097 if (num_phis (loop->header, false) != reduction_list->elements () + 1)
3098 {
3099 /* The call to canonicalize_loop_ivs above failed to "base all the
3100 induction variables in LOOP on a single control one". Do damage
3101 control. */
3102 basic_block preheader = loop_preheader_edge (loop)->src;
3103 basic_block cond_bb = single_pred (preheader);
3104 gcond *cond = as_a <gcond *> (gsi_stmt (gsi_last_bb (cond_bb)));
3105 gimple_cond_make_true (cond);
3106 update_stmt (cond);
3107 /* We've gotten rid of the duplicate loop created by loop_version, but
3108 we can't undo whatever canonicalize_loop_ivs has done.
3109 TODO: Fix this properly by ensuring that the call to
3110 canonicalize_loop_ivs succeeds. */
3111 if (dump_file
3112 && (dump_flags & TDF_DETAILS))
3113 fprintf (dump_file, "canonicalize_loop_ivs failed for loop %d,"
3114 " aborting transformation\n", loop->num);
3115 return;
3116 }
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003117
Tom de Vries7c82d822015-06-05 15:57:34 +00003118 /* Ensure that the exit condition is the first statement in the loop.
3119 The common case is that latch of the loop is empty (apart from the
3120 increment) and immediately follows the loop exit test. Attempt to move the
3121 entry of the loop directly before the exit check and increase the number of
3122 iterations of the loop by one. */
Tom de Vriesa5a57bf2015-07-24 15:00:59 +00003123 if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
3124 {
3125 if (dump_file
3126 && (dump_flags & TDF_DETAILS))
3127 fprintf (dump_file,
3128 "alternative exit-first loop transform succeeded"
3129 " for loop %d\n", loop->num);
3130 }
3131 else
Tom de Vries7c82d822015-06-05 15:57:34 +00003132 {
Tom de Vries61d9c522016-01-18 12:52:32 +00003133 if (oacc_kernels_p)
3134 n_threads = 1;
3135
Tom de Vries7c82d822015-06-05 15:57:34 +00003136 /* Fall back on the method that handles more cases, but duplicates the
3137 loop body: move the exit condition of LOOP to the beginning of its
3138 header, and duplicate the part of the last iteration that gets disabled
3139 to the exit of the loop. */
3140 transform_to_exit_first_loop (loop, reduction_list, nit);
3141 }
Razya Ladelskya509ebb2007-10-29 11:05:04 +00003142
Ralf Wildenhuesfa10bee2008-06-06 05:42:00 +00003143 /* Generate initializations for reductions. */
Martin Liskab119c052019-05-03 14:37:22 +02003144 if (!reduction_list->is_empty ())
Martin Sebor99b1c312019-07-09 18:32:49 +00003145 reduction_list->traverse <class loop *, initialize_reductions> (loop);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003146
3147 /* Eliminate the references to local variables from the loop. */
Antoniu Pop9f9f72a2008-04-24 16:23:51 +01003148 gcc_assert (single_exit (loop));
3149 entry = loop_preheader_edge (loop);
3150 exit = single_dom_exit (loop);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003151
Tom de Vries61d9c522016-01-18 12:52:32 +00003152 /* This rewrites the body in terms of new variables. This has already
3153 been done for oacc_kernels_p in pass_lower_omp/lower_omp (). */
3154 if (!oacc_kernels_p)
3155 {
3156 eliminate_local_variables (entry, exit);
3157 /* In the old loop, move all variables non-local to the loop to a
3158 structure and back, and create separate decls for the variables used in
3159 loop. */
3160 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
3161 &new_arg_struct, &clsn_data);
3162 }
3163 else
3164 {
3165 arg_struct = NULL_TREE;
3166 new_arg_struct = NULL_TREE;
3167 clsn_data.load = NULL_TREE;
3168 clsn_data.load_bb = exit->dest;
3169 clsn_data.store = NULL_TREE;
3170 clsn_data.store_bb = NULL;
3171 }
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003172
3173 /* Create the parallel constructs. */
Jakub Jelinek9ff70652010-12-07 12:27:37 +01003174 loc = UNKNOWN_LOCATION;
3175 cond_stmt = last_stmt (loop->header);
3176 if (cond_stmt)
3177 loc = gimple_location (cond_stmt);
Tom de Vries61d9c522016-01-18 12:52:32 +00003178 create_parallel_loop (loop, create_loop_fn (loc), arg_struct, new_arg_struct,
3179 n_threads, loc, oacc_kernels_p);
Martin Liskab119c052019-05-03 14:37:22 +02003180 if (!reduction_list->is_empty ())
Razya Ladelskya509ebb2007-10-29 11:05:04 +00003181 create_call_for_reduction (loop, reduction_list, &clsn_data);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003182
3183 scev_reset ();
3184
Sebastian Pop92a6bdb2008-01-16 02:46:46 +00003185 /* Free loop bound estimations that could contain references to
3186 removed statements. */
Richard Bieneradb7eaa2017-06-19 07:26:50 +00003187 free_numbers_of_iterations_estimates (cfun);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003188}
3189
Sebastian Pop98572282008-05-20 19:17:12 +00003190/* Returns true when LOOP contains vector phi nodes. */
3191
3192static bool
Martin Sebor99b1c312019-07-09 18:32:49 +00003193loop_has_vector_phi_nodes (class loop *loop ATTRIBUTE_UNUSED)
Sebastian Pop98572282008-05-20 19:17:12 +00003194{
3195 unsigned i;
3196 basic_block *bbs = get_loop_body_in_dom_order (loop);
David Malcolm538dd0b2014-11-19 17:00:54 +00003197 gphi_iterator gsi;
Sebastian Pop98572282008-05-20 19:17:12 +00003198 bool res = true;
Sebastian Pop98572282008-05-20 19:17:12 +00003199
3200 for (i = 0; i < loop->num_nodes; i++)
Richard Biener726a9892008-07-28 14:33:56 +00003201 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
David Malcolm538dd0b2014-11-19 17:00:54 +00003202 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
Sebastian Pop98572282008-05-20 19:17:12 +00003203 goto end;
3204
3205 res = false;
3206 end:
3207 free (bbs);
3208 return res;
3209}
3210
Razya Ladelsky08dab972009-07-30 08:39:57 +00003211/* Create a reduction_info struct, initialize it with REDUC_STMT
3212 and PHI, insert it to the REDUCTION_LIST. */
3213
3214static void
Trevor Saundersc203e8a2014-06-24 13:21:35 +00003215build_new_reduction (reduction_info_table_type *reduction_list,
Trevor Saunders355fe082015-09-20 00:52:59 +00003216 gimple *reduc_stmt, gphi *phi)
Razya Ladelsky08dab972009-07-30 08:39:57 +00003217{
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00003218 reduction_info **slot;
Razya Ladelsky08dab972009-07-30 08:39:57 +00003219 struct reduction_info *new_reduction;
Tom de Vries12efb1d2015-07-28 07:54:04 +00003220 enum tree_code reduction_code;
Razya Ladelsky08dab972009-07-30 08:39:57 +00003221
3222 gcc_assert (reduc_stmt);
H.J. Lub8698a02009-11-25 10:55:54 +00003223
Jakub Jelinekd0ee55a2017-07-28 09:11:51 +02003224 if (gimple_code (reduc_stmt) == GIMPLE_PHI)
3225 {
3226 tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
3227 gimple *def1 = SSA_NAME_DEF_STMT (op1);
3228 reduction_code = gimple_assign_rhs_code (def1);
3229 }
3230 else
3231 reduction_code = gimple_assign_rhs_code (reduc_stmt);
3232 /* Check for OpenMP supported reduction. */
3233 switch (reduction_code)
3234 {
3235 case PLUS_EXPR:
3236 case MULT_EXPR:
3237 case MAX_EXPR:
3238 case MIN_EXPR:
3239 case BIT_IOR_EXPR:
3240 case BIT_XOR_EXPR:
3241 case BIT_AND_EXPR:
3242 case TRUTH_OR_EXPR:
3243 case TRUTH_XOR_EXPR:
3244 case TRUTH_AND_EXPR:
3245 break;
3246 default:
3247 return;
3248 }
3249
Razya Ladelsky08dab972009-07-30 08:39:57 +00003250 if (dump_file && (dump_flags & TDF_DETAILS))
3251 {
3252 fprintf (dump_file,
Tom de Vries430002b2015-11-20 12:48:17 +00003253 "Detected reduction. reduction stmt is:\n");
Martin Liskaef6cb4c2017-05-16 16:51:02 +02003254 print_gimple_stmt (dump_file, reduc_stmt, 0);
Razya Ladelsky08dab972009-07-30 08:39:57 +00003255 fprintf (dump_file, "\n");
3256 }
H.J. Lub8698a02009-11-25 10:55:54 +00003257
Razya Ladelsky08dab972009-07-30 08:39:57 +00003258 new_reduction = XCNEW (struct reduction_info);
H.J. Lub8698a02009-11-25 10:55:54 +00003259
Razya Ladelsky08dab972009-07-30 08:39:57 +00003260 new_reduction->reduc_stmt = reduc_stmt;
3261 new_reduction->reduc_phi = phi;
Jakub Jelinek5d1fd1d2010-12-18 22:07:12 +01003262 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
Tom de Vries12efb1d2015-07-28 07:54:04 +00003263 new_reduction->reduction_code = reduction_code;
Trevor Saundersc203e8a2014-06-24 13:21:35 +00003264 slot = reduction_list->find_slot (new_reduction, INSERT);
Razya Ladelsky08dab972009-07-30 08:39:57 +00003265 *slot = new_reduction;
3266}
3267
Jakub Jelinek5d1fd1d2010-12-18 22:07:12 +01003268/* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
3269
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00003270int
3271set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
Jakub Jelinek5d1fd1d2010-12-18 22:07:12 +01003272{
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00003273 struct reduction_info *const red = *slot;
Jakub Jelinek5d1fd1d2010-12-18 22:07:12 +01003274 gimple_set_uid (red->reduc_phi, red->reduc_version);
3275 return 1;
3276}
3277
Richard Sandiford32c91df2018-07-31 14:23:16 +00003278/* Return true if the type of reduction performed by STMT_INFO is suitable
Richard Sandifordb781a132018-01-13 18:01:24 +00003279 for this pass. */
3280
3281static bool
Richard Sandiford32c91df2018-07-31 14:23:16 +00003282valid_reduction_p (stmt_vec_info stmt_info)
Richard Sandifordb781a132018-01-13 18:01:24 +00003283{
3284 /* Parallelization would reassociate the operation, which isn't
3285 allowed for in-order reductions. */
Richard Sandifordb781a132018-01-13 18:01:24 +00003286 vect_reduction_type reduc_type = STMT_VINFO_REDUC_TYPE (stmt_info);
3287 return reduc_type != FOLD_LEFT_REDUCTION;
3288}
3289
Razya Ladelsky08dab972009-07-30 08:39:57 +00003290/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
3291
3292static void
Trevor Saundersc203e8a2014-06-24 13:21:35 +00003293gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
Razya Ladelsky08dab972009-07-30 08:39:57 +00003294{
David Malcolm538dd0b2014-11-19 17:00:54 +00003295 gphi_iterator gsi;
Razya Ladelsky08dab972009-07-30 08:39:57 +00003296 loop_vec_info simple_loop_info;
Jakub Jelinek846b1a12017-02-04 08:44:13 +01003297 auto_vec<gphi *, 4> double_reduc_phis;
3298 auto_vec<gimple *, 4> double_reduc_stmts;
Razya Ladelsky08dab972009-07-30 08:39:57 +00003299
Richard Bienerca823c82018-06-25 11:04:01 +00003300 vec_info_shared shared;
3301 simple_loop_info = vect_analyze_loop_form (loop, &shared);
Tom de Vries1e6a7b02015-07-27 20:05:19 +00003302 if (simple_loop_info == NULL)
Tom de Vries1cabb202015-11-23 09:45:38 +00003303 goto gather_done;
Razya Ladelsky08dab972009-07-30 08:39:57 +00003304
3305 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
3306 {
David Malcolm538dd0b2014-11-19 17:00:54 +00003307 gphi *phi = gsi.phi ();
Razya Ladelsky08dab972009-07-30 08:39:57 +00003308 affine_iv iv;
3309 tree res = PHI_RESULT (phi);
3310 bool double_reduc;
3311
Richard Guentherea057352012-08-14 14:16:18 +00003312 if (virtual_operand_p (res))
Razya Ladelsky08dab972009-07-30 08:39:57 +00003313 continue;
3314
Tom de Vries1e6a7b02015-07-27 20:05:19 +00003315 if (simple_iv (loop, loop, res, &iv, true))
3316 continue;
3317
Richard Sandiford32c91df2018-07-31 14:23:16 +00003318 stmt_vec_info reduc_stmt_info
Richard Biener31de92e2019-09-18 18:07:06 +00003319 = parloops_force_simple_reduction (simple_loop_info,
3320 simple_loop_info->lookup_stmt (phi),
3321 &double_reduc, true);
Richard Sandiford32c91df2018-07-31 14:23:16 +00003322 if (!reduc_stmt_info || !valid_reduction_p (reduc_stmt_info))
Tom de Vries1e6a7b02015-07-27 20:05:19 +00003323 continue;
3324
Tom de Vries12efb1d2015-07-28 07:54:04 +00003325 if (double_reduc)
3326 {
Jakub Jelinek846b1a12017-02-04 08:44:13 +01003327 if (loop->inner->inner != NULL)
Tom de Vries12efb1d2015-07-28 07:54:04 +00003328 continue;
3329
Jakub Jelinek846b1a12017-02-04 08:44:13 +01003330 double_reduc_phis.safe_push (phi);
Richard Sandiford32c91df2018-07-31 14:23:16 +00003331 double_reduc_stmts.safe_push (reduc_stmt_info->stmt);
Jakub Jelinek846b1a12017-02-04 08:44:13 +01003332 continue;
Tom de Vries12efb1d2015-07-28 07:54:04 +00003333 }
3334
Richard Sandiford32c91df2018-07-31 14:23:16 +00003335 build_new_reduction (reduction_list, reduc_stmt_info->stmt, phi);
Razya Ladelsky08dab972009-07-30 08:39:57 +00003336 }
Richard Sandiford2c515552017-08-04 10:41:12 +00003337 delete simple_loop_info;
Jakub Jelinek846b1a12017-02-04 08:44:13 +01003338
3339 if (!double_reduc_phis.is_empty ())
3340 {
Richard Bienerca823c82018-06-25 11:04:01 +00003341 vec_info_shared shared;
3342 simple_loop_info = vect_analyze_loop_form (loop->inner, &shared);
Jakub Jelinek846b1a12017-02-04 08:44:13 +01003343 if (simple_loop_info)
3344 {
3345 gphi *phi;
3346 unsigned int i;
3347
3348 FOR_EACH_VEC_ELT (double_reduc_phis, i, phi)
3349 {
3350 affine_iv iv;
3351 tree res = PHI_RESULT (phi);
3352 bool double_reduc;
3353
3354 use_operand_p use_p;
3355 gimple *inner_stmt;
3356 bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
3357 gcc_assert (single_use_p);
3358 if (gimple_code (inner_stmt) != GIMPLE_PHI)
3359 continue;
3360 gphi *inner_phi = as_a <gphi *> (inner_stmt);
3361 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
3362 &iv, true))
3363 continue;
3364
Richard Sandiford32c91df2018-07-31 14:23:16 +00003365 stmt_vec_info inner_phi_info
3366 = simple_loop_info->lookup_stmt (inner_phi);
3367 stmt_vec_info inner_reduc_stmt_info
Richard Biener31de92e2019-09-18 18:07:06 +00003368 = parloops_force_simple_reduction (simple_loop_info,
3369 inner_phi_info,
3370 &double_reduc, true);
Jakub Jelinek846b1a12017-02-04 08:44:13 +01003371 gcc_assert (!double_reduc);
Richard Sandiford32c91df2018-07-31 14:23:16 +00003372 if (!inner_reduc_stmt_info
3373 || !valid_reduction_p (inner_reduc_stmt_info))
Jakub Jelinek846b1a12017-02-04 08:44:13 +01003374 continue;
3375
3376 build_new_reduction (reduction_list, double_reduc_stmts[i], phi);
3377 }
Richard Sandiford2c515552017-08-04 10:41:12 +00003378 delete simple_loop_info;
Jakub Jelinek846b1a12017-02-04 08:44:13 +01003379 }
3380 }
Jakub Jelinek5d1fd1d2010-12-18 22:07:12 +01003381
Tom de Vries1cabb202015-11-23 09:45:38 +00003382 gather_done:
Martin Liskab119c052019-05-03 14:37:22 +02003383 if (reduction_list->is_empty ())
Tom de Vries1cabb202015-11-23 09:45:38 +00003384 return;
3385
Jakub Jelinek5d1fd1d2010-12-18 22:07:12 +01003386 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
Richard Sandiford6ef709e2018-07-31 14:26:31 +00003387 and delete simple_loop_info, we can set gimple_uid of reduc_phi stmts only
Tom de Vriesfdce4932015-10-13 14:54:01 +00003388 now. */
3389 basic_block bb;
3390 FOR_EACH_BB_FN (bb, cfun)
3391 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3392 gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1);
Trevor Saundersc203e8a2014-06-24 13:21:35 +00003393 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
Razya Ladelsky08dab972009-07-30 08:39:57 +00003394}
3395
3396/* Try to initialize NITER for code generation part. */
3397
3398static bool
Martin Sebor99b1c312019-07-09 18:32:49 +00003399try_get_loop_niter (loop_p loop, class tree_niter_desc *niter)
Razya Ladelsky08dab972009-07-30 08:39:57 +00003400{
3401 edge exit = single_dom_exit (loop);
3402
3403 gcc_assert (exit);
3404
3405 /* We need to know # of iterations, and there should be no uses of values
3406 defined inside loop outside of it, unless the values are invariants of
3407 the loop. */
3408 if (!number_of_iterations_exit (loop, exit, niter, false))
3409 {
3410 if (dump_file && (dump_flags & TDF_DETAILS))
3411 fprintf (dump_file, " FAILED: number of iterations not known\n");
3412 return false;
3413 }
3414
3415 return true;
3416}
3417
Tom de Vries61d9c522016-01-18 12:52:32 +00003418/* Return the default def of the first function argument. */
3419
3420static tree
3421get_omp_data_i_param (void)
3422{
3423 tree decl = DECL_ARGUMENTS (cfun->decl);
3424 gcc_assert (DECL_CHAIN (decl) == NULL_TREE);
3425 return ssa_default_def (cfun, decl);
3426}
3427
3428/* For PHI in loop header of LOOP, look for pattern:
3429
3430 <bb preheader>
3431 .omp_data_i = &.omp_data_arr;
3432 addr = .omp_data_i->sum;
3433 sum_a = *addr;
3434
3435 <bb header>:
3436 sum_b = PHI <sum_a (preheader), sum_c (latch)>
3437
3438 and return addr. Otherwise, return NULL_TREE. */
3439
3440static tree
Martin Sebor99b1c312019-07-09 18:32:49 +00003441find_reduc_addr (class loop *loop, gphi *phi)
Tom de Vries61d9c522016-01-18 12:52:32 +00003442{
3443 edge e = loop_preheader_edge (loop);
3444 tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
3445 gimple *stmt = SSA_NAME_DEF_STMT (arg);
3446 if (!gimple_assign_single_p (stmt))
3447 return NULL_TREE;
3448 tree memref = gimple_assign_rhs1 (stmt);
3449 if (TREE_CODE (memref) != MEM_REF)
3450 return NULL_TREE;
3451 tree addr = TREE_OPERAND (memref, 0);
3452
3453 gimple *stmt2 = SSA_NAME_DEF_STMT (addr);
3454 if (!gimple_assign_single_p (stmt2))
3455 return NULL_TREE;
3456 tree compref = gimple_assign_rhs1 (stmt2);
3457 if (TREE_CODE (compref) != COMPONENT_REF)
3458 return NULL_TREE;
3459 tree addr2 = TREE_OPERAND (compref, 0);
3460 if (TREE_CODE (addr2) != MEM_REF)
3461 return NULL_TREE;
3462 addr2 = TREE_OPERAND (addr2, 0);
3463 if (TREE_CODE (addr2) != SSA_NAME
3464 || addr2 != get_omp_data_i_param ())
3465 return NULL_TREE;
3466
3467 return addr;
3468}
3469
Razya Ladelsky08dab972009-07-30 08:39:57 +00003470/* Try to initialize REDUCTION_LIST for code generation part.
3471 REDUCTION_LIST describes the reductions. */
3472
3473static bool
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00003474try_create_reduction_list (loop_p loop,
Tom de Vries61d9c522016-01-18 12:52:32 +00003475 reduction_info_table_type *reduction_list,
3476 bool oacc_kernels_p)
Razya Ladelsky08dab972009-07-30 08:39:57 +00003477{
3478 edge exit = single_dom_exit (loop);
David Malcolm538dd0b2014-11-19 17:00:54 +00003479 gphi_iterator gsi;
Razya Ladelsky08dab972009-07-30 08:39:57 +00003480
3481 gcc_assert (exit);
3482
Tom de Vriesf993a852015-11-20 10:25:26 +00003483 /* Try to get rid of exit phis. */
3484 final_value_replacement_loop (loop);
3485
Razya Ladelsky08dab972009-07-30 08:39:57 +00003486 gather_scalar_reductions (loop, reduction_list);
3487
H.J. Lub8698a02009-11-25 10:55:54 +00003488
Razya Ladelsky08dab972009-07-30 08:39:57 +00003489 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3490 {
David Malcolm538dd0b2014-11-19 17:00:54 +00003491 gphi *phi = gsi.phi ();
Razya Ladelsky08dab972009-07-30 08:39:57 +00003492 struct reduction_info *red;
3493 imm_use_iterator imm_iter;
3494 use_operand_p use_p;
Trevor Saunders355fe082015-09-20 00:52:59 +00003495 gimple *reduc_phi;
Razya Ladelsky08dab972009-07-30 08:39:57 +00003496 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
3497
Jakub Jelinek425f5fd2019-05-10 10:20:38 +02003498 if (!virtual_operand_p (val))
Razya Ladelsky08dab972009-07-30 08:39:57 +00003499 {
Jakub Jelinek425f5fd2019-05-10 10:20:38 +02003500 if (TREE_CODE (val) != SSA_NAME)
3501 {
3502 if (dump_file && (dump_flags & TDF_DETAILS))
3503 fprintf (dump_file,
3504 " FAILED: exit PHI argument invariant.\n");
3505 return false;
3506 }
3507
Razya Ladelsky08dab972009-07-30 08:39:57 +00003508 if (dump_file && (dump_flags & TDF_DETAILS))
3509 {
3510 fprintf (dump_file, "phi is ");
Martin Liskaef6cb4c2017-05-16 16:51:02 +02003511 print_gimple_stmt (dump_file, phi, 0);
Razya Ladelsky08dab972009-07-30 08:39:57 +00003512 fprintf (dump_file, "arg of phi to exit: value ");
Martin Liskaef6cb4c2017-05-16 16:51:02 +02003513 print_generic_expr (dump_file, val);
Razya Ladelsky08dab972009-07-30 08:39:57 +00003514 fprintf (dump_file, " used outside loop\n");
3515 fprintf (dump_file,
Tom de Vries430002b2015-11-20 12:48:17 +00003516 " checking if it is part of reduction pattern:\n");
Razya Ladelsky08dab972009-07-30 08:39:57 +00003517 }
Martin Liskab119c052019-05-03 14:37:22 +02003518 if (reduction_list->is_empty ())
Razya Ladelsky08dab972009-07-30 08:39:57 +00003519 {
3520 if (dump_file && (dump_flags & TDF_DETAILS))
3521 fprintf (dump_file,
3522 " FAILED: it is not a part of reduction.\n");
3523 return false;
3524 }
3525 reduc_phi = NULL;
3526 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
3527 {
Jakub Jelinek4942af92010-11-20 13:14:48 +01003528 if (!gimple_debug_bind_p (USE_STMT (use_p))
3529 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
Razya Ladelsky08dab972009-07-30 08:39:57 +00003530 {
3531 reduc_phi = USE_STMT (use_p);
3532 break;
3533 }
3534 }
3535 red = reduction_phi (reduction_list, reduc_phi);
3536 if (red == NULL)
3537 {
3538 if (dump_file && (dump_flags & TDF_DETAILS))
3539 fprintf (dump_file,
3540 " FAILED: it is not a part of reduction.\n");
3541 return false;
3542 }
Tom de Vries23fab8a2016-01-10 09:12:03 +00003543 if (red->keep_res != NULL)
3544 {
3545 if (dump_file && (dump_flags & TDF_DETAILS))
3546 fprintf (dump_file,
3547 " FAILED: reduction has multiple exit phis.\n");
3548 return false;
3549 }
3550 red->keep_res = phi;
Razya Ladelsky08dab972009-07-30 08:39:57 +00003551 if (dump_file && (dump_flags & TDF_DETAILS))
3552 {
3553 fprintf (dump_file, "reduction phi is ");
Martin Liskaef6cb4c2017-05-16 16:51:02 +02003554 print_gimple_stmt (dump_file, red->reduc_phi, 0);
Razya Ladelsky08dab972009-07-30 08:39:57 +00003555 fprintf (dump_file, "reduction stmt is ");
Martin Liskaef6cb4c2017-05-16 16:51:02 +02003556 print_gimple_stmt (dump_file, red->reduc_stmt, 0);
Razya Ladelsky08dab972009-07-30 08:39:57 +00003557 }
3558 }
3559 }
3560
3561 /* The iterations of the loop may communicate only through bivs whose
3562 iteration space can be distributed efficiently. */
3563 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
3564 {
David Malcolm538dd0b2014-11-19 17:00:54 +00003565 gphi *phi = gsi.phi ();
Razya Ladelsky08dab972009-07-30 08:39:57 +00003566 tree def = PHI_RESULT (phi);
3567 affine_iv iv;
3568
Richard Guentherea057352012-08-14 14:16:18 +00003569 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
Razya Ladelsky08dab972009-07-30 08:39:57 +00003570 {
3571 struct reduction_info *red;
3572
3573 red = reduction_phi (reduction_list, phi);
3574 if (red == NULL)
3575 {
3576 if (dump_file && (dump_flags & TDF_DETAILS))
3577 fprintf (dump_file,
3578 " FAILED: scalar dependency between iterations\n");
3579 return false;
3580 }
3581 }
3582 }
3583
Tom de Vries61d9c522016-01-18 12:52:32 +00003584 if (oacc_kernels_p)
3585 {
3586 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi);
3587 gsi_next (&gsi))
3588 {
3589 gphi *phi = gsi.phi ();
3590 tree def = PHI_RESULT (phi);
3591 affine_iv iv;
3592
3593 if (!virtual_operand_p (def)
3594 && !simple_iv (loop, loop, def, &iv, true))
3595 {
3596 tree addr = find_reduc_addr (loop, phi);
3597 if (addr == NULL_TREE)
3598 return false;
3599 struct reduction_info *red = reduction_phi (reduction_list, phi);
3600 red->reduc_addr = addr;
3601 }
3602 }
3603 }
Razya Ladelsky08dab972009-07-30 08:39:57 +00003604
3605 return true;
3606}
3607
Tom de Vries3907c6c2016-01-10 12:44:57 +00003608/* Return true if LOOP contains phis with ADDR_EXPR in args. */
3609
3610static bool
Martin Sebor99b1c312019-07-09 18:32:49 +00003611loop_has_phi_with_address_arg (class loop *loop)
Tom de Vries3907c6c2016-01-10 12:44:57 +00003612{
3613 basic_block *bbs = get_loop_body (loop);
3614 bool res = false;
3615
3616 unsigned i, j;
3617 gphi_iterator gsi;
3618 for (i = 0; i < loop->num_nodes; i++)
3619 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3620 {
3621 gphi *phi = gsi.phi ();
3622 for (j = 0; j < gimple_phi_num_args (phi); j++)
3623 {
3624 tree arg = gimple_phi_arg_def (phi, j);
3625 if (TREE_CODE (arg) == ADDR_EXPR)
3626 {
3627 /* This should be handled by eliminate_local_variables, but that
3628 function currently ignores phis. */
3629 res = true;
3630 goto end;
3631 }
3632 }
3633 }
3634 end:
3635 free (bbs);
Tom de Vries61d9c522016-01-18 12:52:32 +00003636
3637 return res;
3638}
3639
3640/* Return true if memory ref REF (corresponding to the stmt at GSI in
3641 REGIONS_BB[I]) conflicts with the statements in REGIONS_BB[I] after gsi,
3642 or the statements in REGIONS_BB[I + n]. REF_IS_STORE indicates if REF is a
3643 store. Ignore conflicts with SKIP_STMT. */
3644
3645static bool
3646ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref,
3647 bool ref_is_store, vec<basic_block> region_bbs,
3648 unsigned int i, gimple *skip_stmt)
3649{
3650 basic_block bb = region_bbs[i];
3651 gsi_next (&gsi);
3652
3653 while (true)
3654 {
3655 for (; !gsi_end_p (gsi);
3656 gsi_next (&gsi))
3657 {
3658 gimple *stmt = gsi_stmt (gsi);
3659 if (stmt == skip_stmt)
3660 {
3661 if (dump_file)
3662 {
3663 fprintf (dump_file, "skipping reduction store: ");
Martin Liskaef6cb4c2017-05-16 16:51:02 +02003664 print_gimple_stmt (dump_file, stmt, 0);
Tom de Vries61d9c522016-01-18 12:52:32 +00003665 }
3666 continue;
3667 }
3668
3669 if (!gimple_vdef (stmt)
3670 && !gimple_vuse (stmt))
3671 continue;
3672
3673 if (gimple_code (stmt) == GIMPLE_RETURN)
3674 continue;
3675
3676 if (ref_is_store)
3677 {
3678 if (ref_maybe_used_by_stmt_p (stmt, ref))
3679 {
3680 if (dump_file)
3681 {
3682 fprintf (dump_file, "Stmt ");
Martin Liskaef6cb4c2017-05-16 16:51:02 +02003683 print_gimple_stmt (dump_file, stmt, 0);
Tom de Vries61d9c522016-01-18 12:52:32 +00003684 }
3685 return true;
3686 }
3687 }
3688 else
3689 {
3690 if (stmt_may_clobber_ref_p_1 (stmt, ref))
3691 {
3692 if (dump_file)
3693 {
3694 fprintf (dump_file, "Stmt ");
Martin Liskaef6cb4c2017-05-16 16:51:02 +02003695 print_gimple_stmt (dump_file, stmt, 0);
Tom de Vries61d9c522016-01-18 12:52:32 +00003696 }
3697 return true;
3698 }
3699 }
3700 }
3701 i++;
3702 if (i == region_bbs.length ())
3703 break;
3704 bb = region_bbs[i];
3705 gsi = gsi_start_bb (bb);
3706 }
3707
3708 return false;
3709}
3710
3711/* Return true if the bbs in REGION_BBS but not in in_loop_bbs can be executed
3712 in parallel with REGION_BBS containing the loop. Return the stores of
3713 reduction results in REDUCTION_STORES. */
3714
3715static bool
3716oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3717 reduction_info_table_type *reduction_list,
3718 bitmap reduction_stores)
3719{
3720 tree omp_data_i = get_omp_data_i_param ();
3721
3722 unsigned i;
3723 basic_block bb;
3724 FOR_EACH_VEC_ELT (region_bbs, i, bb)
3725 {
3726 if (bitmap_bit_p (in_loop_bbs, bb->index))
3727 continue;
3728
3729 gimple_stmt_iterator gsi;
3730 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
3731 gsi_next (&gsi))
3732 {
3733 gimple *stmt = gsi_stmt (gsi);
3734 gimple *skip_stmt = NULL;
3735
3736 if (is_gimple_debug (stmt)
3737 || gimple_code (stmt) == GIMPLE_COND)
3738 continue;
3739
3740 ao_ref ref;
3741 bool ref_is_store = false;
3742 if (gimple_assign_load_p (stmt))
3743 {
3744 tree rhs = gimple_assign_rhs1 (stmt);
3745 tree base = get_base_address (rhs);
3746 if (TREE_CODE (base) == MEM_REF
3747 && operand_equal_p (TREE_OPERAND (base, 0), omp_data_i, 0))
3748 continue;
3749
3750 tree lhs = gimple_assign_lhs (stmt);
3751 if (TREE_CODE (lhs) == SSA_NAME
3752 && has_single_use (lhs))
3753 {
3754 use_operand_p use_p;
3755 gimple *use_stmt;
Tom de Vriesf45ce172019-06-16 07:47:15 +00003756 struct reduction_info *red;
Tom de Vries61d9c522016-01-18 12:52:32 +00003757 single_imm_use (lhs, &use_p, &use_stmt);
Tom de Vriesf45ce172019-06-16 07:47:15 +00003758 if (gimple_code (use_stmt) == GIMPLE_PHI
3759 && (red = reduction_phi (reduction_list, use_stmt)))
Tom de Vries61d9c522016-01-18 12:52:32 +00003760 {
Tom de Vries61d9c522016-01-18 12:52:32 +00003761 tree val = PHI_RESULT (red->keep_res);
3762 if (has_single_use (val))
3763 {
3764 single_imm_use (val, &use_p, &use_stmt);
3765 if (gimple_store_p (use_stmt))
3766 {
3767 unsigned int id
3768 = SSA_NAME_VERSION (gimple_vdef (use_stmt));
3769 bitmap_set_bit (reduction_stores, id);
3770 skip_stmt = use_stmt;
3771 if (dump_file)
3772 {
3773 fprintf (dump_file, "found reduction load: ");
Martin Liskaef6cb4c2017-05-16 16:51:02 +02003774 print_gimple_stmt (dump_file, stmt, 0);
Tom de Vries61d9c522016-01-18 12:52:32 +00003775 }
3776 }
3777 }
3778 }
3779 }
3780
3781 ao_ref_init (&ref, rhs);
3782 }
3783 else if (gimple_store_p (stmt))
3784 {
3785 ao_ref_init (&ref, gimple_assign_lhs (stmt));
3786 ref_is_store = true;
3787 }
3788 else if (gimple_code (stmt) == GIMPLE_OMP_RETURN)
3789 continue;
3790 else if (!gimple_has_side_effects (stmt)
3791 && !gimple_could_trap_p (stmt)
Martin Jambor36bbc052018-10-22 10:27:50 +02003792 && !stmt_could_throw_p (cfun, stmt)
Tom de Vries61d9c522016-01-18 12:52:32 +00003793 && !gimple_vdef (stmt)
3794 && !gimple_vuse (stmt))
3795 continue;
Marek Polacek8e4284d2016-09-26 15:50:13 +00003796 else if (gimple_call_internal_p (stmt, IFN_GOACC_DIM_POS))
Tom de Vries61d9c522016-01-18 12:52:32 +00003797 continue;
3798 else if (gimple_code (stmt) == GIMPLE_RETURN)
3799 continue;
3800 else
3801 {
3802 if (dump_file)
3803 {
3804 fprintf (dump_file, "Unhandled stmt in entry/exit: ");
Martin Liskaef6cb4c2017-05-16 16:51:02 +02003805 print_gimple_stmt (dump_file, stmt, 0);
Tom de Vries61d9c522016-01-18 12:52:32 +00003806 }
3807 return false;
3808 }
3809
3810 if (ref_conflicts_with_region (gsi, &ref, ref_is_store, region_bbs,
3811 i, skip_stmt))
3812 {
3813 if (dump_file)
3814 {
3815 fprintf (dump_file, "conflicts with entry/exit stmt: ");
Martin Liskaef6cb4c2017-05-16 16:51:02 +02003816 print_gimple_stmt (dump_file, stmt, 0);
Tom de Vries61d9c522016-01-18 12:52:32 +00003817 }
3818 return false;
3819 }
3820 }
3821 }
3822
3823 return true;
3824}
3825
3826/* Find stores inside REGION_BBS and outside IN_LOOP_BBS, and guard them with
3827 gang_pos == 0, except when the stores are REDUCTION_STORES. Return true
3828 if any changes were made. */
3829
3830static bool
3831oacc_entry_exit_single_gang (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3832 bitmap reduction_stores)
3833{
3834 tree gang_pos = NULL_TREE;
3835 bool changed = false;
3836
3837 unsigned i;
3838 basic_block bb;
3839 FOR_EACH_VEC_ELT (region_bbs, i, bb)
3840 {
3841 if (bitmap_bit_p (in_loop_bbs, bb->index))
3842 continue;
3843
3844 gimple_stmt_iterator gsi;
3845 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3846 {
3847 gimple *stmt = gsi_stmt (gsi);
3848
3849 if (!gimple_store_p (stmt))
3850 {
3851 /* Update gsi to point to next stmt. */
3852 gsi_next (&gsi);
3853 continue;
3854 }
3855
3856 if (bitmap_bit_p (reduction_stores,
3857 SSA_NAME_VERSION (gimple_vdef (stmt))))
3858 {
3859 if (dump_file)
3860 {
3861 fprintf (dump_file,
3862 "skipped reduction store for single-gang"
3863 " neutering: ");
Martin Liskaef6cb4c2017-05-16 16:51:02 +02003864 print_gimple_stmt (dump_file, stmt, 0);
Tom de Vries61d9c522016-01-18 12:52:32 +00003865 }
3866
3867 /* Update gsi to point to next stmt. */
3868 gsi_next (&gsi);
3869 continue;
3870 }
3871
3872 changed = true;
3873
3874 if (gang_pos == NULL_TREE)
3875 {
3876 tree arg = build_int_cst (integer_type_node, GOMP_DIM_GANG);
3877 gcall *gang_single
3878 = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
3879 gang_pos = make_ssa_name (integer_type_node);
3880 gimple_call_set_lhs (gang_single, gang_pos);
3881 gimple_stmt_iterator start
3882 = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
3883 tree vuse = ssa_default_def (cfun, gimple_vop (cfun));
3884 gimple_set_vuse (gang_single, vuse);
3885 gsi_insert_before (&start, gang_single, GSI_SAME_STMT);
3886 }
3887
3888 if (dump_file)
3889 {
3890 fprintf (dump_file,
3891 "found store that needs single-gang neutering: ");
Martin Liskaef6cb4c2017-05-16 16:51:02 +02003892 print_gimple_stmt (dump_file, stmt, 0);
Tom de Vries61d9c522016-01-18 12:52:32 +00003893 }
3894
3895 {
3896 /* Split block before store. */
3897 gimple_stmt_iterator gsi2 = gsi;
3898 gsi_prev (&gsi2);
3899 edge e;
3900 if (gsi_end_p (gsi2))
3901 {
3902 e = split_block_after_labels (bb);
3903 gsi2 = gsi_last_bb (bb);
3904 }
3905 else
3906 e = split_block (bb, gsi_stmt (gsi2));
3907 basic_block bb2 = e->dest;
3908
3909 /* Split block after store. */
3910 gimple_stmt_iterator gsi3 = gsi_start_bb (bb2);
3911 edge e2 = split_block (bb2, gsi_stmt (gsi3));
3912 basic_block bb3 = e2->dest;
3913
3914 gimple *cond
3915 = gimple_build_cond (EQ_EXPR, gang_pos, integer_zero_node,
3916 NULL_TREE, NULL_TREE);
3917 gsi_insert_after (&gsi2, cond, GSI_NEW_STMT);
3918
3919 edge e3 = make_edge (bb, bb3, EDGE_FALSE_VALUE);
Jan Hubicka357067f2017-06-29 18:40:53 +02003920 /* FIXME: What is the probability? */
3921 e3->probability = profile_probability::guessed_never ();
Tom de Vries61d9c522016-01-18 12:52:32 +00003922 e->flags = EDGE_TRUE_VALUE;
3923
3924 tree vdef = gimple_vdef (stmt);
3925 tree vuse = gimple_vuse (stmt);
3926
3927 tree phi_res = copy_ssa_name (vdef);
3928 gphi *new_phi = create_phi_node (phi_res, bb3);
3929 replace_uses_by (vdef, phi_res);
3930 add_phi_arg (new_phi, vuse, e3, UNKNOWN_LOCATION);
3931 add_phi_arg (new_phi, vdef, e2, UNKNOWN_LOCATION);
3932
3933 /* Update gsi to point to next stmt. */
3934 bb = bb3;
3935 gsi = gsi_start_bb (bb);
3936 }
3937 }
3938 }
3939
3940 return changed;
3941}
3942
3943/* Return true if the statements before and after the LOOP can be executed in
3944 parallel with the function containing the loop. Resolve conflicting stores
3945 outside LOOP by guarding them such that only a single gang executes them. */
3946
3947static bool
Martin Sebor99b1c312019-07-09 18:32:49 +00003948oacc_entry_exit_ok (class loop *loop,
Tom de Vries61d9c522016-01-18 12:52:32 +00003949 reduction_info_table_type *reduction_list)
3950{
3951 basic_block *loop_bbs = get_loop_body_in_dom_order (loop);
3952 vec<basic_block> region_bbs
3953 = get_all_dominated_blocks (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
3954
3955 bitmap in_loop_bbs = BITMAP_ALLOC (NULL);
3956 bitmap_clear (in_loop_bbs);
3957 for (unsigned int i = 0; i < loop->num_nodes; i++)
3958 bitmap_set_bit (in_loop_bbs, loop_bbs[i]->index);
3959
3960 bitmap reduction_stores = BITMAP_ALLOC (NULL);
3961 bool res = oacc_entry_exit_ok_1 (in_loop_bbs, region_bbs, reduction_list,
3962 reduction_stores);
3963
3964 if (res)
3965 {
3966 bool changed = oacc_entry_exit_single_gang (in_loop_bbs, region_bbs,
3967 reduction_stores);
3968 if (changed)
3969 {
3970 free_dominance_info (CDI_DOMINATORS);
3971 calculate_dominance_info (CDI_DOMINATORS);
3972 }
3973 }
3974
Martin Liska4089c342016-05-19 17:07:28 +02003975 region_bbs.release ();
Tom de Vries61d9c522016-01-18 12:52:32 +00003976 free (loop_bbs);
3977
3978 BITMAP_FREE (in_loop_bbs);
3979 BITMAP_FREE (reduction_stores);
3980
Tom de Vries3907c6c2016-01-10 12:44:57 +00003981 return res;
3982}
3983
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003984/* Detect parallel loops and generate parallel code using libgomp
3985 primitives. Returns true if some loop was parallelized, false
3986 otherwise. */
3987
Tom de Vries09489eb2015-03-18 18:55:38 +00003988static bool
Tom de Vries61d9c522016-01-18 12:52:32 +00003989parallelize_loops (bool oacc_kernels_p)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003990{
Thomas Schwingef99c3552016-02-23 16:07:54 +01003991 unsigned n_threads;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003992 bool changed = false;
Martin Sebor99b1c312019-07-09 18:32:49 +00003993 class loop *loop;
3994 class loop *skip_loop = NULL;
3995 class tree_niter_desc niter_desc;
Laurynas Biveinisf873b202010-04-22 12:42:15 +00003996 struct obstack parloop_obstack;
Razya Ladelsky8adfe012010-01-28 14:24:25 +00003997 HOST_WIDE_INT estimated;
Laurynas Biveinisf873b202010-04-22 12:42:15 +00003998
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02003999 /* Do not parallelize loops in the functions created by parallelization. */
Tom de Vries61d9c522016-01-18 12:52:32 +00004000 if (!oacc_kernels_p
4001 && parallelized_function_p (cfun->decl))
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02004002 return false;
Tom de Vries61d9c522016-01-18 12:52:32 +00004003
4004 /* Do not parallelize loops in offloaded functions. */
4005 if (!oacc_kernels_p
Martin Jambor629b3d72016-12-14 23:30:41 +01004006 && oacc_get_fn_attrib (cfun->decl) != NULL)
Tom de Vries61d9c522016-01-18 12:52:32 +00004007 return false;
4008
Razya Ladelsky8adfe012010-01-28 14:24:25 +00004009 if (cfun->has_nonlocal_label)
4010 return false;
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02004011
Thomas Schwingef99c3552016-02-23 16:07:54 +01004012 /* For OpenACC kernels, n_threads will be determined later; otherwise, it's
4013 the argument to -ftree-parallelize-loops. */
4014 if (oacc_kernels_p)
4015 n_threads = 0;
4016 else
4017 n_threads = flag_tree_parallelize_loops;
4018
Laurynas Biveinisf873b202010-04-22 12:42:15 +00004019 gcc_obstack_init (&parloop_obstack);
Trevor Saundersc203e8a2014-06-24 13:21:35 +00004020 reduction_info_table_type reduction_list (10);
Razya Ladelskya509ebb2007-10-29 11:05:04 +00004021
Tom de Vries61d9c522016-01-18 12:52:32 +00004022 calculate_dominance_info (CDI_DOMINATORS);
4023
Richard Bienerf0bd40b2013-11-19 15:19:09 +00004024 FOR_EACH_LOOP (loop, 0)
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02004025 {
Tom de Vriese67d7a12015-07-31 06:26:44 +00004026 if (loop == skip_loop)
4027 {
Tom de Vries61d9c522016-01-18 12:52:32 +00004028 if (!loop->in_oacc_kernels_region
4029 && dump_file && (dump_flags & TDF_DETAILS))
Tom de Vriese67d7a12015-07-31 06:26:44 +00004030 fprintf (dump_file,
4031 "Skipping loop %d as inner loop of parallelized loop\n",
4032 loop->num);
4033
4034 skip_loop = loop->inner;
4035 continue;
4036 }
4037 else
4038 skip_loop = NULL;
4039
Lawrence Crowl4a8fb1a2013-04-26 00:28:35 +00004040 reduction_list.empty ();
Tom de Vries61d9c522016-01-18 12:52:32 +00004041
4042 if (oacc_kernels_p)
4043 {
4044 if (!loop->in_oacc_kernels_region)
4045 continue;
4046
4047 /* Don't try to parallelize inner loops in an oacc kernels region. */
4048 if (loop->inner)
4049 skip_loop = loop->inner;
4050
4051 if (dump_file && (dump_flags & TDF_DETAILS))
4052 fprintf (dump_file,
4053 "Trying loop %d with header bb %d in oacc kernels"
4054 " region\n", loop->num, loop->header->index);
4055 }
4056
Razya Ladelsky48710222009-10-22 14:43:40 +00004057 if (dump_file && (dump_flags & TDF_DETAILS))
4058 {
4059 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
4060 if (loop->inner)
4061 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
4062 else
4063 fprintf (dump_file, "loop %d is innermost\n",loop->num);
4064 }
H.J. Lub8698a02009-11-25 10:55:54 +00004065
Razya Ladelsky48710222009-10-22 14:43:40 +00004066 if (!single_dom_exit (loop))
4067 {
H.J. Lub8698a02009-11-25 10:55:54 +00004068
Razya Ladelsky48710222009-10-22 14:43:40 +00004069 if (dump_file && (dump_flags & TDF_DETAILS))
4070 fprintf (dump_file, "loop is !single_dom_exit\n");
H.J. Lub8698a02009-11-25 10:55:54 +00004071
Razya Ladelsky08dab972009-07-30 08:39:57 +00004072 continue;
Razya Ladelsky48710222009-10-22 14:43:40 +00004073 }
Razya Ladelsky08dab972009-07-30 08:39:57 +00004074
4075 if (/* And of course, the loop must be parallelizable. */
4076 !can_duplicate_loop_p (loop)
Sebastian Pop1d4af1e2008-01-16 02:44:04 +00004077 || loop_has_blocks_with_irreducible_flag (loop)
Razya Ladelsky8adfe012010-01-28 14:24:25 +00004078 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
Sebastian Pop98572282008-05-20 19:17:12 +00004079 /* FIXME: the check for vector phi nodes could be removed. */
Razya Ladelsky69958392012-04-22 10:36:13 +00004080 || loop_has_vector_phi_nodes (loop))
Razya Ladelsky08dab972009-07-30 08:39:57 +00004081 continue;
Richard Guenthere5b332c2012-04-12 10:13:22 +00004082
Richard Bienera851ce02017-11-17 13:15:34 +00004083 estimated = estimated_loop_iterations_int (loop);
Richard Guenthere5b332c2012-04-12 10:13:22 +00004084 if (estimated == -1)
Richard Bienera851ce02017-11-17 13:15:34 +00004085 estimated = get_likely_max_loop_iterations_int (loop);
Sebastian Pop87d4d0e2009-08-12 14:18:17 +00004086 /* FIXME: Bypass this check as graphite doesn't update the
Richard Guenthere5b332c2012-04-12 10:13:22 +00004087 count and frequency correctly now. */
Sebastian Pop87d4d0e2009-08-12 14:18:17 +00004088 if (!flag_loop_parallelize_all
Tom de Vries61d9c522016-01-18 12:52:32 +00004089 && !oacc_kernels_p
Richard Guenthere5b332c2012-04-12 10:13:22 +00004090 && ((estimated != -1
Richard Bienera851ce02017-11-17 13:15:34 +00004091 && (estimated
4092 < ((HOST_WIDE_INT) n_threads
4093 * (loop->inner ? 2 : MIN_PER_THREAD) - 1)))
Sebastian Pop87d4d0e2009-08-12 14:18:17 +00004094 /* Do not bother with loops in cold areas. */
4095 || optimize_loop_nest_for_size_p (loop)))
Razya Ladelsky08dab972009-07-30 08:39:57 +00004096 continue;
H.J. Lub8698a02009-11-25 10:55:54 +00004097
Razya Ladelsky08dab972009-07-30 08:39:57 +00004098 if (!try_get_loop_niter (loop, &niter_desc))
4099 continue;
4100
Tom de Vries61d9c522016-01-18 12:52:32 +00004101 if (!try_create_reduction_list (loop, &reduction_list, oacc_kernels_p))
Razya Ladelsky08dab972009-07-30 08:39:57 +00004102 continue;
4103
Tom de Vries3907c6c2016-01-10 12:44:57 +00004104 if (loop_has_phi_with_address_arg (loop))
4105 continue;
4106
Richard Bienera851ce02017-11-17 13:15:34 +00004107 if (!loop->can_be_parallel
Laurynas Biveinisf873b202010-04-22 12:42:15 +00004108 && !loop_parallel_p (loop, &parloop_obstack))
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02004109 continue;
4110
Tom de Vries61d9c522016-01-18 12:52:32 +00004111 if (oacc_kernels_p
4112 && !oacc_entry_exit_ok (loop, &reduction_list))
4113 {
4114 if (dump_file)
4115 fprintf (dump_file, "entry/exit not ok: FAILED\n");
4116 continue;
4117 }
4118
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02004119 changed = true;
Tom de Vriese67d7a12015-07-31 06:26:44 +00004120 skip_loop = loop->inner;
Richard Biener558b3182017-02-01 14:05:09 +00004121
David Malcolmbbeeac92018-11-13 16:10:13 +00004122 if (dump_enabled_p ())
4123 {
4124 dump_user_location_t loop_loc = find_loop_location (loop);
4125 if (loop->inner)
4126 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
4127 "parallelizing outer loop %d\n", loop->num);
4128 else
4129 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
4130 "parallelizing inner loop %d\n", loop->num);
4131 }
Tom de Vries61d9c522016-01-18 12:52:32 +00004132
Trevor Saundersc203e8a2014-06-24 13:21:35 +00004133 gen_parallel_loop (loop, &reduction_list,
Tom de Vries61d9c522016-01-18 12:52:32 +00004134 n_threads, &niter_desc, oacc_kernels_p);
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02004135 }
4136
Laurynas Biveinisf873b202010-04-22 12:42:15 +00004137 obstack_free (&parloop_obstack, NULL);
Richard Guenther6b8ed142009-05-25 13:35:10 +00004138
4139 /* Parallelization will cause new function calls to be inserted through
Richard Guentherd086d312010-04-12 15:20:48 +00004140 which local variables will escape. Reset the points-to solution
4141 for ESCAPED. */
Richard Guenther6b8ed142009-05-25 13:35:10 +00004142 if (changed)
Richard Guentherd086d312010-04-12 15:20:48 +00004143 pt_solution_reset (&cfun->gimple_df->escaped);
Richard Guenther6b8ed142009-05-25 13:35:10 +00004144
Zdenek Dvorak5f40b3c2007-09-15 23:53:45 +02004145 return changed;
4146}
4147
Andrew MacLeodc1bf2a32013-10-09 14:11:30 +00004148/* Parallelization. */
4149
Andrew MacLeodc1bf2a32013-10-09 14:11:30 +00004150namespace {
4151
4152const pass_data pass_data_parallelize_loops =
4153{
4154 GIMPLE_PASS, /* type */
4155 "parloops", /* name */
4156 OPTGROUP_LOOP, /* optinfo_flags */
Andrew MacLeodc1bf2a32013-10-09 14:11:30 +00004157 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
4158 ( PROP_cfg | PROP_ssa ), /* properties_required */
4159 0, /* properties_provided */
4160 0, /* properties_destroyed */
4161 0, /* todo_flags_start */
Richard Biener3bea3412014-05-06 13:35:40 +00004162 0, /* todo_flags_finish */
Andrew MacLeodc1bf2a32013-10-09 14:11:30 +00004163};
4164
4165class pass_parallelize_loops : public gimple_opt_pass
4166{
4167public:
4168 pass_parallelize_loops (gcc::context *ctxt)
Tom de Vries61d9c522016-01-18 12:52:32 +00004169 : gimple_opt_pass (pass_data_parallelize_loops, ctxt),
4170 oacc_kernels_p (false)
Andrew MacLeodc1bf2a32013-10-09 14:11:30 +00004171 {}
4172
4173 /* opt_pass methods: */
Thomas Schwingef99c3552016-02-23 16:07:54 +01004174 virtual bool gate (function *)
4175 {
4176 if (oacc_kernels_p)
4177 return flag_openacc;
4178 else
4179 return flag_tree_parallelize_loops > 1;
4180 }
Trevor Saundersbe55bfe2014-04-17 12:37:34 +00004181 virtual unsigned int execute (function *);
Tom de Vries61d9c522016-01-18 12:52:32 +00004182 opt_pass * clone () { return new pass_parallelize_loops (m_ctxt); }
4183 void set_pass_param (unsigned int n, bool param)
4184 {
4185 gcc_assert (n == 0);
4186 oacc_kernels_p = param;
4187 }
Andrew MacLeodc1bf2a32013-10-09 14:11:30 +00004188
Tom de Vries61d9c522016-01-18 12:52:32 +00004189 private:
4190 bool oacc_kernels_p;
Andrew MacLeodc1bf2a32013-10-09 14:11:30 +00004191}; // class pass_parallelize_loops
4192
Trevor Saundersbe55bfe2014-04-17 12:37:34 +00004193unsigned
4194pass_parallelize_loops::execute (function *fun)
4195{
Tom de Vriese9ff08b2016-01-11 08:55:16 +00004196 tree nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4197 if (nthreads == NULL_TREE)
4198 return 0;
4199
Tom de Vries12db08142016-01-18 12:52:23 +00004200 bool in_loop_pipeline = scev_initialized_p ();
4201 if (!in_loop_pipeline)
4202 loop_optimizer_init (LOOPS_NORMAL
4203 | LOOPS_HAVE_RECORDED_EXITS);
4204
4205 if (number_of_loops (fun) <= 1)
4206 return 0;
4207
4208 if (!in_loop_pipeline)
4209 {
4210 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
4211 scev_initialize ();
4212 }
4213
4214 unsigned int todo = 0;
Tom de Vries61d9c522016-01-18 12:52:32 +00004215 if (parallelize_loops (oacc_kernels_p))
Tom de Vries18751892014-11-13 10:51:58 +00004216 {
4217 fun->curr_properties &= ~(PROP_gimple_eomp);
Tom de Vriese67d7a12015-07-31 06:26:44 +00004218
Mikhail Maltsevb2b29372015-10-28 01:05:53 +00004219 checking_verify_loop_structure ();
Tom de Vriese67d7a12015-07-31 06:26:44 +00004220
Tom de Vries12db08142016-01-18 12:52:23 +00004221 todo |= TODO_update_ssa;
Tom de Vries18751892014-11-13 10:51:58 +00004222 }
4223
Tom de Vries12db08142016-01-18 12:52:23 +00004224 if (!in_loop_pipeline)
4225 {
4226 scev_finalize ();
4227 loop_optimizer_finalize ();
4228 }
4229
4230 return todo;
Trevor Saundersbe55bfe2014-04-17 12:37:34 +00004231}
4232
Andrew MacLeodc1bf2a32013-10-09 14:11:30 +00004233} // anon namespace
4234
4235gimple_opt_pass *
4236make_pass_parallelize_loops (gcc::context *ctxt)
4237{
4238 return new pass_parallelize_loops (ctxt);
4239}