blob: c636a174e361e4905ce28532e9390f45e25524d1 [file] [log] [blame]
/* Expansion pass for OMP directives. Outlines regions of certain OMP
directives to separate functions, converts others into explicit calls to the
runtime library (libgomp) and so forth
Copyright (C) 2005-2022 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "memmodel.h"
#include "backend.h"
#include "target.h"
#include "rtl.h"
#include "tree.h"
#include "gimple.h"
#include "cfghooks.h"
#include "tree-pass.h"
#include "ssa.h"
#include "optabs.h"
#include "cgraph.h"
#include "pretty-print.h"
#include "diagnostic-core.h"
#include "fold-const.h"
#include "stor-layout.h"
#include "cfganal.h"
#include "internal-fn.h"
#include "gimplify.h"
#include "gimple-iterator.h"
#include "gimplify-me.h"
#include "gimple-walk.h"
#include "tree-cfg.h"
#include "tree-into-ssa.h"
#include "tree-ssa.h"
#include "splay-tree.h"
#include "cfgloop.h"
#include "omp-general.h"
#include "omp-offload.h"
#include "tree-cfgcleanup.h"
#include "alloc-pool.h"
#include "symbol-summary.h"
#include "gomp-constants.h"
#include "gimple-pretty-print.h"
#include "stringpool.h"
#include "attribs.h"
#include "tree-eh.h"
#include "opts.h"
/* OMP region information. Every parallel and workshare
directive is enclosed between two markers, the OMP_* directive
and a corresponding GIMPLE_OMP_RETURN statement. */
struct omp_region
{
/* The enclosing region. */
struct omp_region *outer;
/* First child region. */
struct omp_region *inner;
/* Next peer region. */
struct omp_region *next;
/* Block containing the omp directive as its last stmt. */
basic_block entry;
/* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
basic_block exit;
/* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
basic_block cont;
/* If this is a combined parallel+workshare region, this is a list
of additional arguments needed by the combined parallel+workshare
library call. */
vec<tree, va_gc> *ws_args;
/* The code for the omp directive of this region. */
enum gimple_code type;
/* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
enum omp_clause_schedule_kind sched_kind;
/* Schedule modifiers. */
unsigned char sched_modifiers;
/* True if this is a combined parallel+workshare region. */
bool is_combined_parallel;
/* Copy of fd.lastprivate_conditional != 0. */
bool has_lastprivate_conditional;
/* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
a depend clause. */
gomp_ordered *ord_stmt;
};
static struct omp_region *root_omp_region;
static bool omp_any_child_fn_dumped;
static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
bool = false);
static gphi *find_phi_with_arg_on_edge (tree, edge);
static void expand_omp (struct omp_region *region);
/* Return true if REGION is a combined parallel+workshare region. */
static inline bool
is_combined_parallel (struct omp_region *region)
{
return region->is_combined_parallel;
}
/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
is the immediate dominator of PAR_ENTRY_BB, return true if there
are no data dependencies that would prevent expanding the parallel
directive at PAR_ENTRY_BB as a combined parallel+workshare region.
When expanding a combined parallel+workshare region, the call to
the child function may need additional arguments in the case of
GIMPLE_OMP_FOR regions. In some cases, these arguments are
computed out of variables passed in from the parent to the child
via 'struct .omp_data_s'. For instance:
#pragma omp parallel for schedule (guided, i * 4)
for (j ...)
Is lowered into:
# BLOCK 2 (PAR_ENTRY_BB)
.omp_data_o.i = i;
#pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
# BLOCK 3 (WS_ENTRY_BB)
.omp_data_i = &.omp_data_o;
D.1667 = .omp_data_i->i;
D.1598 = D.1667 * 4;
#pragma omp for schedule (guided, D.1598)
When we outline the parallel region, the call to the child function
'bar.omp_fn.0' will need the value D.1598 in its argument list, but
that value is computed *after* the call site. So, in principle we
cannot do the transformation.
To see whether the code in WS_ENTRY_BB blocks the combined
parallel+workshare call, we collect all the variables used in the
GIMPLE_OMP_FOR header check whether they appear on the LHS of any
statement in WS_ENTRY_BB. If so, then we cannot emit the combined
call.
FIXME. If we had the SSA form built at this point, we could merely
hoist the code in block 3 into block 2 and be done with it. But at
this point we don't have dataflow information and though we could
hack something up here, it is really not worth the aggravation. */
static bool
workshare_safe_to_combine_p (basic_block ws_entry_bb)
{
struct omp_for_data fd;
gimple *ws_stmt = last_stmt (ws_entry_bb);
if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
return true;
gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
return false;
omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
return false;
if (fd.iter_type != long_integer_type_node)
return false;
/* FIXME. We give up too easily here. If any of these arguments
are not constants, they will likely involve variables that have
been mapped into fields of .omp_data_s for sharing with the child
function. With appropriate data flow, it would be possible to
see through this. */
if (!is_gimple_min_invariant (fd.loop.n1)
|| !is_gimple_min_invariant (fd.loop.n2)
|| !is_gimple_min_invariant (fd.loop.step)
|| (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
return false;
return true;
}
/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
presence (SIMD_SCHEDULE). */
static tree
omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
{
if (!simd_schedule || integer_zerop (chunk_size))
return chunk_size;
poly_uint64 vf = omp_max_vf ();
if (known_eq (vf, 1U))
return chunk_size;
tree type = TREE_TYPE (chunk_size);
chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
build_int_cst (type, vf - 1));
return fold_build2 (BIT_AND_EXPR, type, chunk_size,
build_int_cst (type, -vf));
}
/* Collect additional arguments needed to emit a combined
parallel+workshare call. WS_STMT is the workshare directive being
expanded. */
static vec<tree, va_gc> *
get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
{
tree t;
location_t loc = gimple_location (ws_stmt);
vec<tree, va_gc> *ws_args;
if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
{
struct omp_for_data fd;
tree n1, n2;
omp_extract_for_data (for_stmt, &fd, NULL);
n1 = fd.loop.n1;
n2 = fd.loop.n2;
if (gimple_omp_for_combined_into_p (for_stmt))
{
tree innerc
= omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
OMP_CLAUSE__LOOPTEMP_);
gcc_assert (innerc);
n1 = OMP_CLAUSE_DECL (innerc);
innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
OMP_CLAUSE__LOOPTEMP_);
gcc_assert (innerc);
n2 = OMP_CLAUSE_DECL (innerc);
}
vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
t = fold_convert_loc (loc, long_integer_type_node, n1);
ws_args->quick_push (t);
t = fold_convert_loc (loc, long_integer_type_node, n2);
ws_args->quick_push (t);
t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
ws_args->quick_push (t);
if (fd.chunk_size)
{
t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
t = omp_adjust_chunk_size (t, fd.simd_schedule);
ws_args->quick_push (t);
}
return ws_args;
}
else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
{
/* Number of sections is equal to the number of edges from the
GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
the exit of the sections region. */
basic_block bb = single_succ (gimple_bb (ws_stmt));
t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
vec_alloc (ws_args, 1);
ws_args->quick_push (t);
return ws_args;
}
gcc_unreachable ();
}
/* Discover whether REGION is a combined parallel+workshare region. */
static void
determine_parallel_type (struct omp_region *region)
{
basic_block par_entry_bb, par_exit_bb;
basic_block ws_entry_bb, ws_exit_bb;
if (region == NULL || region->inner == NULL
|| region->exit == NULL || region->inner->exit == NULL
|| region->inner->cont == NULL)
return;
/* We only support parallel+for and parallel+sections. */
if (region->type != GIMPLE_OMP_PARALLEL
|| (region->inner->type != GIMPLE_OMP_FOR
&& region->inner->type != GIMPLE_OMP_SECTIONS))
return;
/* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
WS_EXIT_BB -> PAR_EXIT_BB. */
par_entry_bb = region->entry;
par_exit_bb = region->exit;
ws_entry_bb = region->inner->entry;
ws_exit_bb = region->inner->exit;
/* Give up for task reductions on the parallel, while it is implementable,
adding another big set of APIs or slowing down the normal paths is
not acceptable. */
tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
return;
if (single_succ (par_entry_bb) == ws_entry_bb
&& single_succ (ws_exit_bb) == par_exit_bb
&& workshare_safe_to_combine_p (ws_entry_bb)
&& (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
|| (last_and_only_stmt (ws_entry_bb)
&& last_and_only_stmt (par_exit_bb))))
{
gimple *par_stmt = last_stmt (par_entry_bb);
gimple *ws_stmt = last_stmt (ws_entry_bb);
if (region->inner->type == GIMPLE_OMP_FOR)
{
/* If this is a combined parallel loop, we need to determine
whether or not to use the combined library calls. There
are two cases where we do not apply the transformation:
static loops and any kind of ordered loop. In the first
case, we already open code the loop so there is no need
to do anything else. In the latter case, the combined
parallel loop call would still need extra synchronization
to implement ordered semantics, so there would not be any
gain in using the combined call. */
tree clauses = gimple_omp_for_clauses (ws_stmt);
tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
if (c == NULL
|| ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
== OMP_CLAUSE_SCHEDULE_STATIC)
|| omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
|| omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
|| ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
&& POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
return;
}
else if (region->inner->type == GIMPLE_OMP_SECTIONS
&& (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
OMP_CLAUSE__REDUCTEMP_)
|| omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
OMP_CLAUSE__CONDTEMP_)))
return;
region->is_combined_parallel = true;
region->inner->is_combined_parallel = true;
region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
}
}
/* Debugging dumps for parallel regions. */
void dump_omp_region (FILE *, struct omp_region *, int);
void debug_omp_region (struct omp_region *);
void debug_all_omp_regions (void);
/* Dump the parallel region tree rooted at REGION. */
void
dump_omp_region (FILE *file, struct omp_region *region, int indent)
{
fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
gimple_code_name[region->type]);
if (region->inner)
dump_omp_region (file, region->inner, indent + 4);
if (region->cont)
{
fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
region->cont->index);
}
if (region->exit)
fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
region->exit->index);
else
fprintf (file, "%*s[no exit marker]\n", indent, "");
if (region->next)
dump_omp_region (file, region->next, indent);
}
DEBUG_FUNCTION void
debug_omp_region (struct omp_region *region)
{
dump_omp_region (stderr, region, 0);
}
DEBUG_FUNCTION void
debug_all_omp_regions (void)
{
dump_omp_region (stderr, root_omp_region, 0);
}
/* Create a new parallel region starting at STMT inside region PARENT. */
static struct omp_region *
new_omp_region (basic_block bb, enum gimple_code type,
struct omp_region *parent)
{
struct omp_region *region = XCNEW (struct omp_region);
region->outer = parent;
region->entry = bb;
region->type = type;
if (parent)
{
/* This is a nested region. Add it to the list of inner
regions in PARENT. */
region->next = parent->inner;
parent->inner = region;
}
else
{
/* This is a toplevel region. Add it to the list of toplevel
regions in ROOT_OMP_REGION. */
region->next = root_omp_region;
root_omp_region = region;
}
return region;
}
/* Release the memory associated with the region tree rooted at REGION. */
static void
free_omp_region_1 (struct omp_region *region)
{
struct omp_region *i, *n;
for (i = region->inner; i ; i = n)
{
n = i->next;
free_omp_region_1 (i);
}
free (region);
}
/* Release the memory for the entire omp region tree. */
void
omp_free_regions (void)
{
struct omp_region *r, *n;
for (r = root_omp_region; r ; r = n)
{
n = r->next;
free_omp_region_1 (r);
}
root_omp_region = NULL;
}
/* A convenience function to build an empty GIMPLE_COND with just the
condition. */
static gcond *
gimple_build_cond_empty (tree cond)
{
enum tree_code pred_code;
tree lhs, rhs;
gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
}
/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
Add CHILD_FNDECL to decl chain of the supercontext of the block
ENTRY_BLOCK - this is the block which originally contained the
code from which CHILD_FNDECL was created.
Together, these actions ensure that the debug info for the outlined
function will be emitted with the correct lexical scope. */
static void
adjust_context_and_scope (struct omp_region *region, tree entry_block,
tree child_fndecl)
{
tree parent_fndecl = NULL_TREE;
gimple *entry_stmt;
/* OMP expansion expands inner regions before outer ones, so if
we e.g. have explicit task region nested in parallel region, when
expanding the task region current_function_decl will be the original
source function, but we actually want to use as context the child
function of the parallel. */
for (region = region->outer;
region && parent_fndecl == NULL_TREE; region = region->outer)
switch (region->type)
{
case GIMPLE_OMP_PARALLEL:
case GIMPLE_OMP_TASK:
case GIMPLE_OMP_TEAMS:
entry_stmt = last_stmt (region->entry);
parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
break;
case GIMPLE_OMP_TARGET:
entry_stmt = last_stmt (region->entry);
parent_fndecl
= gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
break;
default:
break;
}
if (parent_fndecl == NULL_TREE)
parent_fndecl = current_function_decl;
DECL_CONTEXT (child_fndecl) = parent_fndecl;
if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
{
tree b = BLOCK_SUPERCONTEXT (entry_block);
if (TREE_CODE (b) == BLOCK)
{
DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
BLOCK_VARS (b) = child_fndecl;
}
}
}
/* Build the function calls to GOMP_parallel etc to actually
generate the parallel operation. REGION is the parallel region
being expanded. BB is the block where to insert the code. WS_ARGS
will be set if this is a call to a combined parallel+workshare
construct, it contains the list of additional arguments needed by
the workshare construct. */
static void
expand_parallel_call (struct omp_region *region, basic_block bb,
gomp_parallel *entry_stmt,
vec<tree, va_gc> *ws_args)
{
tree t, t1, t2, val, cond, c, clauses, flags;
gimple_stmt_iterator gsi;
gimple *stmt;
enum built_in_function start_ix;
int start_ix2;
location_t clause_loc;
vec<tree, va_gc> *args;
clauses = gimple_omp_parallel_clauses (entry_stmt);
/* Determine what flavor of GOMP_parallel we will be
emitting. */
start_ix = BUILT_IN_GOMP_PARALLEL;
tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
if (rtmp)
start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
else if (is_combined_parallel (region))
{
switch (region->inner->type)
{
case GIMPLE_OMP_FOR:
gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
switch (region->inner->sched_kind)
{
case OMP_CLAUSE_SCHEDULE_RUNTIME:
/* For lastprivate(conditional:), our implementation
requires monotonic behavior. */
if (region->inner->has_lastprivate_conditional != 0)
start_ix2 = 3;
else if ((region->inner->sched_modifiers
& OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
start_ix2 = 6;
else if ((region->inner->sched_modifiers
& OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
start_ix2 = 7;
else
start_ix2 = 3;
break;
case OMP_CLAUSE_SCHEDULE_DYNAMIC:
case OMP_CLAUSE_SCHEDULE_GUIDED:
if ((region->inner->sched_modifiers
& OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
&& !region->inner->has_lastprivate_conditional)
{
start_ix2 = 3 + region->inner->sched_kind;
break;
}
/* FALLTHRU */
default:
start_ix2 = region->inner->sched_kind;
break;
}
start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
start_ix = (enum built_in_function) start_ix2;
break;
case GIMPLE_OMP_SECTIONS:
start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
break;
default:
gcc_unreachable ();
}
}
/* By default, the value of NUM_THREADS is zero (selected at run time)
and there is no conditional. */
cond = NULL_TREE;
val = build_int_cst (unsigned_type_node, 0);
flags = build_int_cst (unsigned_type_node, 0);
c = omp_find_clause (clauses, OMP_CLAUSE_IF);
if (c)
cond = OMP_CLAUSE_IF_EXPR (c);
c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
if (c)
{
val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
clause_loc = OMP_CLAUSE_LOCATION (c);
}
else
clause_loc = gimple_location (entry_stmt);
c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
if (c)
flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
/* Ensure 'val' is of the correct type. */
val = fold_convert_loc (clause_loc, unsigned_type_node, val);
/* If we found the clause 'if (cond)', build either
(cond != 0) or (cond ? val : 1u). */
if (cond)
{
cond = gimple_boolify (cond);
if (integer_zerop (val))
val = fold_build2_loc (clause_loc,
EQ_EXPR, unsigned_type_node, cond,
build_int_cst (TREE_TYPE (cond), 0));
else
{
basic_block cond_bb, then_bb, else_bb;
edge e, e_then, e_else;
tree tmp_then, tmp_else, tmp_join, tmp_var;
tmp_var = create_tmp_var (TREE_TYPE (val));
if (gimple_in_ssa_p (cfun))
{
tmp_then = make_ssa_name (tmp_var);
tmp_else = make_ssa_name (tmp_var);
tmp_join = make_ssa_name (tmp_var);
}
else
{
tmp_then = tmp_var;
tmp_else = tmp_var;
tmp_join = tmp_var;
}
e = split_block_after_labels (bb);
cond_bb = e->src;
bb = e->dest;
remove_edge (e);
then_bb = create_empty_bb (cond_bb);
else_bb = create_empty_bb (then_bb);
set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
stmt = gimple_build_cond_empty (cond);
gsi = gsi_start_bb (cond_bb);
gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
gsi = gsi_start_bb (then_bb);
expand_omp_build_assign (&gsi, tmp_then, val, true);
gsi = gsi_start_bb (else_bb);
expand_omp_build_assign (&gsi, tmp_else,
build_int_cst (unsigned_type_node, 1),
true);
make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
add_bb_to_loop (then_bb, cond_bb->loop_father);
add_bb_to_loop (else_bb, cond_bb->loop_father);
e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
if (gimple_in_ssa_p (cfun))
{
gphi *phi = create_phi_node (tmp_join, bb);
add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
}
val = tmp_join;
}
gsi = gsi_start_bb (bb);
val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
}
gsi = gsi_last_nondebug_bb (bb);
t = gimple_omp_parallel_data_arg (entry_stmt);
if (t == NULL)
t1 = null_pointer_node;
else
t1 = build_fold_addr_expr (t);
tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
t2 = build_fold_addr_expr (child_fndecl);
vec_alloc (args, 4 + vec_safe_length (ws_args));
args->quick_push (t2);
args->quick_push (t1);
args->quick_push (val);
if (ws_args)
args->splice (*ws_args);
args->quick_push (flags);
t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
builtin_decl_explicit (start_ix), args);
if (rtmp)
{
tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
fold_convert (type,
fold_convert (pointer_sized_int_node, t)));
}
force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
}
/* Build the function call to GOMP_task to actually
generate the task operation. BB is the block where to insert the code. */
static void
expand_task_call (struct omp_region *region, basic_block bb,
gomp_task *entry_stmt)
{
tree t1, t2, t3;
gimple_stmt_iterator gsi;
location_t loc = gimple_location (entry_stmt);
tree clauses = gimple_omp_task_clauses (entry_stmt);
tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
unsigned int iflags
= (untied ? GOMP_TASK_FLAG_UNTIED : 0)
| (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
| (depend ? GOMP_TASK_FLAG_DEPEND : 0);
bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
tree num_tasks = NULL_TREE;
bool ull = false;
if (taskloop_p)
{
gimple *g = last_stmt (region->outer->entry);
gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
&& gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
struct omp_for_data fd;
omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
OMP_CLAUSE__LOOPTEMP_);
startvar = OMP_CLAUSE_DECL (startvar);
endvar = OMP_CLAUSE_DECL (endvar);
step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
if (fd.loop.cond_code == LT_EXPR)
iflags |= GOMP_TASK_FLAG_UP;
tree tclauses = gimple_omp_for_clauses (g);
num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
if (num_tasks)
{
if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
iflags |= GOMP_TASK_FLAG_STRICT;
num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
}
else
{
num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
if (num_tasks)
{
iflags |= GOMP_TASK_FLAG_GRAINSIZE;
if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
iflags |= GOMP_TASK_FLAG_STRICT;
num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
}
else
num_tasks = integer_zero_node;
}
num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
if (ifc == NULL_TREE)
iflags |= GOMP_TASK_FLAG_IF;
if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
iflags |= GOMP_TASK_FLAG_NOGROUP;
ull = fd.iter_type == long_long_unsigned_type_node;
if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
iflags |= GOMP_TASK_FLAG_REDUCTION;
}
else
{
if (priority)
iflags |= GOMP_TASK_FLAG_PRIORITY;
if (detach)
iflags |= GOMP_TASK_FLAG_DETACH;
}
tree flags = build_int_cst (unsigned_type_node, iflags);
tree cond = boolean_true_node;
if (ifc)
{
if (taskloop_p)
{
tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
build_int_cst (unsigned_type_node,
GOMP_TASK_FLAG_IF),
build_int_cst (unsigned_type_node, 0));
flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
flags, t);
}
else
cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
}
if (finalc)
{
tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
build_int_cst (unsigned_type_node,
GOMP_TASK_FLAG_FINAL),
build_int_cst (unsigned_type_node, 0));
flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
}
if (depend)
depend = OMP_CLAUSE_DECL (depend);
else
depend = build_int_cst (ptr_type_node, 0);
if (priority)
priority = fold_convert (integer_type_node,
OMP_CLAUSE_PRIORITY_EXPR (priority));
else
priority = integer_zero_node;
gsi = gsi_last_nondebug_bb (bb);
detach = (detach
? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
: null_pointer_node);
tree t = gimple_omp_task_data_arg (entry_stmt);
if (t == NULL)
t2 = null_pointer_node;
else
t2 = build_fold_addr_expr_loc (loc, t);
t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
t = gimple_omp_task_copy_fn (entry_stmt);
if (t == NULL)
t3 = null_pointer_node;
else
t3 = build_fold_addr_expr_loc (loc, t);
if (taskloop_p)
t = build_call_expr (ull
? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
: builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
11, t1, t2, t3,
gimple_omp_task_arg_size (entry_stmt),
gimple_omp_task_arg_align (entry_stmt), flags,
num_tasks, priority, startvar, endvar, step);
else
t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
10, t1, t2, t3,
gimple_omp_task_arg_size (entry_stmt),
gimple_omp_task_arg_align (entry_stmt), cond, flags,
depend, priority, detach);
force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
}
/* Build the function call to GOMP_taskwait_depend to actually
generate the taskwait operation. BB is the block where to insert the
code. */
static void
expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
{
tree clauses = gimple_omp_task_clauses (entry_stmt);
tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
if (depend == NULL_TREE)
return;
depend = OMP_CLAUSE_DECL (depend);
bool nowait = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT) != NULL_TREE;
gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
enum built_in_function f = (nowait
? BUILT_IN_GOMP_TASKWAIT_DEPEND_NOWAIT
: BUILT_IN_GOMP_TASKWAIT_DEPEND);
tree t = build_call_expr (builtin_decl_explicit (f), 1, depend);
force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
}
/* Build the function call to GOMP_teams_reg to actually
generate the host teams operation. REGION is the teams region
being expanded. BB is the block where to insert the code. */
static void
expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
{
tree clauses = gimple_omp_teams_clauses (entry_stmt);
tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
if (num_teams == NULL_TREE)
num_teams = build_int_cst (unsigned_type_node, 0);
else
{
num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
num_teams = fold_convert (unsigned_type_node, num_teams);
}
tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
if (thread_limit == NULL_TREE)
thread_limit = build_int_cst (unsigned_type_node, 0);
else
{
thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
thread_limit = fold_convert (unsigned_type_node, thread_limit);
}
gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
if (t == NULL)
t1 = null_pointer_node;
else
t1 = build_fold_addr_expr (t);
tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
tree t2 = build_fold_addr_expr (child_fndecl);
vec<tree, va_gc> *args;
vec_alloc (args, 5);
args->quick_push (t2);
args->quick_push (t1);
args->quick_push (num_teams);
args->quick_push (thread_limit);
/* For future extensibility. */
args->quick_push (build_zero_cst (unsigned_type_node));
t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
args);
force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
}
/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
static tree
vec2chain (vec<tree, va_gc> *v)
{
tree chain = NULL_TREE, t;
unsigned ix;
FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
{
DECL_CHAIN (t) = chain;
chain = t;
}
return chain;
}
/* Remove barriers in REGION->EXIT's block. Note that this is only
valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
removed. */
static void
remove_exit_barrier (struct omp_region *region)
{
gimple_stmt_iterator gsi;
basic_block exit_bb;
edge_iterator ei;
edge e;
gimple *stmt;
int any_addressable_vars = -1;
exit_bb = region->exit;
/* If the parallel region doesn't return, we don't have REGION->EXIT
block at all. */
if (! exit_bb)
return;
/* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
statements that can appear in between are extremely limited -- no
memory operations at all. Here, we allow nothing at all, so the
only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
gsi = gsi_last_nondebug_bb (exit_bb);
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
gsi_prev_nondebug (&gsi);
if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
return;
FOR_EACH_EDGE (e, ei, exit_bb->preds)
{
gsi = gsi_last_nondebug_bb (e->src);
if (gsi_end_p (gsi))
continue;
stmt = gsi_stmt (gsi);
if (gimple_code (stmt) == GIMPLE_OMP_RETURN
&& !gimple_omp_return_nowait_p (stmt))
{
/* OpenMP 3.0 tasks unfortunately prevent this optimization
in many cases. If there could be tasks queued, the barrier
might be needed to let the tasks run before some local
variable of the parallel that the task uses as shared
runs out of scope. The task can be spawned either
from within current function (this would be easy to check)
or from some function it calls and gets passed an address
of such a variable. */
if (any_addressable_vars < 0)
{
gomp_parallel *parallel_stmt
= as_a <gomp_parallel *> (last_stmt (region->entry));
tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
tree local_decls, block, decl;
unsigned ix;
any_addressable_vars = 0;
FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
if (TREE_ADDRESSABLE (decl))
{
any_addressable_vars = 1;
break;
}
for (block = gimple_block (stmt);
!any_addressable_vars
&& block
&& TREE_CODE (block) == BLOCK;
block = BLOCK_SUPERCONTEXT (block))
{
for (local_decls = BLOCK_VARS (block);
local_decls;
local_decls = DECL_CHAIN (local_decls))
if (TREE_ADDRESSABLE (local_decls))
{
any_addressable_vars = 1;
break;
}
if (block == gimple_block (parallel_stmt))
break;
}
}
if (!any_addressable_vars)
gimple_omp_return_set_nowait (stmt);
}
}
}
static void
remove_exit_barriers (struct omp_region *region)
{
if (region->type == GIMPLE_OMP_PARALLEL)
remove_exit_barrier (region);
if (region->inner)
{
region = region->inner;
remove_exit_barriers (region);
while (region->next)
{
region = region->next;
remove_exit_barriers (region);
}
}
}
/* Optimize omp_get_thread_num () and omp_get_num_threads ()
calls. These can't be declared as const functions, but
within one parallel body they are constant, so they can be
transformed there into __builtin_omp_get_{thread_num,num_threads} ()
which are declared const. Similarly for task body, except
that in untied task omp_get_thread_num () can change at any task
scheduling point. */
static void
optimize_omp_library_calls (gimple *entry_stmt)
{
basic_block bb;
gimple_stmt_iterator gsi;
tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
&& omp_find_clause (gimple_omp_task_clauses (entry_stmt),
OMP_CLAUSE_UNTIED) != NULL);
FOR_EACH_BB_FN (bb, cfun)
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
gimple *call = gsi_stmt (gsi);
tree decl;
if (is_gimple_call (call)
&& (decl = gimple_call_fndecl (call))
&& DECL_EXTERNAL (decl)
&& TREE_PUBLIC (decl)
&& DECL_INITIAL (decl) == NULL)
{
tree built_in;
if (DECL_NAME (decl) == thr_num_id)
{
/* In #pragma omp task untied omp_get_thread_num () can change
during the execution of the task region. */
if (untied_task)
continue;
built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
}
else if (DECL_NAME (decl) == num_thr_id)
built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
else
continue;
if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
|| gimple_call_num_args (call) != 0)
continue;
if (flag_exceptions && !TREE_NOTHROW (decl))
continue;
if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
|| !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
TREE_TYPE (TREE_TYPE (built_in))))
continue;
gimple_call_set_fndecl (call, built_in);
}
}
}
/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
regimplified. */
static tree
expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
{
tree t = *tp;
/* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
return t;
if (TREE_CODE (t) == ADDR_EXPR)
recompute_tree_invariant_for_addr_expr (t);
*walk_subtrees = !TYPE_P (t) && !DECL_P (t);
return NULL_TREE;
}
/* Prepend or append TO = FROM assignment before or after *GSI_P. */
static void
expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
bool after)
{
bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
!after, after ? GSI_CONTINUE_LINKING
: GSI_SAME_STMT);
gimple *stmt = gimple_build_assign (to, from);
if (after)
gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
else
gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
|| walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
{
gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
gimple_regimplify_operands (stmt, &gsi);
}
}
/* Prepend or append LHS CODE RHS condition before or after *GSI_P. */
static gcond *
expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code,
tree lhs, tree rhs, bool after = false)
{
gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE);
if (after)
gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING);
else
gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT);
if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
NULL, NULL)
|| walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
NULL, NULL))
{
gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt);
gimple_regimplify_operands (cond_stmt, &gsi);
}
return cond_stmt;
}
/* Expand the OpenMP parallel or task directive starting at REGION. */
static void
expand_omp_taskreg (struct omp_region *region)
{
basic_block entry_bb, exit_bb, new_bb;
struct function *child_cfun;
tree child_fn, block, t;
gimple_stmt_iterator gsi;
gimple *entry_stmt, *stmt;
edge e;
vec<tree, va_gc> *ws_args;
entry_stmt = last_stmt (region->entry);
if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
&& gimple_omp_task_taskwait_p (entry_stmt))
{
new_bb = region->entry;
gsi = gsi_last_nondebug_bb (region->entry);
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
gsi_remove (&gsi, true);
expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
return;
}
child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
child_cfun = DECL_STRUCT_FUNCTION (child_fn);
entry_bb = region->entry;
if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
exit_bb = region->cont;
else
exit_bb = region->exit;
if (is_combined_parallel (region))
ws_args = region->ws_args;
else
ws_args = NULL;
if (child_cfun->cfg)
{
/* Due to inlining, it may happen that we have already outlined
the region, in which case all we need to do is make the
sub-graph unreachable and emit the parallel call. */
edge entry_succ_e, exit_succ_e;
entry_succ_e = single_succ_edge (entry_bb);
gsi = gsi_last_nondebug_bb (entry_bb);
gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
|| gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
|| gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
gsi_remove (&gsi, true);
new_bb = entry_bb;
if (exit_bb)
{
exit_succ_e = single_succ_edge (exit_bb);
make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
}
remove_edge_and_dominated_blocks (entry_succ_e);
}
else
{
unsigned srcidx, dstidx, num;
/* If the parallel region needs data sent from the parent
function, then the very first statement (except possible
tree profile counter updates) of the parallel body
is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
&.OMP_DATA_O is passed as an argument to the child function,
we need to replace it with the argument as seen by the child
function.
In most cases, this will end up being the identity assignment
.OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
a function call that has been inlined, the original PARM_DECL
.OMP_DATA_I may have been converted into a different local
variable. In which case, we need to keep the assignment. */
if (gimple_omp_taskreg_data_arg (entry_stmt))
{
basic_block entry_succ_bb
= single_succ_p (entry_bb) ? single_succ (entry_bb)
: FALLTHRU_EDGE (entry_bb)->dest;
tree arg;
gimple *parcopy_stmt = NULL;
for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
{
gimple *stmt;
gcc_assert (!gsi_end_p (gsi));
stmt = gsi_stmt (gsi);
if (gimple_code (stmt) != GIMPLE_ASSIGN)
continue;
if (gimple_num_ops (stmt) == 2)
{
tree arg = gimple_assign_rhs1 (stmt);
/* We're ignore the subcode because we're
effectively doing a STRIP_NOPS. */
if (TREE_CODE (arg) == ADDR_EXPR
&& (TREE_OPERAND (arg, 0)
== gimple_omp_taskreg_data_arg (entry_stmt)))
{
parcopy_stmt = stmt;
break;
}
}
}
gcc_assert (parcopy_stmt != NULL);
arg = DECL_ARGUMENTS (child_fn);
if (!gimple_in_ssa_p (cfun))
{
if (gimple_assign_lhs (parcopy_stmt) == arg)
gsi_remove (&gsi, true);
else
{
/* ?? Is setting the subcode really necessary ?? */
gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
gimple_assign_set_rhs1 (parcopy_stmt, arg);
}
}
else
{
tree lhs = gimple_assign_lhs (parcopy_stmt);
gcc_assert (SSA_NAME_VAR (lhs) == arg);
/* We'd like to set the rhs to the default def in the child_fn,
but it's too early to create ssa names in the child_fn.
Instead, we set the rhs to the parm. In
move_sese_region_to_fn, we introduce a default def for the
parm, map the parm to it's default def, and once we encounter
this stmt, replace the parm with the default def. */
gimple_assign_set_rhs1 (parcopy_stmt, arg);
update_stmt (parcopy_stmt);
}
}
/* Declare local variables needed in CHILD_CFUN. */
block = DECL_INITIAL (child_fn);
BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
/* The gimplifier could record temporaries in parallel/task block
rather than in containing function's local_decls chain,
which would mean cgraph missed finalizing them. Do it now. */
for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
varpool_node::finalize_decl (t);
DECL_SAVED_TREE (child_fn) = NULL;
/* We'll create a CFG for child_fn, so no gimple body is needed. */
gimple_set_body (child_fn, NULL);
TREE_USED (block) = 1;
/* Reset DECL_CONTEXT on function arguments. */
for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
DECL_CONTEXT (t) = child_fn;
/* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
so that it can be moved to the child function. */
gsi = gsi_last_nondebug_bb (entry_bb);
stmt = gsi_stmt (gsi);
gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
|| gimple_code (stmt) == GIMPLE_OMP_TASK
|| gimple_code (stmt) == GIMPLE_OMP_TEAMS));
e = split_block (entry_bb, stmt);
gsi_remove (&gsi, true);
entry_bb = e->dest;
edge e2 = NULL;
if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
else
{
e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
gcc_assert (e2->dest == region->exit);
remove_edge (BRANCH_EDGE (entry_bb));
set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
gsi = gsi_last_nondebug_bb (region->exit);
gcc_assert (!gsi_end_p (gsi)
&& gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
gsi_remove (&gsi, true);
}
/* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
if (exit_bb)
{
gsi = gsi_last_nondebug_bb (exit_bb);
gcc_assert (!gsi_end_p (gsi)
&& (gimple_code (gsi_stmt (gsi))
== (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
stmt = gimple_build_return (NULL);
gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
gsi_remove (&gsi, true);
}
/* Move the parallel region into CHILD_CFUN. */
if (gimple_in_ssa_p (cfun))
{
init_tree_ssa (child_cfun);
init_ssa_operands (child_cfun);
child_cfun->gimple_df->in_ssa_p = true;
block = NULL_TREE;
}
else
block = gimple_block (entry_stmt);
new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
if (exit_bb)
single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
if (e2)
{
basic_block dest_bb = e2->dest;
if (!exit_bb)
make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
remove_edge (e2);
set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
}
/* When the OMP expansion process cannot guarantee an up-to-date
loop tree arrange for the child function to fixup loops. */
if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
/* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
num = vec_safe_length (child_cfun->local_decls);
for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
{
t = (*child_cfun->local_decls)[srcidx];
if (DECL_CONTEXT (t) == cfun->decl)
continue;
if (srcidx != dstidx)
(*child_cfun->local_decls)[dstidx] = t;
dstidx++;
}
if (dstidx != num)
vec_safe_truncate (child_cfun->local_decls, dstidx);
/* Inform the callgraph about the new function. */
child_cfun->curr_properties = cfun->curr_properties;
child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
cgraph_node *node = cgraph_node::get_create (child_fn);
node->parallelized_function = 1;
cgraph_node::add_new_function (child_fn, true);
bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
&& !DECL_ASSEMBLER_NAME_SET_P (child_fn);
/* Fix the callgraph edges for child_cfun. Those for cfun will be
fixed in a following pass. */
push_cfun (child_cfun);
if (need_asm)
assign_assembler_name_if_needed (child_fn);
if (optimize)
optimize_omp_library_calls (entry_stmt);
update_max_bb_count ();
cgraph_edge::rebuild_edges ();
/* Some EH regions might become dead, see PR34608. If
pass_cleanup_cfg isn't the first pass to happen with the
new child, these dead EH edges might cause problems.
Clean them up now. */
if (flag_exceptions)
{
basic_block bb;
bool changed = false;
FOR_EACH_BB_FN (bb, cfun)
changed |= gimple_purge_dead_eh_edges (bb);
if (changed)
cleanup_tree_cfg ();
}
if (gimple_in_ssa_p (cfun))
update_ssa (TODO_update_ssa);
if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
verify_loop_structure ();
pop_cfun ();
if (dump_file && !gimple_in_ssa_p (cfun))
{
omp_any_child_fn_dumped = true;
dump_function_header (dump_file, child_fn, dump_flags);
dump_function_to_file (child_fn, dump_file, dump_flags);
}
}
adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
expand_parallel_call (region, new_bb,
as_a <gomp_parallel *> (entry_stmt), ws_args);
else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
else
expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
}
/* Information about members of an OpenACC collapsed loop nest. */
struct oacc_collapse
{
tree base; /* Base value. */
tree iters; /* Number of steps. */
tree step; /* Step size. */
tree tile; /* Tile increment (if tiled). */
tree outer; /* Tile iterator var. */
};
/* Helper for expand_oacc_for. Determine collapsed loop information.
Fill in COUNTS array. Emit any initialization code before GSI.
Return the calculated outer loop bound of BOUND_TYPE. */
static tree
expand_oacc_collapse_init (const struct omp_for_data *fd,
gimple_stmt_iterator *gsi,
oacc_collapse *counts, tree diff_type,
tree bound_type, location_t loc)
{
tree tiling = fd->tiling;
tree total = build_int_cst (bound_type, 1);
int ix;
gcc_assert (integer_onep (fd->loop.step));
gcc_assert (integer_zerop (fd->loop.n1));
/* When tiling, the first operand of the tile clause applies to the
innermost loop, and we work outwards from there. Seems
backwards, but whatever. */
for (ix = fd->collapse; ix--;)
{
const omp_for_data_loop *loop = &fd->loops[ix];
tree iter_type = TREE_TYPE (loop->v);
tree plus_type = iter_type;
gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
if (POINTER_TYPE_P (iter_type))
plus_type = sizetype;
if (tiling)
{
tree num = build_int_cst (integer_type_node, fd->collapse);
tree loop_no = build_int_cst (integer_type_node, ix);
tree tile = TREE_VALUE (tiling);
gcall *call
= gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
/* gwv-outer=*/integer_zero_node,
/* gwv-inner=*/integer_zero_node);
counts[ix].outer = create_tmp_var (iter_type, ".outer");
counts[ix].tile = create_tmp_var (diff_type, ".tile");
gimple_call_set_lhs (call, counts[ix].tile);
gimple_set_location (call, loc);
gsi_insert_before (gsi, call, GSI_SAME_STMT);
tiling = TREE_CHAIN (tiling);
}
else
{
counts[ix].tile = NULL;
counts[ix].outer = loop->v;
}
tree b = loop->n1;
tree e = loop->n2;
tree s = loop->step;
bool up = loop->cond_code == LT_EXPR;
tree dir = build_int_cst (diff_type, up ? +1 : -1);
bool negating;
tree expr;
b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
true, GSI_SAME_STMT);
e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
true, GSI_SAME_STMT);
/* Convert the step, avoiding possible unsigned->signed overflow. */
negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
if (negating)
s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
s = fold_convert (diff_type, s);
if (negating)
s = fold_build1 (NEGATE_EXPR, diff_type, s);
s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
true, GSI_SAME_STMT);
/* Determine the range, avoiding possible unsigned->signed overflow. */
negating = !up && TYPE_UNSIGNED (iter_type);
expr = fold_build2 (MINUS_EXPR, plus_type,
fold_convert (plus_type, negating ? b : e),
fold_convert (plus_type, negating ? e : b));
expr = fold_convert (diff_type, expr);
if (negating)
expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
tree range = force_gimple_operand_gsi
(gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
/* Determine number of iterations. */
expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
true, GSI_SAME_STMT);
counts[ix].base = b;
counts[ix].iters = iters;
counts[ix].step = s;
total = fold_build2 (MULT_EXPR, bound_type, total,
fold_convert (bound_type, iters));
}
return total;
}
/* Emit initializers for collapsed loop members. INNER is true if
this is for the element loop of a TILE. IVAR is the outer
loop iteration variable, from which collapsed loop iteration values
are calculated. COUNTS array has been initialized by
expand_oacc_collapse_inits. */
static void
expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
gimple_stmt_iterator *gsi,
const oacc_collapse *counts, tree ivar,
tree diff_type)
{
tree ivar_type = TREE_TYPE (ivar);
/* The most rapidly changing iteration variable is the innermost
one. */
for (int ix = fd->collapse; ix--;)
{
const omp_for_data_loop *loop = &fd->loops[ix];
const oacc_collapse *collapse = &counts[ix];
tree v = inner ? loop->v : collapse->outer;
tree iter_type = TREE_TYPE (v);
tree plus_type = iter_type;
enum tree_code plus_code = PLUS_EXPR;
tree expr;
if (POINTER_TYPE_P (iter_type))
{
plus_code = POINTER_PLUS_EXPR;
plus_type = sizetype;
}
expr = ivar;
if (ix)
{
tree mod = fold_convert (ivar_type, collapse->iters);
ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
true, GSI_SAME_STMT);
}
expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
fold_convert (diff_type, collapse->step));
expr = fold_build2 (plus_code, iter_type,
inner ? collapse->outer : collapse->base,
fold_convert (plus_type, expr));
expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
true, GSI_SAME_STMT);
gassign *ass = gimple_build_assign (v, expr);
gsi_insert_before (gsi, ass, GSI_SAME_STMT);
}
}
/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
of the combined collapse > 1 loop constructs, generate code like:
if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
if (cond3 is <)
adj = STEP3 - 1;
else
adj = STEP3 + 1;
count3 = (adj + N32 - N31) / STEP3;
if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
if (cond2 is <)
adj = STEP2 - 1;
else
adj = STEP2 + 1;
count2 = (adj + N22 - N21) / STEP2;
if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
if (cond1 is <)
adj = STEP1 - 1;
else
adj = STEP1 + 1;
count1 = (adj + N12 - N11) / STEP1;
count = count1 * count2 * count3;
Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
count = 0;
and set ZERO_ITER_BB to that bb. If this isn't the outermost
of the combined loop constructs, just initialize COUNTS array
from the _looptemp_ clauses. For loop nests with non-rectangular
loops, do this only for the rectangular loops. Then pick
the loops which reference outer vars in their bound expressions
and the loops which they refer to and for this sub-nest compute
number of iterations. For triangular loops use Faulhaber's formula,
otherwise as a fallback, compute by iterating the loops.
If e.g. the sub-nest is
for (I = N11; I COND1 N12; I += STEP1)
for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
do:
COUNT = 0;
for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
for (tmpj = M21 * tmpi + N21;
tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
{
int tmpk1 = M31 * tmpj + N31;
int tmpk2 = M32 * tmpj + N32;
if (tmpk1 COND3 tmpk2)
{
if (COND3 is <)
adj = STEP3 - 1;
else
adj = STEP3 + 1;
COUNT += (adj + tmpk2 - tmpk1) / STEP3;
}
}
and finally multiply the counts of the rectangular loops not
in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
store number of iterations of the loops from fd->first_nonrect
to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
by the counts of rectangular loops not referenced in any non-rectangular
loops sandwitched in between those. */
/* NOTE: It *could* be better to moosh all of the BBs together,
creating one larger BB with all the computation and the unexpected
jump at the end. I.e.
bool zero3, zero2, zero1, zero;
zero3 = N32 c3 N31;
count3 = (N32 - N31) /[cl] STEP3;
zero2 = N22 c2 N21;
count2 = (N22 - N21) /[cl] STEP2;
zero1 = N12 c1 N11;
count1 = (N12 - N11) /[cl] STEP1;
zero = zero3 || zero2 || zero1;
count = count1 * count2 * count3;
if (__builtin_expect(zero, false)) goto zero_iter_bb;
After all, we expect the zero=false, and thus we expect to have to
evaluate all of the comparison expressions, so short-circuiting
oughtn't be a win. Since the condition isn't protecting a
denominator, we're not concerned about divide-by-zero, so we can
fully evaluate count even if a numerator turned out to be wrong.
It seems like putting this all together would create much better
scheduling opportunities, and less pressure on the chip's branch
predictor. */
static void
expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
basic_block &entry_bb, tree *counts,
basic_block &zero_iter1_bb, int &first_zero_iter1,
basic_block &zero_iter2_bb, int &first_zero_iter2,
basic_block &l2_dom_bb)
{
tree t, type = TREE_TYPE (fd->loop.v);
edge e, ne;
int i;
/* Collapsed loops need work for expansion into SSA form. */
gcc_assert (!gimple_in_ssa_p (cfun));
if (gimple_omp_for_combined_into_p (fd->for_stmt)
&& TREE_CODE (fd->loop.n2) != INTEGER_CST)
{
gcc_assert (fd->ordered == 0);
/* First two _looptemp_ clauses are for istart/iend, counts[0]
isn't supposed to be handled, as the inner loop doesn't
use it. */
tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
OMP_CLAUSE__LOOPTEMP_);
gcc_assert (innerc);
for (i = 0; i < fd->collapse; i++)
{
innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
OMP_CLAUSE__LOOPTEMP_);
gcc_assert (innerc);
if (i)
counts[i] = OMP_CLAUSE_DECL (innerc);
else
counts[0] = NULL_TREE;
}
if (fd->non_rect
&& fd->last_nonrect == fd->first_nonrect + 1
&& !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
{
tree c[4];
for (i = 0; i < 4; i++)
{
innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
OMP_CLAUSE__LOOPTEMP_);
gcc_assert (innerc);
c[i] = OMP_CLAUSE_DECL (innerc);
}
counts[0] = c[0];
fd->first_inner_iterations = c[1];
fd->factor = c[2];
fd->adjn1 = c[3];
}
return;
}
for (i = fd->collapse; i < fd->ordered; i++)
{
tree itype = TREE_TYPE (fd->loops[i].v);
counts[i] = NULL_TREE;
t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
fold_convert (itype, fd->loops[i].n1),
fold_convert (itype, fd->loops[i].n2));
if (t && integer_zerop (t))
{
for (i = fd->collapse; i < fd->ordered; i++)
counts[i] = build_int_cst (type, 0);
break;
}
}
bool rect_count_seen = false;
for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
{
tree itype = TREE_TYPE (fd->loops[i].v);
if (i >= fd->collapse && counts[i])
continue;
if (fd->non_rect)
{
/* Skip loops that use outer iterators in their expressions
during this phase. */
if (fd->loops[i].m1 || fd->loops[i].m2)
{
counts[i] = build_zero_cst (type);
continue;
}
}
if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
&& ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
fold_convert (itype, fd->loops[i].n1),
fold_convert (itype, fd->loops[i].n2)))
== NULL_TREE || !integer_onep (t)))
{
gcond *cond_stmt;
tree n1, n2;
n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
true, GSI_SAME_STMT);
n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
true, GSI_SAME_STMT);
cond_stmt = expand_omp_build_cond (gsi, fd->loops[i].cond_code,
n1, n2);
e = split_block (entry_bb, cond_stmt);
basic_block &zero_iter_bb
= i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
int &first_zero_iter
= i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
if (zero_iter_bb == NULL)
{
gassign *assign_stmt;
first_zero_iter = i;
zero_iter_bb = create_empty_bb (entry_bb);
add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
*gsi = gsi_after_labels (zero_iter_bb);
if (i < fd->collapse)
assign_stmt = gimple_build_assign (fd->loop.n2,
build_zero_cst (type));
else
{
counts[i] = create_tmp_reg (type, ".count");
assign_stmt
= gimple_build_assign (counts[i], build_zero_cst (type));
}
gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
entry_bb);
}
ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
ne->probability = profile_probability::very_unlikely ();
e->flags = EDGE_TRUE_VALUE;
e->probability = ne->probability.invert ();
if (l2_dom_bb == NULL)
l2_dom_bb = entry_bb;
entry_bb = e->dest;
*gsi = gsi_last_nondebug_bb (entry_bb);
}
if (POINTER_TYPE_P (itype))
itype = signed_type_for (itype);
t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
? -1 : 1));
t = fold_build2 (PLUS_EXPR, itype,
fold_convert (itype, fd->loops[i].step), t);
t = fold_build2 (PLUS_EXPR, itype, t,
fold_convert (itype, fd->loops[i].n2));
t = fold_build2 (MINUS_EXPR, itype, t,
fold_convert (itype, fd->loops[i].n1));
/* ?? We could probably use CEIL_DIV_EXPR instead of
TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
generate the same code in the end because generically we
don't know that the values involved must be negative for
GT?? */
if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
t = fold_build2 (TRUNC_DIV_EXPR, itype,
fold_build1 (NEGATE_EXPR, itype, t),
fold_build1 (NEGATE_EXPR, itype,
fold_convert (itype,
fd->loops[i].step)));
else
t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
fold_convert (itype, fd->loops[i].step));
t = fold_convert (type, t);
if (TREE_CODE (t) == INTEGER_CST)
counts[i] = t;
else
{
if (i < fd->collapse || i != first_zero_iter2)
counts[i] = create_tmp_reg (type, ".count");
expand_omp_build_assign (gsi, counts[i], t);
}
if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
{
if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
continue;
if (!rect_count_seen)
{
t = counts[i];
rect_count_seen = true;
}
else
t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
expand_omp_build_assign (gsi, fd->loop.n2, t);
}
}
if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
{
gcc_assert (fd->last_nonrect != -1);
counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
expand_omp_build_assign (gsi, counts[fd->last_nonrect],
build_zero_cst (type));
for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
if (fd->loops[i].m1
|| fd->loops[i].m2
|| fd->loops[i].non_rect_referenced)
break;
if (i == fd->last_nonrect
&& fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
&& !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
&& !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
{
int o = fd->first_nonrect;
tree itype = TREE_TYPE (fd->loops[o].v);
tree n1o = create_tmp_reg (itype, ".n1o");
t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
expand_omp_build_assign (gsi, n1o, t);
tree n2o = create_tmp_reg (itype, ".n2o");
t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
expand_omp_build_assign (gsi, n2o, t);
if (fd->loops[i].m1 && fd->loops[i].m2)
t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
unshare_expr (fd->loops[i].m1));
else if (fd->loops[i].m1)
t = fold_unary (NEGATE_EXPR, itype,
unshare_expr (fd->loops[i].m1));
else
t = unshare_expr (fd->loops[i].m2);
tree m2minusm1
= force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
true, GSI_SAME_STMT);
gimple_stmt_iterator gsi2 = *gsi;
gsi_prev (&gsi2);
e = split_block (entry_bb, gsi_stmt (gsi2));
e = split_block (e->dest, (gimple *) NULL);
basic_block bb1 = e->src;
entry_bb = e->dest;
*gsi = gsi_after_labels (entry_bb);
gsi2 = gsi_after_labels (bb1);
tree ostep = fold_convert (itype, fd->loops[o].step);
t = build_int_cst (itype, (fd->loops[o].cond_code
== LT_EXPR ? -1 : 1));
t = fold_build2 (PLUS_EXPR, itype, ostep, t);
t = fold_build2 (PLUS_EXPR, itype, t, n2o);
t = fold_build2 (MINUS_EXPR, itype, t, n1o);
if (TYPE_UNSIGNED (itype)
&& fd->loops[o].cond_code == GT_EXPR)
t = fold_build2 (TRUNC_DIV_EXPR, itype,
fold_build1 (NEGATE_EXPR, itype, t),
fold_build1 (NEGATE_EXPR, itype, ostep));
else
t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
tree outer_niters
= force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
true, GSI_SAME_STMT);
t = fold_build2 (MINUS_EXPR, itype, outer_niters,
build_one_cst (itype));
t = fold_build2 (MULT_EXPR, itype, t, ostep);
t = fold_build2 (PLUS_EXPR, itype, n1o, t);
tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
true, GSI_SAME_STMT);
tree n1, n2, n1e, n2e;
t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
if (fd->loops[i].m1)
{
n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
}
else
n1 = t;
n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
true, GSI_SAME_STMT);
t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
if (fd->loops[i].m2)
{
n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
}
else
n2 = t;
n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
true, GSI_SAME_STMT);
t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
if (fd->loops[i].m1)
{
n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
}
else
n1e = t;
n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
true, GSI_SAME_STMT);
t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
if (fd->loops[i].m2)
{
n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
}
else
n2e = t;
n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
true, GSI_SAME_STMT);
gcond *cond_stmt
= expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
n1, n2);
e = split_block (bb1, cond_stmt);
e->flags = EDGE_TRUE_VALUE;
e->probability = profile_probability::likely ().guessed ();
basic_block bb2 = e->dest;
gsi2 = gsi_after_labels (bb2);
cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
n1e, n2e);
e = split_block (bb2, cond_stmt);
e->flags = EDGE_TRUE_VALUE;
e->probability = profile_probability::likely ().guessed ();
gsi2 = gsi_after_labels (e->dest);
tree step = fold_convert (itype, fd->loops[i].step);
t = build_int_cst (itype, (fd->loops[i].cond_code
== LT_EXPR ? -1 : 1));
t = fold_build2 (PLUS_EXPR, itype, step, t);
t = fold_build2 (PLUS_EXPR, itype, t, n2);
t = fold_build2 (MINUS_EXPR, itype, t, n1);
if (TYPE_UNSIGNED (itype)
&& fd->loops[i].cond_code == GT_EXPR)
t = fold_build2 (TRUNC_DIV_EXPR, itype,
fold_build1 (NEGATE_EXPR, itype, t),
fold_build1 (NEGATE_EXPR, itype, step));
else
t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
tree first_inner_iterations
= force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
true, GSI_SAME_STMT);
t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
if (TYPE_UNSIGNED (itype)
&& fd->loops[i].cond_code == GT_EXPR)
t = fold_build2 (TRUNC_DIV_EXPR, itype,
fold_build1 (NEGATE_EXPR, itype, t),
fold_build1 (NEGATE_EXPR, itype, step));
else
t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
tree factor
= force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
true, GSI_SAME_STMT);
t = fold_build2 (MINUS_EXPR, itype, outer_niters,
build_one_cst (itype));
t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
t = fold_build2 (MULT_EXPR, itype, factor, t);
t = fold_build2 (PLUS_EXPR, itype,
fold_build2 (MULT_EXPR, itype, outer_niters,
first_inner_iterations), t);
expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
fold_convert (type, t));
basic_block bb3 = create_empty_bb (bb1);
add_bb_to_loop (bb3, bb1->loop_father);
e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
e->probability = profile_probability::unlikely ().guessed ();
gsi2 = gsi_after_labels (bb3);
cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
n1e, n2e);
e = split_block (bb3, cond_stmt);
e->flags = EDGE_TRUE_VALUE;
e->probability = profile_probability::likely ().guessed ();
basic_block bb4 = e->dest;
ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
ne->probability = e->probability.invert ();
basic_block bb5 = create_empty_bb (bb2);
add_bb_to_loop (bb5, bb2->loop_father);
ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
ne->probability = profile_probability::unlikely ().guessed ();
for (int j = 0; j < 2; j++)
{
gsi2 = gsi_after_labels (j ? bb5 : bb4);
t = fold_build2 (MINUS_EXPR, itype,
unshare_expr (fd->loops[i].n1),
unshare_expr (fd->loops[i].n2));
t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
tree tem
= force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
true, GSI_SAME_STMT);
t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
t = fold_build2 (MINUS_EXPR, itype, tem, t);
tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
true, GSI_SAME_STMT);
t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
if (fd->loops[i].m1)
{
n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
}
else
n1 = t;
n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
true, GSI_SAME_STMT);
t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
if (fd->loops[i].m2)
{
n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
}
else
n2 = t;
n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
true, GSI_SAME_STMT);
expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
n1, n2);
e = split_block (gsi_bb (gsi2), cond_stmt);
e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
e->probability = profile_probability::unlikely ().guessed ();
ne = make_edge (e->src, bb1,
j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
ne->probability = e->probability.invert ();
gsi2 = gsi_after_labels (e->dest);
t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
make_edge (e->dest, bb1, EDGE_FALLTHRU);
}
set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
if (fd->first_nonrect + 1 == fd->last_nonrect)
{
fd->first_inner_iterations = first_inner_iterations;
fd->factor = factor;
fd->adjn1 = n1o;
}
}
else
{
/* Fallback implementation. Evaluate the loops with m1/m2
non-NULL as well as their outer loops at runtime using temporaries
instead of the original iteration variables, and in the
body just bump the counter. */
gimple_stmt_iterator gsi2 = *gsi;
gsi_prev (&gsi2);
e = split_block (entry_bb, gsi_stmt (gsi2));
e = split_block (e->dest, (gimple *) NULL);
basic_block cur_bb = e->src;
basic_block next_bb = e->dest;
entry_bb = e->dest;
*gsi = gsi_after_labels (entry_bb);
tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
memset (vs, 0, fd->last_nonrect * sizeof (tree));
for (i = 0; i <= fd->last_nonrect; i++)
{
if (fd->loops[i].m1 == NULL_TREE
&& fd->loops[i].m2 == NULL_TREE
&& !fd->loops[i].non_rect_referenced)
continue;
tree itype = TREE_TYPE (fd->loops[i].v);
gsi2 = gsi_after_labels (cur_bb);
tree n1, n2;
t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
if (fd->loops[i].m1 == NULL_TREE)
n1 = t;
else if (POINTER_TYPE_P (itype))
{
gcc_assert (integer_onep (fd->loops[i].m1));
t = unshare_expr (fd->loops[i].n1);
n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
}
else
{
n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
n1 = fold_build2 (MULT_EXPR, itype,
vs[i - fd->loops[i].outer], n1);
n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
}
n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
true, GSI_SAME_STMT);
if (i < fd->last_nonrect)
{
vs[i] = create_tmp_reg (itype, ".it");
expand_omp_build_assign (&gsi2, vs[i], n1);
}
t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
if (fd->loops[i].m2 == NULL_TREE)
n2 = t;
else if (POINTER_TYPE_P (itype))
{
gcc_assert (integer_onep (fd->loops[i].m2));
t = unshare_expr (fd->loops[i].n2);
n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
}
else
{
n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
n2 = fold_build2 (MULT_EXPR, itype,
vs[i - fd->loops[i].outer], n2);
n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
}
n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
true, GSI_SAME_STMT);
if (POINTER_TYPE_P (itype))
itype = signed_type_for (itype);
if (i == fd->last_nonrect)
{
gcond *cond_stmt
= expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
n1, n2);
e = split_block (cur_bb, cond_stmt);
e->flags = EDGE_TRUE_VALUE;
ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
e->probability = profile_probability::likely ().guessed ();
ne->probability = e->probability.invert ();
gsi2 = gsi_after_labels (e->dest);
t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
? -1 : 1));
t = fold_build2 (PLUS_EXPR, itype,
fold_convert (itype, fd->loops[i].step), t);
t = fold_build2 (PLUS_EXPR, itype, t,
fold_convert (itype, n2));
t = fold_build2 (MINUS_EXPR, itype, t,
fold_convert (itype, n1));
tree step = fold_convert (itype, fd->loops[i].step);
if (TYPE_UNSIGNED (itype)
&& fd->loops[i].cond_code == GT_EXPR)
t = fold_build2 (TRUNC_DIV_EXPR, itype,
fold_build1 (NEGATE_EXPR, itype, t),
fold_build1 (NEGATE_EXPR, itype, step));
else
t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
t = fold_convert (type, t);
t = fold_build2 (PLUS_EXPR, type,
counts[fd->last_nonrect], t);
t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
true, GSI_SAME_STMT);
expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
break;
}
e = split_block (cur_bb, last_stmt (cur_bb));
basic_block new_cur_bb = create_empty_bb (cur_bb);
add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
gsi2 = gsi_after_labels (e->dest);
tree step = fold_convert (itype,
unshare_expr (fd->loops[i].step));
if (POINTER_TYPE_P (TREE_TYPE (vs[i])))
t = fold_build_pointer_plus (vs[i], step);
else
t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
true, GSI_SAME_STMT);
expand_omp_build_assign (&gsi2, vs[i], t);
ne = split_block (e->dest, last_stmt (e->dest));
gsi2 = gsi_after_labels (ne->dest);
expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, vs[i], n2);
edge e3, e4;
if (next_bb == entry_bb)
{
e3 = find_edge (ne->dest, next_bb);
e3->flags = EDGE_FALSE_VALUE;
}
else
e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
e4->probability = profile_probability::likely ().guessed ();
e3->probability = e4->probability.invert ();
basic_block esrc = e->src;
make_edge (e->src, ne->dest, EDGE_FALLTHRU);
cur_bb = new_cur_bb;
basic_block latch_bb = next_bb;
next_bb = e->dest;
remove_edge (e);
set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
}
}
t = NULL_TREE;
for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
if (!fd->loops[i].non_rect_referenced
&& fd->loops[i].m1 == NULL_TREE
&& fd->loops[i].m2 == NULL_TREE)
{
if (t == NULL_TREE)
t = counts[i];
else
t = fold_build2 (MULT_EXPR, type, t, counts[i]);
}
if (t)
{
t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
}
if (!rect_count_seen)
t = counts[fd->last_nonrect];
else
t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
counts[fd->last_nonrect]);
expand_omp_build_assign (gsi, fd->loop.n2, t);
}
else if (fd->non_rect)
{
tree t = fd->loop.n2;
gcc_assert (TREE_CODE (t) == INTEGER_CST);
int non_rect_referenced = 0, non_rect = 0;
for (i = 0; i < fd->collapse; i++)
{
if ((i < fd->first_nonrect || i > fd->last_nonrect)
&& !integer_zerop (counts[i]))
t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
if (fd->loops[i].non_rect_referenced)
non_rect_referenced++;
if (fd->loops[i].m1 || fd->loops[i].m2)
non_rect++;
}
gcc_assert (non_rect == 1 && non_rect_referenced == 1);
counts[fd->last_nonrect] = t;
}
}
/* Helper function for expand_omp_{for_*,simd}. Generate code like:
T = V;
V3 = N31 + (T % count3) * STEP3;
T = T / count3;
V2 = N21 + (T % count2) * STEP2;
T = T / count2;
V1 = N11 + T * STEP1;
if this loop doesn't have an inner loop construct combined with it.
If it does have an inner loop construct combined with it and the
iteration count isn't known constant, store values from counts array
into its _looptemp_ temporaries instead.
For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
inclusive), use the count of all those loops together, and either
find quadratic etc. equation roots, or as a fallback, do:
COUNT = 0;
for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
for (tmpj = M21 * tmpi + N21;
tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
{
int tmpk1 = M31 * tmpj + N31;
int tmpk2 = M32 * tmpj + N32;
if (tmpk1 COND3 tmpk2)
{
if (COND3 is <)
adj = STEP3 - 1;
else
adj = STEP3 + 1;
int temp = (adj + tmpk2 - tmpk1) / STEP3;
if (COUNT + temp > T)
{
V1 = tmpi;
V2 = tmpj;
V3 = tmpk1 + (T - COUNT) * STEP3;
goto done;
}
else
COUNT += temp;
}
}
done:;
but for optional innermost or outermost rectangular loops that aren't
referenced by other loop expressions keep doing the division/modulo. */
static void
expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
tree *counts, tree *nonrect_bounds,
gimple *inner_stmt, tree startvar)
{
int i;
if (gimple_omp_for_combined_p (fd->for_stmt))
{
/* If fd->loop.n2 is constant, then no propagation of the counts
is needed, they are constant. */
if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
return;
tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
? gimple_omp_taskreg_clauses (inner_stmt)
: gimple_omp_for_clauses (inner_stmt);
/* First two _looptemp_ clauses are for istart/iend, counts[0]
isn't supposed to be handled, as the inner loop doesn't
use it. */
tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
gcc_assert (innerc);
int count = 0;
if (fd->non_rect
&& fd->last_nonrect == fd->first_nonrect + 1
&& !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
count = 4;
for (i = 0; i < fd->collapse + count; i++)
{
innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
OMP_CLAUSE__LOOPTEMP_);
gcc_assert (innerc);
if (i)
{
tree tem = OMP_CLAUSE_DECL (innerc);
tree t;
if (i < fd->collapse)
t = counts[i];
else
switch (i - fd->collapse)
{
case 0: t = counts[0]; break;
case 1: t = fd->first_inner_iterations; break;
case 2: t = fd->factor; break;
case 3: t = fd->adjn1; break;
default: gcc_unreachable ();
}
t = fold_convert (TREE_TYPE (tem), t);
t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
false, GSI_CONTINUE_LINKING);
gassign *stmt = gimple_build_assign (tem, t);
gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
}
}
return;
}
tree type = TREE_TYPE (fd->loop.v);
tree tem = create_tmp_reg (type, ".tem");
gassign *stmt = gimple_build_assign (tem, startvar);
gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
for (i = fd->collapse - 1; i >= 0; i--)
{
tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
itype = vtype;
if (POINTER_TYPE_P (vtype))
itype = signed_type_for (vtype);
if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
else
t = tem;
if (i == fd->last_nonrect)
{
t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
tree stopval = t;
tree idx = create_tmp_reg (type, ".count");
expand_omp_build_assign (gsi, idx,
build_zero_cst (type), true);
basic_block bb_triang = NULL, bb_triang_dom = NULL;
if (fd->first_nonrect + 1 == fd->last_nonrect
&& (TREE_CODE (fd->loop.n2) == INTEGER_CST
|| fd->first_inner_iterations)
&& (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
!= CODE_FOR_nothing)
&& !integer_zerop (fd->loop.n2))
{
tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
tree itype = TREE_TYPE (fd->loops[i].v);
tree first_inner_iterations = fd->first_inner_iterations;
tree factor = fd->factor;
gcond *cond_stmt
= expand_omp_build_cond (gsi, NE_EXPR, factor,
build_zero_cst (TREE_TYPE (factor)));
edge e = split_block (gsi_bb (*gsi), cond_stmt);
basic_block bb0 = e->src;
e->flags = EDGE_TRUE_VALUE;
e->probability = profile_probability::likely ();
bb_triang_dom = bb0;
*gsi = gsi_after_labels (e->dest);
tree slltype = long_long_integer_type_node;
tree ulltype = long_long_unsigned_type_node;
tree stopvalull = fold_convert (ulltype, stopval);
stopvalull
= force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
first_inner_iterations
= fold_convert (slltype, first_inner_iterations);
first_inner_iterations
= force_gimple_operand_gsi (gsi, first_inner_iterations, true,
NULL_TREE, false,
GSI_CONTINUE_LINKING);
factor = fold_convert (slltype, factor);
factor
= force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
tree first_inner_iterationsd
= fold_build1 (FLOAT_EXPR, double_type_node,
first_inner_iterations);
first_inner_iterationsd
= force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
NULL_TREE, false,
GSI_CONTINUE_LINKING);
tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
factor);
factord = force_gimple_operand_gsi (gsi, factord, true,
NULL_TREE, false,
GSI_CONTINUE_LINKING);
tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
stopvalull);
stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
NULL_TREE, false,
GSI_CONTINUE_LINKING);
/* Temporarily disable flag_rounding_math, values will be
decimal numbers divided by 2 and worst case imprecisions
due to too large values ought to be caught later by the
checks for fallback. */
int save_flag_rounding_math = flag_rounding_math;
flag_rounding_math = 0;
t = fold_build2 (RDIV_EXPR, double_type_node, factord,
build_real (double_type_node, dconst2));
tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
first_inner_iterationsd, t);
t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
GSI_CONTINUE_LINKING);
t = fold_build2 (MULT_EXPR, double_type_node, factord,
build_real (double_type_node, dconst2));
t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
t = fold_build2 (PLUS_EXPR, double_type_node, t,
fold_build2 (MULT_EXPR, double_type_node,
t3, t3));
flag_rounding_math = save_flag_rounding_math;
t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
GSI_CONTINUE_LINKING);
if (flag_exceptions
&& cfun->can_throw_non_call_exceptions
&& operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
{
tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
build_zero_cst (double_type_node));
tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
cond_stmt = gimple_build_cond (NE_EXPR, tem,
boolean_false_node,
NULL_TREE, NULL_TREE);
}
else
cond_stmt
= gimple_build_cond (LT_EXPR, t,
build_zero_cst (double_type_node),
NULL_TREE, NULL_TREE);
gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
e = split_block (gsi_bb (*gsi), cond_stmt);
basic_block bb1 = e->src;
e->flags = EDGE_FALSE_VALUE;
e->probability = profile_probability::very_likely ();
*gsi = gsi_after_labels (e->dest);
gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
tree sqrtr = create_tmp_var (double_type_node);
gimple_call_set_lhs (call, sqrtr);
gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
tree c = create_tmp_var (ulltype);
tree d = create_tmp_var (ulltype);
expand_omp_build_assign (gsi, c, t, true);
t = fold_build2 (MINUS_EXPR, ulltype, c,
build_one_cst (ulltype));
t = fold_build2 (MULT_EXPR, ulltype, c, t);
t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
t = fold_build2 (MULT_EXPR, ulltype,
fold_convert (ulltype, fd->factor), t);
tree t2
= fold_build2 (MULT_EXPR, ulltype, c,
fold_convert (ulltype,
fd->first_inner_iterations));
t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
expand_omp_build_assign (gsi, d, t, true);
t = fold_build2 (MULT_EXPR, ulltype,
fold_convert (ulltype, fd->factor), c);
t = fold_build2 (PLUS_EXPR, ulltype,
t, fold_convert (ulltype,
fd->first_inner_iterations));
t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
GSI_CONTINUE_LINKING);
cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
NULL_TREE, NULL_TREE);
gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
e = split_block (gsi_bb (*gsi), cond_stmt);
basic_block bb2 = e->src;
e->flags = EDGE_TRUE_VALUE;
e->probability = profile_probability::very_likely ();
*gsi = gsi_after_labels (e->dest);
t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
GSI_CONTINUE_LINKING);
cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
NULL_TREE, NULL_TREE);
gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
e = split_block (gsi_bb (*gsi), cond_stmt);
basic_block bb3 = e->src;
e->flags = EDGE_FALSE_VALUE;
e->probability = profile_probability::very_likely ();
*gsi = gsi_after_labels (e->dest);
t = fold_convert (itype, c);
t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
GSI_CONTINUE_LINKING);
expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
t2 = fold_convert (itype, t2);
t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
if (fd->loops[i].m1)
{
t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
}
expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
bb_triang = e->src;
*gsi = gsi_after_labels (e->dest);
remove_edge (e);
e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
e->probability = profile_probability::very_unlikely ();
e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
e->probability = profile_probability::very_unlikely ();
e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
e->probability = profile_probability::very_unlikely ();
basic_block bb4 = create_empty_bb (bb0);
add_bb_to_loop (bb4, bb0->loop_father);
e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
e->probability = profile_probability::unlikely ();
make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
t2 = fold_build2 (TRUNC_DIV_EXPR, type,
counts[i], counts[i - 1]);
t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
GSI_CONTINUE_LINKING);
t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
t = fold_convert (itype, t);
t2 = fold_convert (itype, t2);
t = fold_build2 (MULT_EXPR, itype, t,
fold_convert (itype, fd->loops[i].step));
t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
t2 = fold_build2 (MULT_EXPR, itype, t2,
fold_convert (itype, fd->loops[i - 1].step));
t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
false, GSI_CONTINUE_LINKING);
stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
if (fd->loops[i].m1)
{
t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
fd->loops[i - 1].v);
t = fold_build2 (PLUS_EXPR, itype, t, t2);
}
t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
false, GSI_CONTINUE_LINKING);
stmt = gimple_build_assign (fd->loops[i].v, t);
gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
}
/* Fallback implementation. Evaluate the loops in between
(inclusive) fd->first_nonrect and fd->last_nonrect at
runtime unsing temporaries instead of the original iteration
variables, in the body just bump the counter and compare
with the desired value. */
gimple_stmt_iterator gsi2 = *gsi;
basic_block entry_bb = gsi_bb (gsi2);
edge e = split_block (entry_bb, gsi_stmt (gsi2));
e = split_block (e->dest, (gimple *) NULL);
basic_block dom_bb = NULL;
basic_block cur_bb = e->src;
basic_block next_bb = e->dest;
entry_bb = e->dest;
*gsi = gsi_after_labels (entry_bb);
tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
tree n1 = NULL_TREE, n2 = NULL_TREE;
memset (vs, 0, fd->last_nonrect * sizeof (tree));
for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
{
tree itype = TREE_TYPE (fd->loops[j].v);
bool rect_p = (fd->loops[j].m1 == NULL_TREE
&& fd->loops[j].m2 == NULL_TREE
&& !fd->loops[j].non_rect_referenced);
gsi2 = gsi_after_labels (cur_bb);
t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
if (fd->loops[j].m1 == NULL_TREE)
n1 = rect_p ? build_zero_cst (type) : t;
else if (POINTER_TYPE_P (itype))
{
gcc_assert (integer_onep (fd->loops[j].m1));
t = unshare_expr (fd->loops[j].n1);
n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
}
else
{
n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
n1 = fold_build2 (MULT_EXPR, itype,
vs[j - fd->loops[j].outer], n1);
n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
}
n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
true, GSI_SAME_STMT);
if (j < fd->last_nonrect)
{
vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
expand_omp_build_assign (&gsi2, vs[j], n1);
}
t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
if (fd->loops[j].m2 == NULL_TREE)
n2 = rect_p ? counts[j] : t;
else if (POINTER_TYPE_P (itype))
{
gcc_assert (integer_onep (fd->loops[j].m2));
t = unshare_expr (fd->loops[j].n2);
n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
}
else
{
n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
n2 = fold_build2 (MULT_EXPR, itype,
vs[j - fd->loops[j].outer], n2);
n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
}
n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
true, GSI_SAME_STMT);
if (POINTER_TYPE_P (itype))
itype = signed_type_for (itype);
if (j == fd->last_nonrect)
{
gcond *cond_stmt
= expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
n1, n2);
e = split_block (cur_bb, cond_stmt);
e->flags = EDGE_TRUE_VALUE;
edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
e->probability = profile_probability::likely ().guessed ();
ne->probability = e->probability.invert ();
gsi2 = gsi_after_labels (e->dest);
t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
? -1 : 1));
t = fold_build2 (PLUS_EXPR, itype,
fold_convert (itype, fd->loops[j].step), t);
t = fold_build2 (PLUS_EXPR, itype, t,
fold_convert (itype, n2));
t = fold_build2 (MINUS_EXPR, itype, t,
fold_convert (itype, n1));
tree step = fold_convert (itype, fd->loops[j].step);
if (TYPE_UNSIGNED (itype)
&& fd->loops[j].cond_code == GT_EXPR)
t = fold_build2 (TRUNC_DIV_EXPR, itype,
fold_build1 (NEGATE_EXPR, itype, t),
fold_build1 (NEGATE_EXPR, itype, step));
else
t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
t = fold_convert (type, t);
t = fold_build2 (PLUS_EXPR, type, idx, t);
t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
true, GSI_SAME_STMT);
e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
cond_stmt
= gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
NULL_TREE);
gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
e = split_block (gsi_bb (gsi2), cond_stmt);
e->flags = EDGE_TRUE_VALUE;
e->probability = profile_probability::likely ().guessed ();
ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
ne->probability = e->probability.invert ();
gsi2 = gsi_after_labels (e->dest);
expand_omp_build_assign (&gsi2, idx, t);
set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
break;
}
e = split_block (cur_bb, last_stmt (cur_bb));
basic_block new_cur_bb = create_empty_bb (cur_bb);
add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
gsi2 = gsi_after_labels (e->dest);
if (rect_p)
t = fold_build2 (PLUS_EXPR, type, vs[j],
build_one_cst (type));
else
{
tree step
= fold_convert (itype, unshare_expr (fd->loops[j].step));
if (POINTER_TYPE_P (vtype))
t = fold_build_pointer_plus (vs[j], step);
else