| /* Expansion pass for OMP directives. Outlines regions of certain OMP |
| directives to separate functions, converts others into explicit calls to the |
| runtime library (libgomp) and so forth |
| |
| Copyright (C) 2005-2022 Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "memmodel.h" |
| #include "backend.h" |
| #include "target.h" |
| #include "rtl.h" |
| #include "tree.h" |
| #include "gimple.h" |
| #include "cfghooks.h" |
| #include "tree-pass.h" |
| #include "ssa.h" |
| #include "optabs.h" |
| #include "cgraph.h" |
| #include "pretty-print.h" |
| #include "diagnostic-core.h" |
| #include "fold-const.h" |
| #include "stor-layout.h" |
| #include "cfganal.h" |
| #include "internal-fn.h" |
| #include "gimplify.h" |
| #include "gimple-iterator.h" |
| #include "gimplify-me.h" |
| #include "gimple-walk.h" |
| #include "tree-cfg.h" |
| #include "tree-into-ssa.h" |
| #include "tree-ssa.h" |
| #include "splay-tree.h" |
| #include "cfgloop.h" |
| #include "omp-general.h" |
| #include "omp-offload.h" |
| #include "tree-cfgcleanup.h" |
| #include "alloc-pool.h" |
| #include "symbol-summary.h" |
| #include "gomp-constants.h" |
| #include "gimple-pretty-print.h" |
| #include "stringpool.h" |
| #include "attribs.h" |
| #include "tree-eh.h" |
| #include "opts.h" |
| |
| /* OMP region information. Every parallel and workshare |
| directive is enclosed between two markers, the OMP_* directive |
| and a corresponding GIMPLE_OMP_RETURN statement. */ |
| |
| struct omp_region |
| { |
| /* The enclosing region. */ |
| struct omp_region *outer; |
| |
| /* First child region. */ |
| struct omp_region *inner; |
| |
| /* Next peer region. */ |
| struct omp_region *next; |
| |
| /* Block containing the omp directive as its last stmt. */ |
| basic_block entry; |
| |
| /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */ |
| basic_block exit; |
| |
| /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */ |
| basic_block cont; |
| |
| /* If this is a combined parallel+workshare region, this is a list |
| of additional arguments needed by the combined parallel+workshare |
| library call. */ |
| vec<tree, va_gc> *ws_args; |
| |
| /* The code for the omp directive of this region. */ |
| enum gimple_code type; |
| |
| /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */ |
| enum omp_clause_schedule_kind sched_kind; |
| |
| /* Schedule modifiers. */ |
| unsigned char sched_modifiers; |
| |
| /* True if this is a combined parallel+workshare region. */ |
| bool is_combined_parallel; |
| |
| /* Copy of fd.lastprivate_conditional != 0. */ |
| bool has_lastprivate_conditional; |
| |
| /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has |
| a depend clause. */ |
| gomp_ordered *ord_stmt; |
| }; |
| |
| static struct omp_region *root_omp_region; |
| static bool omp_any_child_fn_dumped; |
| |
| static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree, |
| bool = false); |
| static gphi *find_phi_with_arg_on_edge (tree, edge); |
| static void expand_omp (struct omp_region *region); |
| |
| /* Return true if REGION is a combined parallel+workshare region. */ |
| |
| static inline bool |
| is_combined_parallel (struct omp_region *region) |
| { |
| return region->is_combined_parallel; |
| } |
| |
| /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB |
| is the immediate dominator of PAR_ENTRY_BB, return true if there |
| are no data dependencies that would prevent expanding the parallel |
| directive at PAR_ENTRY_BB as a combined parallel+workshare region. |
| |
| When expanding a combined parallel+workshare region, the call to |
| the child function may need additional arguments in the case of |
| GIMPLE_OMP_FOR regions. In some cases, these arguments are |
| computed out of variables passed in from the parent to the child |
| via 'struct .omp_data_s'. For instance: |
| |
| #pragma omp parallel for schedule (guided, i * 4) |
| for (j ...) |
| |
| Is lowered into: |
| |
| # BLOCK 2 (PAR_ENTRY_BB) |
| .omp_data_o.i = i; |
| #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598) |
| |
| # BLOCK 3 (WS_ENTRY_BB) |
| .omp_data_i = &.omp_data_o; |
| D.1667 = .omp_data_i->i; |
| D.1598 = D.1667 * 4; |
| #pragma omp for schedule (guided, D.1598) |
| |
| When we outline the parallel region, the call to the child function |
| 'bar.omp_fn.0' will need the value D.1598 in its argument list, but |
| that value is computed *after* the call site. So, in principle we |
| cannot do the transformation. |
| |
| To see whether the code in WS_ENTRY_BB blocks the combined |
| parallel+workshare call, we collect all the variables used in the |
| GIMPLE_OMP_FOR header check whether they appear on the LHS of any |
| statement in WS_ENTRY_BB. If so, then we cannot emit the combined |
| call. |
| |
| FIXME. If we had the SSA form built at this point, we could merely |
| hoist the code in block 3 into block 2 and be done with it. But at |
| this point we don't have dataflow information and though we could |
| hack something up here, it is really not worth the aggravation. */ |
| |
| static bool |
| workshare_safe_to_combine_p (basic_block ws_entry_bb) |
| { |
| struct omp_for_data fd; |
| gimple *ws_stmt = last_stmt (ws_entry_bb); |
| |
| if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) |
| return true; |
| |
| gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR); |
| if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR) |
| return false; |
| |
| omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL); |
| |
| if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST) |
| return false; |
| if (fd.iter_type != long_integer_type_node) |
| return false; |
| |
| /* FIXME. We give up too easily here. If any of these arguments |
| are not constants, they will likely involve variables that have |
| been mapped into fields of .omp_data_s for sharing with the child |
| function. With appropriate data flow, it would be possible to |
| see through this. */ |
| if (!is_gimple_min_invariant (fd.loop.n1) |
| || !is_gimple_min_invariant (fd.loop.n2) |
| || !is_gimple_min_invariant (fd.loop.step) |
| || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size))) |
| return false; |
| |
| return true; |
| } |
| |
| /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier |
| presence (SIMD_SCHEDULE). */ |
| |
| static tree |
| omp_adjust_chunk_size (tree chunk_size, bool simd_schedule) |
| { |
| if (!simd_schedule || integer_zerop (chunk_size)) |
| return chunk_size; |
| |
| poly_uint64 vf = omp_max_vf (); |
| if (known_eq (vf, 1U)) |
| return chunk_size; |
| |
| tree type = TREE_TYPE (chunk_size); |
| chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size, |
| build_int_cst (type, vf - 1)); |
| return fold_build2 (BIT_AND_EXPR, type, chunk_size, |
| build_int_cst (type, -vf)); |
| } |
| |
| /* Collect additional arguments needed to emit a combined |
| parallel+workshare call. WS_STMT is the workshare directive being |
| expanded. */ |
| |
| static vec<tree, va_gc> * |
| get_ws_args_for (gimple *par_stmt, gimple *ws_stmt) |
| { |
| tree t; |
| location_t loc = gimple_location (ws_stmt); |
| vec<tree, va_gc> *ws_args; |
| |
| if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt)) |
| { |
| struct omp_for_data fd; |
| tree n1, n2; |
| |
| omp_extract_for_data (for_stmt, &fd, NULL); |
| n1 = fd.loop.n1; |
| n2 = fd.loop.n2; |
| |
| if (gimple_omp_for_combined_into_p (for_stmt)) |
| { |
| tree innerc |
| = omp_find_clause (gimple_omp_parallel_clauses (par_stmt), |
| OMP_CLAUSE__LOOPTEMP_); |
| gcc_assert (innerc); |
| n1 = OMP_CLAUSE_DECL (innerc); |
| innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), |
| OMP_CLAUSE__LOOPTEMP_); |
| gcc_assert (innerc); |
| n2 = OMP_CLAUSE_DECL (innerc); |
| } |
| |
| vec_alloc (ws_args, 3 + (fd.chunk_size != 0)); |
| |
| t = fold_convert_loc (loc, long_integer_type_node, n1); |
| ws_args->quick_push (t); |
| |
| t = fold_convert_loc (loc, long_integer_type_node, n2); |
| ws_args->quick_push (t); |
| |
| t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step); |
| ws_args->quick_push (t); |
| |
| if (fd.chunk_size) |
| { |
| t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size); |
| t = omp_adjust_chunk_size (t, fd.simd_schedule); |
| ws_args->quick_push (t); |
| } |
| |
| return ws_args; |
| } |
| else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS) |
| { |
| /* Number of sections is equal to the number of edges from the |
| GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to |
| the exit of the sections region. */ |
| basic_block bb = single_succ (gimple_bb (ws_stmt)); |
| t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1); |
| vec_alloc (ws_args, 1); |
| ws_args->quick_push (t); |
| return ws_args; |
| } |
| |
| gcc_unreachable (); |
| } |
| |
| /* Discover whether REGION is a combined parallel+workshare region. */ |
| |
| static void |
| determine_parallel_type (struct omp_region *region) |
| { |
| basic_block par_entry_bb, par_exit_bb; |
| basic_block ws_entry_bb, ws_exit_bb; |
| |
| if (region == NULL || region->inner == NULL |
| || region->exit == NULL || region->inner->exit == NULL |
| || region->inner->cont == NULL) |
| return; |
| |
| /* We only support parallel+for and parallel+sections. */ |
| if (region->type != GIMPLE_OMP_PARALLEL |
| || (region->inner->type != GIMPLE_OMP_FOR |
| && region->inner->type != GIMPLE_OMP_SECTIONS)) |
| return; |
| |
| /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and |
| WS_EXIT_BB -> PAR_EXIT_BB. */ |
| par_entry_bb = region->entry; |
| par_exit_bb = region->exit; |
| ws_entry_bb = region->inner->entry; |
| ws_exit_bb = region->inner->exit; |
| |
| /* Give up for task reductions on the parallel, while it is implementable, |
| adding another big set of APIs or slowing down the normal paths is |
| not acceptable. */ |
| tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb)); |
| if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_)) |
| return; |
| |
| if (single_succ (par_entry_bb) == ws_entry_bb |
| && single_succ (ws_exit_bb) == par_exit_bb |
| && workshare_safe_to_combine_p (ws_entry_bb) |
| && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb)) |
| || (last_and_only_stmt (ws_entry_bb) |
| && last_and_only_stmt (par_exit_bb)))) |
| { |
| gimple *par_stmt = last_stmt (par_entry_bb); |
| gimple *ws_stmt = last_stmt (ws_entry_bb); |
| |
| if (region->inner->type == GIMPLE_OMP_FOR) |
| { |
| /* If this is a combined parallel loop, we need to determine |
| whether or not to use the combined library calls. There |
| are two cases where we do not apply the transformation: |
| static loops and any kind of ordered loop. In the first |
| case, we already open code the loop so there is no need |
| to do anything else. In the latter case, the combined |
| parallel loop call would still need extra synchronization |
| to implement ordered semantics, so there would not be any |
| gain in using the combined call. */ |
| tree clauses = gimple_omp_for_clauses (ws_stmt); |
| tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE); |
| if (c == NULL |
| || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK) |
| == OMP_CLAUSE_SCHEDULE_STATIC) |
| || omp_find_clause (clauses, OMP_CLAUSE_ORDERED) |
| || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_) |
| || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_)) |
| && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c))))) |
| return; |
| } |
| else if (region->inner->type == GIMPLE_OMP_SECTIONS |
| && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt), |
| OMP_CLAUSE__REDUCTEMP_) |
| || omp_find_clause (gimple_omp_sections_clauses (ws_stmt), |
| OMP_CLAUSE__CONDTEMP_))) |
| return; |
| |
| region->is_combined_parallel = true; |
| region->inner->is_combined_parallel = true; |
| region->ws_args = get_ws_args_for (par_stmt, ws_stmt); |
| } |
| } |
| |
| /* Debugging dumps for parallel regions. */ |
| void dump_omp_region (FILE *, struct omp_region *, int); |
| void debug_omp_region (struct omp_region *); |
| void debug_all_omp_regions (void); |
| |
| /* Dump the parallel region tree rooted at REGION. */ |
| |
| void |
| dump_omp_region (FILE *file, struct omp_region *region, int indent) |
| { |
| fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index, |
| gimple_code_name[region->type]); |
| |
| if (region->inner) |
| dump_omp_region (file, region->inner, indent + 4); |
| |
| if (region->cont) |
| { |
| fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "", |
| region->cont->index); |
| } |
| |
| if (region->exit) |
| fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "", |
| region->exit->index); |
| else |
| fprintf (file, "%*s[no exit marker]\n", indent, ""); |
| |
| if (region->next) |
| dump_omp_region (file, region->next, indent); |
| } |
| |
| DEBUG_FUNCTION void |
| debug_omp_region (struct omp_region *region) |
| { |
| dump_omp_region (stderr, region, 0); |
| } |
| |
| DEBUG_FUNCTION void |
| debug_all_omp_regions (void) |
| { |
| dump_omp_region (stderr, root_omp_region, 0); |
| } |
| |
| /* Create a new parallel region starting at STMT inside region PARENT. */ |
| |
| static struct omp_region * |
| new_omp_region (basic_block bb, enum gimple_code type, |
| struct omp_region *parent) |
| { |
| struct omp_region *region = XCNEW (struct omp_region); |
| |
| region->outer = parent; |
| region->entry = bb; |
| region->type = type; |
| |
| if (parent) |
| { |
| /* This is a nested region. Add it to the list of inner |
| regions in PARENT. */ |
| region->next = parent->inner; |
| parent->inner = region; |
| } |
| else |
| { |
| /* This is a toplevel region. Add it to the list of toplevel |
| regions in ROOT_OMP_REGION. */ |
| region->next = root_omp_region; |
| root_omp_region = region; |
| } |
| |
| return region; |
| } |
| |
| /* Release the memory associated with the region tree rooted at REGION. */ |
| |
| static void |
| free_omp_region_1 (struct omp_region *region) |
| { |
| struct omp_region *i, *n; |
| |
| for (i = region->inner; i ; i = n) |
| { |
| n = i->next; |
| free_omp_region_1 (i); |
| } |
| |
| free (region); |
| } |
| |
| /* Release the memory for the entire omp region tree. */ |
| |
| void |
| omp_free_regions (void) |
| { |
| struct omp_region *r, *n; |
| for (r = root_omp_region; r ; r = n) |
| { |
| n = r->next; |
| free_omp_region_1 (r); |
| } |
| root_omp_region = NULL; |
| } |
| |
| /* A convenience function to build an empty GIMPLE_COND with just the |
| condition. */ |
| |
| static gcond * |
| gimple_build_cond_empty (tree cond) |
| { |
| enum tree_code pred_code; |
| tree lhs, rhs; |
| |
| gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs); |
| return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE); |
| } |
| |
| /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function. |
| Add CHILD_FNDECL to decl chain of the supercontext of the block |
| ENTRY_BLOCK - this is the block which originally contained the |
| code from which CHILD_FNDECL was created. |
| |
| Together, these actions ensure that the debug info for the outlined |
| function will be emitted with the correct lexical scope. */ |
| |
| static void |
| adjust_context_and_scope (struct omp_region *region, tree entry_block, |
| tree child_fndecl) |
| { |
| tree parent_fndecl = NULL_TREE; |
| gimple *entry_stmt; |
| /* OMP expansion expands inner regions before outer ones, so if |
| we e.g. have explicit task region nested in parallel region, when |
| expanding the task region current_function_decl will be the original |
| source function, but we actually want to use as context the child |
| function of the parallel. */ |
| for (region = region->outer; |
| region && parent_fndecl == NULL_TREE; region = region->outer) |
| switch (region->type) |
| { |
| case GIMPLE_OMP_PARALLEL: |
| case GIMPLE_OMP_TASK: |
| case GIMPLE_OMP_TEAMS: |
| entry_stmt = last_stmt (region->entry); |
| parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt); |
| break; |
| case GIMPLE_OMP_TARGET: |
| entry_stmt = last_stmt (region->entry); |
| parent_fndecl |
| = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt)); |
| break; |
| default: |
| break; |
| } |
| |
| if (parent_fndecl == NULL_TREE) |
| parent_fndecl = current_function_decl; |
| DECL_CONTEXT (child_fndecl) = parent_fndecl; |
| |
| if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK) |
| { |
| tree b = BLOCK_SUPERCONTEXT (entry_block); |
| if (TREE_CODE (b) == BLOCK) |
| { |
| DECL_CHAIN (child_fndecl) = BLOCK_VARS (b); |
| BLOCK_VARS (b) = child_fndecl; |
| } |
| } |
| } |
| |
| /* Build the function calls to GOMP_parallel etc to actually |
| generate the parallel operation. REGION is the parallel region |
| being expanded. BB is the block where to insert the code. WS_ARGS |
| will be set if this is a call to a combined parallel+workshare |
| construct, it contains the list of additional arguments needed by |
| the workshare construct. */ |
| |
| static void |
| expand_parallel_call (struct omp_region *region, basic_block bb, |
| gomp_parallel *entry_stmt, |
| vec<tree, va_gc> *ws_args) |
| { |
| tree t, t1, t2, val, cond, c, clauses, flags; |
| gimple_stmt_iterator gsi; |
| gimple *stmt; |
| enum built_in_function start_ix; |
| int start_ix2; |
| location_t clause_loc; |
| vec<tree, va_gc> *args; |
| |
| clauses = gimple_omp_parallel_clauses (entry_stmt); |
| |
| /* Determine what flavor of GOMP_parallel we will be |
| emitting. */ |
| start_ix = BUILT_IN_GOMP_PARALLEL; |
| tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_); |
| if (rtmp) |
| start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS; |
| else if (is_combined_parallel (region)) |
| { |
| switch (region->inner->type) |
| { |
| case GIMPLE_OMP_FOR: |
| gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO); |
| switch (region->inner->sched_kind) |
| { |
| case OMP_CLAUSE_SCHEDULE_RUNTIME: |
| /* For lastprivate(conditional:), our implementation |
| requires monotonic behavior. */ |
| if (region->inner->has_lastprivate_conditional != 0) |
| start_ix2 = 3; |
| else if ((region->inner->sched_modifiers |
| & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0) |
| start_ix2 = 6; |
| else if ((region->inner->sched_modifiers |
| & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0) |
| start_ix2 = 7; |
| else |
| start_ix2 = 3; |
| break; |
| case OMP_CLAUSE_SCHEDULE_DYNAMIC: |
| case OMP_CLAUSE_SCHEDULE_GUIDED: |
| if ((region->inner->sched_modifiers |
| & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0 |
| && !region->inner->has_lastprivate_conditional) |
| { |
| start_ix2 = 3 + region->inner->sched_kind; |
| break; |
| } |
| /* FALLTHRU */ |
| default: |
| start_ix2 = region->inner->sched_kind; |
| break; |
| } |
| start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC; |
| start_ix = (enum built_in_function) start_ix2; |
| break; |
| case GIMPLE_OMP_SECTIONS: |
| start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS; |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* By default, the value of NUM_THREADS is zero (selected at run time) |
| and there is no conditional. */ |
| cond = NULL_TREE; |
| val = build_int_cst (unsigned_type_node, 0); |
| flags = build_int_cst (unsigned_type_node, 0); |
| |
| c = omp_find_clause (clauses, OMP_CLAUSE_IF); |
| if (c) |
| cond = OMP_CLAUSE_IF_EXPR (c); |
| |
| c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS); |
| if (c) |
| { |
| val = OMP_CLAUSE_NUM_THREADS_EXPR (c); |
| clause_loc = OMP_CLAUSE_LOCATION (c); |
| } |
| else |
| clause_loc = gimple_location (entry_stmt); |
| |
| c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND); |
| if (c) |
| flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c)); |
| |
| /* Ensure 'val' is of the correct type. */ |
| val = fold_convert_loc (clause_loc, unsigned_type_node, val); |
| |
| /* If we found the clause 'if (cond)', build either |
| (cond != 0) or (cond ? val : 1u). */ |
| if (cond) |
| { |
| cond = gimple_boolify (cond); |
| |
| if (integer_zerop (val)) |
| val = fold_build2_loc (clause_loc, |
| EQ_EXPR, unsigned_type_node, cond, |
| build_int_cst (TREE_TYPE (cond), 0)); |
| else |
| { |
| basic_block cond_bb, then_bb, else_bb; |
| edge e, e_then, e_else; |
| tree tmp_then, tmp_else, tmp_join, tmp_var; |
| |
| tmp_var = create_tmp_var (TREE_TYPE (val)); |
| if (gimple_in_ssa_p (cfun)) |
| { |
| tmp_then = make_ssa_name (tmp_var); |
| tmp_else = make_ssa_name (tmp_var); |
| tmp_join = make_ssa_name (tmp_var); |
| } |
| else |
| { |
| tmp_then = tmp_var; |
| tmp_else = tmp_var; |
| tmp_join = tmp_var; |
| } |
| |
| e = split_block_after_labels (bb); |
| cond_bb = e->src; |
| bb = e->dest; |
| remove_edge (e); |
| |
| then_bb = create_empty_bb (cond_bb); |
| else_bb = create_empty_bb (then_bb); |
| set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); |
| set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb); |
| |
| stmt = gimple_build_cond_empty (cond); |
| gsi = gsi_start_bb (cond_bb); |
| gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); |
| |
| gsi = gsi_start_bb (then_bb); |
| expand_omp_build_assign (&gsi, tmp_then, val, true); |
| |
| gsi = gsi_start_bb (else_bb); |
| expand_omp_build_assign (&gsi, tmp_else, |
| build_int_cst (unsigned_type_node, 1), |
| true); |
| |
| make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); |
| make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE); |
| add_bb_to_loop (then_bb, cond_bb->loop_father); |
| add_bb_to_loop (else_bb, cond_bb->loop_father); |
| e_then = make_edge (then_bb, bb, EDGE_FALLTHRU); |
| e_else = make_edge (else_bb, bb, EDGE_FALLTHRU); |
| |
| if (gimple_in_ssa_p (cfun)) |
| { |
| gphi *phi = create_phi_node (tmp_join, bb); |
| add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION); |
| add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION); |
| } |
| |
| val = tmp_join; |
| } |
| |
| gsi = gsi_start_bb (bb); |
| val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE, |
| false, GSI_CONTINUE_LINKING); |
| } |
| |
| gsi = gsi_last_nondebug_bb (bb); |
| t = gimple_omp_parallel_data_arg (entry_stmt); |
| if (t == NULL) |
| t1 = null_pointer_node; |
| else |
| t1 = build_fold_addr_expr (t); |
| tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt); |
| t2 = build_fold_addr_expr (child_fndecl); |
| |
| vec_alloc (args, 4 + vec_safe_length (ws_args)); |
| args->quick_push (t2); |
| args->quick_push (t1); |
| args->quick_push (val); |
| if (ws_args) |
| args->splice (*ws_args); |
| args->quick_push (flags); |
| |
| t = build_call_expr_loc_vec (UNKNOWN_LOCATION, |
| builtin_decl_explicit (start_ix), args); |
| |
| if (rtmp) |
| { |
| tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp)); |
| t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp), |
| fold_convert (type, |
| fold_convert (pointer_sized_int_node, t))); |
| } |
| force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, |
| false, GSI_CONTINUE_LINKING); |
| } |
| |
| /* Build the function call to GOMP_task to actually |
| generate the task operation. BB is the block where to insert the code. */ |
| |
| static void |
| expand_task_call (struct omp_region *region, basic_block bb, |
| gomp_task *entry_stmt) |
| { |
| tree t1, t2, t3; |
| gimple_stmt_iterator gsi; |
| location_t loc = gimple_location (entry_stmt); |
| |
| tree clauses = gimple_omp_task_clauses (entry_stmt); |
| |
| tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF); |
| tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED); |
| tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE); |
| tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); |
| tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL); |
| tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY); |
| tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH); |
| |
| unsigned int iflags |
| = (untied ? GOMP_TASK_FLAG_UNTIED : 0) |
| | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0) |
| | (depend ? GOMP_TASK_FLAG_DEPEND : 0); |
| |
| bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt); |
| tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE; |
| tree num_tasks = NULL_TREE; |
| bool ull = false; |
| if (taskloop_p) |
| { |
| gimple *g = last_stmt (region->outer->entry); |
| gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR |
| && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP); |
| struct omp_for_data fd; |
| omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL); |
| startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); |
| endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar), |
| OMP_CLAUSE__LOOPTEMP_); |
| startvar = OMP_CLAUSE_DECL (startvar); |
| endvar = OMP_CLAUSE_DECL (endvar); |
| step = fold_convert_loc (loc, fd.iter_type, fd.loop.step); |
| if (fd.loop.cond_code == LT_EXPR) |
| iflags |= GOMP_TASK_FLAG_UP; |
| tree tclauses = gimple_omp_for_clauses (g); |
| num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS); |
| if (num_tasks) |
| { |
| if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks)) |
| iflags |= GOMP_TASK_FLAG_STRICT; |
| num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks); |
| } |
| else |
| { |
| num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE); |
| if (num_tasks) |
| { |
| iflags |= GOMP_TASK_FLAG_GRAINSIZE; |
| if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks)) |
| iflags |= GOMP_TASK_FLAG_STRICT; |
| num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks); |
| } |
| else |
| num_tasks = integer_zero_node; |
| } |
| num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks); |
| if (ifc == NULL_TREE) |
| iflags |= GOMP_TASK_FLAG_IF; |
| if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP)) |
| iflags |= GOMP_TASK_FLAG_NOGROUP; |
| ull = fd.iter_type == long_long_unsigned_type_node; |
| if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION)) |
| iflags |= GOMP_TASK_FLAG_REDUCTION; |
| } |
| else |
| { |
| if (priority) |
| iflags |= GOMP_TASK_FLAG_PRIORITY; |
| if (detach) |
| iflags |= GOMP_TASK_FLAG_DETACH; |
| } |
| |
| tree flags = build_int_cst (unsigned_type_node, iflags); |
| |
| tree cond = boolean_true_node; |
| if (ifc) |
| { |
| if (taskloop_p) |
| { |
| tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); |
| t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, |
| build_int_cst (unsigned_type_node, |
| GOMP_TASK_FLAG_IF), |
| build_int_cst (unsigned_type_node, 0)); |
| flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, |
| flags, t); |
| } |
| else |
| cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc)); |
| } |
| |
| if (finalc) |
| { |
| tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc)); |
| t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t, |
| build_int_cst (unsigned_type_node, |
| GOMP_TASK_FLAG_FINAL), |
| build_int_cst (unsigned_type_node, 0)); |
| flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t); |
| } |
| if (depend) |
| depend = OMP_CLAUSE_DECL (depend); |
| else |
| depend = build_int_cst (ptr_type_node, 0); |
| if (priority) |
| priority = fold_convert (integer_type_node, |
| OMP_CLAUSE_PRIORITY_EXPR (priority)); |
| else |
| priority = integer_zero_node; |
| |
| gsi = gsi_last_nondebug_bb (bb); |
| |
| detach = (detach |
| ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach)) |
| : null_pointer_node); |
| |
| tree t = gimple_omp_task_data_arg (entry_stmt); |
| if (t == NULL) |
| t2 = null_pointer_node; |
| else |
| t2 = build_fold_addr_expr_loc (loc, t); |
| t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt)); |
| t = gimple_omp_task_copy_fn (entry_stmt); |
| if (t == NULL) |
| t3 = null_pointer_node; |
| else |
| t3 = build_fold_addr_expr_loc (loc, t); |
| |
| if (taskloop_p) |
| t = build_call_expr (ull |
| ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL) |
| : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP), |
| 11, t1, t2, t3, |
| gimple_omp_task_arg_size (entry_stmt), |
| gimple_omp_task_arg_align (entry_stmt), flags, |
| num_tasks, priority, startvar, endvar, step); |
| else |
| t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK), |
| 10, t1, t2, t3, |
| gimple_omp_task_arg_size (entry_stmt), |
| gimple_omp_task_arg_align (entry_stmt), cond, flags, |
| depend, priority, detach); |
| |
| force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, |
| false, GSI_CONTINUE_LINKING); |
| } |
| |
| /* Build the function call to GOMP_taskwait_depend to actually |
| generate the taskwait operation. BB is the block where to insert the |
| code. */ |
| |
| static void |
| expand_taskwait_call (basic_block bb, gomp_task *entry_stmt) |
| { |
| tree clauses = gimple_omp_task_clauses (entry_stmt); |
| tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND); |
| if (depend == NULL_TREE) |
| return; |
| |
| depend = OMP_CLAUSE_DECL (depend); |
| |
| bool nowait = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT) != NULL_TREE; |
| gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); |
| enum built_in_function f = (nowait |
| ? BUILT_IN_GOMP_TASKWAIT_DEPEND_NOWAIT |
| : BUILT_IN_GOMP_TASKWAIT_DEPEND); |
| tree t = build_call_expr (builtin_decl_explicit (f), 1, depend); |
| |
| force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, |
| false, GSI_CONTINUE_LINKING); |
| } |
| |
| /* Build the function call to GOMP_teams_reg to actually |
| generate the host teams operation. REGION is the teams region |
| being expanded. BB is the block where to insert the code. */ |
| |
| static void |
| expand_teams_call (basic_block bb, gomp_teams *entry_stmt) |
| { |
| tree clauses = gimple_omp_teams_clauses (entry_stmt); |
| tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS); |
| if (num_teams == NULL_TREE) |
| num_teams = build_int_cst (unsigned_type_node, 0); |
| else |
| { |
| num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams); |
| num_teams = fold_convert (unsigned_type_node, num_teams); |
| } |
| tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT); |
| if (thread_limit == NULL_TREE) |
| thread_limit = build_int_cst (unsigned_type_node, 0); |
| else |
| { |
| thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit); |
| thread_limit = fold_convert (unsigned_type_node, thread_limit); |
| } |
| |
| gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb); |
| tree t = gimple_omp_teams_data_arg (entry_stmt), t1; |
| if (t == NULL) |
| t1 = null_pointer_node; |
| else |
| t1 = build_fold_addr_expr (t); |
| tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt); |
| tree t2 = build_fold_addr_expr (child_fndecl); |
| |
| vec<tree, va_gc> *args; |
| vec_alloc (args, 5); |
| args->quick_push (t2); |
| args->quick_push (t1); |
| args->quick_push (num_teams); |
| args->quick_push (thread_limit); |
| /* For future extensibility. */ |
| args->quick_push (build_zero_cst (unsigned_type_node)); |
| |
| t = build_call_expr_loc_vec (UNKNOWN_LOCATION, |
| builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG), |
| args); |
| |
| force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, |
| false, GSI_CONTINUE_LINKING); |
| } |
| |
| /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */ |
| |
| static tree |
| vec2chain (vec<tree, va_gc> *v) |
| { |
| tree chain = NULL_TREE, t; |
| unsigned ix; |
| |
| FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t) |
| { |
| DECL_CHAIN (t) = chain; |
| chain = t; |
| } |
| |
| return chain; |
| } |
| |
| /* Remove barriers in REGION->EXIT's block. Note that this is only |
| valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region |
| is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that |
| left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be |
| removed. */ |
| |
| static void |
| remove_exit_barrier (struct omp_region *region) |
| { |
| gimple_stmt_iterator gsi; |
| basic_block exit_bb; |
| edge_iterator ei; |
| edge e; |
| gimple *stmt; |
| int any_addressable_vars = -1; |
| |
| exit_bb = region->exit; |
| |
| /* If the parallel region doesn't return, we don't have REGION->EXIT |
| block at all. */ |
| if (! exit_bb) |
| return; |
| |
| /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The |
| workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of |
| statements that can appear in between are extremely limited -- no |
| memory operations at all. Here, we allow nothing at all, so the |
| only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */ |
| gsi = gsi_last_nondebug_bb (exit_bb); |
| gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); |
| gsi_prev_nondebug (&gsi); |
| if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL) |
| return; |
| |
| FOR_EACH_EDGE (e, ei, exit_bb->preds) |
| { |
| gsi = gsi_last_nondebug_bb (e->src); |
| if (gsi_end_p (gsi)) |
| continue; |
| stmt = gsi_stmt (gsi); |
| if (gimple_code (stmt) == GIMPLE_OMP_RETURN |
| && !gimple_omp_return_nowait_p (stmt)) |
| { |
| /* OpenMP 3.0 tasks unfortunately prevent this optimization |
| in many cases. If there could be tasks queued, the barrier |
| might be needed to let the tasks run before some local |
| variable of the parallel that the task uses as shared |
| runs out of scope. The task can be spawned either |
| from within current function (this would be easy to check) |
| or from some function it calls and gets passed an address |
| of such a variable. */ |
| if (any_addressable_vars < 0) |
| { |
| gomp_parallel *parallel_stmt |
| = as_a <gomp_parallel *> (last_stmt (region->entry)); |
| tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt); |
| tree local_decls, block, decl; |
| unsigned ix; |
| |
| any_addressable_vars = 0; |
| FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl) |
| if (TREE_ADDRESSABLE (decl)) |
| { |
| any_addressable_vars = 1; |
| break; |
| } |
| for (block = gimple_block (stmt); |
| !any_addressable_vars |
| && block |
| && TREE_CODE (block) == BLOCK; |
| block = BLOCK_SUPERCONTEXT (block)) |
| { |
| for (local_decls = BLOCK_VARS (block); |
| local_decls; |
| local_decls = DECL_CHAIN (local_decls)) |
| if (TREE_ADDRESSABLE (local_decls)) |
| { |
| any_addressable_vars = 1; |
| break; |
| } |
| if (block == gimple_block (parallel_stmt)) |
| break; |
| } |
| } |
| if (!any_addressable_vars) |
| gimple_omp_return_set_nowait (stmt); |
| } |
| } |
| } |
| |
| static void |
| remove_exit_barriers (struct omp_region *region) |
| { |
| if (region->type == GIMPLE_OMP_PARALLEL) |
| remove_exit_barrier (region); |
| |
| if (region->inner) |
| { |
| region = region->inner; |
| remove_exit_barriers (region); |
| while (region->next) |
| { |
| region = region->next; |
| remove_exit_barriers (region); |
| } |
| } |
| } |
| |
| /* Optimize omp_get_thread_num () and omp_get_num_threads () |
| calls. These can't be declared as const functions, but |
| within one parallel body they are constant, so they can be |
| transformed there into __builtin_omp_get_{thread_num,num_threads} () |
| which are declared const. Similarly for task body, except |
| that in untied task omp_get_thread_num () can change at any task |
| scheduling point. */ |
| |
| static void |
| optimize_omp_library_calls (gimple *entry_stmt) |
| { |
| basic_block bb; |
| gimple_stmt_iterator gsi; |
| tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); |
| tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree); |
| tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); |
| tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree); |
| bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK |
| && omp_find_clause (gimple_omp_task_clauses (entry_stmt), |
| OMP_CLAUSE_UNTIED) != NULL); |
| |
| FOR_EACH_BB_FN (bb, cfun) |
| for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
| { |
| gimple *call = gsi_stmt (gsi); |
| tree decl; |
| |
| if (is_gimple_call (call) |
| && (decl = gimple_call_fndecl (call)) |
| && DECL_EXTERNAL (decl) |
| && TREE_PUBLIC (decl) |
| && DECL_INITIAL (decl) == NULL) |
| { |
| tree built_in; |
| |
| if (DECL_NAME (decl) == thr_num_id) |
| { |
| /* In #pragma omp task untied omp_get_thread_num () can change |
| during the execution of the task region. */ |
| if (untied_task) |
| continue; |
| built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM); |
| } |
| else if (DECL_NAME (decl) == num_thr_id) |
| built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS); |
| else |
| continue; |
| |
| if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in) |
| || gimple_call_num_args (call) != 0) |
| continue; |
| |
| if (flag_exceptions && !TREE_NOTHROW (decl)) |
| continue; |
| |
| if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE |
| || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)), |
| TREE_TYPE (TREE_TYPE (built_in)))) |
| continue; |
| |
| gimple_call_set_fndecl (call, built_in); |
| } |
| } |
| } |
| |
| /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be |
| regimplified. */ |
| |
| static tree |
| expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *) |
| { |
| tree t = *tp; |
| |
| /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */ |
| if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t)) |
| return t; |
| |
| if (TREE_CODE (t) == ADDR_EXPR) |
| recompute_tree_invariant_for_addr_expr (t); |
| |
| *walk_subtrees = !TYPE_P (t) && !DECL_P (t); |
| return NULL_TREE; |
| } |
| |
| /* Prepend or append TO = FROM assignment before or after *GSI_P. */ |
| |
| static void |
| expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from, |
| bool after) |
| { |
| bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to); |
| from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE, |
| !after, after ? GSI_CONTINUE_LINKING |
| : GSI_SAME_STMT); |
| gimple *stmt = gimple_build_assign (to, from); |
| if (after) |
| gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING); |
| else |
| gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT); |
| if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL) |
| || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL)) |
| { |
| gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
| gimple_regimplify_operands (stmt, &gsi); |
| } |
| } |
| |
| /* Prepend or append LHS CODE RHS condition before or after *GSI_P. */ |
| |
| static gcond * |
| expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code, |
| tree lhs, tree rhs, bool after = false) |
| { |
| gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE); |
| if (after) |
| gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING); |
| else |
| gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT); |
| if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p, |
| NULL, NULL) |
| || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p, |
| NULL, NULL)) |
| { |
| gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt); |
| gimple_regimplify_operands (cond_stmt, &gsi); |
| } |
| return cond_stmt; |
| } |
| |
| /* Expand the OpenMP parallel or task directive starting at REGION. */ |
| |
| static void |
| expand_omp_taskreg (struct omp_region *region) |
| { |
| basic_block entry_bb, exit_bb, new_bb; |
| struct function *child_cfun; |
| tree child_fn, block, t; |
| gimple_stmt_iterator gsi; |
| gimple *entry_stmt, *stmt; |
| edge e; |
| vec<tree, va_gc> *ws_args; |
| |
| entry_stmt = last_stmt (region->entry); |
| if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK |
| && gimple_omp_task_taskwait_p (entry_stmt)) |
| { |
| new_bb = region->entry; |
| gsi = gsi_last_nondebug_bb (region->entry); |
| gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK); |
| gsi_remove (&gsi, true); |
| expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt)); |
| return; |
| } |
| |
| child_fn = gimple_omp_taskreg_child_fn (entry_stmt); |
| child_cfun = DECL_STRUCT_FUNCTION (child_fn); |
| |
| entry_bb = region->entry; |
| if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK) |
| exit_bb = region->cont; |
| else |
| exit_bb = region->exit; |
| |
| if (is_combined_parallel (region)) |
| ws_args = region->ws_args; |
| else |
| ws_args = NULL; |
| |
| if (child_cfun->cfg) |
| { |
| /* Due to inlining, it may happen that we have already outlined |
| the region, in which case all we need to do is make the |
| sub-graph unreachable and emit the parallel call. */ |
| edge entry_succ_e, exit_succ_e; |
| |
| entry_succ_e = single_succ_edge (entry_bb); |
| |
| gsi = gsi_last_nondebug_bb (entry_bb); |
| gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL |
| || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK |
| || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS); |
| gsi_remove (&gsi, true); |
| |
| new_bb = entry_bb; |
| if (exit_bb) |
| { |
| exit_succ_e = single_succ_edge (exit_bb); |
| make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU); |
| } |
| remove_edge_and_dominated_blocks (entry_succ_e); |
| } |
| else |
| { |
| unsigned srcidx, dstidx, num; |
| |
| /* If the parallel region needs data sent from the parent |
| function, then the very first statement (except possible |
| tree profile counter updates) of the parallel body |
| is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since |
| &.OMP_DATA_O is passed as an argument to the child function, |
| we need to replace it with the argument as seen by the child |
| function. |
| |
| In most cases, this will end up being the identity assignment |
| .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had |
| a function call that has been inlined, the original PARM_DECL |
| .OMP_DATA_I may have been converted into a different local |
| variable. In which case, we need to keep the assignment. */ |
| if (gimple_omp_taskreg_data_arg (entry_stmt)) |
| { |
| basic_block entry_succ_bb |
| = single_succ_p (entry_bb) ? single_succ (entry_bb) |
| : FALLTHRU_EDGE (entry_bb)->dest; |
| tree arg; |
| gimple *parcopy_stmt = NULL; |
| |
| for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi)) |
| { |
| gimple *stmt; |
| |
| gcc_assert (!gsi_end_p (gsi)); |
| stmt = gsi_stmt (gsi); |
| if (gimple_code (stmt) != GIMPLE_ASSIGN) |
| continue; |
| |
| if (gimple_num_ops (stmt) == 2) |
| { |
| tree arg = gimple_assign_rhs1 (stmt); |
| |
| /* We're ignore the subcode because we're |
| effectively doing a STRIP_NOPS. */ |
| |
| if (TREE_CODE (arg) == ADDR_EXPR |
| && (TREE_OPERAND (arg, 0) |
| == gimple_omp_taskreg_data_arg (entry_stmt))) |
| { |
| parcopy_stmt = stmt; |
| break; |
| } |
| } |
| } |
| |
| gcc_assert (parcopy_stmt != NULL); |
| arg = DECL_ARGUMENTS (child_fn); |
| |
| if (!gimple_in_ssa_p (cfun)) |
| { |
| if (gimple_assign_lhs (parcopy_stmt) == arg) |
| gsi_remove (&gsi, true); |
| else |
| { |
| /* ?? Is setting the subcode really necessary ?? */ |
| gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg)); |
| gimple_assign_set_rhs1 (parcopy_stmt, arg); |
| } |
| } |
| else |
| { |
| tree lhs = gimple_assign_lhs (parcopy_stmt); |
| gcc_assert (SSA_NAME_VAR (lhs) == arg); |
| /* We'd like to set the rhs to the default def in the child_fn, |
| but it's too early to create ssa names in the child_fn. |
| Instead, we set the rhs to the parm. In |
| move_sese_region_to_fn, we introduce a default def for the |
| parm, map the parm to it's default def, and once we encounter |
| this stmt, replace the parm with the default def. */ |
| gimple_assign_set_rhs1 (parcopy_stmt, arg); |
| update_stmt (parcopy_stmt); |
| } |
| } |
| |
| /* Declare local variables needed in CHILD_CFUN. */ |
| block = DECL_INITIAL (child_fn); |
| BLOCK_VARS (block) = vec2chain (child_cfun->local_decls); |
| /* The gimplifier could record temporaries in parallel/task block |
| rather than in containing function's local_decls chain, |
| which would mean cgraph missed finalizing them. Do it now. */ |
| for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t)) |
| if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t)) |
| varpool_node::finalize_decl (t); |
| DECL_SAVED_TREE (child_fn) = NULL; |
| /* We'll create a CFG for child_fn, so no gimple body is needed. */ |
| gimple_set_body (child_fn, NULL); |
| TREE_USED (block) = 1; |
| |
| /* Reset DECL_CONTEXT on function arguments. */ |
| for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t)) |
| DECL_CONTEXT (t) = child_fn; |
| |
| /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK, |
| so that it can be moved to the child function. */ |
| gsi = gsi_last_nondebug_bb (entry_bb); |
| stmt = gsi_stmt (gsi); |
| gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL |
| || gimple_code (stmt) == GIMPLE_OMP_TASK |
| || gimple_code (stmt) == GIMPLE_OMP_TEAMS)); |
| e = split_block (entry_bb, stmt); |
| gsi_remove (&gsi, true); |
| entry_bb = e->dest; |
| edge e2 = NULL; |
| if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK) |
| single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU; |
| else |
| { |
| e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL); |
| gcc_assert (e2->dest == region->exit); |
| remove_edge (BRANCH_EDGE (entry_bb)); |
| set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src); |
| gsi = gsi_last_nondebug_bb (region->exit); |
| gcc_assert (!gsi_end_p (gsi) |
| && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN); |
| gsi_remove (&gsi, true); |
| } |
| |
| /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */ |
| if (exit_bb) |
| { |
| gsi = gsi_last_nondebug_bb (exit_bb); |
| gcc_assert (!gsi_end_p (gsi) |
| && (gimple_code (gsi_stmt (gsi)) |
| == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN))); |
| stmt = gimple_build_return (NULL); |
| gsi_insert_after (&gsi, stmt, GSI_SAME_STMT); |
| gsi_remove (&gsi, true); |
| } |
| |
| /* Move the parallel region into CHILD_CFUN. */ |
| |
| if (gimple_in_ssa_p (cfun)) |
| { |
| init_tree_ssa (child_cfun); |
| init_ssa_operands (child_cfun); |
| child_cfun->gimple_df->in_ssa_p = true; |
| block = NULL_TREE; |
| } |
| else |
| block = gimple_block (entry_stmt); |
| |
| new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block); |
| if (exit_bb) |
| single_succ_edge (new_bb)->flags = EDGE_FALLTHRU; |
| if (e2) |
| { |
| basic_block dest_bb = e2->dest; |
| if (!exit_bb) |
| make_edge (new_bb, dest_bb, EDGE_FALLTHRU); |
| remove_edge (e2); |
| set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb); |
| } |
| /* When the OMP expansion process cannot guarantee an up-to-date |
| loop tree arrange for the child function to fixup loops. */ |
| if (loops_state_satisfies_p (LOOPS_NEED_FIXUP)) |
| child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP; |
| |
| /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */ |
| num = vec_safe_length (child_cfun->local_decls); |
| for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++) |
| { |
| t = (*child_cfun->local_decls)[srcidx]; |
| if (DECL_CONTEXT (t) == cfun->decl) |
| continue; |
| if (srcidx != dstidx) |
| (*child_cfun->local_decls)[dstidx] = t; |
| dstidx++; |
| } |
| if (dstidx != num) |
| vec_safe_truncate (child_cfun->local_decls, dstidx); |
| |
| /* Inform the callgraph about the new function. */ |
| child_cfun->curr_properties = cfun->curr_properties; |
| child_cfun->has_simduid_loops |= cfun->has_simduid_loops; |
| child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops; |
| cgraph_node *node = cgraph_node::get_create (child_fn); |
| node->parallelized_function = 1; |
| cgraph_node::add_new_function (child_fn, true); |
| |
| bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl) |
| && !DECL_ASSEMBLER_NAME_SET_P (child_fn); |
| |
| /* Fix the callgraph edges for child_cfun. Those for cfun will be |
| fixed in a following pass. */ |
| push_cfun (child_cfun); |
| if (need_asm) |
| assign_assembler_name_if_needed (child_fn); |
| |
| if (optimize) |
| optimize_omp_library_calls (entry_stmt); |
| update_max_bb_count (); |
| cgraph_edge::rebuild_edges (); |
| |
| /* Some EH regions might become dead, see PR34608. If |
| pass_cleanup_cfg isn't the first pass to happen with the |
| new child, these dead EH edges might cause problems. |
| Clean them up now. */ |
| if (flag_exceptions) |
| { |
| basic_block bb; |
| bool changed = false; |
| |
| FOR_EACH_BB_FN (bb, cfun) |
| changed |= gimple_purge_dead_eh_edges (bb); |
| if (changed) |
| cleanup_tree_cfg (); |
| } |
| if (gimple_in_ssa_p (cfun)) |
| update_ssa (TODO_update_ssa); |
| if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP)) |
| verify_loop_structure (); |
| pop_cfun (); |
| |
| if (dump_file && !gimple_in_ssa_p (cfun)) |
| { |
| omp_any_child_fn_dumped = true; |
| dump_function_header (dump_file, child_fn, dump_flags); |
| dump_function_to_file (child_fn, dump_file, dump_flags); |
| } |
| } |
| |
| adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn); |
| |
| if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL) |
| expand_parallel_call (region, new_bb, |
| as_a <gomp_parallel *> (entry_stmt), ws_args); |
| else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS) |
| expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt)); |
| else |
| expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt)); |
| } |
| |
| /* Information about members of an OpenACC collapsed loop nest. */ |
| |
| struct oacc_collapse |
| { |
| tree base; /* Base value. */ |
| tree iters; /* Number of steps. */ |
| tree step; /* Step size. */ |
| tree tile; /* Tile increment (if tiled). */ |
| tree outer; /* Tile iterator var. */ |
| }; |
| |
| /* Helper for expand_oacc_for. Determine collapsed loop information. |
| Fill in COUNTS array. Emit any initialization code before GSI. |
| Return the calculated outer loop bound of BOUND_TYPE. */ |
| |
| static tree |
| expand_oacc_collapse_init (const struct omp_for_data *fd, |
| gimple_stmt_iterator *gsi, |
| oacc_collapse *counts, tree diff_type, |
| tree bound_type, location_t loc) |
| { |
| tree tiling = fd->tiling; |
| tree total = build_int_cst (bound_type, 1); |
| int ix; |
| |
| gcc_assert (integer_onep (fd->loop.step)); |
| gcc_assert (integer_zerop (fd->loop.n1)); |
| |
| /* When tiling, the first operand of the tile clause applies to the |
| innermost loop, and we work outwards from there. Seems |
| backwards, but whatever. */ |
| for (ix = fd->collapse; ix--;) |
| { |
| const omp_for_data_loop *loop = &fd->loops[ix]; |
| |
| tree iter_type = TREE_TYPE (loop->v); |
| tree plus_type = iter_type; |
| |
| gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR); |
| |
| if (POINTER_TYPE_P (iter_type)) |
| plus_type = sizetype; |
| |
| if (tiling) |
| { |
| tree num = build_int_cst (integer_type_node, fd->collapse); |
| tree loop_no = build_int_cst (integer_type_node, ix); |
| tree tile = TREE_VALUE (tiling); |
| gcall *call |
| = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile, |
| /* gwv-outer=*/integer_zero_node, |
| /* gwv-inner=*/integer_zero_node); |
| |
| counts[ix].outer = create_tmp_var (iter_type, ".outer"); |
| counts[ix].tile = create_tmp_var (diff_type, ".tile"); |
| gimple_call_set_lhs (call, counts[ix].tile); |
| gimple_set_location (call, loc); |
| gsi_insert_before (gsi, call, GSI_SAME_STMT); |
| |
| tiling = TREE_CHAIN (tiling); |
| } |
| else |
| { |
| counts[ix].tile = NULL; |
| counts[ix].outer = loop->v; |
| } |
| |
| tree b = loop->n1; |
| tree e = loop->n2; |
| tree s = loop->step; |
| bool up = loop->cond_code == LT_EXPR; |
| tree dir = build_int_cst (diff_type, up ? +1 : -1); |
| bool negating; |
| tree expr; |
| |
| b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| |
| /* Convert the step, avoiding possible unsigned->signed overflow. */ |
| negating = !up && TYPE_UNSIGNED (TREE_TYPE (s)); |
| if (negating) |
| s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s); |
| s = fold_convert (diff_type, s); |
| if (negating) |
| s = fold_build1 (NEGATE_EXPR, diff_type, s); |
| s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| |
| /* Determine the range, avoiding possible unsigned->signed overflow. */ |
| negating = !up && TYPE_UNSIGNED (iter_type); |
| expr = fold_build2 (MINUS_EXPR, plus_type, |
| fold_convert (plus_type, negating ? b : e), |
| fold_convert (plus_type, negating ? e : b)); |
| expr = fold_convert (diff_type, expr); |
| if (negating) |
| expr = fold_build1 (NEGATE_EXPR, diff_type, expr); |
| tree range = force_gimple_operand_gsi |
| (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT); |
| |
| /* Determine number of iterations. */ |
| expr = fold_build2 (MINUS_EXPR, diff_type, range, dir); |
| expr = fold_build2 (PLUS_EXPR, diff_type, expr, s); |
| expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s); |
| |
| tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| |
| counts[ix].base = b; |
| counts[ix].iters = iters; |
| counts[ix].step = s; |
| |
| total = fold_build2 (MULT_EXPR, bound_type, total, |
| fold_convert (bound_type, iters)); |
| } |
| |
| return total; |
| } |
| |
| /* Emit initializers for collapsed loop members. INNER is true if |
| this is for the element loop of a TILE. IVAR is the outer |
| loop iteration variable, from which collapsed loop iteration values |
| are calculated. COUNTS array has been initialized by |
| expand_oacc_collapse_inits. */ |
| |
| static void |
| expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner, |
| gimple_stmt_iterator *gsi, |
| const oacc_collapse *counts, tree ivar, |
| tree diff_type) |
| { |
| tree ivar_type = TREE_TYPE (ivar); |
| |
| /* The most rapidly changing iteration variable is the innermost |
| one. */ |
| for (int ix = fd->collapse; ix--;) |
| { |
| const omp_for_data_loop *loop = &fd->loops[ix]; |
| const oacc_collapse *collapse = &counts[ix]; |
| tree v = inner ? loop->v : collapse->outer; |
| tree iter_type = TREE_TYPE (v); |
| tree plus_type = iter_type; |
| enum tree_code plus_code = PLUS_EXPR; |
| tree expr; |
| |
| if (POINTER_TYPE_P (iter_type)) |
| { |
| plus_code = POINTER_PLUS_EXPR; |
| plus_type = sizetype; |
| } |
| |
| expr = ivar; |
| if (ix) |
| { |
| tree mod = fold_convert (ivar_type, collapse->iters); |
| ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod); |
| expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod); |
| ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| } |
| |
| expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr), |
| fold_convert (diff_type, collapse->step)); |
| expr = fold_build2 (plus_code, iter_type, |
| inner ? collapse->outer : collapse->base, |
| fold_convert (plus_type, expr)); |
| expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE, |
| true, GSI_SAME_STMT); |
| gassign *ass = gimple_build_assign (v, expr); |
| gsi_insert_before (gsi, ass, GSI_SAME_STMT); |
| } |
| } |
| |
| /* Helper function for expand_omp_{for_*,simd}. If this is the outermost |
| of the combined collapse > 1 loop constructs, generate code like: |
| if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB; |
| if (cond3 is <) |
| adj = STEP3 - 1; |
| else |
| adj = STEP3 + 1; |
| count3 = (adj + N32 - N31) / STEP3; |
| if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB; |
| if (cond2 is <) |
| adj = STEP2 - 1; |
| else |
| adj = STEP2 + 1; |
| count2 = (adj + N22 - N21) / STEP2; |
| if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB; |
| if (cond1 is <) |
| adj = STEP1 - 1; |
| else |
| adj = STEP1 + 1; |
| count1 = (adj + N12 - N11) / STEP1; |
| count = count1 * count2 * count3; |
| Furthermore, if ZERO_ITER_BB is NULL, create a BB which does: |
| count = 0; |
| and set ZERO_ITER_BB to that bb. If this isn't the outermost |
| of the combined loop constructs, just initialize COUNTS array |
| from the _looptemp_ clauses. For loop nests with non-rectangular |
| loops, do this only for the rectangular loops. Then pick |
| the loops which reference outer vars in their bound expressions |
| and the loops which they refer to and for this sub-nest compute |
| number of iterations. For triangular loops use Faulhaber's formula, |
| otherwise as a fallback, compute by iterating the loops. |
| If e.g. the sub-nest is |
| for (I = N11; I COND1 N12; I += STEP1) |
| for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2) |
| for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3) |
| do: |
| COUNT = 0; |
| for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1) |
| for (tmpj = M21 * tmpi + N21; |
| tmpj COND2 M22 * tmpi + N22; tmpj += STEP2) |
| { |
| int tmpk1 = M31 * tmpj + N31; |
| int tmpk2 = M32 * tmpj + N32; |
| if (tmpk1 COND3 tmpk2) |
| { |
| if (COND3 is <) |
| adj = STEP3 - 1; |
| else |
| adj = STEP3 + 1; |
| COUNT += (adj + tmpk2 - tmpk1) / STEP3; |
| } |
| } |
| and finally multiply the counts of the rectangular loops not |
| in the sub-nest with COUNT. Also, as counts[fd->last_nonrect] |
| store number of iterations of the loops from fd->first_nonrect |
| to fd->last_nonrect inclusive, i.e. the above COUNT multiplied |
| by the counts of rectangular loops not referenced in any non-rectangular |
| loops sandwitched in between those. */ |
| |
| /* NOTE: It *could* be better to moosh all of the BBs together, |
| creating one larger BB with all the computation and the unexpected |
| jump at the end. I.e. |
| |
| bool zero3, zero2, zero1, zero; |
| |
| zero3 = N32 c3 N31; |
| count3 = (N32 - N31) /[cl] STEP3; |
| zero2 = N22 c2 N21; |
| count2 = (N22 - N21) /[cl] STEP2; |
| zero1 = N12 c1 N11; |
| count1 = (N12 - N11) /[cl] STEP1; |
| zero = zero3 || zero2 || zero1; |
| count = count1 * count2 * count3; |
| if (__builtin_expect(zero, false)) goto zero_iter_bb; |
| |
| After all, we expect the zero=false, and thus we expect to have to |
| evaluate all of the comparison expressions, so short-circuiting |
| oughtn't be a win. Since the condition isn't protecting a |
| denominator, we're not concerned about divide-by-zero, so we can |
| fully evaluate count even if a numerator turned out to be wrong. |
| |
| It seems like putting this all together would create much better |
| scheduling opportunities, and less pressure on the chip's branch |
| predictor. */ |
| |
| static void |
| expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi, |
| basic_block &entry_bb, tree *counts, |
| basic_block &zero_iter1_bb, int &first_zero_iter1, |
| basic_block &zero_iter2_bb, int &first_zero_iter2, |
| basic_block &l2_dom_bb) |
| { |
| tree t, type = TREE_TYPE (fd->loop.v); |
| edge e, ne; |
| int i; |
| |
| /* Collapsed loops need work for expansion into SSA form. */ |
| gcc_assert (!gimple_in_ssa_p (cfun)); |
| |
| if (gimple_omp_for_combined_into_p (fd->for_stmt) |
| && TREE_CODE (fd->loop.n2) != INTEGER_CST) |
| { |
| gcc_assert (fd->ordered == 0); |
| /* First two _looptemp_ clauses are for istart/iend, counts[0] |
| isn't supposed to be handled, as the inner loop doesn't |
| use it. */ |
| tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt), |
| OMP_CLAUSE__LOOPTEMP_); |
| gcc_assert (innerc); |
| for (i = 0; i < fd->collapse; i++) |
| { |
| innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), |
| OMP_CLAUSE__LOOPTEMP_); |
| gcc_assert (innerc); |
| if (i) |
| counts[i] = OMP_CLAUSE_DECL (innerc); |
| else |
| counts[0] = NULL_TREE; |
| } |
| if (fd->non_rect |
| && fd->last_nonrect == fd->first_nonrect + 1 |
| && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v))) |
| { |
| tree c[4]; |
| for (i = 0; i < 4; i++) |
| { |
| innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), |
| OMP_CLAUSE__LOOPTEMP_); |
| gcc_assert (innerc); |
| c[i] = OMP_CLAUSE_DECL (innerc); |
| } |
| counts[0] = c[0]; |
| fd->first_inner_iterations = c[1]; |
| fd->factor = c[2]; |
| fd->adjn1 = c[3]; |
| } |
| return; |
| } |
| |
| for (i = fd->collapse; i < fd->ordered; i++) |
| { |
| tree itype = TREE_TYPE (fd->loops[i].v); |
| counts[i] = NULL_TREE; |
| t = fold_binary (fd->loops[i].cond_code, boolean_type_node, |
| fold_convert (itype, fd->loops[i].n1), |
| fold_convert (itype, fd->loops[i].n2)); |
| if (t && integer_zerop (t)) |
| { |
| for (i = fd->collapse; i < fd->ordered; i++) |
| counts[i] = build_int_cst (type, 0); |
| break; |
| } |
| } |
| bool rect_count_seen = false; |
| for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++) |
| { |
| tree itype = TREE_TYPE (fd->loops[i].v); |
| |
| if (i >= fd->collapse && counts[i]) |
| continue; |
| if (fd->non_rect) |
| { |
| /* Skip loops that use outer iterators in their expressions |
| during this phase. */ |
| if (fd->loops[i].m1 || fd->loops[i].m2) |
| { |
| counts[i] = build_zero_cst (type); |
| continue; |
| } |
| } |
| if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse) |
| && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node, |
| fold_convert (itype, fd->loops[i].n1), |
| fold_convert (itype, fd->loops[i].n2))) |
| == NULL_TREE || !integer_onep (t))) |
| { |
| gcond *cond_stmt; |
| tree n1, n2; |
| n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1)); |
| n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2)); |
| n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| cond_stmt = expand_omp_build_cond (gsi, fd->loops[i].cond_code, |
| n1, n2); |
| e = split_block (entry_bb, cond_stmt); |
| basic_block &zero_iter_bb |
| = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb; |
| int &first_zero_iter |
| = i < fd->collapse ? first_zero_iter1 : first_zero_iter2; |
| if (zero_iter_bb == NULL) |
| { |
| gassign *assign_stmt; |
| first_zero_iter = i; |
| zero_iter_bb = create_empty_bb (entry_bb); |
| add_bb_to_loop (zero_iter_bb, entry_bb->loop_father); |
| *gsi = gsi_after_labels (zero_iter_bb); |
| if (i < fd->collapse) |
| assign_stmt = gimple_build_assign (fd->loop.n2, |
| build_zero_cst (type)); |
| else |
| { |
| counts[i] = create_tmp_reg (type, ".count"); |
| assign_stmt |
| = gimple_build_assign (counts[i], build_zero_cst (type)); |
| } |
| gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT); |
| set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb, |
| entry_bb); |
| } |
| ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE); |
| ne->probability = profile_probability::very_unlikely (); |
| e->flags = EDGE_TRUE_VALUE; |
| e->probability = ne->probability.invert (); |
| if (l2_dom_bb == NULL) |
| l2_dom_bb = entry_bb; |
| entry_bb = e->dest; |
| *gsi = gsi_last_nondebug_bb (entry_bb); |
| } |
| |
| if (POINTER_TYPE_P (itype)) |
| itype = signed_type_for (itype); |
| t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR |
| ? -1 : 1)); |
| t = fold_build2 (PLUS_EXPR, itype, |
| fold_convert (itype, fd->loops[i].step), t); |
| t = fold_build2 (PLUS_EXPR, itype, t, |
| fold_convert (itype, fd->loops[i].n2)); |
| t = fold_build2 (MINUS_EXPR, itype, t, |
| fold_convert (itype, fd->loops[i].n1)); |
| /* ?? We could probably use CEIL_DIV_EXPR instead of |
| TRUNC_DIV_EXPR and adjusting by hand. Unless we can't |
| generate the same code in the end because generically we |
| don't know that the values involved must be negative for |
| GT?? */ |
| if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR) |
| t = fold_build2 (TRUNC_DIV_EXPR, itype, |
| fold_build1 (NEGATE_EXPR, itype, t), |
| fold_build1 (NEGATE_EXPR, itype, |
| fold_convert (itype, |
| fd->loops[i].step))); |
| else |
| t = fold_build2 (TRUNC_DIV_EXPR, itype, t, |
| fold_convert (itype, fd->loops[i].step)); |
| t = fold_convert (type, t); |
| if (TREE_CODE (t) == INTEGER_CST) |
| counts[i] = t; |
| else |
| { |
| if (i < fd->collapse || i != first_zero_iter2) |
| counts[i] = create_tmp_reg (type, ".count"); |
| expand_omp_build_assign (gsi, counts[i], t); |
| } |
| if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse) |
| { |
| if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect) |
| continue; |
| if (!rect_count_seen) |
| { |
| t = counts[i]; |
| rect_count_seen = true; |
| } |
| else |
| t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]); |
| expand_omp_build_assign (gsi, fd->loop.n2, t); |
| } |
| } |
| if (fd->non_rect && SSA_VAR_P (fd->loop.n2)) |
| { |
| gcc_assert (fd->last_nonrect != -1); |
| |
| counts[fd->last_nonrect] = create_tmp_reg (type, ".count"); |
| expand_omp_build_assign (gsi, counts[fd->last_nonrect], |
| build_zero_cst (type)); |
| for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++) |
| if (fd->loops[i].m1 |
| || fd->loops[i].m2 |
| || fd->loops[i].non_rect_referenced) |
| break; |
| if (i == fd->last_nonrect |
| && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect |
| && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)) |
| && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v))) |
| { |
| int o = fd->first_nonrect; |
| tree itype = TREE_TYPE (fd->loops[o].v); |
| tree n1o = create_tmp_reg (itype, ".n1o"); |
| t = fold_convert (itype, unshare_expr (fd->loops[o].n1)); |
| expand_omp_build_assign (gsi, n1o, t); |
| tree n2o = create_tmp_reg (itype, ".n2o"); |
| t = fold_convert (itype, unshare_expr (fd->loops[o].n2)); |
| expand_omp_build_assign (gsi, n2o, t); |
| if (fd->loops[i].m1 && fd->loops[i].m2) |
| t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2), |
| unshare_expr (fd->loops[i].m1)); |
| else if (fd->loops[i].m1) |
| t = fold_unary (NEGATE_EXPR, itype, |
| unshare_expr (fd->loops[i].m1)); |
| else |
| t = unshare_expr (fd->loops[i].m2); |
| tree m2minusm1 |
| = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| |
| gimple_stmt_iterator gsi2 = *gsi; |
| gsi_prev (&gsi2); |
| e = split_block (entry_bb, gsi_stmt (gsi2)); |
| e = split_block (e->dest, (gimple *) NULL); |
| basic_block bb1 = e->src; |
| entry_bb = e->dest; |
| *gsi = gsi_after_labels (entry_bb); |
| |
| gsi2 = gsi_after_labels (bb1); |
| tree ostep = fold_convert (itype, fd->loops[o].step); |
| t = build_int_cst (itype, (fd->loops[o].cond_code |
| == LT_EXPR ? -1 : 1)); |
| t = fold_build2 (PLUS_EXPR, itype, ostep, t); |
| t = fold_build2 (PLUS_EXPR, itype, t, n2o); |
| t = fold_build2 (MINUS_EXPR, itype, t, n1o); |
| if (TYPE_UNSIGNED (itype) |
| && fd->loops[o].cond_code == GT_EXPR) |
| t = fold_build2 (TRUNC_DIV_EXPR, itype, |
| fold_build1 (NEGATE_EXPR, itype, t), |
| fold_build1 (NEGATE_EXPR, itype, ostep)); |
| else |
| t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep); |
| tree outer_niters |
| = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| t = fold_build2 (MINUS_EXPR, itype, outer_niters, |
| build_one_cst (itype)); |
| t = fold_build2 (MULT_EXPR, itype, t, ostep); |
| t = fold_build2 (PLUS_EXPR, itype, n1o, t); |
| tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| tree n1, n2, n1e, n2e; |
| t = fold_convert (itype, unshare_expr (fd->loops[i].n1)); |
| if (fd->loops[i].m1) |
| { |
| n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1)); |
| n1 = fold_build2 (MULT_EXPR, itype, n1o, n1); |
| n1 = fold_build2 (PLUS_EXPR, itype, n1, t); |
| } |
| else |
| n1 = t; |
| n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| t = fold_convert (itype, unshare_expr (fd->loops[i].n2)); |
| if (fd->loops[i].m2) |
| { |
| n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2)); |
| n2 = fold_build2 (MULT_EXPR, itype, n1o, n2); |
| n2 = fold_build2 (PLUS_EXPR, itype, n2, t); |
| } |
| else |
| n2 = t; |
| n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| t = fold_convert (itype, unshare_expr (fd->loops[i].n1)); |
| if (fd->loops[i].m1) |
| { |
| n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1)); |
| n1e = fold_build2 (MULT_EXPR, itype, last, n1e); |
| n1e = fold_build2 (PLUS_EXPR, itype, n1e, t); |
| } |
| else |
| n1e = t; |
| n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| t = fold_convert (itype, unshare_expr (fd->loops[i].n2)); |
| if (fd->loops[i].m2) |
| { |
| n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2)); |
| n2e = fold_build2 (MULT_EXPR, itype, last, n2e); |
| n2e = fold_build2 (PLUS_EXPR, itype, n2e, t); |
| } |
| else |
| n2e = t; |
| n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| gcond *cond_stmt |
| = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, |
| n1, n2); |
| e = split_block (bb1, cond_stmt); |
| e->flags = EDGE_TRUE_VALUE; |
| e->probability = profile_probability::likely ().guessed (); |
| basic_block bb2 = e->dest; |
| gsi2 = gsi_after_labels (bb2); |
| |
| cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, |
| n1e, n2e); |
| e = split_block (bb2, cond_stmt); |
| e->flags = EDGE_TRUE_VALUE; |
| e->probability = profile_probability::likely ().guessed (); |
| gsi2 = gsi_after_labels (e->dest); |
| |
| tree step = fold_convert (itype, fd->loops[i].step); |
| t = build_int_cst (itype, (fd->loops[i].cond_code |
| == LT_EXPR ? -1 : 1)); |
| t = fold_build2 (PLUS_EXPR, itype, step, t); |
| t = fold_build2 (PLUS_EXPR, itype, t, n2); |
| t = fold_build2 (MINUS_EXPR, itype, t, n1); |
| if (TYPE_UNSIGNED (itype) |
| && fd->loops[i].cond_code == GT_EXPR) |
| t = fold_build2 (TRUNC_DIV_EXPR, itype, |
| fold_build1 (NEGATE_EXPR, itype, t), |
| fold_build1 (NEGATE_EXPR, itype, step)); |
| else |
| t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); |
| tree first_inner_iterations |
| = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep); |
| if (TYPE_UNSIGNED (itype) |
| && fd->loops[i].cond_code == GT_EXPR) |
| t = fold_build2 (TRUNC_DIV_EXPR, itype, |
| fold_build1 (NEGATE_EXPR, itype, t), |
| fold_build1 (NEGATE_EXPR, itype, step)); |
| else |
| t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); |
| tree factor |
| = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| t = fold_build2 (MINUS_EXPR, itype, outer_niters, |
| build_one_cst (itype)); |
| t = fold_build2 (MULT_EXPR, itype, t, outer_niters); |
| t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node); |
| t = fold_build2 (MULT_EXPR, itype, factor, t); |
| t = fold_build2 (PLUS_EXPR, itype, |
| fold_build2 (MULT_EXPR, itype, outer_niters, |
| first_inner_iterations), t); |
| expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], |
| fold_convert (type, t)); |
| |
| basic_block bb3 = create_empty_bb (bb1); |
| add_bb_to_loop (bb3, bb1->loop_father); |
| |
| e = make_edge (bb1, bb3, EDGE_FALSE_VALUE); |
| e->probability = profile_probability::unlikely ().guessed (); |
| |
| gsi2 = gsi_after_labels (bb3); |
| cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, |
| n1e, n2e); |
| e = split_block (bb3, cond_stmt); |
| e->flags = EDGE_TRUE_VALUE; |
| e->probability = profile_probability::likely ().guessed (); |
| basic_block bb4 = e->dest; |
| |
| ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE); |
| ne->probability = e->probability.invert (); |
| |
| basic_block bb5 = create_empty_bb (bb2); |
| add_bb_to_loop (bb5, bb2->loop_father); |
| |
| ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE); |
| ne->probability = profile_probability::unlikely ().guessed (); |
| |
| for (int j = 0; j < 2; j++) |
| { |
| gsi2 = gsi_after_labels (j ? bb5 : bb4); |
| t = fold_build2 (MINUS_EXPR, itype, |
| unshare_expr (fd->loops[i].n1), |
| unshare_expr (fd->loops[i].n2)); |
| t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1); |
| tree tem |
| = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| t = fold_build2 (MINUS_EXPR, itype, tem, n1o); |
| t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep); |
| t = fold_build2 (MINUS_EXPR, itype, tem, t); |
| tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| t = fold_convert (itype, unshare_expr (fd->loops[i].n1)); |
| if (fd->loops[i].m1) |
| { |
| n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1)); |
| n1 = fold_build2 (MULT_EXPR, itype, tem, n1); |
| n1 = fold_build2 (PLUS_EXPR, itype, n1, t); |
| } |
| else |
| n1 = t; |
| n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| t = fold_convert (itype, unshare_expr (fd->loops[i].n2)); |
| if (fd->loops[i].m2) |
| { |
| n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2)); |
| n2 = fold_build2 (MULT_EXPR, itype, tem, n2); |
| n2 = fold_build2 (PLUS_EXPR, itype, n2, t); |
| } |
| else |
| n2 = t; |
| n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem); |
| |
| cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, |
| n1, n2); |
| e = split_block (gsi_bb (gsi2), cond_stmt); |
| e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE; |
| e->probability = profile_probability::unlikely ().guessed (); |
| ne = make_edge (e->src, bb1, |
| j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE); |
| ne->probability = e->probability.invert (); |
| gsi2 = gsi_after_labels (e->dest); |
| |
| t = fold_build2 (PLUS_EXPR, itype, tem, ostep); |
| expand_omp_build_assign (&gsi2, j ? n2o : n1o, t); |
| |
| make_edge (e->dest, bb1, EDGE_FALLTHRU); |
| } |
| |
| set_immediate_dominator (CDI_DOMINATORS, bb3, bb1); |
| set_immediate_dominator (CDI_DOMINATORS, bb5, bb2); |
| set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1); |
| |
| if (fd->first_nonrect + 1 == fd->last_nonrect) |
| { |
| fd->first_inner_iterations = first_inner_iterations; |
| fd->factor = factor; |
| fd->adjn1 = n1o; |
| } |
| } |
| else |
| { |
| /* Fallback implementation. Evaluate the loops with m1/m2 |
| non-NULL as well as their outer loops at runtime using temporaries |
| instead of the original iteration variables, and in the |
| body just bump the counter. */ |
| gimple_stmt_iterator gsi2 = *gsi; |
| gsi_prev (&gsi2); |
| e = split_block (entry_bb, gsi_stmt (gsi2)); |
| e = split_block (e->dest, (gimple *) NULL); |
| basic_block cur_bb = e->src; |
| basic_block next_bb = e->dest; |
| entry_bb = e->dest; |
| *gsi = gsi_after_labels (entry_bb); |
| |
| tree *vs = XALLOCAVEC (tree, fd->last_nonrect); |
| memset (vs, 0, fd->last_nonrect * sizeof (tree)); |
| |
| for (i = 0; i <= fd->last_nonrect; i++) |
| { |
| if (fd->loops[i].m1 == NULL_TREE |
| && fd->loops[i].m2 == NULL_TREE |
| && !fd->loops[i].non_rect_referenced) |
| continue; |
| |
| tree itype = TREE_TYPE (fd->loops[i].v); |
| |
| gsi2 = gsi_after_labels (cur_bb); |
| tree n1, n2; |
| t = fold_convert (itype, unshare_expr (fd->loops[i].n1)); |
| if (fd->loops[i].m1 == NULL_TREE) |
| n1 = t; |
| else if (POINTER_TYPE_P (itype)) |
| { |
| gcc_assert (integer_onep (fd->loops[i].m1)); |
| t = unshare_expr (fd->loops[i].n1); |
| n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t); |
| } |
| else |
| { |
| n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1)); |
| n1 = fold_build2 (MULT_EXPR, itype, |
| vs[i - fd->loops[i].outer], n1); |
| n1 = fold_build2 (PLUS_EXPR, itype, n1, t); |
| } |
| n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| if (i < fd->last_nonrect) |
| { |
| vs[i] = create_tmp_reg (itype, ".it"); |
| expand_omp_build_assign (&gsi2, vs[i], n1); |
| } |
| t = fold_convert (itype, unshare_expr (fd->loops[i].n2)); |
| if (fd->loops[i].m2 == NULL_TREE) |
| n2 = t; |
| else if (POINTER_TYPE_P (itype)) |
| { |
| gcc_assert (integer_onep (fd->loops[i].m2)); |
| t = unshare_expr (fd->loops[i].n2); |
| n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t); |
| } |
| else |
| { |
| n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2)); |
| n2 = fold_build2 (MULT_EXPR, itype, |
| vs[i - fd->loops[i].outer], n2); |
| n2 = fold_build2 (PLUS_EXPR, itype, n2, t); |
| } |
| n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| if (POINTER_TYPE_P (itype)) |
| itype = signed_type_for (itype); |
| if (i == fd->last_nonrect) |
| { |
| gcond *cond_stmt |
| = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, |
| n1, n2); |
| e = split_block (cur_bb, cond_stmt); |
| e->flags = EDGE_TRUE_VALUE; |
| ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE); |
| e->probability = profile_probability::likely ().guessed (); |
| ne->probability = e->probability.invert (); |
| gsi2 = gsi_after_labels (e->dest); |
| |
| t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR |
| ? -1 : 1)); |
| t = fold_build2 (PLUS_EXPR, itype, |
| fold_convert (itype, fd->loops[i].step), t); |
| t = fold_build2 (PLUS_EXPR, itype, t, |
| fold_convert (itype, n2)); |
| t = fold_build2 (MINUS_EXPR, itype, t, |
| fold_convert (itype, n1)); |
| tree step = fold_convert (itype, fd->loops[i].step); |
| if (TYPE_UNSIGNED (itype) |
| && fd->loops[i].cond_code == GT_EXPR) |
| t = fold_build2 (TRUNC_DIV_EXPR, itype, |
| fold_build1 (NEGATE_EXPR, itype, t), |
| fold_build1 (NEGATE_EXPR, itype, step)); |
| else |
| t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); |
| t = fold_convert (type, t); |
| t = fold_build2 (PLUS_EXPR, type, |
| counts[fd->last_nonrect], t); |
| t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t); |
| e = make_edge (e->dest, next_bb, EDGE_FALLTHRU); |
| set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb); |
| break; |
| } |
| e = split_block (cur_bb, last_stmt (cur_bb)); |
| |
| basic_block new_cur_bb = create_empty_bb (cur_bb); |
| add_bb_to_loop (new_cur_bb, cur_bb->loop_father); |
| |
| gsi2 = gsi_after_labels (e->dest); |
| tree step = fold_convert (itype, |
| unshare_expr (fd->loops[i].step)); |
| if (POINTER_TYPE_P (TREE_TYPE (vs[i]))) |
| t = fold_build_pointer_plus (vs[i], step); |
| else |
| t = fold_build2 (PLUS_EXPR, itype, vs[i], step); |
| t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| expand_omp_build_assign (&gsi2, vs[i], t); |
| |
| ne = split_block (e->dest, last_stmt (e->dest)); |
| gsi2 = gsi_after_labels (ne->dest); |
| |
| expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, vs[i], n2); |
| edge e3, e4; |
| if (next_bb == entry_bb) |
| { |
| e3 = find_edge (ne->dest, next_bb); |
| e3->flags = EDGE_FALSE_VALUE; |
| } |
| else |
| e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE); |
| e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE); |
| e4->probability = profile_probability::likely ().guessed (); |
| e3->probability = e4->probability.invert (); |
| basic_block esrc = e->src; |
| make_edge (e->src, ne->dest, EDGE_FALLTHRU); |
| cur_bb = new_cur_bb; |
| basic_block latch_bb = next_bb; |
| next_bb = e->dest; |
| remove_edge (e); |
| set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc); |
| set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest); |
| set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest); |
| } |
| } |
| t = NULL_TREE; |
| for (i = fd->first_nonrect; i < fd->last_nonrect; i++) |
| if (!fd->loops[i].non_rect_referenced |
| && fd->loops[i].m1 == NULL_TREE |
| && fd->loops[i].m2 == NULL_TREE) |
| { |
| if (t == NULL_TREE) |
| t = counts[i]; |
| else |
| t = fold_build2 (MULT_EXPR, type, t, counts[i]); |
| } |
| if (t) |
| { |
| t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t); |
| expand_omp_build_assign (gsi, counts[fd->last_nonrect], t); |
| } |
| if (!rect_count_seen) |
| t = counts[fd->last_nonrect]; |
| else |
| t = fold_build2 (MULT_EXPR, type, fd->loop.n2, |
| counts[fd->last_nonrect]); |
| expand_omp_build_assign (gsi, fd->loop.n2, t); |
| } |
| else if (fd->non_rect) |
| { |
| tree t = fd->loop.n2; |
| gcc_assert (TREE_CODE (t) == INTEGER_CST); |
| int non_rect_referenced = 0, non_rect = 0; |
| for (i = 0; i < fd->collapse; i++) |
| { |
| if ((i < fd->first_nonrect || i > fd->last_nonrect) |
| && !integer_zerop (counts[i])) |
| t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]); |
| if (fd->loops[i].non_rect_referenced) |
| non_rect_referenced++; |
| if (fd->loops[i].m1 || fd->loops[i].m2) |
| non_rect++; |
| } |
| gcc_assert (non_rect == 1 && non_rect_referenced == 1); |
| counts[fd->last_nonrect] = t; |
| } |
| } |
| |
| /* Helper function for expand_omp_{for_*,simd}. Generate code like: |
| T = V; |
| V3 = N31 + (T % count3) * STEP3; |
| T = T / count3; |
| V2 = N21 + (T % count2) * STEP2; |
| T = T / count2; |
| V1 = N11 + T * STEP1; |
| if this loop doesn't have an inner loop construct combined with it. |
| If it does have an inner loop construct combined with it and the |
| iteration count isn't known constant, store values from counts array |
| into its _looptemp_ temporaries instead. |
| For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect |
| inclusive), use the count of all those loops together, and either |
| find quadratic etc. equation roots, or as a fallback, do: |
| COUNT = 0; |
| for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1) |
| for (tmpj = M21 * tmpi + N21; |
| tmpj COND2 M22 * tmpi + N22; tmpj += STEP2) |
| { |
| int tmpk1 = M31 * tmpj + N31; |
| int tmpk2 = M32 * tmpj + N32; |
| if (tmpk1 COND3 tmpk2) |
| { |
| if (COND3 is <) |
| adj = STEP3 - 1; |
| else |
| adj = STEP3 + 1; |
| int temp = (adj + tmpk2 - tmpk1) / STEP3; |
| if (COUNT + temp > T) |
| { |
| V1 = tmpi; |
| V2 = tmpj; |
| V3 = tmpk1 + (T - COUNT) * STEP3; |
| goto done; |
| } |
| else |
| COUNT += temp; |
| } |
| } |
| done:; |
| but for optional innermost or outermost rectangular loops that aren't |
| referenced by other loop expressions keep doing the division/modulo. */ |
| |
| static void |
| expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi, |
| tree *counts, tree *nonrect_bounds, |
| gimple *inner_stmt, tree startvar) |
| { |
| int i; |
| if (gimple_omp_for_combined_p (fd->for_stmt)) |
| { |
| /* If fd->loop.n2 is constant, then no propagation of the counts |
| is needed, they are constant. */ |
| if (TREE_CODE (fd->loop.n2) == INTEGER_CST) |
| return; |
| |
| tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR |
| ? gimple_omp_taskreg_clauses (inner_stmt) |
| : gimple_omp_for_clauses (inner_stmt); |
| /* First two _looptemp_ clauses are for istart/iend, counts[0] |
| isn't supposed to be handled, as the inner loop doesn't |
| use it. */ |
| tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_); |
| gcc_assert (innerc); |
| int count = 0; |
| if (fd->non_rect |
| && fd->last_nonrect == fd->first_nonrect + 1 |
| && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v))) |
| count = 4; |
| for (i = 0; i < fd->collapse + count; i++) |
| { |
| innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), |
| OMP_CLAUSE__LOOPTEMP_); |
| gcc_assert (innerc); |
| if (i) |
| { |
| tree tem = OMP_CLAUSE_DECL (innerc); |
| tree t; |
| if (i < fd->collapse) |
| t = counts[i]; |
| else |
| switch (i - fd->collapse) |
| { |
| case 0: t = counts[0]; break; |
| case 1: t = fd->first_inner_iterations; break; |
| case 2: t = fd->factor; break; |
| case 3: t = fd->adjn1; break; |
| default: gcc_unreachable (); |
| } |
| t = fold_convert (TREE_TYPE (tem), t); |
| t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE, |
| false, GSI_CONTINUE_LINKING); |
| gassign *stmt = gimple_build_assign (tem, t); |
| gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); |
| } |
| } |
| return; |
| } |
| |
| tree type = TREE_TYPE (fd->loop.v); |
| tree tem = create_tmp_reg (type, ".tem"); |
| gassign *stmt = gimple_build_assign (tem, startvar); |
| gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING); |
| |
| for (i = fd->collapse - 1; i >= 0; i--) |
| { |
| tree vtype = TREE_TYPE (fd->loops[i].v), itype, t; |
| itype = vtype; |
| if (POINTER_TYPE_P (vtype)) |
| itype = signed_type_for (vtype); |
| if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect)) |
| t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]); |
| else |
| t = tem; |
| if (i == fd->last_nonrect) |
| { |
| t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, |
| false, GSI_CONTINUE_LINKING); |
| tree stopval = t; |
| tree idx = create_tmp_reg (type, ".count"); |
| expand_omp_build_assign (gsi, idx, |
| build_zero_cst (type), true); |
| basic_block bb_triang = NULL, bb_triang_dom = NULL; |
| if (fd->first_nonrect + 1 == fd->last_nonrect |
| && (TREE_CODE (fd->loop.n2) == INTEGER_CST |
| || fd->first_inner_iterations) |
| && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node)) |
| != CODE_FOR_nothing) |
| && !integer_zerop (fd->loop.n2)) |
| { |
| tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1; |
| tree itype = TREE_TYPE (fd->loops[i].v); |
| tree first_inner_iterations = fd->first_inner_iterations; |
| tree factor = fd->factor; |
| gcond *cond_stmt |
| = expand_omp_build_cond (gsi, NE_EXPR, factor, |
| build_zero_cst (TREE_TYPE (factor))); |
| edge e = split_block (gsi_bb (*gsi), cond_stmt); |
| basic_block bb0 = e->src; |
| e->flags = EDGE_TRUE_VALUE; |
| e->probability = profile_probability::likely (); |
| bb_triang_dom = bb0; |
| *gsi = gsi_after_labels (e->dest); |
| tree slltype = long_long_integer_type_node; |
| tree ulltype = long_long_unsigned_type_node; |
| tree stopvalull = fold_convert (ulltype, stopval); |
| stopvalull |
| = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE, |
| false, GSI_CONTINUE_LINKING); |
| first_inner_iterations |
| = fold_convert (slltype, first_inner_iterations); |
| first_inner_iterations |
| = force_gimple_operand_gsi (gsi, first_inner_iterations, true, |
| NULL_TREE, false, |
| GSI_CONTINUE_LINKING); |
| factor = fold_convert (slltype, factor); |
| factor |
| = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE, |
| false, GSI_CONTINUE_LINKING); |
| tree first_inner_iterationsd |
| = fold_build1 (FLOAT_EXPR, double_type_node, |
| first_inner_iterations); |
| first_inner_iterationsd |
| = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true, |
| NULL_TREE, false, |
| GSI_CONTINUE_LINKING); |
| tree factord = fold_build1 (FLOAT_EXPR, double_type_node, |
| factor); |
| factord = force_gimple_operand_gsi (gsi, factord, true, |
| NULL_TREE, false, |
| GSI_CONTINUE_LINKING); |
| tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node, |
| stopvalull); |
| stopvald = force_gimple_operand_gsi (gsi, stopvald, true, |
| NULL_TREE, false, |
| GSI_CONTINUE_LINKING); |
| /* Temporarily disable flag_rounding_math, values will be |
| decimal numbers divided by 2 and worst case imprecisions |
| due to too large values ought to be caught later by the |
| checks for fallback. */ |
| int save_flag_rounding_math = flag_rounding_math; |
| flag_rounding_math = 0; |
| t = fold_build2 (RDIV_EXPR, double_type_node, factord, |
| build_real (double_type_node, dconst2)); |
| tree t3 = fold_build2 (MINUS_EXPR, double_type_node, |
| first_inner_iterationsd, t); |
| t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false, |
| GSI_CONTINUE_LINKING); |
| t = fold_build2 (MULT_EXPR, double_type_node, factord, |
| build_real (double_type_node, dconst2)); |
| t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald); |
| t = fold_build2 (PLUS_EXPR, double_type_node, t, |
| fold_build2 (MULT_EXPR, double_type_node, |
| t3, t3)); |
| flag_rounding_math = save_flag_rounding_math; |
| t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false, |
| GSI_CONTINUE_LINKING); |
| if (flag_exceptions |
| && cfun->can_throw_non_call_exceptions |
| && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE)) |
| { |
| tree tem = fold_build2 (LT_EXPR, boolean_type_node, t, |
| build_zero_cst (double_type_node)); |
| tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE, |
| false, GSI_CONTINUE_LINKING); |
| cond_stmt = gimple_build_cond (NE_EXPR, tem, |
| boolean_false_node, |
| NULL_TREE, NULL_TREE); |
| } |
| else |
| cond_stmt |
| = gimple_build_cond (LT_EXPR, t, |
| build_zero_cst (double_type_node), |
| NULL_TREE, NULL_TREE); |
| gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING); |
| e = split_block (gsi_bb (*gsi), cond_stmt); |
| basic_block bb1 = e->src; |
| e->flags = EDGE_FALSE_VALUE; |
| e->probability = profile_probability::very_likely (); |
| *gsi = gsi_after_labels (e->dest); |
| gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t); |
| tree sqrtr = create_tmp_var (double_type_node); |
| gimple_call_set_lhs (call, sqrtr); |
| gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING); |
| t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3); |
| t = fold_build2 (RDIV_EXPR, double_type_node, t, factord); |
| t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t); |
| tree c = create_tmp_var (ulltype); |
| tree d = create_tmp_var (ulltype); |
| expand_omp_build_assign (gsi, c, t, true); |
| t = fold_build2 (MINUS_EXPR, ulltype, c, |
| build_one_cst (ulltype)); |
| t = fold_build2 (MULT_EXPR, ulltype, c, t); |
| t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node); |
| t = fold_build2 (MULT_EXPR, ulltype, |
| fold_convert (ulltype, fd->factor), t); |
| tree t2 |
| = fold_build2 (MULT_EXPR, ulltype, c, |
| fold_convert (ulltype, |
| fd->first_inner_iterations)); |
| t = fold_build2 (PLUS_EXPR, ulltype, t, t2); |
| expand_omp_build_assign (gsi, d, t, true); |
| t = fold_build2 (MULT_EXPR, ulltype, |
| fold_convert (ulltype, fd->factor), c); |
| t = fold_build2 (PLUS_EXPR, ulltype, |
| t, fold_convert (ulltype, |
| fd->first_inner_iterations)); |
| t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false, |
| GSI_CONTINUE_LINKING); |
| cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d, |
| NULL_TREE, NULL_TREE); |
| gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING); |
| e = split_block (gsi_bb (*gsi), cond_stmt); |
| basic_block bb2 = e->src; |
| e->flags = EDGE_TRUE_VALUE; |
| e->probability = profile_probability::very_likely (); |
| *gsi = gsi_after_labels (e->dest); |
| t = fold_build2 (PLUS_EXPR, ulltype, d, t2); |
| t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false, |
| GSI_CONTINUE_LINKING); |
| cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t, |
| NULL_TREE, NULL_TREE); |
| gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING); |
| e = split_block (gsi_bb (*gsi), cond_stmt); |
| basic_block bb3 = e->src; |
| e->flags = EDGE_FALSE_VALUE; |
| e->probability = profile_probability::very_likely (); |
| *gsi = gsi_after_labels (e->dest); |
| t = fold_convert (itype, c); |
| t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step); |
| t = fold_build2 (PLUS_EXPR, itype, outer_n1, t); |
| t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false, |
| GSI_CONTINUE_LINKING); |
| expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true); |
| t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d); |
| t2 = fold_convert (itype, t2); |
| t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step); |
| t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1); |
| if (fd->loops[i].m1) |
| { |
| t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1); |
| t2 = fold_build2 (PLUS_EXPR, itype, t2, t); |
| } |
| expand_omp_build_assign (gsi, fd->loops[i].v, t2, true); |
| e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi)); |
| bb_triang = e->src; |
| *gsi = gsi_after_labels (e->dest); |
| remove_edge (e); |
| e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE); |
| e->probability = profile_probability::very_unlikely (); |
| e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE); |
| e->probability = profile_probability::very_unlikely (); |
| e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE); |
| e->probability = profile_probability::very_unlikely (); |
| |
| basic_block bb4 = create_empty_bb (bb0); |
| add_bb_to_loop (bb4, bb0->loop_father); |
| e = make_edge (bb0, bb4, EDGE_FALSE_VALUE); |
| e->probability = profile_probability::unlikely (); |
| make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU); |
| set_immediate_dominator (CDI_DOMINATORS, bb4, bb0); |
| set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0); |
| gimple_stmt_iterator gsi2 = gsi_after_labels (bb4); |
| t2 = fold_build2 (TRUNC_DIV_EXPR, type, |
| counts[i], counts[i - 1]); |
| t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false, |
| GSI_CONTINUE_LINKING); |
| t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2); |
| t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2); |
| t = fold_convert (itype, t); |
| t2 = fold_convert (itype, t2); |
| t = fold_build2 (MULT_EXPR, itype, t, |
| fold_convert (itype, fd->loops[i].step)); |
| t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t); |
| t2 = fold_build2 (MULT_EXPR, itype, t2, |
| fold_convert (itype, fd->loops[i - 1].step)); |
| t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2); |
| t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE, |
| false, GSI_CONTINUE_LINKING); |
| stmt = gimple_build_assign (fd->loops[i - 1].v, t2); |
| gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING); |
| if (fd->loops[i].m1) |
| { |
| t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1, |
| fd->loops[i - 1].v); |
| t = fold_build2 (PLUS_EXPR, itype, t, t2); |
| } |
| t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE, |
| false, GSI_CONTINUE_LINKING); |
| stmt = gimple_build_assign (fd->loops[i].v, t); |
| gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING); |
| } |
| /* Fallback implementation. Evaluate the loops in between |
| (inclusive) fd->first_nonrect and fd->last_nonrect at |
| runtime unsing temporaries instead of the original iteration |
| variables, in the body just bump the counter and compare |
| with the desired value. */ |
| gimple_stmt_iterator gsi2 = *gsi; |
| basic_block entry_bb = gsi_bb (gsi2); |
| edge e = split_block (entry_bb, gsi_stmt (gsi2)); |
| e = split_block (e->dest, (gimple *) NULL); |
| basic_block dom_bb = NULL; |
| basic_block cur_bb = e->src; |
| basic_block next_bb = e->dest; |
| entry_bb = e->dest; |
| *gsi = gsi_after_labels (entry_bb); |
| |
| tree *vs = XALLOCAVEC (tree, fd->last_nonrect); |
| tree n1 = NULL_TREE, n2 = NULL_TREE; |
| memset (vs, 0, fd->last_nonrect * sizeof (tree)); |
| |
| for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++) |
| { |
| tree itype = TREE_TYPE (fd->loops[j].v); |
| bool rect_p = (fd->loops[j].m1 == NULL_TREE |
| && fd->loops[j].m2 == NULL_TREE |
| && !fd->loops[j].non_rect_referenced); |
| gsi2 = gsi_after_labels (cur_bb); |
| t = fold_convert (itype, unshare_expr (fd->loops[j].n1)); |
| if (fd->loops[j].m1 == NULL_TREE) |
| n1 = rect_p ? build_zero_cst (type) : t; |
| else if (POINTER_TYPE_P (itype)) |
| { |
| gcc_assert (integer_onep (fd->loops[j].m1)); |
| t = unshare_expr (fd->loops[j].n1); |
| n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t); |
| } |
| else |
| { |
| n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1)); |
| n1 = fold_build2 (MULT_EXPR, itype, |
| vs[j - fd->loops[j].outer], n1); |
| n1 = fold_build2 (PLUS_EXPR, itype, n1, t); |
| } |
| n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| if (j < fd->last_nonrect) |
| { |
| vs[j] = create_tmp_reg (rect_p ? type : itype, ".it"); |
| expand_omp_build_assign (&gsi2, vs[j], n1); |
| } |
| t = fold_convert (itype, unshare_expr (fd->loops[j].n2)); |
| if (fd->loops[j].m2 == NULL_TREE) |
| n2 = rect_p ? counts[j] : t; |
| else if (POINTER_TYPE_P (itype)) |
| { |
| gcc_assert (integer_onep (fd->loops[j].m2)); |
| t = unshare_expr (fd->loops[j].n2); |
| n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t); |
| } |
| else |
| { |
| n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2)); |
| n2 = fold_build2 (MULT_EXPR, itype, |
| vs[j - fd->loops[j].outer], n2); |
| n2 = fold_build2 (PLUS_EXPR, itype, n2, t); |
| } |
| n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| if (POINTER_TYPE_P (itype)) |
| itype = signed_type_for (itype); |
| if (j == fd->last_nonrect) |
| { |
| gcond *cond_stmt |
| = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, |
| n1, n2); |
| e = split_block (cur_bb, cond_stmt); |
| e->flags = EDGE_TRUE_VALUE; |
| edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE); |
| e->probability = profile_probability::likely ().guessed (); |
| ne->probability = e->probability.invert (); |
| gsi2 = gsi_after_labels (e->dest); |
| |
| t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR |
| ? -1 : 1)); |
| t = fold_build2 (PLUS_EXPR, itype, |
| fold_convert (itype, fd->loops[j].step), t); |
| t = fold_build2 (PLUS_EXPR, itype, t, |
| fold_convert (itype, n2)); |
| t = fold_build2 (MINUS_EXPR, itype, t, |
| fold_convert (itype, n1)); |
| tree step = fold_convert (itype, fd->loops[j].step); |
| if (TYPE_UNSIGNED (itype) |
| && fd->loops[j].cond_code == GT_EXPR) |
| t = fold_build2 (TRUNC_DIV_EXPR, itype, |
| fold_build1 (NEGATE_EXPR, itype, t), |
| fold_build1 (NEGATE_EXPR, itype, step)); |
| else |
| t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step); |
| t = fold_convert (type, t); |
| t = fold_build2 (PLUS_EXPR, type, idx, t); |
| t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE, |
| true, GSI_SAME_STMT); |
| e = make_edge (e->dest, next_bb, EDGE_FALLTHRU); |
| set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb); |
| cond_stmt |
| = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE, |
| NULL_TREE); |
| gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT); |
| e = split_block (gsi_bb (gsi2), cond_stmt); |
| e->flags = EDGE_TRUE_VALUE; |
| e->probability = profile_probability::likely ().guessed (); |
| ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE); |
| ne->probability = e->probability.invert (); |
| gsi2 = gsi_after_labels (e->dest); |
| expand_omp_build_assign (&gsi2, idx, t); |
| set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb); |
| break; |
| } |
| e = split_block (cur_bb, last_stmt (cur_bb)); |
| |
| basic_block new_cur_bb = create_empty_bb (cur_bb); |
| add_bb_to_loop (new_cur_bb, cur_bb->loop_father); |
| |
| gsi2 = gsi_after_labels (e->dest); |
| if (rect_p) |
| t = fold_build2 (PLUS_EXPR, type, vs[j], |
| build_one_cst (type)); |
| else |
| { |
| tree step |
| = fold_convert (itype, unshare_expr (fd->loops[j].step)); |
| if (POINTER_TYPE_P (vtype)) |
| t = fold_build_pointer_plus (vs[j], step); |
| else |
|