blob: 5a47309b8e996b3406aaa35768baea2f128caf3d [file] [log] [blame]
/* Perform various loop optimizations, including strength reduction.
Copyright (C) 1987, 1988, 1989, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
1998, 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING. If not, write to the Free
Software Foundation, 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA. */
/* This is the loop optimization pass of the compiler.
It finds invariant computations within loops and moves them
to the beginning of the loop. Then it identifies basic and
general induction variables.
Basic induction variables (BIVs) are a pseudo registers which are set within
a loop only by incrementing or decrementing its value. General induction
variables (GIVs) are pseudo registers with a value which is a linear function
of a basic induction variable. BIVs are recognized by `basic_induction_var';
GIVs by `general_induction_var'.
Once induction variables are identified, strength reduction is applied to the
general induction variables, and induction variable elimination is applied to
the basic induction variables.
It also finds cases where
a register is set within the loop by zero-extending a narrower value
and changes these to zero the entire register once before the loop
and merely copy the low part within the loop.
Most of the complexity is in heuristics to decide when it is worth
while to do these things. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "rtl.h"
#include "tm_p.h"
#include "function.h"
#include "expr.h"
#include "hard-reg-set.h"
#include "basic-block.h"
#include "insn-config.h"
#include "regs.h"
#include "recog.h"
#include "flags.h"
#include "real.h"
#include "loop.h"
#include "cselib.h"
#include "except.h"
#include "toplev.h"
#include "predict.h"
#include "insn-flags.h"
#include "optabs.h"
#include "cfgloop.h"
#include "ggc.h"
/* Not really meaningful values, but at least something. */
#ifndef SIMULTANEOUS_PREFETCHES
#define SIMULTANEOUS_PREFETCHES 3
#endif
#ifndef PREFETCH_BLOCK
#define PREFETCH_BLOCK 32
#endif
#ifndef HAVE_prefetch
#define HAVE_prefetch 0
#define CODE_FOR_prefetch 0
#define gen_prefetch(a,b,c) (abort(), NULL_RTX)
#endif
/* Give up the prefetch optimizations once we exceed a given threshold.
It is unlikely that we would be able to optimize something in a loop
with so many detected prefetches. */
#define MAX_PREFETCHES 100
/* The number of prefetch blocks that are beneficial to fetch at once before
a loop with a known (and low) iteration count. */
#define PREFETCH_BLOCKS_BEFORE_LOOP_MAX 6
/* For very tiny loops it is not worthwhile to prefetch even before the loop,
since it is likely that the data are already in the cache. */
#define PREFETCH_BLOCKS_BEFORE_LOOP_MIN 2
/* Parameterize some prefetch heuristics so they can be turned on and off
easily for performance testing on new architectures. These can be
defined in target-dependent files. */
/* Prefetch is worthwhile only when loads/stores are dense. */
#ifndef PREFETCH_ONLY_DENSE_MEM
#define PREFETCH_ONLY_DENSE_MEM 1
#endif
/* Define what we mean by "dense" loads and stores; This value divided by 256
is the minimum percentage of memory references that worth prefetching. */
#ifndef PREFETCH_DENSE_MEM
#define PREFETCH_DENSE_MEM 220
#endif
/* Do not prefetch for a loop whose iteration count is known to be low. */
#ifndef PREFETCH_NO_LOW_LOOPCNT
#define PREFETCH_NO_LOW_LOOPCNT 1
#endif
/* Define what we mean by a "low" iteration count. */
#ifndef PREFETCH_LOW_LOOPCNT
#define PREFETCH_LOW_LOOPCNT 32
#endif
/* Do not prefetch for a loop that contains a function call; such a loop is
probably not an internal loop. */
#ifndef PREFETCH_NO_CALL
#define PREFETCH_NO_CALL 1
#endif
/* Do not prefetch accesses with an extreme stride. */
#ifndef PREFETCH_NO_EXTREME_STRIDE
#define PREFETCH_NO_EXTREME_STRIDE 1
#endif
/* Define what we mean by an "extreme" stride. */
#ifndef PREFETCH_EXTREME_STRIDE
#define PREFETCH_EXTREME_STRIDE 4096
#endif
/* Define a limit to how far apart indices can be and still be merged
into a single prefetch. */
#ifndef PREFETCH_EXTREME_DIFFERENCE
#define PREFETCH_EXTREME_DIFFERENCE 4096
#endif
/* Issue prefetch instructions before the loop to fetch data to be used
in the first few loop iterations. */
#ifndef PREFETCH_BEFORE_LOOP
#define PREFETCH_BEFORE_LOOP 1
#endif
/* Do not handle reversed order prefetches (negative stride). */
#ifndef PREFETCH_NO_REVERSE_ORDER
#define PREFETCH_NO_REVERSE_ORDER 1
#endif
/* Prefetch even if the GIV is in conditional code. */
#ifndef PREFETCH_CONDITIONAL
#define PREFETCH_CONDITIONAL 1
#endif
#define LOOP_REG_LIFETIME(LOOP, REGNO) \
((REGNO_LAST_LUID (REGNO) - REGNO_FIRST_LUID (REGNO)))
#define LOOP_REG_GLOBAL_P(LOOP, REGNO) \
((REGNO_LAST_LUID (REGNO) > INSN_LUID ((LOOP)->end) \
|| REGNO_FIRST_LUID (REGNO) < INSN_LUID ((LOOP)->start)))
#define LOOP_REGNO_NREGS(REGNO, SET_DEST) \
((REGNO) < FIRST_PSEUDO_REGISTER \
? (int) HARD_REGNO_NREGS ((REGNO), GET_MODE (SET_DEST)) : 1)
/* Vector mapping INSN_UIDs to luids.
The luids are like uids but increase monotonically always.
We use them to see whether a jump comes from outside a given loop. */
int *uid_luid;
/* Indexed by INSN_UID, contains the ordinal giving the (innermost) loop
number the insn is contained in. */
struct loop **uid_loop;
/* 1 + largest uid of any insn. */
int max_uid_for_loop;
/* Number of loops detected in current function. Used as index to the
next few tables. */
static int max_loop_num;
/* Bound on pseudo register number before loop optimization.
A pseudo has valid regscan info if its number is < max_reg_before_loop. */
unsigned int max_reg_before_loop;
/* The value to pass to the next call of reg_scan_update. */
static int loop_max_reg;
/* During the analysis of a loop, a chain of `struct movable's
is made to record all the movable insns found.
Then the entire chain can be scanned to decide which to move. */
struct movable
{
rtx insn; /* A movable insn */
rtx set_src; /* The expression this reg is set from. */
rtx set_dest; /* The destination of this SET. */
rtx dependencies; /* When INSN is libcall, this is an EXPR_LIST
of any registers used within the LIBCALL. */
int consec; /* Number of consecutive following insns
that must be moved with this one. */
unsigned int regno; /* The register it sets */
short lifetime; /* lifetime of that register;
may be adjusted when matching movables
that load the same value are found. */
short savings; /* Number of insns we can move for this reg,
including other movables that force this
or match this one. */
ENUM_BITFIELD(machine_mode) savemode : 8; /* Nonzero means it is a mode for
a low part that we should avoid changing when
clearing the rest of the reg. */
unsigned int cond : 1; /* 1 if only conditionally movable */
unsigned int force : 1; /* 1 means MUST move this insn */
unsigned int global : 1; /* 1 means reg is live outside this loop */
/* If PARTIAL is 1, GLOBAL means something different:
that the reg is live outside the range from where it is set
to the following label. */
unsigned int done : 1; /* 1 inhibits further processing of this */
unsigned int partial : 1; /* 1 means this reg is used for zero-extending.
In particular, moving it does not make it
invariant. */
unsigned int move_insn : 1; /* 1 means that we call emit_move_insn to
load SRC, rather than copying INSN. */
unsigned int move_insn_first:1;/* Same as above, if this is necessary for the
first insn of a consecutive sets group. */
unsigned int is_equiv : 1; /* 1 means a REG_EQUIV is present on INSN. */
unsigned int insert_temp : 1; /* 1 means we copy to a new pseudo and replace
the original insn with a copy from that
pseudo, rather than deleting it. */
struct movable *match; /* First entry for same value */
struct movable *forces; /* An insn that must be moved if this is */
struct movable *next;
};
FILE *loop_dump_stream;
/* Forward declarations. */
static void invalidate_loops_containing_label (rtx);
static void find_and_verify_loops (rtx, struct loops *);
static void mark_loop_jump (rtx, struct loop *);
static void prescan_loop (struct loop *);
static int reg_in_basic_block_p (rtx, rtx);
static int consec_sets_invariant_p (const struct loop *, rtx, int, rtx);
static int labels_in_range_p (rtx, int);
static void count_one_set (struct loop_regs *, rtx, rtx, rtx *);
static void note_addr_stored (rtx, rtx, void *);
static void note_set_pseudo_multiple_uses (rtx, rtx, void *);
static int loop_reg_used_before_p (const struct loop *, rtx, rtx);
static rtx find_regs_nested (rtx, rtx);
static void scan_loop (struct loop*, int);
#if 0
static void replace_call_address (rtx, rtx, rtx);
#endif
static rtx skip_consec_insns (rtx, int);
static int libcall_benefit (rtx);
static void ignore_some_movables (struct loop_movables *);
static void force_movables (struct loop_movables *);
static void combine_movables (struct loop_movables *, struct loop_regs *);
static int num_unmoved_movables (const struct loop *);
static int regs_match_p (rtx, rtx, struct loop_movables *);
static int rtx_equal_for_loop_p (rtx, rtx, struct loop_movables *,
struct loop_regs *);
static void add_label_notes (rtx, rtx);
static void move_movables (struct loop *loop, struct loop_movables *, int,
int);
static void loop_movables_add (struct loop_movables *, struct movable *);
static void loop_movables_free (struct loop_movables *);
static int count_nonfixed_reads (const struct loop *, rtx);
static void loop_bivs_find (struct loop *);
static void loop_bivs_init_find (struct loop *);
static void loop_bivs_check (struct loop *);
static void loop_givs_find (struct loop *);
static void loop_givs_check (struct loop *);
static int loop_biv_eliminable_p (struct loop *, struct iv_class *, int, int);
static int loop_giv_reduce_benefit (struct loop *, struct iv_class *,
struct induction *, rtx);
static void loop_givs_dead_check (struct loop *, struct iv_class *);
static void loop_givs_reduce (struct loop *, struct iv_class *);
static void loop_givs_rescan (struct loop *, struct iv_class *, rtx *);
static void loop_ivs_free (struct loop *);
static void strength_reduce (struct loop *, int);
static void find_single_use_in_loop (struct loop_regs *, rtx, rtx);
static int valid_initial_value_p (rtx, rtx, int, rtx);
static void find_mem_givs (const struct loop *, rtx, rtx, int, int);
static void record_biv (struct loop *, struct induction *, rtx, rtx, rtx,
rtx, rtx *, int, int);
static void check_final_value (const struct loop *, struct induction *);
static void loop_ivs_dump (const struct loop *, FILE *, int);
static void loop_iv_class_dump (const struct iv_class *, FILE *, int);
static void loop_biv_dump (const struct induction *, FILE *, int);
static void loop_giv_dump (const struct induction *, FILE *, int);
static void record_giv (const struct loop *, struct induction *, rtx, rtx,
rtx, rtx, rtx, rtx, int, enum g_types, int, int,
rtx *);
static void update_giv_derive (const struct loop *, rtx);
static void check_ext_dependent_givs (const struct loop *, struct iv_class *);
static int basic_induction_var (const struct loop *, rtx, enum machine_mode,
rtx, rtx, rtx *, rtx *, rtx **);
static rtx simplify_giv_expr (const struct loop *, rtx, rtx *, int *);
static int general_induction_var (const struct loop *loop, rtx, rtx *, rtx *,
rtx *, rtx *, int, int *, enum machine_mode);
static int consec_sets_giv (const struct loop *, int, rtx, rtx, rtx, rtx *,
rtx *, rtx *, rtx *);
static int check_dbra_loop (struct loop *, int);
static rtx express_from_1 (rtx, rtx, rtx);
static rtx combine_givs_p (struct induction *, struct induction *);
static int cmp_combine_givs_stats (const void *, const void *);
static void combine_givs (struct loop_regs *, struct iv_class *);
static int product_cheap_p (rtx, rtx);
static int maybe_eliminate_biv (const struct loop *, struct iv_class *, int,
int, int);
static int maybe_eliminate_biv_1 (const struct loop *, rtx, rtx,
struct iv_class *, int, basic_block, rtx);
static int last_use_this_basic_block (rtx, rtx);
static void record_initial (rtx, rtx, void *);
static void update_reg_last_use (rtx, rtx);
static rtx next_insn_in_loop (const struct loop *, rtx);
static void loop_regs_scan (const struct loop *, int);
static int count_insns_in_loop (const struct loop *);
static int find_mem_in_note_1 (rtx *, void *);
static rtx find_mem_in_note (rtx);
static void load_mems (const struct loop *);
static int insert_loop_mem (rtx *, void *);
static int replace_loop_mem (rtx *, void *);
static void replace_loop_mems (rtx, rtx, rtx, int);
static int replace_loop_reg (rtx *, void *);
static void replace_loop_regs (rtx insn, rtx, rtx);
static void note_reg_stored (rtx, rtx, void *);
static void try_copy_prop (const struct loop *, rtx, unsigned int);
static void try_swap_copy_prop (const struct loop *, rtx, unsigned int);
static rtx check_insn_for_givs (struct loop *, rtx, int, int);
static rtx check_insn_for_bivs (struct loop *, rtx, int, int);
static rtx gen_add_mult (rtx, rtx, rtx, rtx);
static void loop_regs_update (const struct loop *, rtx);
static int iv_add_mult_cost (rtx, rtx, rtx, rtx);
static rtx loop_insn_emit_after (const struct loop *, basic_block, rtx, rtx);
static rtx loop_call_insn_emit_before (const struct loop *, basic_block,
rtx, rtx);
static rtx loop_call_insn_hoist (const struct loop *, rtx);
static rtx loop_insn_sink_or_swim (const struct loop *, rtx);
static void loop_dump_aux (const struct loop *, FILE *, int);
static void loop_delete_insns (rtx, rtx);
static HOST_WIDE_INT remove_constant_addition (rtx *);
static rtx gen_load_of_final_value (rtx, rtx);
void debug_ivs (const struct loop *);
void debug_iv_class (const struct iv_class *);
void debug_biv (const struct induction *);
void debug_giv (const struct induction *);
void debug_loop (const struct loop *);
void debug_loops (const struct loops *);
typedef struct loop_replace_args
{
rtx match;
rtx replacement;
rtx insn;
} loop_replace_args;
/* Nonzero iff INSN is between START and END, inclusive. */
#define INSN_IN_RANGE_P(INSN, START, END) \
(INSN_UID (INSN) < max_uid_for_loop \
&& INSN_LUID (INSN) >= INSN_LUID (START) \
&& INSN_LUID (INSN) <= INSN_LUID (END))
/* Indirect_jump_in_function is computed once per function. */
static int indirect_jump_in_function;
static int indirect_jump_in_function_p (rtx);
static int compute_luids (rtx, rtx, int);
static int biv_elimination_giv_has_0_offset (struct induction *,
struct induction *, rtx);
/* Benefit penalty, if a giv is not replaceable, i.e. must emit an insn to
copy the value of the strength reduced giv to its original register. */
static int copy_cost;
/* Cost of using a register, to normalize the benefits of a giv. */
static int reg_address_cost;
void
init_loop (void)
{
rtx reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
reg_address_cost = address_cost (reg, SImode);
copy_cost = COSTS_N_INSNS (1);
}
/* Compute the mapping from uids to luids.
LUIDs are numbers assigned to insns, like uids,
except that luids increase monotonically through the code.
Start at insn START and stop just before END. Assign LUIDs
starting with PREV_LUID + 1. Return the last assigned LUID + 1. */
static int
compute_luids (rtx start, rtx end, int prev_luid)
{
int i;
rtx insn;
for (insn = start, i = prev_luid; insn != end; insn = NEXT_INSN (insn))
{
if (INSN_UID (insn) >= max_uid_for_loop)
continue;
/* Don't assign luids to line-number NOTEs, so that the distance in
luids between two insns is not affected by -g. */
if (GET_CODE (insn) != NOTE
|| NOTE_LINE_NUMBER (insn) <= 0)
uid_luid[INSN_UID (insn)] = ++i;
else
/* Give a line number note the same luid as preceding insn. */
uid_luid[INSN_UID (insn)] = i;
}
return i + 1;
}
/* Entry point of this file. Perform loop optimization
on the current function. F is the first insn of the function
and DUMPFILE is a stream for output of a trace of actions taken
(or 0 if none should be output). */
void
loop_optimize (rtx f, FILE *dumpfile, int flags)
{
rtx insn;
int i;
struct loops loops_data;
struct loops *loops = &loops_data;
struct loop_info *loops_info;
loop_dump_stream = dumpfile;
init_recog_no_volatile ();
max_reg_before_loop = max_reg_num ();
loop_max_reg = max_reg_before_loop;
regs_may_share = 0;
/* Count the number of loops. */
max_loop_num = 0;
for (insn = f; insn; insn = NEXT_INSN (insn))
{
if (GET_CODE (insn) == NOTE
&& NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
max_loop_num++;
}
/* Don't waste time if no loops. */
if (max_loop_num == 0)
return;
loops->num = max_loop_num;
/* Get size to use for tables indexed by uids.
Leave some space for labels allocated by find_and_verify_loops. */
max_uid_for_loop = get_max_uid () + 1 + max_loop_num * 32;
uid_luid = xcalloc (max_uid_for_loop, sizeof (int));
uid_loop = xcalloc (max_uid_for_loop, sizeof (struct loop *));
/* Allocate storage for array of loops. */
loops->array = xcalloc (loops->num, sizeof (struct loop));
/* Find and process each loop.
First, find them, and record them in order of their beginnings. */
find_and_verify_loops (f, loops);
/* Allocate and initialize auxiliary loop information. */
loops_info = xcalloc (loops->num, sizeof (struct loop_info));
for (i = 0; i < (int) loops->num; i++)
loops->array[i].aux = loops_info + i;
/* Now find all register lifetimes. This must be done after
find_and_verify_loops, because it might reorder the insns in the
function. */
reg_scan (f, max_reg_before_loop, 1);
/* This must occur after reg_scan so that registers created by gcse
will have entries in the register tables.
We could have added a call to reg_scan after gcse_main in toplev.c,
but moving this call to init_alias_analysis is more efficient. */
init_alias_analysis ();
/* See if we went too far. Note that get_max_uid already returns
one more that the maximum uid of all insn. */
if (get_max_uid () > max_uid_for_loop)
abort ();
/* Now reset it to the actual size we need. See above. */
max_uid_for_loop = get_max_uid ();
/* find_and_verify_loops has already called compute_luids, but it
might have rearranged code afterwards, so we need to recompute
the luids now. */
compute_luids (f, NULL_RTX, 0);
/* Don't leave gaps in uid_luid for insns that have been
deleted. It is possible that the first or last insn
using some register has been deleted by cross-jumping.
Make sure that uid_luid for that former insn's uid
points to the general area where that insn used to be. */
for (i = 0; i < max_uid_for_loop; i++)
{
uid_luid[0] = uid_luid[i];
if (uid_luid[0] != 0)
break;
}
for (i = 0; i < max_uid_for_loop; i++)
if (uid_luid[i] == 0)
uid_luid[i] = uid_luid[i - 1];
/* Determine if the function has indirect jump. On some systems
this prevents low overhead loop instructions from being used. */
indirect_jump_in_function = indirect_jump_in_function_p (f);
/* Now scan the loops, last ones first, since this means inner ones are done
before outer ones. */
for (i = max_loop_num - 1; i >= 0; i--)
{
struct loop *loop = &loops->array[i];
if (! loop->invalid && loop->end)
{
scan_loop (loop, flags);
ggc_collect ();
}
}
end_alias_analysis ();
/* Clean up. */
for (i = 0; i < (int) loops->num; i++)
free (loops_info[i].mems);
free (uid_luid);
free (uid_loop);
free (loops_info);
free (loops->array);
}
/* Returns the next insn, in execution order, after INSN. START and
END are the NOTE_INSN_LOOP_BEG and NOTE_INSN_LOOP_END for the loop,
respectively. LOOP->TOP, if non-NULL, is the top of the loop in the
insn-stream; it is used with loops that are entered near the
bottom. */
static rtx
next_insn_in_loop (const struct loop *loop, rtx insn)
{
insn = NEXT_INSN (insn);
if (insn == loop->end)
{
if (loop->top)
/* Go to the top of the loop, and continue there. */
insn = loop->top;
else
/* We're done. */
insn = NULL_RTX;
}
if (insn == loop->scan_start)
/* We're done. */
insn = NULL_RTX;
return insn;
}
/* Find any register references hidden inside X and add them to
the dependency list DEPS. This is used to look inside CLOBBER (MEM
when checking whether a PARALLEL can be pulled out of a loop. */
static rtx
find_regs_nested (rtx deps, rtx x)
{
enum rtx_code code = GET_CODE (x);
if (code == REG)
deps = gen_rtx_EXPR_LIST (VOIDmode, x, deps);
else
{
const char *fmt = GET_RTX_FORMAT (code);
int i, j;
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
{
if (fmt[i] == 'e')
deps = find_regs_nested (deps, XEXP (x, i));
else if (fmt[i] == 'E')
for (j = 0; j < XVECLEN (x, i); j++)
deps = find_regs_nested (deps, XVECEXP (x, i, j));
}
}
return deps;
}
/* Optimize one loop described by LOOP. */
/* ??? Could also move memory writes out of loops if the destination address
is invariant, the source is invariant, the memory write is not volatile,
and if we can prove that no read inside the loop can read this address
before the write occurs. If there is a read of this address after the
write, then we can also mark the memory read as invariant. */
static void
scan_loop (struct loop *loop, int flags)
{
struct loop_info *loop_info = LOOP_INFO (loop);
struct loop_regs *regs = LOOP_REGS (loop);
int i;
rtx loop_start = loop->start;
rtx loop_end = loop->end;
rtx p;
/* 1 if we are scanning insns that could be executed zero times. */
int maybe_never = 0;
/* 1 if we are scanning insns that might never be executed
due to a subroutine call which might exit before they are reached. */
int call_passed = 0;
/* Number of insns in the loop. */
int insn_count;
int tem;
rtx temp, update_start, update_end;
/* The SET from an insn, if it is the only SET in the insn. */
rtx set, set1;
/* Chain describing insns movable in current loop. */
struct loop_movables *movables = LOOP_MOVABLES (loop);
/* Ratio of extra register life span we can justify
for saving an instruction. More if loop doesn't call subroutines
since in that case saving an insn makes more difference
and more registers are available. */
int threshold;
/* Nonzero if we are scanning instructions in a sub-loop. */
int loop_depth = 0;
int in_libcall;
loop->top = 0;
movables->head = 0;
movables->last = 0;
/* Determine whether this loop starts with a jump down to a test at
the end. This will occur for a small number of loops with a test
that is too complex to duplicate in front of the loop.
We search for the first insn or label in the loop, skipping NOTEs.
However, we must be careful not to skip past a NOTE_INSN_LOOP_BEG
(because we might have a loop executed only once that contains a
loop which starts with a jump to its exit test) or a NOTE_INSN_LOOP_END
(in case we have a degenerate loop).
Note that if we mistakenly think that a loop is entered at the top
when, in fact, it is entered at the exit test, the only effect will be
slightly poorer optimization. Making the opposite error can generate
incorrect code. Since very few loops now start with a jump to the
exit test, the code here to detect that case is very conservative. */
for (p = NEXT_INSN (loop_start);
p != loop_end
&& GET_CODE (p) != CODE_LABEL && ! INSN_P (p)
&& (GET_CODE (p) != NOTE
|| (NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_BEG
&& NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_END));
p = NEXT_INSN (p))
;
loop->scan_start = p;
/* If loop end is the end of the current function, then emit a
NOTE_INSN_DELETED after loop_end and set loop->sink to the dummy
note insn. This is the position we use when sinking insns out of
the loop. */
if (NEXT_INSN (loop->end) != 0)
loop->sink = NEXT_INSN (loop->end);
else
loop->sink = emit_note_after (NOTE_INSN_DELETED, loop->end);
/* Set up variables describing this loop. */
prescan_loop (loop);
threshold = (loop_info->has_call ? 1 : 2) * (1 + n_non_fixed_regs);
/* If loop has a jump before the first label,
the true entry is the target of that jump.
Start scan from there.
But record in LOOP->TOP the place where the end-test jumps
back to so we can scan that after the end of the loop. */
if (GET_CODE (p) == JUMP_INSN
/* Loop entry must be unconditional jump (and not a RETURN) */
&& any_uncondjump_p (p)
&& JUMP_LABEL (p) != 0
/* Check to see whether the jump actually
jumps out of the loop (meaning it's no loop).
This case can happen for things like
do {..} while (0). If this label was generated previously
by loop, we can't tell anything about it and have to reject
the loop. */
&& INSN_IN_RANGE_P (JUMP_LABEL (p), loop_start, loop_end))
{
loop->top = next_label (loop->scan_start);
loop->scan_start = JUMP_LABEL (p);
}
/* If LOOP->SCAN_START was an insn created by loop, we don't know its luid
as required by loop_reg_used_before_p. So skip such loops. (This
test may never be true, but it's best to play it safe.)
Also, skip loops where we do not start scanning at a label. This
test also rejects loops starting with a JUMP_INSN that failed the
test above. */
if (INSN_UID (loop->scan_start) >= max_uid_for_loop
|| GET_CODE (loop->scan_start) != CODE_LABEL)
{
if (loop_dump_stream)
fprintf (loop_dump_stream, "\nLoop from %d to %d is phony.\n\n",
INSN_UID (loop_start), INSN_UID (loop_end));
return;
}
/* Allocate extra space for REGs that might be created by load_mems.
We allocate a little extra slop as well, in the hopes that we
won't have to reallocate the regs array. */
loop_regs_scan (loop, loop_info->mems_idx + 16);
insn_count = count_insns_in_loop (loop);
if (loop_dump_stream)
{
fprintf (loop_dump_stream, "\nLoop from %d to %d: %d real insns.\n",
INSN_UID (loop_start), INSN_UID (loop_end), insn_count);
if (loop->cont)
fprintf (loop_dump_stream, "Continue at insn %d.\n",
INSN_UID (loop->cont));
}
/* Scan through the loop finding insns that are safe to move.
Set REGS->ARRAY[I].SET_IN_LOOP negative for the reg I being set, so that
this reg will be considered invariant for subsequent insns.
We consider whether subsequent insns use the reg
in deciding whether it is worth actually moving.
MAYBE_NEVER is nonzero if we have passed a conditional jump insn
and therefore it is possible that the insns we are scanning
would never be executed. At such times, we must make sure
that it is safe to execute the insn once instead of zero times.
When MAYBE_NEVER is 0, all insns will be executed at least once
so that is not a problem. */
for (in_libcall = 0, p = next_insn_in_loop (loop, loop->scan_start);
p != NULL_RTX;
p = next_insn_in_loop (loop, p))
{
if (in_libcall && INSN_P (p) && find_reg_note (p, REG_RETVAL, NULL_RTX))
in_libcall--;
if (GET_CODE (p) == INSN)
{
temp = find_reg_note (p, REG_LIBCALL, NULL_RTX);
if (temp)
in_libcall++;
if (! in_libcall
&& (set = single_set (p))
&& GET_CODE (SET_DEST (set)) == REG
#ifdef PIC_OFFSET_TABLE_REG_CALL_CLOBBERED
&& SET_DEST (set) != pic_offset_table_rtx
#endif
&& ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
{
int tem1 = 0;
int tem2 = 0;
int move_insn = 0;
int insert_temp = 0;
rtx src = SET_SRC (set);
rtx dependencies = 0;
/* Figure out what to use as a source of this insn. If a
REG_EQUIV note is given or if a REG_EQUAL note with a
constant operand is specified, use it as the source and
mark that we should move this insn by calling
emit_move_insn rather that duplicating the insn.
Otherwise, only use the REG_EQUAL contents if a REG_RETVAL
note is present. */
temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
if (temp)
src = XEXP (temp, 0), move_insn = 1;
else
{
temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
if (temp && CONSTANT_P (XEXP (temp, 0)))
src = XEXP (temp, 0), move_insn = 1;
if (temp && find_reg_note (p, REG_RETVAL, NULL_RTX))
{
src = XEXP (temp, 0);
/* A libcall block can use regs that don't appear in
the equivalent expression. To move the libcall,
we must move those regs too. */
dependencies = libcall_other_reg (p, src);
}
}
/* For parallels, add any possible uses to the dependencies, as
we can't move the insn without resolving them first.
MEMs inside CLOBBERs may also reference registers; these
count as implicit uses. */
if (GET_CODE (PATTERN (p)) == PARALLEL)
{
for (i = 0; i < XVECLEN (PATTERN (p), 0); i++)
{
rtx x = XVECEXP (PATTERN (p), 0, i);
if (GET_CODE (x) == USE)
dependencies
= gen_rtx_EXPR_LIST (VOIDmode, XEXP (x, 0),
dependencies);
else if (GET_CODE (x) == CLOBBER
&& GET_CODE (XEXP (x, 0)) == MEM)
dependencies = find_regs_nested (dependencies,
XEXP (XEXP (x, 0), 0));
}
}
if (/* The register is used in basic blocks other
than the one where it is set (meaning that
something after this point in the loop might
depend on its value before the set). */
! reg_in_basic_block_p (p, SET_DEST (set))
/* And the set is not guaranteed to be executed once
the loop starts, or the value before the set is
needed before the set occurs...
??? Note we have quadratic behavior here, mitigated
by the fact that the previous test will often fail for
large loops. Rather than re-scanning the entire loop
each time for register usage, we should build tables
of the register usage and use them here instead. */
&& (maybe_never
|| loop_reg_used_before_p (loop, set, p)))
/* It is unsafe to move the set. However, it may be OK to
move the source into a new pseudo, and substitute a
reg-to-reg copy for the original insn.
This code used to consider it OK to move a set of a variable
which was not created by the user and not used in an exit
test.
That behavior is incorrect and was removed. */
insert_temp = 1;
/* Don't try to optimize a MODE_CC set with a constant
source. It probably will be combined with a conditional
jump. */
if (GET_MODE_CLASS (GET_MODE (SET_DEST (set))) == MODE_CC
&& CONSTANT_P (src))
;
/* Don't try to optimize a register that was made
by loop-optimization for an inner loop.
We don't know its life-span, so we can't compute
the benefit. */
else if (REGNO (SET_DEST (set)) >= max_reg_before_loop)
;
/* Don't move the source and add a reg-to-reg copy:
- with -Os (this certainly increases size),
- if the mode doesn't support copy operations (obviously),
- if the source is already a reg (the motion will gain nothing),
- if the source is a legitimate constant (likewise). */
else if (insert_temp
&& (optimize_size
|| ! can_copy_p (GET_MODE (SET_SRC (set)))
|| GET_CODE (SET_SRC (set)) == REG
|| (CONSTANT_P (SET_SRC (set))
&& LEGITIMATE_CONSTANT_P (SET_SRC (set)))))
;
else if ((tem = loop_invariant_p (loop, src))
&& (dependencies == 0
|| (tem2
= loop_invariant_p (loop, dependencies)) != 0)
&& (regs->array[REGNO (SET_DEST (set))].set_in_loop == 1
|| (tem1
= consec_sets_invariant_p
(loop, SET_DEST (set),
regs->array[REGNO (SET_DEST (set))].set_in_loop,
p)))
/* If the insn can cause a trap (such as divide by zero),
can't move it unless it's guaranteed to be executed
once loop is entered. Even a function call might
prevent the trap insn from being reached
(since it might exit!) */
&& ! ((maybe_never || call_passed)
&& may_trap_p (src)))
{
struct movable *m;
int regno = REGNO (SET_DEST (set));
/* A potential lossage is where we have a case where two insns
can be combined as long as they are both in the loop, but
we move one of them outside the loop. For large loops,
this can lose. The most common case of this is the address
of a function being called.
Therefore, if this register is marked as being used
exactly once if we are in a loop with calls
(a "large loop"), see if we can replace the usage of
this register with the source of this SET. If we can,
delete this insn.
Don't do this if P has a REG_RETVAL note or if we have
SMALL_REGISTER_CLASSES and SET_SRC is a hard register. */
if (loop_info->has_call
&& regs->array[regno].single_usage != 0
&& regs->array[regno].single_usage != const0_rtx
&& REGNO_FIRST_UID (regno) == INSN_UID (p)
&& (REGNO_LAST_UID (regno)
== INSN_UID (regs->array[regno].single_usage))
&& regs->array[regno].set_in_loop == 1
&& GET_CODE (SET_SRC (set)) != ASM_OPERANDS
&& ! side_effects_p (SET_SRC (set))
&& ! find_reg_note (p, REG_RETVAL, NULL_RTX)
&& (! SMALL_REGISTER_CLASSES
|| (! (GET_CODE (SET_SRC (set)) == REG
&& (REGNO (SET_SRC (set))
< FIRST_PSEUDO_REGISTER))))
&& regno >= FIRST_PSEUDO_REGISTER
/* This test is not redundant; SET_SRC (set) might be
a call-clobbered register and the life of REGNO
might span a call. */
&& ! modified_between_p (SET_SRC (set), p,
regs->array[regno].single_usage)
&& no_labels_between_p (p,
regs->array[regno].single_usage)
&& validate_replace_rtx (SET_DEST (set), SET_SRC (set),
regs->array[regno].single_usage))
{
/* Replace any usage in a REG_EQUAL note. Must copy
the new source, so that we don't get rtx sharing
between the SET_SOURCE and REG_NOTES of insn p. */
REG_NOTES (regs->array[regno].single_usage)
= (replace_rtx
(REG_NOTES (regs->array[regno].single_usage),
SET_DEST (set), copy_rtx (SET_SRC (set))));
delete_insn (p);
for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
i++)
regs->array[regno+i].set_in_loop = 0;
continue;
}
m = xmalloc (sizeof (struct movable));
m->next = 0;
m->insn = p;
m->set_src = src;
m->dependencies = dependencies;
m->set_dest = SET_DEST (set);
m->force = 0;
m->consec
= regs->array[REGNO (SET_DEST (set))].set_in_loop - 1;
m->done = 0;
m->forces = 0;
m->partial = 0;
m->move_insn = move_insn;
m->move_insn_first = 0;
m->insert_temp = insert_temp;
m->is_equiv = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
m->savemode = VOIDmode;
m->regno = regno;
/* Set M->cond if either loop_invariant_p
or consec_sets_invariant_p returned 2
(only conditionally invariant). */
m->cond = ((tem | tem1 | tem2) > 1);
m->global = LOOP_REG_GLOBAL_P (loop, regno);
m->match = 0;
m->lifetime = LOOP_REG_LIFETIME (loop, regno);
m->savings = regs->array[regno].n_times_set;
if (find_reg_note (p, REG_RETVAL, NULL_RTX))
m->savings += libcall_benefit (p);
for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set)); i++)
regs->array[regno+i].set_in_loop = move_insn ? -2 : -1;
/* Add M to the end of the chain MOVABLES. */
loop_movables_add (movables, m);
if (m->consec > 0)
{
/* It is possible for the first instruction to have a
REG_EQUAL note but a non-invariant SET_SRC, so we must
remember the status of the first instruction in case
the last instruction doesn't have a REG_EQUAL note. */
m->move_insn_first = m->move_insn;
/* Skip this insn, not checking REG_LIBCALL notes. */
p = next_nonnote_insn (p);
/* Skip the consecutive insns, if there are any. */
p = skip_consec_insns (p, m->consec);
/* Back up to the last insn of the consecutive group. */
p = prev_nonnote_insn (p);
/* We must now reset m->move_insn, m->is_equiv, and
possibly m->set_src to correspond to the effects of
all the insns. */
temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
if (temp)
m->set_src = XEXP (temp, 0), m->move_insn = 1;
else
{
temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
if (temp && CONSTANT_P (XEXP (temp, 0)))
m->set_src = XEXP (temp, 0), m->move_insn = 1;
else
m->move_insn = 0;
}
m->is_equiv
= (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
}
}
/* If this register is always set within a STRICT_LOW_PART
or set to zero, then its high bytes are constant.
So clear them outside the loop and within the loop
just load the low bytes.
We must check that the machine has an instruction to do so.
Also, if the value loaded into the register
depends on the same register, this cannot be done. */
else if (SET_SRC (set) == const0_rtx
&& GET_CODE (NEXT_INSN (p)) == INSN
&& (set1 = single_set (NEXT_INSN (p)))
&& GET_CODE (set1) == SET
&& (GET_CODE (SET_DEST (set1)) == STRICT_LOW_PART)
&& (GET_CODE (XEXP (SET_DEST (set1), 0)) == SUBREG)
&& (SUBREG_REG (XEXP (SET_DEST (set1), 0))
== SET_DEST (set))
&& !reg_mentioned_p (SET_DEST (set), SET_SRC (set1)))
{
int regno = REGNO (SET_DEST (set));
if (regs->array[regno].set_in_loop == 2)
{
struct movable *m;
m = xmalloc (sizeof (struct movable));
m->next = 0;
m->insn = p;
m->set_dest = SET_DEST (set);
m->dependencies = 0;
m->force = 0;
m->consec = 0;
m->done = 0;
m->forces = 0;
m->move_insn = 0;
m->move_insn_first = 0;
m->insert_temp = insert_temp;
m->partial = 1;
/* If the insn may not be executed on some cycles,
we can't clear the whole reg; clear just high part.
Not even if the reg is used only within this loop.
Consider this:
while (1)
while (s != t) {
if (foo ()) x = *s;
use (x);
}
Clearing x before the inner loop could clobber a value
being saved from the last time around the outer loop.
However, if the reg is not used outside this loop
and all uses of the register are in the same
basic block as the store, there is no problem.
If this insn was made by loop, we don't know its
INSN_LUID and hence must make a conservative
assumption. */
m->global = (INSN_UID (p) >= max_uid_for_loop
|| LOOP_REG_GLOBAL_P (loop, regno)
|| (labels_in_range_p
(p, REGNO_FIRST_LUID (regno))));
if (maybe_never && m->global)
m->savemode = GET_MODE (SET_SRC (set1));
else
m->savemode = VOIDmode;
m->regno = regno;
m->cond = 0;
m->match = 0;
m->lifetime = LOOP_REG_LIFETIME (loop, regno);
m->savings = 1;
for (i = 0;
i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
i++)
regs->array[regno+i].set_in_loop = -1;
/* Add M to the end of the chain MOVABLES. */
loop_movables_add (movables, m);
}
}
}
}
/* Past a call insn, we get to insns which might not be executed
because the call might exit. This matters for insns that trap.
Constant and pure call insns always return, so they don't count. */
else if (GET_CODE (p) == CALL_INSN && ! CONST_OR_PURE_CALL_P (p))
call_passed = 1;
/* Past a label or a jump, we get to insns for which we
can't count on whether or how many times they will be
executed during each iteration. Therefore, we can
only move out sets of trivial variables
(those not used after the loop). */
/* Similar code appears twice in strength_reduce. */
else if ((GET_CODE (p) == CODE_LABEL || GET_CODE (p) == JUMP_INSN)
/* If we enter the loop in the middle, and scan around to the
beginning, don't set maybe_never for that. This must be an
unconditional jump, otherwise the code at the top of the
loop might never be executed. Unconditional jumps are
followed by a barrier then the loop_end. */
&& ! (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p) == loop->top
&& NEXT_INSN (NEXT_INSN (p)) == loop_end
&& any_uncondjump_p (p)))
maybe_never = 1;
else if (GET_CODE (p) == NOTE)
{
/* At the virtual top of a converted loop, insns are again known to
be executed: logically, the loop begins here even though the exit
code has been duplicated. */
if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_VTOP && loop_depth == 0)
maybe_never = call_passed = 0;
else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
loop_depth++;
else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_END)
loop_depth--;
}
}
/* If one movable subsumes another, ignore that other. */
ignore_some_movables (movables);
/* For each movable insn, see if the reg that it loads
leads when it dies right into another conditionally movable insn.
If so, record that the second insn "forces" the first one,
since the second can be moved only if the first is. */
force_movables (movables);
/* See if there are multiple movable insns that load the same value.
If there are, make all but the first point at the first one
through the `match' field, and add the priorities of them
all together as the priority of the first. */
combine_movables (movables, regs);
/* Now consider each movable insn to decide whether it is worth moving.
Store 0 in regs->array[I].set_in_loop for each reg I that is moved.
For machines with few registers this increases code size, so do not
move moveables when optimizing for code size on such machines.
(The 18 below is the value for i386.) */
if (!optimize_size
|| (reg_class_size[GENERAL_REGS] > 18 && !loop_info->has_call))
{
move_movables (loop, movables, threshold, insn_count);
/* Recalculate regs->array if move_movables has created new
registers. */
if (max_reg_num () > regs->num)
{
loop_regs_scan (loop, 0);
for (update_start = loop_start;
PREV_INSN (update_start)
&& GET_CODE (PREV_INSN (update_start)) != CODE_LABEL;
update_start = PREV_INSN (update_start))
;
update_end = NEXT_INSN (loop_end);
reg_scan_update (update_start, update_end, loop_max_reg);
loop_max_reg = max_reg_num ();
}
}
/* Now candidates that still are negative are those not moved.
Change regs->array[I].set_in_loop to indicate that those are not actually
invariant. */
for (i = 0; i < regs->num; i++)
if (regs->array[i].set_in_loop < 0)
regs->array[i].set_in_loop = regs->array[i].n_times_set;
/* Now that we've moved some things out of the loop, we might be able to
hoist even more memory references. */
load_mems (loop);
/* Recalculate regs->array if load_mems has created new registers. */
if (max_reg_num () > regs->num)
loop_regs_scan (loop, 0);
for (update_start = loop_start;
PREV_INSN (update_start)
&& GET_CODE (PREV_INSN (update_start)) != CODE_LABEL;
update_start = PREV_INSN (update_start))
;
update_end = NEXT_INSN (loop_end);
reg_scan_update (update_start, update_end, loop_max_reg);
loop_max_reg = max_reg_num ();
if (flag_strength_reduce)
{
if (update_end && GET_CODE (update_end) == CODE_LABEL)
/* Ensure our label doesn't go away. */
LABEL_NUSES (update_end)++;
strength_reduce (loop, flags);
reg_scan_update (update_start, update_end, loop_max_reg);
loop_max_reg = max_reg_num ();
if (update_end && GET_CODE (update_end) == CODE_LABEL
&& --LABEL_NUSES (update_end) == 0)
delete_related_insns (update_end);
}
/* The movable information is required for strength reduction. */
loop_movables_free (movables);
free (regs->array);
regs->array = 0;
regs->num = 0;
}
/* Add elements to *OUTPUT to record all the pseudo-regs
mentioned in IN_THIS but not mentioned in NOT_IN_THIS. */
void
record_excess_regs (rtx in_this, rtx not_in_this, rtx *output)
{
enum rtx_code code;
const char *fmt;
int i;
code = GET_CODE (in_this);
switch (code)
{
case PC:
case CC0:
case CONST_INT:
case CONST_DOUBLE:
case CONST:
case SYMBOL_REF:
case LABEL_REF:
return;
case REG:
if (REGNO (in_this) >= FIRST_PSEUDO_REGISTER
&& ! reg_mentioned_p (in_this, not_in_this))
*output = gen_rtx_EXPR_LIST (VOIDmode, in_this, *output);
return;
default:
break;
}
fmt = GET_RTX_FORMAT (code);
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
{
int j;
switch (fmt[i])
{
case 'E':
for (j = 0; j < XVECLEN (in_this, i); j++)
record_excess_regs (XVECEXP (in_this, i, j), not_in_this, output);
break;
case 'e':
record_excess_regs (XEXP (in_this, i), not_in_this, output);
break;
}
}
}
/* Check what regs are referred to in the libcall block ending with INSN,
aside from those mentioned in the equivalent value.
If there are none, return 0.
If there are one or more, return an EXPR_LIST containing all of them. */
rtx
libcall_other_reg (rtx insn, rtx equiv)
{
rtx note = find_reg_note (insn, REG_RETVAL, NULL_RTX);
rtx p = XEXP (note, 0);
rtx output = 0;
/* First, find all the regs used in the libcall block
that are not mentioned as inputs to the result. */
while (p != insn)
{
if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
|| GET_CODE (p) == CALL_INSN)
record_excess_regs (PATTERN (p), equiv, &output);
p = NEXT_INSN (p);
}
return output;
}
/* Return 1 if all uses of REG
are between INSN and the end of the basic block. */
static int
reg_in_basic_block_p (rtx insn, rtx reg)
{
int regno = REGNO (reg);
rtx p;
if (REGNO_FIRST_UID (regno) != INSN_UID (insn))
return 0;
/* Search this basic block for the already recorded last use of the reg. */
for (p = insn; p; p = NEXT_INSN (p))
{
switch (GET_CODE (p))
{
case NOTE:
break;
case INSN:
case CALL_INSN:
/* Ordinary insn: if this is the last use, we win. */
if (REGNO_LAST_UID (regno) == INSN_UID (p))
return 1;
break;
case JUMP_INSN:
/* Jump insn: if this is the last use, we win. */
if (REGNO_LAST_UID (regno) == INSN_UID (p))
return 1;
/* Otherwise, it's the end of the basic block, so we lose. */
return 0;
case CODE_LABEL:
case BARRIER:
/* It's the end of the basic block, so we lose. */
return 0;
default:
break;
}
}
/* The "last use" that was recorded can't be found after the first
use. This can happen when the last use was deleted while
processing an inner loop, this inner loop was then completely
unrolled, and the outer loop is always exited after the inner loop,
so that everything after the first use becomes a single basic block. */
return 1;
}
/* Compute the benefit of eliminating the insns in the block whose
last insn is LAST. This may be a group of insns used to compute a
value directly or can contain a library call. */
static int
libcall_benefit (rtx last)
{
rtx insn;
int benefit = 0;
for (insn = XEXP (find_reg_note (last, REG_RETVAL, NULL_RTX), 0);
insn != last; insn = NEXT_INSN (insn))
{
if (GET_CODE (insn) == CALL_INSN)
benefit += 10; /* Assume at least this many insns in a library
routine. */
else if (GET_CODE (insn) == INSN
&& GET_CODE (PATTERN (insn)) != USE
&& GET_CODE (PATTERN (insn)) != CLOBBER)
benefit++;
}
return benefit;
}
/* Skip COUNT insns from INSN, counting library calls as 1 insn. */
static rtx
skip_consec_insns (rtx insn, int count)
{
for (; count > 0; count--)
{
rtx temp;
/* If first insn of libcall sequence, skip to end. */
/* Do this at start of loop, since INSN is guaranteed to
be an insn here. */
if (GET_CODE (insn) != NOTE
&& (temp = find_reg_note (insn, REG_LIBCALL, NULL_RTX)))
insn = XEXP (temp, 0);
do
insn = NEXT_INSN (insn);
while (GET_CODE (insn) == NOTE);
}
return insn;
}
/* Ignore any movable whose insn falls within a libcall
which is part of another movable.
We make use of the fact that the movable for the libcall value
was made later and so appears later on the chain. */
static void
ignore_some_movables (struct loop_movables *movables)
{
struct movable *m, *m1;
for (m = movables->head; m; m = m->next)
{
/* Is this a movable for the value of a libcall? */
rtx note = find_reg_note (m->insn, REG_RETVAL, NULL_RTX);
if (note)
{
rtx insn;
/* Check for earlier movables inside that range,
and mark them invalid. We cannot use LUIDs here because
insns created by loop.c for prior loops don't have LUIDs.
Rather than reject all such insns from movables, we just
explicitly check each insn in the libcall (since invariant
libcalls aren't that common). */
for (insn = XEXP (note, 0); insn != m->insn; insn = NEXT_INSN (insn))
for (m1 = movables->head; m1 != m; m1 = m1->next)
if (m1->insn == insn)
m1->done = 1;
}
}
}
/* For each movable insn, see if the reg that it loads
leads when it dies right into another conditionally movable insn.
If so, record that the second insn "forces" the first one,
since the second can be moved only if the first is. */
static void
force_movables (struct loop_movables *movables)
{
struct movable *m, *m1;
for (m1 = movables->head; m1; m1 = m1->next)
/* Omit this if moving just the (SET (REG) 0) of a zero-extend. */
if (!m1->partial && !m1->done)
{
int regno = m1->regno;
for (m = m1->next; m; m = m->next)
/* ??? Could this be a bug? What if CSE caused the
register of M1 to be used after this insn?
Since CSE does not update regno_last_uid,
this insn M->insn might not be where it dies.
But very likely this doesn't matter; what matters is
that M's reg is computed from M1's reg. */
if (INSN_UID (m->insn) == REGNO_LAST_UID (regno)
&& !m->done)
break;
if (m != 0 && m->set_src == m1->set_dest
/* If m->consec, m->set_src isn't valid. */
&& m->consec == 0)
m = 0;
/* Increase the priority of the moving the first insn
since it permits the second to be moved as well.
Likewise for insns already forced by the first insn. */
if (m != 0)
{
struct movable *m2;
m->forces = m1;
for (m2 = m1; m2; m2 = m2->forces)
{
m2->lifetime += m->lifetime;
m2->savings += m->savings;
}
}
}
}
/* Find invariant expressions that are equal and can be combined into
one register. */
static void
combine_movables (struct loop_movables *movables, struct loop_regs *regs)
{
struct movable *m;
char *matched_regs = xmalloc (regs->num);
enum machine_mode mode;
/* Regs that are set more than once are not allowed to match
or be matched. I'm no longer sure why not. */
/* Only pseudo registers are allowed to match or be matched,
since move_movables does not validate the change. */
/* Perhaps testing m->consec_sets would be more appropriate here? */
for (m = movables->head; m; m = m->next)
if (m->match == 0 && regs->array[m->regno].n_times_set == 1
&& m->regno >= FIRST_PSEUDO_REGISTER
&& !m->insert_temp
&& !m->partial)
{
struct movable *m1;
int regno = m->regno;
memset (matched_regs, 0, regs->num);
matched_regs[regno] = 1;
/* We want later insns to match the first one. Don't make the first
one match any later ones. So start this loop at m->next. */
for (m1 = m->next; m1; m1 = m1->next)
if (m != m1 && m1->match == 0
&& !m1->insert_temp
&& regs->array[m1->regno].n_times_set == 1
&& m1->regno >= FIRST_PSEUDO_REGISTER
/* A reg used outside the loop mustn't be eliminated. */
&& !m1->global
/* A reg used for zero-extending mustn't be eliminated. */
&& !m1->partial
&& (matched_regs[m1->regno]
||
(
/* Can combine regs with different modes loaded from the
same constant only if the modes are the same or
if both are integer modes with M wider or the same
width as M1. The check for integer is redundant, but
safe, since the only case of differing destination
modes with equal sources is when both sources are
VOIDmode, i.e., CONST_INT. */
(GET_MODE (m->set_dest) == GET_MODE (m1->set_dest)
|| (GET_MODE_CLASS (GET_MODE (m->set_dest)) == MODE_INT
&& GET_MODE_CLASS (GET_MODE (m1->set_dest)) == MODE_INT
&& (GET_MODE_BITSIZE (GET_MODE (m->set_dest))
>= GET_MODE_BITSIZE (GET_MODE (m1->set_dest)))))
/* See if the source of M1 says it matches M. */
&& ((GET_CODE (m1->set_src) == REG
&& matched_regs[REGNO (m1->set_src)])
|| rtx_equal_for_loop_p (m->set_src, m1->set_src,
movables, regs))))
&& ((m->dependencies == m1->dependencies)
|| rtx_equal_p (m->dependencies, m1->dependencies)))
{
m->lifetime += m1->lifetime;
m->savings += m1->savings;
m1->done = 1;
m1->match = m;
matched_regs[m1->regno] = 1;
}
}
/* Now combine the regs used for zero-extension.
This can be done for those not marked `global'
provided their lives don't overlap. */
for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
mode = GET_MODE_WIDER_MODE (mode))
{
struct movable *m0 = 0;
/* Combine all the registers for extension from mode MODE.
Don't combine any that are used outside this loop. */
for (m = movables->head; m; m = m->next)
if (m->partial && ! m->global
&& mode == GET_MODE (SET_SRC (PATTERN (NEXT_INSN (m->insn)))))
{
struct movable *m1;
int first = REGNO_FIRST_LUID (m->regno);
int last = REGNO_LAST_LUID (m->regno);
if (m0 == 0)
{
/* First one: don't check for overlap, just record it. */
m0 = m;
continue;
}
/* Make sure they extend to the same mode.
(Almost always true.) */
if (GET_MODE (m->set_dest) != GET_MODE (m0->set_dest))
continue;
/* We already have one: check for overlap with those
already combined together. */
for (m1 = movables->head; m1 != m; m1 = m1->next)
if (m1 == m0 || (m1->partial && m1->match == m0))
if (! (REGNO_FIRST_LUID (m1->regno) > last
|| REGNO_LAST_LUID (m1->regno) < first))
goto overlap;
/* No overlap: we can combine this with the others. */
m0->lifetime += m->lifetime;
m0->savings += m->savings;
m->done = 1;
m->match = m0;
overlap:
;
}
}
/* Clean up. */
free (matched_regs);
}
/* Returns the number of movable instructions in LOOP that were not
moved outside the loop. */
static int
num_unmoved_movables (const struct loop *loop)
{
int num = 0;
struct movable *m;
for (m = LOOP_MOVABLES (loop)->head; m; m = m->next)
if (!m->done)
++num;
return num;
}
/* Return 1 if regs X and Y will become the same if moved. */
static int
regs_match_p (rtx x, rtx y, struct loop_movables *movables)
{
unsigned int xn = REGNO (x);
unsigned int yn = REGNO (y);
struct movable *mx, *my;
for (mx = movables->head; mx; mx = mx->next)
if (mx->regno == xn)
break;
for (my = movables->head; my; my = my->next)
if (my->regno == yn)
break;
return (mx && my
&& ((mx->match == my->match && mx->match != 0)
|| mx->match == my
|| mx == my->match));
}
/* Return 1 if X and Y are identical-looking rtx's.
This is the Lisp function EQUAL for rtx arguments.
If two registers are matching movables or a movable register and an
equivalent constant, consider them equal. */
static int
rtx_equal_for_loop_p (rtx x, rtx y, struct loop_movables *movables,
struct loop_regs *regs)
{
int i;
int j;
struct movable *m;
enum rtx_code code;
const char *fmt;
if (x == y)
return 1;
if (x == 0 || y == 0)
return 0;
code = GET_CODE (x);
/* If we have a register and a constant, they may sometimes be
equal. */
if (GET_CODE (x) == REG && regs->array[REGNO (x)].set_in_loop == -2
&& CONSTANT_P (y))
{
for (m = movables->head; m; m = m->next)
if (m->move_insn && m->regno == REGNO (x)
&& rtx_equal_p (m->set_src, y))
return 1;
}
else if (GET_CODE (y) == REG && regs->array[REGNO (y)].set_in_loop == -2
&& CONSTANT_P (x))
{
for (m = movables->head; m; m = m->next)
if (m->move_insn && m->regno == REGNO (y)
&& rtx_equal_p (m->set_src, x))
return 1;
}
/* Otherwise, rtx's of different codes cannot be equal. */
if (code != GET_CODE (y))
return 0;
/* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.
(REG:SI x) and (REG:HI x) are NOT equivalent. */
if (GET_MODE (x) != GET_MODE (y))
return 0;
/* These three types of rtx's can be compared nonrecursively. */
if (code == REG)
return (REGNO (x) == REGNO (y) || regs_match_p (x, y, movables));
if (code == LABEL_REF)
return XEXP (x, 0) == XEXP (y, 0);
if (code == SYMBOL_REF)
return XSTR (x, 0) == XSTR (y, 0);
/* Compare the elements. If any pair of corresponding elements
fail to match, return 0 for the whole things. */
fmt = GET_RTX_FORMAT (code);
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
{
switch (fmt[i])
{
case 'w':
if (XWINT (x, i) != XWINT (y, i))
return 0;
break;
case 'i':
if (XINT (x, i) != XINT (y, i))
return 0;
break;
case 'E':
/* Two vectors must have the same length. */
if (XVECLEN (x, i) != XVECLEN (y, i))
return 0;
/* And the corresponding elements must match. */
for (j = 0; j < XVECLEN (x, i); j++)
if (rtx_equal_for_loop_p (XVECEXP (x, i, j), XVECEXP (y, i, j),
movables, regs) == 0)
return 0;
break;
case 'e':
if (rtx_equal_for_loop_p (XEXP (x, i), XEXP (y, i), movables, regs)
== 0)
return 0;
break;
case 's':
if (strcmp (XSTR (x, i), XSTR (y, i)))
return 0;
break;
case 'u':
/* These are just backpointers, so they don't matter. */
break;
case '0':
break;
/* It is believed that rtx's at this level will never
contain anything but integers and other rtx's,
except for within LABEL_REFs and SYMBOL_REFs. */
default:
abort ();
}
}
return 1;
}
/* If X contains any LABEL_REF's, add REG_LABEL notes for them to all
insns in INSNS which use the reference. LABEL_NUSES for CODE_LABEL
references is incremented once for each added note. */
static void
add_label_notes (rtx x, rtx insns)
{
enum rtx_code code = GET_CODE (x);
int i, j;
const char *fmt;
rtx insn;
if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
{
/* This code used to ignore labels that referred to dispatch tables to
avoid flow generating (slightly) worse code.
We no longer ignore such label references (see LABEL_REF handling in
mark_jump_label for additional information). */
for (insn = insns; insn; insn = NEXT_INSN (insn))
if (reg_mentioned_p (XEXP (x, 0), insn))
{
REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, XEXP (x, 0),
REG_NOTES (insn));
if (LABEL_P (XEXP (x, 0)))
LABEL_NUSES (XEXP (x, 0))++;
}
}
fmt = GET_RTX_FORMAT (code);
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
{
if (fmt[i] == 'e')
add_label_notes (XEXP (x, i), insns);
else if (fmt[i] == 'E')
for (j = XVECLEN (x, i) - 1; j >= 0; j--)
add_label_notes (XVECEXP (x, i, j), insns);
}
}
/* Scan MOVABLES, and move the insns that deserve to be moved.
If two matching movables are combined, replace one reg with the
other throughout. */
static void
move_movables (struct loop *loop, struct loop_movables *movables,
int threshold, int insn_count)
{
struct loop_regs *regs = LOOP_REGS (loop);
int nregs = regs->num;
rtx new_start = 0;
struct movable *m;
rtx p;
rtx loop_start = loop->start;
rtx loop_end = loop->end;
/* Map of pseudo-register replacements to handle combining
when we move several insns that load the same value
into different pseudo-registers. */
rtx *reg_map = xcalloc (nregs, sizeof (rtx));
char *already_moved = xcalloc (nregs, sizeof (char));
for (m = movables->head; m; m = m->next)
{
/* Describe this movable insn. */
if (loop_dump_stream)
{
fprintf (loop_dump_stream, "Insn %d: regno %d (life %d), ",
INSN_UID (m->insn), m->regno, m->lifetime);
if (m->consec > 0)
fprintf (loop_dump_stream, "consec %d, ", m->consec);
if (m->cond)
fprintf (loop_dump_stream, "cond ");
if (m->force)
fprintf (loop_dump_stream, "force ");
if (m->global)
fprintf (loop_dump_stream, "global ");
if (m->done)
fprintf (loop_dump_stream, "done ");
if (m->move_insn)
fprintf (loop_dump_stream, "move-insn ");
if (m->match)
fprintf (loop_dump_stream, "matches %d ",
INSN_UID (m->match->insn));
if (m->forces)
fprintf (loop_dump_stream, "forces %d ",
INSN_UID (m->forces->insn));
}
/* Ignore the insn if it's already done (it matched something else).
Otherwise, see if it is now safe to move. */
if (!m->done
&& (! m->cond
|| (1 == loop_invariant_p (loop, m->set_src)
&& (m->dependencies == 0
|| 1 == loop_invariant_p (loop, m->dependencies))
&& (m->consec == 0
|| 1 == consec_sets_invariant_p (loop, m->set_dest,
m->consec + 1,
m->insn))))
&& (! m->forces || m->forces->done))
{
int regno;
rtx p;
int savings = m->savings;
/* We have an insn that is safe to move.
Compute its desirability. */
p = m->insn;
regno = m->regno;
if (loop_dump_stream)
fprintf (loop_dump_stream, "savings %d ", savings);
if (regs->array[regno].moved_once && loop_dump_stream)
fprintf (loop_dump_stream, "halved since already moved ");
/* An insn MUST be moved if we already moved something else
which is safe only if this one is moved too: that is,
if already_moved[REGNO] is nonzero. */
/* An insn is desirable to move if the new lifetime of the
register is no more than THRESHOLD times the old lifetime.
If it's not desirable, it means the loop is so big
that moving won't speed things up much,
and it is liable to make register usage worse. */
/* It is also desirable to move if it can be moved at no
extra cost because something else was already moved. */
if (already_moved[regno]
|| flag_move_all_movables
|| (threshold * savings * m->lifetime) >=
(regs->array[regno].moved_once ? insn_count * 2 : insn_count)
|| (m->forces && m->forces->done
&& regs->array[m->forces->regno].n_times_set == 1))
{
int count;
struct movable *m1;
rtx first = NULL_RTX;
rtx newreg = NULL_RTX;
if (m->insert_temp)
newreg = gen_reg_rtx (GET_MODE (m->set_dest));
/* Now move the insns that set the reg. */
if (m->partial && m->match)
{
rtx newpat, i1;
rtx r1, r2;
/* Find the end of this chain of matching regs.
Thus, we load each reg in the chain from that one reg.
And that reg is loaded with 0 directly,
since it has ->match == 0. */
for (m1 = m; m1->match; m1 = m1->match);
newpat = gen_move_insn (SET_DEST (PATTERN (m->insn)),
SET_DEST (PATTERN (m1->insn)));
i1 = loop_insn_hoist (loop, newpat);
/* Mark the moved, invariant reg as being allowed to
share a hard reg with the other matching invariant. */
REG_NOTES (i1) = REG_NOTES (m->insn);
r1 = SET_DEST (PATTERN (m->insn));
r2 = SET_DEST (PATTERN (m1->insn));
regs_may_share
= gen_rtx_EXPR_LIST (VOIDmode, r1,
gen_rtx_EXPR_LIST (VOIDmode, r2,
regs_may_share));
delete_insn (m->insn);
if (new_start == 0)
new_start = i1;
if (loop_dump_stream)
fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
}
/* If we are to re-generate the item being moved with a
new move insn, first delete what we have and then emit
the move insn before the loop. */
else if (m->move_insn)
{
rtx i1, temp, seq;
for (count = m->consec; count >= 0; count--)
{
/* If this is the first insn of a library call sequence,
something is very wrong. */
if (GET_CODE (p) != NOTE
&& (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
abort ();
/* If this is the last insn of a libcall sequence, then
delete every insn in the sequence except the last.
The last insn is handled in the normal manner. */
if (GET_CODE (p) != NOTE
&& (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
{
temp = XEXP (temp, 0);
while (temp != p)
temp = delete_insn (temp);
}
temp = p;
p = delete_insn (p);
/* simplify_giv_expr expects that it can walk the insns
at m->insn forwards and see this old sequence we are
tossing here. delete_insn does preserve the next
pointers, but when we skip over a NOTE we must fix
it up. Otherwise that code walks into the non-deleted
insn stream. */
while (p && GET_CODE (p) == NOTE)
p = NEXT_INSN (temp) = NEXT_INSN (p);
if (m->insert_temp)
{
/* Replace the original insn with a move from
our newly created temp. */
start_sequence ();
emit_move_insn (m->set_dest, newreg);
seq = get_insns ();
end_sequence ();
emit_insn_before (seq, p);
}
}
start_sequence ();
emit_move_insn (m->insert_temp ? newreg : m->set_dest,
m->set_src);
seq = get_insns ();
end_sequence ();
add_label_notes (m->set_src, seq);
i1 = loop_insn_hoist (loop, seq);
if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
set_unique_reg_note (i1,
m->is_equiv ? REG_EQUIV : REG_EQUAL,
m->set_src);
if (loop_dump_stream)
fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
/* The more regs we move, the less we like moving them. */
threshold -= 3;
}
else
{
for (count = m->consec; count >= 0; count--)
{
rtx i1, temp;
/* If first insn of libcall sequence, skip to end. */
/* Do this at start of loop, since p is guaranteed to
be an insn here. */
if (GET_CODE (p) != NOTE
&& (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
p = XEXP (temp, 0);
/* If last insn of libcall sequence, move all
insns except the last before the loop. The last
insn is handled in the normal manner. */
if (GET_CODE (p) != NOTE
&& (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
{
rtx fn_address = 0;
rtx fn_reg = 0;
rtx fn_address_insn = 0;
first = 0;
for (temp = XEXP (temp, 0); temp != p;
temp = NEXT_INSN (temp))
{
rtx body;
rtx n;
rtx next;
if (GET_CODE (temp) == NOTE)
continue;
body = PATTERN (temp);
/* Find the next insn after TEMP,
not counting USE or NOTE insns. */
for (next = NEXT_INSN (temp); next != p;
next = NEXT_INSN (next))
if (! (GET_CODE (next) == INSN
&& GET_CODE (PATTERN (next)) == USE)
&& GET_CODE (next) != NOTE)
break;
/* If that is the call, this may be the insn
that loads the function address.
Extract the function address from the insn
that loads it into a register.
If this insn was cse'd, we get incorrect code.
So emit a new move insn that copies the
function address into the register that the
call insn will use. flow.c will delete any
redundant stores that we have created. */
if (GET_CODE (next) == CALL_INSN
&& GET_CODE (body) == SET
&& GET_CODE (SET_DEST (body)) == REG
&& (n = find_reg_note (temp, REG_EQUAL,
NULL_RTX)))
{
fn_reg = SET_SRC (body);
if (GET_CODE (fn_reg) != REG)
fn_reg = SET_DEST (body);
fn_address = XEXP (n, 0);
fn_address_insn = temp;
}
/* We have the call insn.
If it uses the register we suspect it might,
load it with the correct address directly. */
if (GET_CODE (temp) == CALL_INSN
&& fn_address != 0
&& reg_referenced_p (fn_reg, body))
loop_insn_emit_after (loop, 0, fn_address_insn,
gen_move_insn
(fn_reg, fn_address));
if (GET_CODE (temp) == CALL_INSN)
{
i1 = loop_call_insn_hoist (loop, body);
/* Because the USAGE information potentially
contains objects other than hard registers
we need to copy it. */
if (CALL_INSN_FUNCTION_USAGE (temp))
CALL_INSN_FUNCTION_USAGE (i1)
= copy_rtx (CALL_INSN_FUNCTION_USAGE (temp));
}
else
i1 = loop_insn_hoist (loop, body);
if (first == 0)
first = i1;
if (temp == fn_address_insn)
fn_address_insn = i1;
REG_NOTES (i1) = REG_NOTES (temp);
REG_NOTES (temp) = NULL;
delete_insn (temp);
}
if (new_start == 0)
new_start = first;
}
if (m->savemode != VOIDmode)
{
/* P sets REG to zero; but we should clear only
the bits that are not covered by the mode
m->savemode. */
rtx reg = m->set_dest;
rtx sequence;
rtx tem;
start_sequence ();
tem = expand_simple_binop
(GET_MODE (reg), AND, reg,
GEN_INT ((((HOST_WIDE_INT) 1
<< GET_MODE_BITSIZE (m->savemode)))
- 1),
reg, 1, OPTAB_LIB_WIDEN);
if (tem == 0)
abort ();
if (tem != reg)
emit_move_insn (reg, tem);
sequence = get_insns ();
end_sequence ();
i1 = loop_insn_hoist (loop, sequence);
}
else if (GET_CODE (p) == CALL_INSN)
{
i1 = loop_call_insn_hoist (loop, PATTERN (p));
/* Because the USAGE information potentially
contains objects other than hard registers
we need to copy it. */
if (CALL_INSN_FUNCTION_USAGE (p))
CALL_INSN_FUNCTION_USAGE (i1)
= copy_rtx (CALL_INSN_FUNCTION_USAGE (p));
}
else if (count == m->consec && m->move_insn_first)
{
rtx seq;
/* The SET_SRC might not be invariant, so we must
use the REG_EQUAL note. */
start_sequence ();
emit_move_insn (m->insert_temp ? newreg : m->set_dest,
m->set_src);
seq = get_insns ();
end_sequence ();
add_label_notes (m->set_src, seq);
i1 = loop_insn_hoist (loop, seq);
if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
set_unique_reg_note (i1, m->is_equiv ? REG_EQUIV
: REG_EQUAL, m->set_src);
}
else if (m->insert_temp)
{
rtx *reg_map2 = xcalloc (REGNO (newreg),
sizeof(rtx));
reg_map2 [m->regno] = newreg;
i1 = loop_insn_hoist (loop, copy_rtx (PATTERN (p)));
replace_regs (i1, reg_map2, REGNO (newreg), 1);
free (reg_map2);
}
else
i1 = loop_insn_hoist (loop, PATTERN (p));
if (REG_NOTES (i1) == 0)
{
REG_NOTES (i1) = REG_NOTES (p);
REG_NOTES (p) = NULL;
/* If there is a REG_EQUAL note present whose value
is not loop invariant, then delete it, since it
may cause problems with later optimization passes.
It is possible for cse to create such notes
like this as a result of record_jump_cond. */
if ((temp = find_reg_note (i1, REG_EQUAL, NULL_RTX))
&& ! loop_invariant_p (loop, XEXP (temp, 0)))
remove_note (i1, temp);
}
if (new_start == 0)
new_start = i1;
if (loop_dump_stream)
fprintf (loop_dump_stream, " moved to %d",
INSN_UID (i1));
/* If library call, now fix the REG_NOTES that contain
insn pointers, namely REG_LIBCALL on FIRST
and REG_RETVAL on I1. */
if ((temp = find_reg_note (i1, REG_RETVAL, NULL_RTX)))
{
XEXP (temp, 0) = first;
temp = find_reg_note (first, REG_LIBCALL, NULL_RTX);
XEXP (temp, 0) = i1;
}
temp = p;
delete_insn (p);
p = NEXT_INSN (p);
/* simplify_giv_expr expects that it can walk the insns
at m->insn forwards and see this old sequence we are
tossing here. delete_insn does preserve the next
pointers, but when we skip over a NOTE we must fix
it up. Otherwise that code walks into the non-deleted
insn stream. */
while (p && GET_CODE (p) == NOTE)
p = NEXT_INSN (temp) = NEXT_INSN (p);
if (m->insert_temp)
{
rtx seq;
/* Replace the original insn with a move from
our newly created temp. */
start_sequence ();
emit_move_insn (m->set_dest, newreg);
seq = get_insns ();
end_sequence ();
emit_insn_before (seq, p);
}
}
/* The more regs we move, the less we like moving them. */
threshold -= 3;
}
m->done = 1;
if (!m->insert_temp)
{
/* Any other movable that loads the same register
MUST be moved. */
already_moved[regno] = 1;
/* This reg has been moved out of one loop. */
regs->array[regno].moved_once = 1;
/* The reg set here is now invariant. */
if (! m->partial)
{
int i;
for (i = 0; i < LOOP_REGNO_NREGS (regno, m->set_dest); i++)
regs->array[regno+i].set_in_loop = 0;
}
/* Change the length-of-life info for the register
to say it lives at least the full length of this loop.
This will help guide optimizations in outer loops. */
if (REGNO_FIRST_LUID (regno) > INSN_LUID (loop_start))
/* This is the old insn before all the moved insns.
We can't use the moved insn because it is out of range
in uid_luid. Only the old insns have luids. */
REGNO_FIRST_UID (regno) = INSN_UID (loop_start);
if (REGNO_LAST_LUID (regno) < INSN_LUID (loop_end))
REGNO_LAST_UID (regno) = INSN_UID (loop_end);
}
/* Combine with this moved insn any other matching movables. */
if (! m->partial)
for (m1 = movables->head; m1; m1 = m1->next)
if (m1->match == m)
{
rtx temp;
/* Schedule the reg loaded by M1
for replacement so that shares the reg of M.
If the modes differ (only possible in restricted
circumstances, make a SUBREG.
Note this assumes that the target dependent files
treat REG and SUBREG equally, including within
GO_IF_LEGITIMATE_ADDRESS and in all the
predicates since we never verify that replacing the
original register with a SUBREG results in a
recognizable insn. */
if (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest))
reg_map[m1->regno] = m->set_dest;
else
reg_map[m1->regno]
= gen_lowpart_common (GET_MODE (m1->set_dest),
m->set_dest);
/* Get rid of the matching insn
and prevent further processing of it. */
m1->done = 1;
/* If library call, delete all insns. */
if ((temp = find_reg_note (m1->insn, REG_RETVAL,
NULL_RTX)))
delete_insn_chain (XEXP (temp, 0), m1->insn);
else
delete_insn (m1->insn);
/* Any other movable that loads the same register
MUST be moved. */
already_moved[m1->regno] = 1;
/* The reg merged here is now invariant,
if the reg it matches is invariant. */
if (! m->partial)
{
int i;
for (i = 0;
i < LOOP_REGNO_NREGS (regno, m1->set_dest);
i++)
regs->array[m1->regno+i].set_in_loop = 0;
}
}
}
else if (loop_dump_stream)
fprintf (loop_dump_stream, "not desirable");
}
else if (loop_dump_stream && !m->match)
fprintf (loop_dump_stream, "not safe");
if (loop_dump_stream)
fprintf (loop_dump_stream, "\n");
}
if (new_start == 0)
new_start = loop_start;
/* Go through all the instructions in the loop, making
all the register substitutions scheduled in REG_MAP. */
for (p = new_start; p != loop_end; p = NEXT_INSN (p))
if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
|| GET_CODE (p) == CALL_INSN)
{
replace_regs (PATTERN (p), reg_map, nregs, 0);
replace_regs (REG_NOTES (p), reg_map, nregs, 0);
INSN_CODE (p) = -1;
}
/* Clean up. */
free (reg_map);
free (already_moved);
}
static void
loop_movables_add (struct loop_movables *movables, struct movable *m)
{
if (movables->head == 0)
movables->head = m;
else
movables->last->next = m;
movables->last = m;
}
static void
loop_movables_free (struct loop_movables *movables)
{
struct movable *m;
struct movable *m_next;
for (m = movables->head; m; m = m_next)
{
m_next = m->next;
free (m);
}
}
#if 0
/* Scan X and replace the address of any MEM in it with ADDR.
REG is the address that MEM should have before the replacement. */
static void
replace_call_address (rtx x, rtx reg, rtx addr)
{
enum rtx_code code;
int i;
const char *fmt;
if (x == 0)
return;
code = GET_CODE (x);
switch (code)
{
case PC:
case CC0:
case CONST_INT:
case CONST_DOUBLE:
case CONST:
case SYMBOL_REF:
case LABEL_REF:
case REG:
return;
case SET:
/* Short cut for very common case. */
replace_call_address (XEXP (x, 1), reg, addr);
return;
case CALL:
/* Short cut for very common case. */
replace_call_address (XEXP (x, 0), reg, addr);
return;
case MEM:
/* If this MEM uses a reg other than the one we expected,
something is wrong. */
if (XEXP (x, 0) != reg)
abort ();
XEXP (x, 0) = addr;
return;
default:
break;
}
fmt = GET_RTX_FORMAT (code);
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
{
if (fmt[i] == 'e')
replace_call_address (XEXP (x, i), reg, addr);
else if (fmt[i] == 'E')
{
int j;
for (j = 0; j < XVECLEN (x, i); j++)
replace_call_address (XVECEXP (x, i, j), reg, addr);
}
}
}
#endif
/* Return the number of memory refs to addresses that vary
in the rtx X. */
static int
count_nonfixed_reads (const struct loop *loop, rtx x)
{
enum rtx_code code;
int i;
const char *fmt;
int value;
if (x == 0)
return 0;
code = GET_CODE (x);
switch (code)
{
case PC:
case CC0:
case CONST_INT:
case CONST_DOUBLE:
case CONST:
case SYMBOL_REF:
case LABEL_REF:
case REG:
return 0;
case MEM:
return ((loop_invariant_p (loop, XEXP (x, 0)) != 1)
+ count_nonfixed_reads (loop, XEXP (x, 0)));
default:
break;
}
value = 0;
fmt = GET_RTX_FORMAT (code);
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
{
if (fmt[i] == 'e')
value += count_nonfixed_reads (loop, XEXP (x, i));
if (fmt[i] == 'E')
{
int j;
for (j = 0; j < XVECLEN (x, i); j++)
value += count_nonfixed_reads (loop, XVECEXP (x, i, j));
}
}
return value;
}
/* Scan a loop setting the elements `cont', `vtop', `loops_enclosed',
`has_call', `has_nonconst_call', `has_volatile', `has_tablejump',
`unknown_address_altered', `unknown_constant_address_altered', and
`num_mem_sets' in LOOP. Also, fill in the array `mems' and the
list `store_mems' in LOOP. */
static void
prescan_loop (struct loop *loop)
{
int level = 1;
rtx insn;
struct loop_info *loop_info = LOOP_INFO (loop);
rtx start = loop->start;
rtx end = loop->end;
/* The label after END. Jumping here is just like falling off the
end of the loop. We use next_nonnote_insn instead of next_label
as a hedge against the (pathological) case where some actual insn
might end up between the two. */
rtx exit_target = next_nonnote_insn (end);
loop_info->has_indirect_jump = indirect_jump_in_function;
loop_info->pre_header_has_call = 0;
loop_info->has_call = 0;
loop_info->has_nonconst_call = 0;
loop_info->has_prefetch = 0;
loop_info->has_volatile = 0;
loop_info->has_tablejump = 0;
loop_info->has_multiple_exit_targets = 0;
loop->level = 1;
loop_info->unknown_address_altered = 0;
loop_info->unknown_constant_address_altered = 0;
loop_info->store_mems = NULL_RTX;
loop_info->first_loop_store_insn = NULL_RTX;
loop_info->mems_idx = 0;
loop_info->num_mem_sets = 0;
/* If loop opts run twice, this was set on 1st pass for 2nd. */
loop_info->preconditioned = NOTE_PRECONDITIONED (end);
for (insn = start; insn && GET_CODE (insn) != CODE_LABEL;
insn = PREV_INSN (insn))
{
if (GET_CODE (insn) == CALL_INSN)
{
loop_info->pre_header_has_call = 1;
break;
}
}
for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
insn = NEXT_INSN (insn))
{
switch (GET_CODE (insn))
{
case NOTE:
if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
{
++level;
/* Count number of loops contained in this one. */
loop->level++;
}
else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END)
--level;
break;
case CALL_INSN:
if (! CONST_OR_PURE_CALL_P (insn))
{
loop_info->unknown_address_altered = 1;
loop_info->has_nonconst_call = 1;
}
else if (pure_call_p (insn))
loop_info->has_nonconst_call = 1;
loop_info->has_call = 1;
if (can_throw_internal (insn))
loop_info->has_multiple_exit_targets = 1;
/* Calls initializing constant objects have CLOBBER of MEM /u in the
attached FUNCTION_USAGE expression list, not accounted for by the
code above. We should note these to avoid missing dependencies in
later references. */
{
rtx fusage_entry;
for (fusage_entry = CALL_INSN_FUNCTION_USAGE (insn);
fusage_entry; fusage_entry = XEXP (fusage_entry, 1))
{
rtx fusage = XEXP (fusage_entry, 0);
if (GET_CODE (fusage) == CLOBBER
&& GET_CODE (XEXP (fusage, 0)) == MEM
&& RTX_UNCHANGING_P (XEXP (fusage, 0)))
{
note_stores (fusage, note_addr_stored, loop_info);
if (! loop_info->first_loop_store_insn
&& loop_info->store_mems)
loop_info->first_loop_store_insn = insn;
}
}
}
break;
case JUMP_INSN:
if (! loop_info->has_multiple_exit_targets)
{
rtx set = pc_set (insn);
if (set)
{
rtx src = SET_SRC (set);
rtx label1, label2;
if (GET_CODE (src) == IF_THEN_ELSE)
{
label1 = XEXP (src, 1);
label2 = XEXP (src, 2);
}
else
{
label1 = src;
label2 = NULL_RTX;
}
do
{
if (label1 && label1 != pc_rtx)
{
if (GET_CODE (label1) != LABEL_REF)
{
/* Something tricky. */
loop_info->has_multiple_exit_targets = 1;
break;
}
else if (XEXP (label1, 0) != exit_target
&& LABEL_OUTSIDE_LOOP_P (label1))
{
/* A jump outside the current loop. */
loop_info->has_multiple_exit_targets = 1;
break;
}
}
label1 = label2;
label2 = NULL_RTX;
}
while (label1);
}
else
{
/* A return, or something tricky. */
loop_info->has_multiple_exit_targets = 1;
}
}
/* Fall through. */
case INSN:
if (volatile_refs_p (PATTERN (insn)))
loop_info->has_volatile = 1;
if (GET_CODE (insn) == JUMP_INSN
&& (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
|| GET_CODE (PATTERN (insn)) == ADDR_VEC))
loop_info->has_tablejump = 1;
note_stores (PATTERN (insn), note_addr_stored, loop_info);
if (! loop_info->first_loop_store_insn && loop_info->store_mems)
loop_info->first_loop_store_insn = insn;
if (flag_non_call_exceptions && can_throw_internal (insn))
loop_info->has_multiple_exit_targets = 1;
break;
default:
break;
}
}
/* Now, rescan the loop, setting up the LOOP_MEMS array. */
if (/* An exception thrown by a called function might land us
anywhere. */
! loop_info->has_nonconst_call
/* We don't want loads for MEMs moved to a location before the
one at which their stack memory becomes allocated. (Note
that this is not a problem for malloc, etc., since those
require actual function calls. */
&& ! current_function_calls_alloca
/* There are ways to leave the loop other than falling off the
end. */
&& ! loop_info->has_multiple_exit_targets)
for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
insn = NEXT_INSN (insn))
for_each_rtx (&insn, insert_loop_mem, loop_info);
/* BLKmode MEMs are added to LOOP_STORE_MEM as necessary so
that loop_invariant_p and load_mems can use true_dependence
to determine what is really clobbered. */
if (loop_info->unknown_address_altered)
{
rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
loop_info->store_mems
= gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
}
if (loop_info->unknown_constant_address_altered)
{
rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
RTX_UNCHANGING_P (mem) = 1;
loop_info->store_mems
= gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
}
}
/* Invalidate all loops containing LABEL. */
static void
invalidate_loops_containing_label (rtx label)
{
struct loop *loop;
for (loop = uid_loop[INSN_UID (label)]; loop; loop = loop->outer)
loop->invalid = 1;
}
/* Scan the function looking for loops. Record the start and end of each loop.
Also mark as invalid loops any loops that contain a setjmp or are branched
to from outside the loop. */
static void
find_and_verify_loops (rtx f, struct loops *loops)
{
rtx insn;
rtx label;
int num_loops;
struct loop *current_loop;
struct loop *next_loop;
struct loop *loop;
num_loops = loops->num;
compute_luids (f, NULL_RTX, 0);
/* If there are jumps to undefined labels,
treat them as jumps out of any/all loops.
This also avoids writing past end of tables when there are no loops. */
uid_loop[0] = NULL;
/* Find boundaries of loops, mark which loops are contained within
loops, and invalidate loops that have setjmp. */
num_loops = 0;
current_loop = NULL;
for (insn = f; insn; insn = NEXT_INSN (insn))
{
if (GET_CODE (insn) == NOTE)
switch (NOTE_LINE_NUMBER (insn))
{
case NOTE_INSN_LOOP_BEG:
next_loop = loops->array + num_loops;
next_loop->num = num_loops;
num_loops++;
next_loop->start = insn;
next_loop->outer = current_loop;
current_loop = next_loop;
break;
case NOTE_INSN_LOOP_CONT:
current_loop->cont = insn;
break;
case NOTE_INSN_LOOP_VTOP:
current_loop->vtop = insn;
break;
case NOTE_INSN_LOOP_END:
if (! current_loop)
abort ();
current_loop->end = insn;
current_loop = current_loop->outer;
break;
default:
break;
}
if (GET_CODE (insn) == CALL_INSN
&& find_reg_note (insn, REG_SETJMP, NULL))
{
/* In this case, we must invalidate our current loop and any
enclosing loop. */
for (loop = current_loop; loop; loop = loop->outer)
{
loop->invalid = 1;
if (loop_dump_stream)
fprintf (loop_dump_stream,
"\nLoop at %d ignored due to setjmp.\n",
INSN_UID (loop->start));
}
}
/* Note that this will mark the NOTE_INSN_LOOP_END note as being in the
enclosing loop, but this doesn't matter. */
uid_loop[INSN_UID (insn)] = current_loop;
}
/* Any loop containing a label used in an initializer must be invalidated,
because it can be jumped into from anywhere. */
for (label = forced_labels; label; label = XEXP (label, 1))
invalidate_loops_containing_label (XEXP (label, 0));
/* Any loop containing a label used for an exception handler must be
invalidated, because it can be jumped into from anywhere. */
for_each_eh_label (invalidate_loops_containing_label);
/* Now scan all insn's in the function. If any JUMP_INSN branches into a
loop that it is not contained within, that loop is marked invalid.
If any INSN or CALL_INSN uses a label's address, then the loop containing
that label is marked invalid, because it could be jumped into from
anywhere.
Also look for blocks of code ending in an unconditional branch that
exits the loop. If such a block is surrounded by a conditional
branch around the block, move the block elsewhere (see below) and
invert the jump to point to the code block. This may eliminate a
label in our loop and will simplify processing by both us and a
possible second cse pass. */
for (insn = f; insn; insn = NEXT_INSN (insn))
if (INSN_P (insn))
{
struct loop *this_loop = uid_loop[INSN_UID (insn)];
if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
{
rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
if (note)
invalidate_loops_containing_label (XEXP (note, 0));
}
if (GET_CODE (insn) != JUMP_INSN)
continue;
mark_loop_jump (PATTERN (insn), this_loop);
/* See if this is an unconditional branch outside the loop. */
if (this_loop
&& (GET_CODE (PATTERN (insn)) == RETURN
|| (any_uncondjump_p (insn)
&& onlyjump_p (insn)
&& (uid_loop[INSN_UID (JUMP_LABEL (insn))]
!= this_loop)))
&& get_max_uid () < max_uid_for_loop)
{
rtx p;
rtx our_next = next_real_insn (insn);
rtx last_insn_to_move = NEXT_INSN (insn);
struct loop *dest_loop;
struct loop *outer_loop = NULL;
/* Go backwards until we reach the start of the loop, a label,
or a JUMP_INSN. */
for (p = PREV_INSN (insn);
GET_CODE (p) != CODE_LABEL
&& ! (GET_CODE (p) == NOTE
&& NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
&& GET_CODE (p) != JUMP_INSN;
p = PREV_INSN (p))
;
/* Check for the case where we have a jump to an inner nested
loop, and do not perform the optimization in that case. */
if (JUMP_LABEL (insn))
{
dest_loop = uid_loop[INSN_UID (JUMP_LABEL (insn))];
if (dest_loop)
{
for (outer_loop = dest_loop; outer_loop;
outer_loop = outer_loop->outer)
if (outer_loop == this_loop)
break;
}
}
/* Make sure that the target of P is within the current loop. */
if (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p)
&& uid_loop[INSN_UID (JUMP_LABEL (p))] != this_loop)
outer_loop = this_loop;
/* If we stopped on a JUMP_INSN to the next insn after INSN,
we have a block of code to try to move.
We look backward and then forward from the target of INSN
to find a BARRIER at the same loop depth as the target.
If we find such a BARRIER, we make a new label for the start
of the block, invert the jump in P and point it to that label,
and move the block of code to the spot we found. */
if (! outer_loop
&& GET_CODE (p) == JUMP_INSN
&& JUMP_LABEL (p) != 0
/* Just ignore jumps to labels that were never emitted.
These always indicate compilation errors. */
&& INSN_UID (JUMP_LABEL (p)) != 0
&& any_condjump_p (p) && onlyjump_p (p)
&& next_real_insn (JUMP_LABEL (p)) == our_next
/* If it's not safe to move the sequence, then we
mustn't try. */
&& insns_safe_to_move_p (p, NEXT_INSN (insn),
&last_insn_to_move))
{
rtx target
= JUMP_LABEL (insn) ? JUMP_LABEL (insn) : get_last_insn ();
struct loop *target_loop = uid_loop[INSN_UID (target)];
rtx loc, loc2;
rtx tmp;
/* Search for possible garbage past the conditional jumps
and look for the last barrier. */
for (tmp = last_insn_to_move;
tmp && GET_CODE (tmp) != CODE_LABEL; tmp = NEXT_INSN (tmp))
if (GET_CODE (tmp) == BARRIER)
last_insn_to_move = tmp;
for (loc = target; loc; loc = PREV_INSN (loc))
if (GET_CODE (loc) == BARRIER
/* Don't move things inside a tablejump. */
&& ((loc2 = next_nonnote_insn (loc)) == 0
|| GET_CODE (loc2) != CODE_LABEL
|| (loc2 = next_nonnote_insn (loc2)) == 0
|| GET_CODE (loc2) != JUMP_INSN
|| (GET_CODE (PATTERN (loc2)) != ADDR_VEC
&& GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
&& uid_loop[INSN_UID (loc)] == target_loop)
break;
if (loc == 0)
for (loc = target; loc; loc = NEXT_INSN (loc))
if (GET_CODE (loc) == BARRIER
/* Don't move things inside a tablejump. */
&& ((loc2 = next_nonnote_insn (loc)) == 0
|| GET_CODE (loc2) != CODE_LABEL
|| (loc2 = next_nonnote_insn (loc2)) == 0
|| GET_CODE (loc2) != JUMP_INSN
|| (GET_CODE (PATTERN (loc2)) != ADDR_VEC
&& GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
&& uid_loop[INSN_UID (loc)] == target_loop)
break;
if (loc)
{
rtx cond_label = JUMP_LABEL (p);
rtx new_label = get_label_after (p);
/* Ensure our label doesn't go away. */
LABEL_NUSES (cond_label)++;
/* Verify that uid_loop is large enough and that
we can invert P. */
if (invert_jump (p, new_label, 1))
{
rtx q, r;
/* If no suitable BARRIER was found, create a suitable
one before TARGET. Since TARGET is a fall through
path, we'll need to insert a jump around our block
and add a BARRIER before TARGET.
This creates an extra unconditional jump outside
the loop. However, the benefits of removing rarely
executed instructions from inside the loop usually
outweighs the cost of the extra unconditional jump
outside the loop. */
if (loc == 0)
{
rtx temp;
temp = gen_jump (JUMP_LABEL (insn));
temp = emit_jump_insn_before (temp, target);
JUMP_LABEL (temp) = JUMP_LABEL (insn);
LABEL_NUSES (JUMP_LABEL (insn))++;
loc = emit_barrier_before (target);
}
/* Include the BARRIER after INSN and copy the
block after LOC. */
if (squeeze_notes (&new_label, &last_insn_to_move))
abort ();
reorder_insns (new_label, last_insn_to_move, loc);
/* All those insns are now in TARGET_LOOP. */
for (q = new_label;
q != NEXT_INSN (last_insn_to_move);
q = NEXT_INSN (q))
uid_loop[INSN_UID (q)] = target_loop;
/* The label jumped to by INSN is no longer a loop
exit. Unless INSN does not have a label (e.g.,
it is a RETURN insn), search loop->exit_labels
to find its label_ref, and remove it. Also turn
off LABEL_OUTSIDE_LOOP_P bit. */
if (JUMP_LABEL (insn))
{
for (q = 0, r = this_loop->exit_labels;
r;
q = r, r = LABEL_NEXTREF (r))
if (XEXP (r, 0) == JUMP_LABEL (insn))
{
LABEL_OUTSIDE_LOOP_P (r) = 0;
if (q)
LABEL_NEXTREF (q) = LABEL_NEXTREF (r);
else
this_loop->exit_labels = LABEL_NEXTREF (r);
break;
}
for (loop = this_loop; loop && loop != target_loop;
loop = loop->outer)
loop->exit_count--;
/* If we didn't find it, then something is
wrong. */
if (! r)
abort ();
}
/* P is now a jump outside the loop, so it must be put
in loop->exit_labels, and marked as such.
The easiest way to do this is to just call
mark_loop_jump again for P. */
mark_loop_jump (PATTERN (p), this_loop);
/* If INSN now jumps to the insn after it,
delete INSN. */
if (JUMP_LABEL (insn) != 0
&& (next_real_insn (JUMP_LABEL (insn))
== next_real_insn (insn)))
delete_related_insns (insn);
}
/* Continue the loop after where the conditional
branch used to jump, since the only branch insn
in the block (if it still remains) is an inter-loop
branch and hence needs no processing. */
insn = NEXT_INSN (cond_label);
if (--LABEL_NUSES (cond_label) == 0)
delete_related_insns (cond_label);
/* This loop will be continued with NEXT_INSN (insn). */
insn = PREV_INSN (insn);
}
}
}
}
}
/* If any label in X jumps to a loop different from LOOP_NUM and any of the
loops it is contained in, mark the target loop invalid.
For speed, we assume that X is part of a pattern of a JUMP_INSN. */
static void
mark_loop_jump (rtx x, struct loop *loop)
{
struct loop