blob: e71fd41f62d0cd92b58b29fed8dc39f3604c7ea5 [file] [log] [blame]
/* Classes for modeling the state of memory.
Copyright (C) 2019-2022 Free Software Foundation, Inc.
Contributed by David Malcolm <dmalcolm@redhat.com>.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h"
#define INCLUDE_MEMORY
#include "system.h"
#include "coretypes.h"
#include "make-unique.h"
#include "tree.h"
#include "function.h"
#include "basic-block.h"
#include "gimple.h"
#include "gimple-iterator.h"
#include "diagnostic-core.h"
#include "graphviz.h"
#include "options.h"
#include "cgraph.h"
#include "tree-dfa.h"
#include "stringpool.h"
#include "convert.h"
#include "target.h"
#include "fold-const.h"
#include "tree-pretty-print.h"
#include "diagnostic-color.h"
#include "diagnostic-metadata.h"
#include "bitmap.h"
#include "selftest.h"
#include "analyzer/analyzer.h"
#include "analyzer/analyzer-logging.h"
#include "ordered-hash-map.h"
#include "options.h"
#include "cgraph.h"
#include "cfg.h"
#include "analyzer/supergraph.h"
#include "sbitmap.h"
#include "analyzer/call-string.h"
#include "analyzer/program-point.h"
#include "analyzer/store.h"
#include "analyzer/region-model.h"
#include "analyzer/constraint-manager.h"
#include "diagnostic-event-id.h"
#include "analyzer/sm.h"
#include "diagnostic-event-id.h"
#include "analyzer/sm.h"
#include "analyzer/pending-diagnostic.h"
#include "analyzer/region-model-reachability.h"
#include "analyzer/analyzer-selftests.h"
#include "analyzer/program-state.h"
#include "analyzer/call-summary.h"
#include "stor-layout.h"
#include "attribs.h"
#include "tree-object-size.h"
#include "gimple-ssa.h"
#include "tree-phinodes.h"
#include "tree-ssa-operands.h"
#include "ssa-iterators.h"
#include "calls.h"
#include "is-a.h"
#include "gcc-rich-location.h"
#if ENABLE_ANALYZER
namespace ana {
/* Dump T to PP in language-independent form, for debugging/logging/dumping
purposes. */
void
dump_tree (pretty_printer *pp, tree t)
{
dump_generic_node (pp, t, 0, TDF_SLIM, 0);
}
/* Dump T to PP in language-independent form in quotes, for
debugging/logging/dumping purposes. */
void
dump_quoted_tree (pretty_printer *pp, tree t)
{
pp_begin_quote (pp, pp_show_color (pp));
dump_tree (pp, t);
pp_end_quote (pp, pp_show_color (pp));
}
/* Equivalent to pp_printf (pp, "%qT", t), to avoid nesting pp_printf
calls within other pp_printf calls.
default_tree_printer handles 'T' and some other codes by calling
dump_generic_node (pp, t, 0, TDF_SLIM, 0);
dump_generic_node calls pp_printf in various places, leading to
garbled output.
Ideally pp_printf could be made to be reentrant, but in the meantime
this function provides a workaround. */
void
print_quoted_type (pretty_printer *pp, tree t)
{
pp_begin_quote (pp, pp_show_color (pp));
dump_generic_node (pp, t, 0, TDF_SLIM, 0);
pp_end_quote (pp, pp_show_color (pp));
}
/* class region_to_value_map. */
/* Assignment operator for region_to_value_map. */
region_to_value_map &
region_to_value_map::operator= (const region_to_value_map &other)
{
m_hash_map.empty ();
for (auto iter : other.m_hash_map)
{
const region *reg = iter.first;
const svalue *sval = iter.second;
m_hash_map.put (reg, sval);
}
return *this;
}
/* Equality operator for region_to_value_map. */
bool
region_to_value_map::operator== (const region_to_value_map &other) const
{
if (m_hash_map.elements () != other.m_hash_map.elements ())
return false;
for (auto iter : *this)
{
const region *reg = iter.first;
const svalue *sval = iter.second;
const svalue * const *other_slot = other.get (reg);
if (other_slot == NULL)
return false;
if (sval != *other_slot)
return false;
}
return true;
}
/* Dump this object to PP. */
void
region_to_value_map::dump_to_pp (pretty_printer *pp, bool simple,
bool multiline) const
{
auto_vec<const region *> regs;
for (iterator iter = begin (); iter != end (); ++iter)
regs.safe_push ((*iter).first);
regs.qsort (region::cmp_ptr_ptr);
if (multiline)
pp_newline (pp);
else
pp_string (pp, " {");
unsigned i;
const region *reg;
FOR_EACH_VEC_ELT (regs, i, reg)
{
if (multiline)
pp_string (pp, " ");
else if (i > 0)
pp_string (pp, ", ");
reg->dump_to_pp (pp, simple);
pp_string (pp, ": ");
const svalue *sval = *get (reg);
sval->dump_to_pp (pp, true);
if (multiline)
pp_newline (pp);
}
if (!multiline)
pp_string (pp, "}");
}
/* Dump this object to stderr. */
DEBUG_FUNCTION void
region_to_value_map::dump (bool simple) const
{
pretty_printer pp;
pp_format_decoder (&pp) = default_tree_printer;
pp_show_color (&pp) = pp_show_color (global_dc->printer);
pp.buffer->stream = stderr;
dump_to_pp (&pp, simple, true);
pp_newline (&pp);
pp_flush (&pp);
}
/* Attempt to merge THIS with OTHER, writing the result
to OUT.
For now, write (region, value) mappings that are in common between THIS
and OTHER to OUT, effectively taking the intersection, rather than
rejecting differences. */
bool
region_to_value_map::can_merge_with_p (const region_to_value_map &other,
region_to_value_map *out) const
{
for (auto iter : *this)
{
const region *iter_reg = iter.first;
const svalue *iter_sval = iter.second;
const svalue * const * other_slot = other.get (iter_reg);
if (other_slot)
if (iter_sval == *other_slot)
out->put (iter_reg, iter_sval);
}
return true;
}
/* Purge any state involving SVAL. */
void
region_to_value_map::purge_state_involving (const svalue *sval)
{
auto_vec<const region *> to_purge;
for (auto iter : *this)
{
const region *iter_reg = iter.first;
const svalue *iter_sval = iter.second;
if (iter_reg->involves_p (sval) || iter_sval->involves_p (sval))
to_purge.safe_push (iter_reg);
}
for (auto iter : to_purge)
m_hash_map.remove (iter);
}
/* class region_model. */
/* Ctor for region_model: construct an "empty" model. */
region_model::region_model (region_model_manager *mgr)
: m_mgr (mgr), m_store (), m_current_frame (NULL),
m_dynamic_extents ()
{
m_constraints = new constraint_manager (mgr);
}
/* region_model's copy ctor. */
region_model::region_model (const region_model &other)
: m_mgr (other.m_mgr), m_store (other.m_store),
m_constraints (new constraint_manager (*other.m_constraints)),
m_current_frame (other.m_current_frame),
m_dynamic_extents (other.m_dynamic_extents)
{
}
/* region_model's dtor. */
region_model::~region_model ()
{
delete m_constraints;
}
/* region_model's assignment operator. */
region_model &
region_model::operator= (const region_model &other)
{
/* m_mgr is const. */
gcc_assert (m_mgr == other.m_mgr);
m_store = other.m_store;
delete m_constraints;
m_constraints = new constraint_manager (*other.m_constraints);
m_current_frame = other.m_current_frame;
m_dynamic_extents = other.m_dynamic_extents;
return *this;
}
/* Equality operator for region_model.
Amongst other things this directly compares the stores and the constraint
managers, so for this to be meaningful both this and OTHER should
have been canonicalized. */
bool
region_model::operator== (const region_model &other) const
{
/* We can only compare instances that use the same manager. */
gcc_assert (m_mgr == other.m_mgr);
if (m_store != other.m_store)
return false;
if (*m_constraints != *other.m_constraints)
return false;
if (m_current_frame != other.m_current_frame)
return false;
if (m_dynamic_extents != other.m_dynamic_extents)
return false;
gcc_checking_assert (hash () == other.hash ());
return true;
}
/* Generate a hash value for this region_model. */
hashval_t
region_model::hash () const
{
hashval_t result = m_store.hash ();
result ^= m_constraints->hash ();
return result;
}
/* Dump a representation of this model to PP, showing the
stack, the store, and any constraints.
Use SIMPLE to control how svalues and regions are printed. */
void
region_model::dump_to_pp (pretty_printer *pp, bool simple,
bool multiline) const
{
/* Dump stack. */
pp_printf (pp, "stack depth: %i", get_stack_depth ());
if (multiline)
pp_newline (pp);
else
pp_string (pp, " {");
for (const frame_region *iter_frame = m_current_frame; iter_frame;
iter_frame = iter_frame->get_calling_frame ())
{
if (multiline)
pp_string (pp, " ");
else if (iter_frame != m_current_frame)
pp_string (pp, ", ");
pp_printf (pp, "frame (index %i): ", iter_frame->get_index ());
iter_frame->dump_to_pp (pp, simple);
if (multiline)
pp_newline (pp);
}
if (!multiline)
pp_string (pp, "}");
/* Dump store. */
if (!multiline)
pp_string (pp, ", {");
m_store.dump_to_pp (pp, simple, multiline,
m_mgr->get_store_manager ());
if (!multiline)
pp_string (pp, "}");
/* Dump constraints. */
pp_string (pp, "constraint_manager:");
if (multiline)
pp_newline (pp);
else
pp_string (pp, " {");
m_constraints->dump_to_pp (pp, multiline);
if (!multiline)
pp_string (pp, "}");
/* Dump sizes of dynamic regions, if any are known. */
if (!m_dynamic_extents.is_empty ())
{
pp_string (pp, "dynamic_extents:");
m_dynamic_extents.dump_to_pp (pp, simple, multiline);
}
}
/* Dump a representation of this model to FILE. */
void
region_model::dump (FILE *fp, bool simple, bool multiline) const
{
pretty_printer pp;
pp_format_decoder (&pp) = default_tree_printer;
pp_show_color (&pp) = pp_show_color (global_dc->printer);
pp.buffer->stream = fp;
dump_to_pp (&pp, simple, multiline);
pp_newline (&pp);
pp_flush (&pp);
}
/* Dump a multiline representation of this model to stderr. */
DEBUG_FUNCTION void
region_model::dump (bool simple) const
{
dump (stderr, simple, true);
}
/* Dump a multiline representation of this model to stderr. */
DEBUG_FUNCTION void
region_model::debug () const
{
dump (true);
}
/* Assert that this object is valid. */
void
region_model::validate () const
{
m_store.validate ();
}
/* Canonicalize the store and constraints, to maximize the chance of
equality between region_model instances. */
void
region_model::canonicalize ()
{
m_store.canonicalize (m_mgr->get_store_manager ());
m_constraints->canonicalize ();
}
/* Return true if this region_model is in canonical form. */
bool
region_model::canonicalized_p () const
{
region_model copy (*this);
copy.canonicalize ();
return *this == copy;
}
/* See the comment for store::loop_replay_fixup. */
void
region_model::loop_replay_fixup (const region_model *dst_state)
{
m_store.loop_replay_fixup (dst_state->get_store (), m_mgr);
}
/* A subclass of pending_diagnostic for complaining about uses of
poisoned values. */
class poisoned_value_diagnostic
: public pending_diagnostic_subclass<poisoned_value_diagnostic>
{
public:
poisoned_value_diagnostic (tree expr, enum poison_kind pkind,
const region *src_region)
: m_expr (expr), m_pkind (pkind),
m_src_region (src_region)
{}
const char *get_kind () const final override { return "poisoned_value_diagnostic"; }
bool use_of_uninit_p () const final override
{
return m_pkind == POISON_KIND_UNINIT;
}
bool operator== (const poisoned_value_diagnostic &other) const
{
return (m_expr == other.m_expr
&& m_pkind == other.m_pkind
&& m_src_region == other.m_src_region);
}
int get_controlling_option () const final override
{
switch (m_pkind)
{
default:
gcc_unreachable ();
case POISON_KIND_UNINIT:
return OPT_Wanalyzer_use_of_uninitialized_value;
case POISON_KIND_FREED:
return OPT_Wanalyzer_use_after_free;
case POISON_KIND_POPPED_STACK:
return OPT_Wanalyzer_use_of_pointer_in_stale_stack_frame;
}
}
bool emit (rich_location *rich_loc) final override
{
switch (m_pkind)
{
default:
gcc_unreachable ();
case POISON_KIND_UNINIT:
{
diagnostic_metadata m;
m.add_cwe (457); /* "CWE-457: Use of Uninitialized Variable". */
return warning_meta (rich_loc, m, get_controlling_option (),
"use of uninitialized value %qE",
m_expr);
}
break;
case POISON_KIND_FREED:
{
diagnostic_metadata m;
m.add_cwe (416); /* "CWE-416: Use After Free". */
return warning_meta (rich_loc, m, get_controlling_option (),
"use after %<free%> of %qE",
m_expr);
}
break;
case POISON_KIND_POPPED_STACK:
{
/* TODO: which CWE? */
return warning_at
(rich_loc, get_controlling_option (),
"dereferencing pointer %qE to within stale stack frame",
m_expr);
}
break;
}
}
label_text describe_final_event (const evdesc::final_event &ev) final override
{
switch (m_pkind)
{
default:
gcc_unreachable ();
case POISON_KIND_UNINIT:
return ev.formatted_print ("use of uninitialized value %qE here",
m_expr);
case POISON_KIND_FREED:
return ev.formatted_print ("use after %<free%> of %qE here",
m_expr);
case POISON_KIND_POPPED_STACK:
return ev.formatted_print
("dereferencing pointer %qE to within stale stack frame",
m_expr);
}
}
void mark_interesting_stuff (interesting_t *interest) final override
{
if (m_src_region)
interest->add_region_creation (m_src_region);
}
private:
tree m_expr;
enum poison_kind m_pkind;
const region *m_src_region;
};
/* A subclass of pending_diagnostic for complaining about shifts
by negative counts. */
class shift_count_negative_diagnostic
: public pending_diagnostic_subclass<shift_count_negative_diagnostic>
{
public:
shift_count_negative_diagnostic (const gassign *assign, tree count_cst)
: m_assign (assign), m_count_cst (count_cst)
{}
const char *get_kind () const final override
{
return "shift_count_negative_diagnostic";
}
bool operator== (const shift_count_negative_diagnostic &other) const
{
return (m_assign == other.m_assign
&& same_tree_p (m_count_cst, other.m_count_cst));
}
int get_controlling_option () const final override
{
return OPT_Wanalyzer_shift_count_negative;
}
bool emit (rich_location *rich_loc) final override
{
return warning_at (rich_loc, get_controlling_option (),
"shift by negative count (%qE)", m_count_cst);
}
label_text describe_final_event (const evdesc::final_event &ev) final override
{
return ev.formatted_print ("shift by negative amount here (%qE)", m_count_cst);
}
private:
const gassign *m_assign;
tree m_count_cst;
};
/* A subclass of pending_diagnostic for complaining about shifts
by counts >= the width of the operand type. */
class shift_count_overflow_diagnostic
: public pending_diagnostic_subclass<shift_count_overflow_diagnostic>
{
public:
shift_count_overflow_diagnostic (const gassign *assign,
int operand_precision,
tree count_cst)
: m_assign (assign), m_operand_precision (operand_precision),
m_count_cst (count_cst)
{}
const char *get_kind () const final override
{
return "shift_count_overflow_diagnostic";
}
bool operator== (const shift_count_overflow_diagnostic &other) const
{
return (m_assign == other.m_assign
&& m_operand_precision == other.m_operand_precision
&& same_tree_p (m_count_cst, other.m_count_cst));
}
int get_controlling_option () const final override
{
return OPT_Wanalyzer_shift_count_overflow;
}
bool emit (rich_location *rich_loc) final override
{
return warning_at (rich_loc, get_controlling_option (),
"shift by count (%qE) >= precision of type (%qi)",
m_count_cst, m_operand_precision);
}
label_text describe_final_event (const evdesc::final_event &ev) final override
{
return ev.formatted_print ("shift by count %qE here", m_count_cst);
}
private:
const gassign *m_assign;
int m_operand_precision;
tree m_count_cst;
};
/* If ASSIGN is a stmt that can be modelled via
set_value (lhs_reg, SVALUE, CTXT)
for some SVALUE, get the SVALUE.
Otherwise return NULL. */
const svalue *
region_model::get_gassign_result (const gassign *assign,
region_model_context *ctxt)
{
tree lhs = gimple_assign_lhs (assign);
tree rhs1 = gimple_assign_rhs1 (assign);
enum tree_code op = gimple_assign_rhs_code (assign);
switch (op)
{
default:
return NULL;
case POINTER_PLUS_EXPR:
{
/* e.g. "_1 = a_10(D) + 12;" */
tree ptr = rhs1;
tree offset = gimple_assign_rhs2 (assign);
const svalue *ptr_sval = get_rvalue (ptr, ctxt);
const svalue *offset_sval = get_rvalue (offset, ctxt);
/* Quoting tree.def, "the second operand [of a POINTER_PLUS_EXPR]
is an integer of type sizetype". */
offset_sval = m_mgr->get_or_create_cast (size_type_node, offset_sval);
const svalue *sval_binop
= m_mgr->get_or_create_binop (TREE_TYPE (lhs), op,
ptr_sval, offset_sval);
return sval_binop;
}
break;
case POINTER_DIFF_EXPR:
{
/* e.g. "_1 = p_2(D) - q_3(D);". */
tree rhs2 = gimple_assign_rhs2 (assign);
const svalue *rhs1_sval = get_rvalue (rhs1, ctxt);
const svalue *rhs2_sval = get_rvalue (rhs2, ctxt);
// TODO: perhaps fold to zero if they're known to be equal?
const svalue *sval_binop
= m_mgr->get_or_create_binop (TREE_TYPE (lhs), op,
rhs1_sval, rhs2_sval);
return sval_binop;
}
break;
/* Assignments of the form
set_value (lvalue (LHS), rvalue (EXPR))
for various EXPR.
We already have the lvalue for the LHS above, as "lhs_reg". */
case ADDR_EXPR: /* LHS = &RHS; */
case BIT_FIELD_REF:
case COMPONENT_REF: /* LHS = op0.op1; */
case MEM_REF:
case REAL_CST:
case COMPLEX_CST:
case VECTOR_CST:
case INTEGER_CST:
case ARRAY_REF:
case SSA_NAME: /* LHS = VAR; */
case VAR_DECL: /* LHS = VAR; */
case PARM_DECL:/* LHS = VAR; */
case REALPART_EXPR:
case IMAGPART_EXPR:
return get_rvalue (rhs1, ctxt);
case ABS_EXPR:
case ABSU_EXPR:
case CONJ_EXPR:
case BIT_NOT_EXPR:
case FIX_TRUNC_EXPR:
case FLOAT_EXPR:
case NEGATE_EXPR:
case NOP_EXPR:
case VIEW_CONVERT_EXPR:
{
/* Unary ops. */
const svalue *rhs_sval = get_rvalue (rhs1, ctxt);
const svalue *sval_unaryop
= m_mgr->get_or_create_unaryop (TREE_TYPE (lhs), op, rhs_sval);
return sval_unaryop;
}
case EQ_EXPR:
case GE_EXPR:
case LE_EXPR:
case NE_EXPR:
case GT_EXPR:
case LT_EXPR:
case UNORDERED_EXPR:
case ORDERED_EXPR:
{
tree rhs2 = gimple_assign_rhs2 (assign);
const svalue *rhs1_sval = get_rvalue (rhs1, ctxt);
const svalue *rhs2_sval = get_rvalue (rhs2, ctxt);
if (TREE_TYPE (lhs) == boolean_type_node)
{
/* Consider constraints between svalues. */
tristate t = eval_condition (rhs1_sval, op, rhs2_sval);
if (t.is_known ())
return m_mgr->get_or_create_constant_svalue
(t.is_true () ? boolean_true_node : boolean_false_node);
}
/* Otherwise, generate a symbolic binary op. */
const svalue *sval_binop
= m_mgr->get_or_create_binop (TREE_TYPE (lhs), op,
rhs1_sval, rhs2_sval);
return sval_binop;
}
break;
case PLUS_EXPR:
case MINUS_EXPR:
case MULT_EXPR:
case MULT_HIGHPART_EXPR:
case TRUNC_DIV_EXPR:
case CEIL_DIV_EXPR:
case FLOOR_DIV_EXPR:
case ROUND_DIV_EXPR:
case TRUNC_MOD_EXPR:
case CEIL_MOD_EXPR:
case FLOOR_MOD_EXPR:
case ROUND_MOD_EXPR:
case RDIV_EXPR:
case EXACT_DIV_EXPR:
case LSHIFT_EXPR:
case RSHIFT_EXPR:
case LROTATE_EXPR:
case RROTATE_EXPR:
case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
case BIT_AND_EXPR:
case MIN_EXPR:
case MAX_EXPR:
case COMPLEX_EXPR:
{
/* Binary ops. */
tree rhs2 = gimple_assign_rhs2 (assign);
const svalue *rhs1_sval = get_rvalue (rhs1, ctxt);
const svalue *rhs2_sval = get_rvalue (rhs2, ctxt);
if (ctxt && (op == LSHIFT_EXPR || op == RSHIFT_EXPR))
{
/* "INT34-C. Do not shift an expression by a negative number of bits
or by greater than or equal to the number of bits that exist in
the operand." */
if (const tree rhs2_cst = rhs2_sval->maybe_get_constant ())
if (TREE_CODE (rhs2_cst) == INTEGER_CST)
{
if (tree_int_cst_sgn (rhs2_cst) < 0)
ctxt->warn
(make_unique<shift_count_negative_diagnostic>
(assign, rhs2_cst));
else if (compare_tree_int (rhs2_cst,
TYPE_PRECISION (TREE_TYPE (rhs1)))
>= 0)
ctxt->warn
(make_unique<shift_count_overflow_diagnostic>
(assign,
int (TYPE_PRECISION (TREE_TYPE (rhs1))),
rhs2_cst));
}
}
const svalue *sval_binop
= m_mgr->get_or_create_binop (TREE_TYPE (lhs), op,
rhs1_sval, rhs2_sval);
return sval_binop;
}
/* Vector expressions. In theory we could implement these elementwise,
but for now, simply return unknown values. */
case VEC_DUPLICATE_EXPR:
case VEC_SERIES_EXPR:
case VEC_COND_EXPR:
case VEC_PERM_EXPR:
case VEC_WIDEN_MULT_HI_EXPR:
case VEC_WIDEN_MULT_LO_EXPR:
case VEC_WIDEN_MULT_EVEN_EXPR:
case VEC_WIDEN_MULT_ODD_EXPR:
case VEC_UNPACK_HI_EXPR:
case VEC_UNPACK_LO_EXPR:
case VEC_UNPACK_FLOAT_HI_EXPR:
case VEC_UNPACK_FLOAT_LO_EXPR:
case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
case VEC_PACK_TRUNC_EXPR:
case VEC_PACK_SAT_EXPR:
case VEC_PACK_FIX_TRUNC_EXPR:
case VEC_PACK_FLOAT_EXPR:
case VEC_WIDEN_LSHIFT_HI_EXPR:
case VEC_WIDEN_LSHIFT_LO_EXPR:
return m_mgr->get_or_create_unknown_svalue (TREE_TYPE (lhs));
}
}
/* Workaround for discarding certain false positives from
-Wanalyzer-use-of-uninitialized-value
of the form:
((A OR-IF B) OR-IF C)
and:
((A AND-IF B) AND-IF C)
where evaluating B is redundant, but could involve simple accesses of
uninitialized locals.
When optimization is turned on the FE can immediately fold compound
conditionals. Specifically, c_parser_condition parses this condition:
((A OR-IF B) OR-IF C)
and calls c_fully_fold on the condition.
Within c_fully_fold, fold_truth_andor is called, which bails when
optimization is off, but if any optimization is turned on can convert the
((A OR-IF B) OR-IF C)
into:
((A OR B) OR_IF C)
for sufficiently simple B
i.e. the inner OR-IF becomes an OR.
At gimplification time the inner OR becomes BIT_IOR_EXPR (in gimplify_expr),
giving this for the inner condition:
tmp = A | B;
if (tmp)
thus effectively synthesizing a redundant access of B when optimization
is turned on, when compared to:
if (A) goto L1; else goto L4;
L1: if (B) goto L2; else goto L4;
L2: if (C) goto L3; else goto L4;
for the unoptimized case.
Return true if CTXT appears to be handling such a short-circuitable stmt,
such as the def-stmt for B for the:
tmp = A | B;
case above, for the case where A is true and thus B would have been
short-circuited without optimization, using MODEL for the value of A. */
static bool
within_short_circuited_stmt_p (const region_model *model,
const gassign *assign_stmt)
{
/* We must have an assignment to a temporary of _Bool type. */
tree lhs = gimple_assign_lhs (assign_stmt);
if (TREE_TYPE (lhs) != boolean_type_node)
return false;
if (TREE_CODE (lhs) != SSA_NAME)
return false;
if (SSA_NAME_VAR (lhs) != NULL_TREE)
return false;
/* The temporary bool must be used exactly once: as the second arg of
a BIT_IOR_EXPR or BIT_AND_EXPR. */
use_operand_p use_op;
gimple *use_stmt;
if (!single_imm_use (lhs, &use_op, &use_stmt))
return false;
const gassign *use_assign = dyn_cast <const gassign *> (use_stmt);
if (!use_assign)
return false;
enum tree_code op = gimple_assign_rhs_code (use_assign);
if (!(op == BIT_IOR_EXPR ||op == BIT_AND_EXPR))
return false;
if (!(gimple_assign_rhs1 (use_assign) != lhs
&& gimple_assign_rhs2 (use_assign) == lhs))
return false;
/* The first arg of the bitwise stmt must have a known value in MODEL
that implies that the value of the second arg doesn't matter, i.e.
1 for bitwise or, 0 for bitwise and. */
tree other_arg = gimple_assign_rhs1 (use_assign);
/* Use a NULL ctxt here to avoid generating warnings. */
const svalue *other_arg_sval = model->get_rvalue (other_arg, NULL);
tree other_arg_cst = other_arg_sval->maybe_get_constant ();
if (!other_arg_cst)
return false;
switch (op)
{
default:
gcc_unreachable ();
case BIT_IOR_EXPR:
if (zerop (other_arg_cst))
return false;
break;
case BIT_AND_EXPR:
if (!zerop (other_arg_cst))
return false;
break;
}
/* All tests passed. We appear to be in a stmt that generates a boolean
temporary with a value that won't matter. */
return true;
}
/* Workaround for discarding certain false positives from
-Wanalyzer-use-of-uninitialized-value
seen with -ftrivial-auto-var-init=.
-ftrivial-auto-var-init= will generate calls to IFN_DEFERRED_INIT.
If the address of the var is taken, gimplification will give us
something like:
_1 = .DEFERRED_INIT (4, 2, &"len"[0]);
len = _1;
The result of DEFERRED_INIT will be an uninit value; we don't
want to emit a false positive for "len = _1;"
Return true if ASSIGN_STMT is such a stmt. */
static bool
due_to_ifn_deferred_init_p (const gassign *assign_stmt)
{
/* We must have an assignment to a decl from an SSA name that's the
result of a IFN_DEFERRED_INIT call. */
if (gimple_assign_rhs_code (assign_stmt) != SSA_NAME)
return false;
tree lhs = gimple_assign_lhs (assign_stmt);
if (TREE_CODE (lhs) != VAR_DECL)
return false;
tree rhs = gimple_assign_rhs1 (assign_stmt);
if (TREE_CODE (rhs) != SSA_NAME)
return false;
const gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
const gcall *call = dyn_cast <const gcall *> (def_stmt);
if (!call)
return false;
if (gimple_call_internal_p (call)
&& gimple_call_internal_fn (call) == IFN_DEFERRED_INIT)
return true;
return false;
}
/* Check for SVAL being poisoned, adding a warning to CTXT.
Return SVAL, or, if a warning is added, another value, to avoid
repeatedly complaining about the same poisoned value in followup code. */
const svalue *
region_model::check_for_poison (const svalue *sval,
tree expr,
region_model_context *ctxt) const
{
if (!ctxt)
return sval;
if (const poisoned_svalue *poisoned_sval = sval->dyn_cast_poisoned_svalue ())
{
enum poison_kind pkind = poisoned_sval->get_poison_kind ();
/* Ignore uninitialized uses of empty types; there's nothing
to initialize. */
if (pkind == POISON_KIND_UNINIT
&& sval->get_type ()
&& is_empty_type (sval->get_type ()))
return sval;
if (pkind == POISON_KIND_UNINIT)
if (const gimple *curr_stmt = ctxt->get_stmt ())
if (const gassign *assign_stmt
= dyn_cast <const gassign *> (curr_stmt))
{
/* Special case to avoid certain false positives. */
if (within_short_circuited_stmt_p (this, assign_stmt))
return sval;
/* Special case to avoid false positive on
-ftrivial-auto-var-init=. */
if (due_to_ifn_deferred_init_p (assign_stmt))
return sval;
}
/* If we have an SSA name for a temporary, we don't want to print
'<unknown>'.
Poisoned values are shared by type, and so we can't reconstruct
the tree other than via the def stmts, using
fixup_tree_for_diagnostic. */
tree diag_arg = fixup_tree_for_diagnostic (expr);
const region *src_region = NULL;
if (pkind == POISON_KIND_UNINIT)
src_region = get_region_for_poisoned_expr (expr);
if (ctxt->warn (make_unique<poisoned_value_diagnostic> (diag_arg,
pkind,
src_region)))
{
/* We only want to report use of a poisoned value at the first
place it gets used; return an unknown value to avoid generating
a chain of followup warnings. */
sval = m_mgr->get_or_create_unknown_svalue (sval->get_type ());
}
return sval;
}
return sval;
}
/* Attempt to get a region for describing EXPR, the source of region of
a poisoned_svalue for use in a poisoned_value_diagnostic.
Return NULL if there is no good region to use. */
const region *
region_model::get_region_for_poisoned_expr (tree expr) const
{
if (TREE_CODE (expr) == SSA_NAME)
{
tree decl = SSA_NAME_VAR (expr);
if (decl && DECL_P (decl))
expr = decl;
else
return NULL;
}
return get_lvalue (expr, NULL);
}
/* Update this model for the ASSIGN stmt, using CTXT to report any
diagnostics. */
void
region_model::on_assignment (const gassign *assign, region_model_context *ctxt)
{
tree lhs = gimple_assign_lhs (assign);
tree rhs1 = gimple_assign_rhs1 (assign);
const region *lhs_reg = get_lvalue (lhs, ctxt);
/* Most assignments are handled by:
set_value (lhs_reg, SVALUE, CTXT)
for some SVALUE. */
if (const svalue *sval = get_gassign_result (assign, ctxt))
{
tree expr = get_diagnostic_tree_for_gassign (assign);
check_for_poison (sval, expr, ctxt);
set_value (lhs_reg, sval, ctxt);
return;
}
enum tree_code op = gimple_assign_rhs_code (assign);
switch (op)
{
default:
{
if (0)
sorry_at (assign->location, "unhandled assignment op: %qs",
get_tree_code_name (op));
const svalue *unknown_sval
= m_mgr->get_or_create_unknown_svalue (TREE_TYPE (lhs));
set_value (lhs_reg, unknown_sval, ctxt);
}
break;
case CONSTRUCTOR:
{
if (TREE_CLOBBER_P (rhs1))
{
/* e.g. "x ={v} {CLOBBER};" */
clobber_region (lhs_reg);
}
else
{
/* Any CONSTRUCTOR that survives to this point is either
just a zero-init of everything, or a vector. */
if (!CONSTRUCTOR_NO_CLEARING (rhs1))
zero_fill_region (lhs_reg);
unsigned ix;
tree index;
tree val;
FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (rhs1), ix, index, val)
{
gcc_assert (TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE);
if (!index)
index = build_int_cst (integer_type_node, ix);
gcc_assert (TREE_CODE (index) == INTEGER_CST);
const svalue *index_sval
= m_mgr->get_or_create_constant_svalue (index);
gcc_assert (index_sval);
const region *sub_reg
= m_mgr->get_element_region (lhs_reg,
TREE_TYPE (val),
index_sval);
const svalue *val_sval = get_rvalue (val, ctxt);
set_value (sub_reg, val_sval, ctxt);
}
}
}
break;
case STRING_CST:
{
/* e.g. "struct s2 x = {{'A', 'B', 'C', 'D'}};". */
const svalue *rhs_sval = get_rvalue (rhs1, ctxt);
m_store.set_value (m_mgr->get_store_manager(), lhs_reg, rhs_sval,
ctxt ? ctxt->get_uncertainty () : NULL);
}
break;
}
}
/* Handle the pre-sm-state part of STMT, modifying this object in-place.
Write true to *OUT_TERMINATE_PATH if the path should be terminated.
Write true to *OUT_UNKNOWN_SIDE_EFFECTS if the stmt has unknown
side effects. */
void
region_model::on_stmt_pre (const gimple *stmt,
bool *out_terminate_path,
bool *out_unknown_side_effects,
region_model_context *ctxt)
{
switch (gimple_code (stmt))
{
default:
/* No-op for now. */
break;
case GIMPLE_ASSIGN:
{
const gassign *assign = as_a <const gassign *> (stmt);
on_assignment (assign, ctxt);
}
break;
case GIMPLE_ASM:
{
const gasm *asm_stmt = as_a <const gasm *> (stmt);
on_asm_stmt (asm_stmt, ctxt);
}
break;
case GIMPLE_CALL:
{
/* Track whether we have a gcall to a function that's not recognized by
anything, for which we don't have a function body, or for which we
don't know the fndecl. */
const gcall *call = as_a <const gcall *> (stmt);
*out_unknown_side_effects
= on_call_pre (call, ctxt, out_terminate_path);
}
break;
case GIMPLE_RETURN:
{
const greturn *return_ = as_a <const greturn *> (stmt);
on_return (return_, ctxt);
}
break;
}
}
/* Abstract base class for all out-of-bounds warnings with concrete values. */
class out_of_bounds : public pending_diagnostic_subclass<out_of_bounds>
{
public:
out_of_bounds (const region *reg, tree diag_arg,
byte_range out_of_bounds_range)
: m_reg (reg), m_diag_arg (diag_arg),
m_out_of_bounds_range (out_of_bounds_range)
{}
const char *get_kind () const final override
{
return "out_of_bounds_diagnostic";
}
bool operator== (const out_of_bounds &other) const
{
return m_reg == other.m_reg
&& m_out_of_bounds_range == other.m_out_of_bounds_range
&& pending_diagnostic::same_tree_p (m_diag_arg, other.m_diag_arg);
}
int get_controlling_option () const final override
{
return OPT_Wanalyzer_out_of_bounds;
}
void mark_interesting_stuff (interesting_t *interest) final override
{
interest->add_region_creation (m_reg);
}
protected:
const region *m_reg;
tree m_diag_arg;
byte_range m_out_of_bounds_range;
};
/* Abstract subclass to complaing about out-of-bounds
past the end of the buffer. */
class past_the_end : public out_of_bounds
{
public:
past_the_end (const region *reg, tree diag_arg, byte_range range,
tree byte_bound)
: out_of_bounds (reg, diag_arg, range), m_byte_bound (byte_bound)
{}
bool operator== (const past_the_end &other) const
{
return out_of_bounds::operator== (other)
&& pending_diagnostic::same_tree_p (m_byte_bound,
other.m_byte_bound);
}
label_text
describe_region_creation_event (const evdesc::region_creation &ev) final
override
{
if (m_byte_bound && TREE_CODE (m_byte_bound) == INTEGER_CST)
return ev.formatted_print ("capacity is %E bytes", m_byte_bound);
return label_text ();
}
protected:
tree m_byte_bound;
};
/* Concrete subclass to complain about buffer overflows. */
class buffer_overflow : public past_the_end
{
public:
buffer_overflow (const region *reg, tree diag_arg,
byte_range range, tree byte_bound)
: past_the_end (reg, diag_arg, range, byte_bound)
{}
bool emit (rich_location *rich_loc) final override
{
diagnostic_metadata m;
bool warned;
switch (m_reg->get_memory_space ())
{
default:
m.add_cwe (787);
warned = warning_meta (rich_loc, m, get_controlling_option (),
"buffer overflow");
break;
case MEMSPACE_STACK:
m.add_cwe (121);
warned = warning_meta (rich_loc, m, get_controlling_option (),
"stack-based buffer overflow");
break;
case MEMSPACE_HEAP:
m.add_cwe (122);
warned = warning_meta (rich_loc, m, get_controlling_option (),
"heap-based buffer overflow");
break;
}
if (warned)
{
char num_bytes_past_buf[WIDE_INT_PRINT_BUFFER_SIZE];
print_dec (m_out_of_bounds_range.m_size_in_bytes,
num_bytes_past_buf, UNSIGNED);
if (m_diag_arg)
inform (rich_loc->get_loc (), "write is %s bytes past the end"
" of %qE", num_bytes_past_buf,
m_diag_arg);
else
inform (rich_loc->get_loc (), "write is %s bytes past the end"
"of the region",
num_bytes_past_buf);
}
return warned;
}
label_text describe_final_event (const evdesc::final_event &ev)
final override
{
byte_size_t start = m_out_of_bounds_range.get_start_byte_offset ();
byte_size_t end = m_out_of_bounds_range.get_last_byte_offset ();
char start_buf[WIDE_INT_PRINT_BUFFER_SIZE];
print_dec (start, start_buf, SIGNED);
char end_buf[WIDE_INT_PRINT_BUFFER_SIZE];
print_dec (end, end_buf, SIGNED);
if (start == end)
{
if (m_diag_arg)
return ev.formatted_print ("out-of-bounds write at byte %s but %qE"
" ends at byte %E", start_buf, m_diag_arg,
m_byte_bound);
return ev.formatted_print ("out-of-bounds write at byte %s but region"
" ends at byte %E", start_buf,
m_byte_bound);
}
else
{
if (m_diag_arg)
return ev.formatted_print ("out-of-bounds write from byte %s till"
" byte %s but %qE ends at byte %E",
start_buf, end_buf, m_diag_arg,
m_byte_bound);
return ev.formatted_print ("out-of-bounds write from byte %s till"
" byte %s but region ends at byte %E",
start_buf, end_buf, m_byte_bound);
}
}
};
/* Concrete subclass to complain about buffer overreads. */
class buffer_overread : public past_the_end
{
public:
buffer_overread (const region *reg, tree diag_arg,
byte_range range, tree byte_bound)
: past_the_end (reg, diag_arg, range, byte_bound)
{}
bool emit (rich_location *rich_loc) final override
{
diagnostic_metadata m;
m.add_cwe (126);
bool warned = warning_meta (rich_loc, m, get_controlling_option (),
"buffer overread");
if (warned)
{
char num_bytes_past_buf[WIDE_INT_PRINT_BUFFER_SIZE];
print_dec (m_out_of_bounds_range.m_size_in_bytes,
num_bytes_past_buf, UNSIGNED);
if (m_diag_arg)
inform (rich_loc->get_loc (), "read is %s bytes past the end"
" of %qE", num_bytes_past_buf,
m_diag_arg);
else
inform (rich_loc->get_loc (), "read is %s bytes past the end"
"of the region",
num_bytes_past_buf);
}
return warned;
}
label_text describe_final_event (const evdesc::final_event &ev)
final override
{
byte_size_t start = m_out_of_bounds_range.get_start_byte_offset ();
byte_size_t end = m_out_of_bounds_range.get_last_byte_offset ();
char start_buf[WIDE_INT_PRINT_BUFFER_SIZE];
print_dec (start, start_buf, SIGNED);
char end_buf[WIDE_INT_PRINT_BUFFER_SIZE];
print_dec (end, end_buf, SIGNED);
if (start == end)
{
if (m_diag_arg)
return ev.formatted_print ("out-of-bounds read at byte %s but %qE"
" ends at byte %E", start_buf, m_diag_arg,
m_byte_bound);
return ev.formatted_print ("out-of-bounds read at byte %s but region"
" ends at byte %E", start_buf,
m_byte_bound);
}
else
{
if (m_diag_arg)
return ev.formatted_print ("out-of-bounds read from byte %s till"
" byte %s but %qE ends at byte %E",
start_buf, end_buf, m_diag_arg,
m_byte_bound);
return ev.formatted_print ("out-of-bounds read from byte %s till"
" byte %s but region ends at byte %E",
start_buf, end_buf, m_byte_bound);
}
}
};
/* Concrete subclass to complain about buffer underflows. */
class buffer_underflow : public out_of_bounds
{
public:
buffer_underflow (const region *reg, tree diag_arg, byte_range range)
: out_of_bounds (reg, diag_arg, range)
{}
bool emit (rich_location *rich_loc) final override
{
diagnostic_metadata m;
m.add_cwe (124);
return warning_meta (rich_loc, m, get_controlling_option (),
"buffer underflow");
}
label_text describe_final_event (const evdesc::final_event &ev)
final override
{
byte_size_t start = m_out_of_bounds_range.get_start_byte_offset ();
byte_size_t end = m_out_of_bounds_range.get_last_byte_offset ();
char start_buf[WIDE_INT_PRINT_BUFFER_SIZE];
print_dec (start, start_buf, SIGNED);
char end_buf[WIDE_INT_PRINT_BUFFER_SIZE];
print_dec (end, end_buf, SIGNED);
if (start == end)
{
if (m_diag_arg)
return ev.formatted_print ("out-of-bounds write at byte %s but %qE"
" starts at byte 0", start_buf,
m_diag_arg);
return ev.formatted_print ("out-of-bounds write at byte %s but region"
" starts at byte 0", start_buf);
}
else
{
if (m_diag_arg)
return ev.formatted_print ("out-of-bounds write from byte %s till"
" byte %s but %qE starts at byte 0",
start_buf, end_buf, m_diag_arg);
return ev.formatted_print ("out-of-bounds write from byte %s till"
" byte %s but region starts at byte 0",
start_buf, end_buf);;
}
}
};
/* Concrete subclass to complain about buffer underreads. */
class buffer_underread : public out_of_bounds
{
public:
buffer_underread (const region *reg, tree diag_arg, byte_range range)
: out_of_bounds (reg, diag_arg, range)
{}
bool emit (rich_location *rich_loc) final override
{
diagnostic_metadata m;
m.add_cwe (127);
return warning_meta (rich_loc, m, get_controlling_option (),
"buffer underread");
}
label_text describe_final_event (const evdesc::final_event &ev)
final override
{
byte_size_t start = m_out_of_bounds_range.get_start_byte_offset ();
byte_size_t end = m_out_of_bounds_range.get_last_byte_offset ();
char start_buf[WIDE_INT_PRINT_BUFFER_SIZE];
print_dec (start, start_buf, SIGNED);
char end_buf[WIDE_INT_PRINT_BUFFER_SIZE];
print_dec (end, end_buf, SIGNED);
if (start == end)
{
if (m_diag_arg)
return ev.formatted_print ("out-of-bounds read at byte %s but %qE"
" starts at byte 0", start_buf,
m_diag_arg);
return ev.formatted_print ("out-of-bounds read at byte %s but region"
" starts at byte 0", start_buf);
}
else
{
if (m_diag_arg)
return ev.formatted_print ("out-of-bounds read from byte %s till"
" byte %s but %qE starts at byte 0",
start_buf, end_buf, m_diag_arg);
return ev.formatted_print ("out-of-bounds read from byte %s till"
" byte %s but region starts at byte 0",
start_buf, end_buf);;
}
}
};
/* Abstract class to complain about out-of-bounds read/writes where
the values are symbolic. */
class symbolic_past_the_end
: public pending_diagnostic_subclass<symbolic_past_the_end>
{
public:
symbolic_past_the_end (const region *reg, tree diag_arg, tree offset,
tree num_bytes, tree capacity)
: m_reg (reg), m_diag_arg (diag_arg), m_offset (offset),
m_num_bytes (num_bytes), m_capacity (capacity)
{}
const char *get_kind () const final override
{
return "symbolic_past_the_end";
}
bool operator== (const symbolic_past_the_end &other) const
{
return m_reg == other.m_reg
&& pending_diagnostic::same_tree_p (m_diag_arg, other.m_diag_arg)
&& pending_diagnostic::same_tree_p (m_offset, other.m_offset)
&& pending_diagnostic::same_tree_p (m_num_bytes, other.m_num_bytes)
&& pending_diagnostic::same_tree_p (m_capacity, other.m_capacity);
}
int get_controlling_option () const final override
{
return OPT_Wanalyzer_out_of_bounds;
}
void mark_interesting_stuff (interesting_t *interest) final override
{
interest->add_region_creation (m_reg);
}
label_text
describe_region_creation_event (const evdesc::region_creation &ev) final
override
{
if (m_capacity)
return ev.formatted_print ("capacity is %qE bytes", m_capacity);
return label_text ();
}
label_text
describe_final_event (const evdesc::final_event &ev) final override
{
const char *byte_str;
if (pending_diagnostic::same_tree_p (m_num_bytes, integer_one_node))
byte_str = "byte";
else
byte_str = "bytes";
if (m_offset)
{
if (m_num_bytes && TREE_CODE (m_num_bytes) == INTEGER_CST)
{
if (m_diag_arg)
return ev.formatted_print ("%s of %E %s at offset %qE"
" exceeds %qE", m_dir_str,
m_num_bytes, byte_str,
m_offset, m_diag_arg);
else
return ev.formatted_print ("%s of %E %s at offset %qE"
" exceeds the buffer", m_dir_str,
m_num_bytes, byte_str, m_offset);
}
else if (m_num_bytes)
{
if (m_diag_arg)
return ev.formatted_print ("%s of %qE %s at offset %qE"
" exceeds %qE", m_dir_str,
m_num_bytes, byte_str,
m_offset, m_diag_arg);
else
return ev.formatted_print ("%s of %qE %s at offset %qE"
" exceeds the buffer", m_dir_str,
m_num_bytes, byte_str, m_offset);
}
else
{
if (m_diag_arg)
return ev.formatted_print ("%s at offset %qE exceeds %qE",
m_dir_str, m_offset, m_diag_arg);
else
return ev.formatted_print ("%s at offset %qE exceeds the"
" buffer", m_dir_str, m_offset);
}
}
if (m_diag_arg)
return ev.formatted_print ("out-of-bounds %s on %qE",
m_dir_str, m_diag_arg);
return ev.formatted_print ("out-of-bounds %s", m_dir_str);
}
protected:
const region *m_reg;
tree m_diag_arg;
tree m_offset;
tree m_num_bytes;
tree m_capacity;
const char *m_dir_str;
};
/* Concrete subclass to complain about overflows with symbolic values. */
class symbolic_buffer_overflow : public symbolic_past_the_end
{
public:
symbolic_buffer_overflow (const region *reg, tree diag_arg, tree offset,
tree num_bytes, tree capacity)
: symbolic_past_the_end (reg, diag_arg, offset, num_bytes, capacity)
{
m_dir_str = "write";
}
bool emit (rich_location *rich_loc) final override
{
diagnostic_metadata m;
switch (m_reg->get_memory_space ())
{
default:
m.add_cwe (787);
return warning_meta (rich_loc, m, get_controlling_option (),
"buffer overflow");
case MEMSPACE_STACK:
m.add_cwe (121);
return warning_meta (rich_loc, m, get_controlling_option (),
"stack-based buffer overflow");
case MEMSPACE_HEAP:
m.add_cwe (122);
return warning_meta (rich_loc, m, get_controlling_option (),
"heap-based buffer overflow");
}
}
};
/* Concrete subclass to complain about overreads with symbolic values. */
class symbolic_buffer_overread : public symbolic_past_the_end
{
public:
symbolic_buffer_overread (const region *reg, tree diag_arg, tree offset,
tree num_bytes, tree capacity)
: symbolic_past_the_end (reg, diag_arg, offset, num_bytes, capacity)
{
m_dir_str = "read";
}
bool emit (rich_location *rich_loc) final override
{
diagnostic_metadata m;
m.add_cwe (126);
return warning_meta (rich_loc, m, get_controlling_option (),
"buffer overread");
}
};
/* Check whether an access is past the end of the BASE_REG. */
void
region_model::check_symbolic_bounds (const region *base_reg,
const svalue *sym_byte_offset,
const svalue *num_bytes_sval,
const svalue *capacity,
enum access_direction dir,
region_model_context *ctxt) const
{
gcc_assert (ctxt);
const svalue *next_byte
= m_mgr->get_or_create_binop (num_bytes_sval->get_type (), PLUS_EXPR,
sym_byte_offset, num_bytes_sval);
if (eval_condition (next_byte, GT_EXPR, capacity).is_true ())
{
tree diag_arg = get_representative_tree (base_reg);
tree offset_tree = get_representative_tree (sym_byte_offset);
tree num_bytes_tree = get_representative_tree (num_bytes_sval);
tree capacity_tree = get_representative_tree (capacity);
switch (dir)
{
default:
gcc_unreachable ();
break;
case DIR_READ:
ctxt->warn (make_unique<symbolic_buffer_overread> (base_reg,
diag_arg,
offset_tree,
num_bytes_tree,
capacity_tree));
break;
case DIR_WRITE:
ctxt->warn (make_unique<symbolic_buffer_overflow> (base_reg,
diag_arg,
offset_tree,
num_bytes_tree,
capacity_tree));
break;
}
}
}
static tree
maybe_get_integer_cst_tree (const svalue *sval)
{
tree cst_tree = sval->maybe_get_constant ();
if (cst_tree && TREE_CODE (cst_tree) == INTEGER_CST)
return cst_tree;
return NULL_TREE;
}
/* May complain when the access on REG is out-of-bounds. */
void
region_model::check_region_bounds (const region *reg,
enum access_direction dir,
region_model_context *ctxt) const
{
gcc_assert (ctxt);
/* Get the offset. */
region_offset reg_offset = reg->get_offset (m_mgr);
const region *base_reg = reg_offset.get_base_region ();
/* Bail out on symbolic regions.
(e.g. because the analyzer did not see previous offsets on the latter,
it might think that a negative access is before the buffer). */
if (base_reg->symbolic_p ())
return;
/* Find out how many bytes were accessed. */
const svalue *num_bytes_sval = reg->get_byte_size_sval (m_mgr);
tree num_bytes_tree = maybe_get_integer_cst_tree (num_bytes_sval);
/* Bail out if 0 bytes are accessed. */
if (num_bytes_tree && zerop (num_bytes_tree))
return;
/* Get the capacity of the buffer. */
const svalue *capacity = get_capacity (base_reg);
tree cst_capacity_tree = maybe_get_integer_cst_tree (capacity);
/* The constant offset from a pointer is represented internally as a sizetype
but should be interpreted as a signed value here. The statement below
converts the offset from bits to bytes and then to a signed integer with
the same precision the sizetype has on the target system.
For example, this is needed for out-of-bounds-3.c test1 to pass when
compiled with a 64-bit gcc build targeting 32-bit systems. */
byte_offset_t offset;
if (!reg_offset.symbolic_p ())
offset = wi::sext (reg_offset.get_bit_offset () >> LOG2_BITS_PER_UNIT,
TYPE_PRECISION (size_type_node));
/* If either the offset or the number of bytes accessed are symbolic,
we have to reason about symbolic values. */
if (reg_offset.symbolic_p () || !num_bytes_tree)
{
const svalue* byte_offset_sval;
if (!reg_offset.symbolic_p ())
{
tree offset_tree = wide_int_to_tree (integer_type_node, offset);
byte_offset_sval
= m_mgr->get_or_create_constant_svalue (offset_tree);
}
else
byte_offset_sval = reg_offset.get_symbolic_byte_offset ();
check_symbolic_bounds (base_reg, byte_offset_sval, num_bytes_sval,
capacity, dir, ctxt);
return;
}
/* Otherwise continue to check with concrete values. */
byte_range out (0, 0);
/* NUM_BYTES_TREE should always be interpreted as unsigned. */
byte_offset_t num_bytes_unsigned = wi::to_offset (num_bytes_tree);
byte_range read_bytes (offset, num_bytes_unsigned);
/* If read_bytes has a subset < 0, we do have an underflow. */
if (read_bytes.falls_short_of_p (0, &out))
{
tree diag_arg = get_representative_tree (base_reg);
switch (dir)
{
default:
gcc_unreachable ();
break;
case DIR_READ:
ctxt->warn (make_unique<buffer_underread> (reg, diag_arg, out));
break;
case DIR_WRITE:
ctxt->warn (make_unique<buffer_underflow> (reg, diag_arg, out));
break;
}
}
/* For accesses past the end, we do need a concrete capacity. No need to
do a symbolic check here because the inequality check does not reason
whether constants are greater than symbolic values. */
if (!cst_capacity_tree)
return;
byte_range buffer (0, wi::to_offset (cst_capacity_tree));
/* If READ_BYTES exceeds BUFFER, we do have an overflow. */
if (read_bytes.exceeds_p (buffer, &out))
{
tree byte_bound = wide_int_to_tree (size_type_node,
buffer.get_next_byte_offset ());
tree diag_arg = get_representative_tree (base_reg);
switch (dir)
{
default:
gcc_unreachable ();
break;
case DIR_READ:
ctxt->warn (make_unique<buffer_overread> (reg, diag_arg,
out, byte_bound));
break;
case DIR_WRITE:
ctxt->warn (make_unique<buffer_overflow> (reg, diag_arg,
out, byte_bound));
break;
}
}
}
/* Ensure that all arguments at the call described by CD are checked
for poisoned values, by calling get_rvalue on each argument. */
void
region_model::check_call_args (const call_details &cd) const
{
for (unsigned arg_idx = 0; arg_idx < cd.num_args (); arg_idx++)
cd.get_arg_svalue (arg_idx);
}
/* Return true if CD is known to be a call to a function with
__attribute__((const)). */
static bool
const_fn_p (const call_details &cd)
{
tree fndecl = cd.get_fndecl_for_call ();
if (!fndecl)
return false;
gcc_assert (DECL_P (fndecl));
return TREE_READONLY (fndecl);
}
/* If this CD is known to be a call to a function with
__attribute__((const)), attempt to get a const_fn_result_svalue
based on the arguments, or return NULL otherwise. */
static const svalue *
maybe_get_const_fn_result (const call_details &cd)
{
if (!const_fn_p (cd))
return NULL;
unsigned num_args = cd.num_args ();
if (num_args > const_fn_result_svalue::MAX_INPUTS)
/* Too many arguments. */
return NULL;
auto_vec<const svalue *> inputs (num_args);
for (unsigned arg_idx = 0; arg_idx < num_args; arg_idx++)
{
const svalue *arg_sval = cd.get_arg_svalue (arg_idx);
if (!arg_sval->can_have_associated_state_p ())
return NULL;
inputs.quick_push (arg_sval);
}
region_model_manager *mgr = cd.get_manager ();
const svalue *sval
= mgr->get_or_create_const_fn_result_svalue (cd.get_lhs_type (),
cd.get_fndecl_for_call (),
inputs);
return sval;
}
/* Update this model for an outcome of a call that returns a specific
integer constant.
If UNMERGEABLE, then make the result unmergeable, e.g. to prevent
the state-merger code from merging success and failure outcomes. */
void
region_model::update_for_int_cst_return (const call_details &cd,
int retval,
bool unmergeable)
{
if (!cd.get_lhs_type ())
return;
if (TREE_CODE (cd.get_lhs_type ()) != INTEGER_TYPE)
return;
const svalue *result
= m_mgr->get_or_create_int_cst (cd.get_lhs_type (), retval);
if (unmergeable)
result = m_mgr->get_or_create_unmergeable (result);
set_value (cd.get_lhs_region (), result, cd.get_ctxt ());
}
/* Update this model for an outcome of a call that returns zero.
If UNMERGEABLE, then make the result unmergeable, e.g. to prevent
the state-merger code from merging success and failure outcomes. */
void
region_model::update_for_zero_return (const call_details &cd,
bool unmergeable)
{
update_for_int_cst_return (cd, 0, unmergeable);
}
/* Update this model for an outcome of a call that returns non-zero. */
void
region_model::update_for_nonzero_return (const call_details &cd)
{
if (!cd.get_lhs_type ())
return;
if (TREE_CODE (cd.get_lhs_type ()) != INTEGER_TYPE)
return;
const svalue *zero
= m_mgr->get_or_create_int_cst (cd.get_lhs_type (), 0);
const svalue *result
= get_store_value (cd.get_lhs_region (), cd.get_ctxt ());
add_constraint (result, NE_EXPR, zero, cd.get_ctxt ());
}
/* Subroutine of region_model::maybe_get_copy_bounds.
The Linux kernel commonly uses
min_t([unsigned] long, VAR, sizeof(T));
to set an upper bound on the size of a copy_to_user.
Attempt to simplify such sizes by trying to get the upper bound as a
constant.
Return the simplified svalue if possible, or NULL otherwise. */
static const svalue *
maybe_simplify_upper_bound (const svalue *num_bytes_sval,
region_model_manager *mgr)
{
tree type = num_bytes_sval->get_type ();
while (const svalue *raw = num_bytes_sval->maybe_undo_cast ())
num_bytes_sval = raw;
if (const binop_svalue *binop_sval = num_bytes_sval->dyn_cast_binop_svalue ())
if (binop_sval->get_op () == MIN_EXPR)
if (binop_sval->get_arg1 ()->get_kind () == SK_CONSTANT)
{
return mgr->get_or_create_cast (type, binop_sval->get_arg1 ());
/* TODO: we might want to also capture the constraint
when recording the diagnostic, or note that we're using
the upper bound. */
}
return NULL;
}
/* Attempt to get an upper bound for the size of a copy when simulating a
copy function.
NUM_BYTES_SVAL is the symbolic value for the size of the copy.
Use it if it's constant, otherwise try to simplify it. Failing
that, use the size of SRC_REG if constant.
Return a symbolic value for an upper limit on the number of bytes
copied, or NULL if no such value could be determined. */
const svalue *
region_model::maybe_get_copy_bounds (const region *src_reg,
const svalue *num_bytes_sval)
{
if (num_bytes_sval->maybe_get_constant ())
return num_bytes_sval;
if (const svalue *simplified
= maybe_simplify_upper_bound (num_bytes_sval, m_mgr))
num_bytes_sval = simplified;
if (num_bytes_sval->maybe_get_constant ())
return num_bytes_sval;
/* For now, try just guessing the size as the capacity of the
base region of the src.
This is a hack; we might get too large a value. */
const region *src_base_reg = src_reg->get_base_region ();
num_bytes_sval = get_capacity (src_base_reg);
if (num_bytes_sval->maybe_get_constant ())
return num_bytes_sval;
/* Non-constant: give up. */
return NULL;
}
/* Get any known_function for FNDECL, or NULL if there is none. */
const known_function *
region_model::get_known_function (tree fndecl) const
{
known_function_manager *known_fn_mgr = m_mgr->get_known_function_manager ();
return known_fn_mgr->get_by_fndecl (fndecl);
}
/* Update this model for the CALL stmt, using CTXT to report any
diagnostics - the first half.
Updates to the region_model that should be made *before* sm-states
are updated are done here; other updates to the region_model are done
in region_model::on_call_post.
Return true if the function call has unknown side effects (it wasn't
recognized and we don't have a body for it, or are unable to tell which
fndecl it is).
Write true to *OUT_TERMINATE_PATH if this execution path should be
terminated (e.g. the function call terminates the process). */
bool
region_model::on_call_pre (const gcall *call, region_model_context *ctxt,
bool *out_terminate_path)
{
call_details cd (call, this, ctxt);
bool unknown_side_effects = false;
/* Special-case for IFN_DEFERRED_INIT.
We want to report uninitialized variables with -fanalyzer (treating
-ftrivial-auto-var-init= as purely a mitigation feature).
Handle IFN_DEFERRED_INIT by treating it as no-op: don't touch the
lhs of the call, so that it is still uninitialized from the point of
view of the analyzer. */
if (gimple_call_internal_p (call)
&& gimple_call_internal_fn (call) == IFN_DEFERRED_INIT)
return false;
/* Get svalues for all of the arguments at the callsite, to ensure that we
complain about any uninitialized arguments. This might lead to
duplicates if any of the handling below also looks up the svalues,
but the deduplication code should deal with that. */
if (ctxt)
check_call_args (cd);
/* Some of the cases below update the lhs of the call based on the
return value, but not all. Provide a default value, which may
get overwritten below. */
if (tree lhs = gimple_call_lhs (call))
{
const region *lhs_region = get_lvalue (lhs, ctxt);
const svalue *sval = maybe_get_const_fn_result (cd);
if (!sval)
{
/* For the common case of functions without __attribute__((const)),
use a conjured value, and purge any prior state involving that
value (in case this is in a loop). */
sval = m_mgr->get_or_create_conjured_svalue (TREE_TYPE (lhs), call,
lhs_region,
conjured_purge (this,
ctxt));
}
set_value (lhs_region, sval, ctxt);
}
if (gimple_call_internal_p (call))
{
switch (gimple_call_internal_fn (call))
{
default:
break;
case IFN_BUILTIN_EXPECT:
impl_call_builtin_expect (cd);
return false;
case IFN_UBSAN_BOUNDS:
return false;
case IFN_VA_ARG:
impl_call_va_arg (cd);
return false;
}
}
if (tree callee_fndecl = get_fndecl_for_call (call, ctxt))
{
/* The various impl_call_* member functions are implemented
in region-model-impl-calls.cc.
Having them split out into separate functions makes it easier
to put breakpoints on the handling of specific functions. */
int callee_fndecl_flags = flags_from_decl_or_type (callee_fndecl);
if (fndecl_built_in_p (callee_fndecl, BUILT_IN_NORMAL)
&& gimple_builtin_call_types_compatible_p (call, callee_fndecl))
switch (DECL_UNCHECKED_FUNCTION_CODE (callee_fndecl))
{
default:
if (!(callee_fndecl_flags & (ECF_CONST | ECF_PURE)))
unknown_side_effects = true;
break;
case BUILT_IN_ALLOCA:
case BUILT_IN_ALLOCA_WITH_ALIGN:
impl_call_alloca (cd);
return false;
case BUILT_IN_CALLOC:
impl_call_calloc (cd);
return false;
case BUILT_IN_EXPECT:
case BUILT_IN_EXPECT_WITH_PROBABILITY:
impl_call_builtin_expect (cd);
return false;
case BUILT_IN_FREE:
/* Handle in "on_call_post". */
break;
case BUILT_IN_MALLOC:
impl_call_malloc (cd);
return false;
case BUILT_IN_MEMCPY:
case BUILT_IN_MEMCPY_CHK:
impl_call_memcpy (cd);
return false;
case BUILT_IN_MEMSET:
case BUILT_IN_MEMSET_CHK:
impl_call_memset (cd);
return false;
break;
case BUILT_IN_REALLOC:
return false;
case BUILT_IN_STRCHR:
/* Handle in "on_call_post". */
return false;
case BUILT_IN_STRCPY:
case BUILT_IN_STRCPY_CHK:
impl_call_strcpy (cd);
return false;
case BUILT_IN_STRLEN:
impl_call_strlen (cd);
return false;
case BUILT_IN_STACK_SAVE:
case BUILT_IN_STACK_RESTORE:
return false;
/* Stdio builtins. */
case BUILT_IN_FPRINTF:
case BUILT_IN_FPRINTF_UNLOCKED:
case BUILT_IN_PUTC:
case BUILT_IN_PUTC_UNLOCKED:
case BUILT_IN_FPUTC:
case BUILT_IN_FPUTC_UNLOCKED:
case BUILT_IN_FPUTS:
case BUILT_IN_FPUTS_UNLOCKED:
case BUILT_IN_FWRITE:
case BUILT_IN_FWRITE_UNLOCKED:
case BUILT_IN_PRINTF:
case BUILT_IN_PRINTF_UNLOCKED:
case BUILT_IN_PUTCHAR:
case BUILT_IN_PUTCHAR_UNLOCKED:
case BUILT_IN_PUTS:
case BUILT_IN_PUTS_UNLOCKED:
case BUILT_IN_VFPRINTF:
case BUILT_IN_VPRINTF:
/* These stdio builtins have external effects that are out
of scope for the analyzer: we only want to model the effects
on the return value. */
break;
case BUILT_IN_VA_START:
impl_call_va_start (cd);
return false;
case BUILT_IN_VA_COPY:
impl_call_va_copy (cd);
return false;
}
else if (is_named_call_p (callee_fndecl, "malloc", call, 1))
{
impl_call_malloc (cd);
return false;
}
else if (is_named_call_p (callee_fndecl, "calloc", call, 2))
{
impl_call_calloc (cd);
return false;
}
else if (is_named_call_p (callee_fndecl, "alloca", call, 1))
{
impl_call_alloca (cd);
return false;
}
else if (is_named_call_p (callee_fndecl, "realloc", call, 2))
{
impl_call_realloc (cd);
return false;
}
else if (is_named_call_p (callee_fndecl, "error"))
{
if (impl_call_error (cd, 3, out_terminate_path))
return false;
else
unknown_side_effects = true;
}
else if (is_named_call_p (callee_fndecl, "error_at_line"))
{
if (impl_call_error (cd, 5, out_terminate_path))
return false;
else
unknown_side_effects = true;
}
else if (is_named_call_p (callee_fndecl, "fgets", call, 3)
|| is_named_call_p (callee_fndecl, "fgets_unlocked", call, 3))
{
impl_call_fgets (cd);
return false;
}
else if (is_named_call_p (callee_fndecl, "fread", call, 4))
{
impl_call_fread (cd);
return false;
}
else if (is_named_call_p (callee_fndecl, "getchar", call, 0))
{
/* No side-effects (tracking stream state is out-of-scope
for the analyzer). */
}
else if (is_named_call_p (callee_fndecl, "memset", call, 3)
&& POINTER_TYPE_P (cd.get_arg_type (0)))
{
impl_call_memset (cd);
return false;
}
else if (is_named_call_p (callee_fndecl, "strchr", call, 2)
&& POINTER_TYPE_P (cd.get_arg_type (0)))
{
/* Handle in "on_call_post". */
return false;
}
else if (is_named_call_p (callee_fndecl, "strlen", call, 1)
&& POINTER_TYPE_P (cd.get_arg_type (0)))
{
impl_call_strlen (cd);
return false;
}
else if (const known_function *kf = get_known_function (callee_fndecl))
{
if (kf->matches_call_types_p (cd))
{
kf->impl_call_pre (cd);
return false;
}
}
else if (!fndecl_has_gimple_body_p (callee_fndecl)
&& (!(callee_fndecl_flags & (ECF_CONST | ECF_PURE)))
&& !fndecl_built_in_p (callee_fndecl))
unknown_side_effects = true;
}
else
unknown_side_effects = true;
return unknown_side_effects;
}
/* Update this model for the CALL stmt, using CTXT to report any
diagnostics - the second half.
Updates to the region_model that should be made *after* sm-states
are updated are done here; other updates to the region_model are done
in region_model::on_call_pre.
If UNKNOWN_SIDE_EFFECTS is true, also call handle_unrecognized_call
to purge state. */
void
region_model::on_call_post (const gcall *call,
bool unknown_side_effects,
region_model_context *ctxt)
{
if (tree callee_fndecl = get_fndecl_for_call (call, ctxt))
{
call_details cd (call, this, ctxt);
if (is_named_call_p (callee_fndecl, "free", call, 1))
{
impl_call_free (cd);
return;
}
else if (is_named_call_p (callee_fndecl, "strchr", call, 2)
&& POINTER_TYPE_P (cd.get_arg_type (0)))
{
impl_call_strchr (cd);
return;
}
else if (const known_function *kf = get_known_function (callee_fndecl))
{
if (kf->matches_call_types_p (cd))
{
kf->impl_call_post (cd);
return;
}
}
/* Was this fndecl referenced by
__attribute__((malloc(FOO)))? */
if (lookup_attribute ("*dealloc", DECL_ATTRIBUTES (callee_fndecl)))
{
impl_deallocation_call (cd);
return;
}
if (fndecl_built_in_p (callee_fndecl, BUILT_IN_NORMAL)
&& gimple_builtin_call_types_compatible_p (call, callee_fndecl))
switch (DECL_UNCHECKED_FUNCTION_CODE (callee_fndecl))
{
default:
break;
case BUILT_IN_REALLOC:
impl_call_realloc (cd);
return;
case BUILT_IN_STRCHR:
impl_call_strchr (cd);
return;
case BUILT_IN_VA_END:
impl_call_va_end (cd);
return;
}
}
if (unknown_side_effects)
handle_unrecognized_call (call, ctxt);
}
/* Purge state involving SVAL from this region_model, using CTXT
(if non-NULL) to purge other state in a program_state.
For example, if we're at the def-stmt of an SSA name, then we need to
purge any state for svalues that involve that SSA name. This avoids
false positives in loops, since a symbolic value referring to the
SSA name will be referring to the previous value of that SSA name.
For example, in:
while ((e = hashmap_iter_next(&iter))) {
struct oid2strbuf *e_strbuf = (struct oid2strbuf *)e;
free (e_strbuf->value);
}
at the def-stmt of e_8:
e_8 = hashmap_iter_next (&iter);
we should purge the "freed" state of:
INIT_VAL(CAST_REG(‘struct oid2strbuf’, (*INIT_VAL(e_8))).value)
which is the "e_strbuf->value" value from the previous iteration,
or we will erroneously report a double-free - the "e_8" within it
refers to the previous value. */
void
region_model::purge_state_involving (const svalue *sval,
region_model_context *ctxt)
{
if (!sval->can_have_associated_state_p ())
return;
m_store.purge_state_involving (sval, m_mgr);
m_constraints->purge_state_involving (sval);
m_dynamic_extents.purge_state_involving (sval);
if (ctxt)
ctxt->purge_state_involving (sval);
}
/* A pending_note subclass for adding a note about an
__attribute__((access, ...)) to a diagnostic. */
class reason_attr_access : public pending_note_subclass<reason_attr_access>
{
public:
reason_attr_access (tree callee_fndecl, const attr_access &access)
: m_callee_fndecl (callee_fndecl),
m_ptr_argno (access.ptrarg),
m_access_str (TREE_STRING_POINTER (access.to_external_string ()))
{
}
const char *get_kind () const final override { return "reason_attr_access"; }
void emit () const final override
{
inform (DECL_SOURCE_LOCATION (m_callee_fndecl),
"parameter %i of %qD marked with attribute %qs",
m_ptr_argno + 1, m_callee_fndecl, m_access_str);
}
bool operator== (const reason_attr_access &other) const
{
return (m_callee_fndecl == other.m_callee_fndecl
&& m_ptr_argno == other.m_ptr_argno
&& !strcmp (m_access_str, other.m_access_str));
}
private:
tree m_callee_fndecl;
unsigned m_ptr_argno;
const char *m_access_str;
};
/* Check CALL a call to external function CALLEE_FNDECL based on
any __attribute__ ((access, ....) on the latter, complaining to
CTXT about any issues.
Currently we merely call check_region_for_write on any regions
pointed to by arguments marked with a "write_only" or "read_write"
attribute. */
void
region_model::
check_external_function_for_access_attr (const gcall *call,
tree callee_fndecl,
region_model_context *ctxt) const
{
gcc_assert (call);
gcc_assert (callee_fndecl);
gcc_assert (ctxt);
tree fntype = TREE_TYPE (callee_fndecl);
if (!fntype)
return;
if (!TYPE_ATTRIBUTES (fntype))
return;
/* Initialize a map of attribute access specifications for arguments
to the function call. */
rdwr_map rdwr_idx;
init_attr_rdwr_indices (&rdwr_idx, TYPE_ATTRIBUTES (fntype));
unsigned argno = 0;
for (tree iter = TYPE_ARG_TYPES (fntype); iter;
iter = TREE_CHAIN (iter), ++argno)
{
const attr_access* access = rdwr_idx.get (argno);
if (!access)
continue;
/* Ignore any duplicate entry in the map for the size argument. */
if (access->ptrarg != argno)
continue;
if (access->mode == access_write_only
|| access->mode == access_read_write)
{
/* Subclass of decorated_region_model_context that
adds a note about the attr access to any saved diagnostics. */
class annotating_ctxt : public note_adding_context
{
public:
annotating_ctxt (tree callee_fndecl,
const attr_access &access,
region_model_context *ctxt)
: note_adding_context (ctxt),
m_callee_fndecl (callee_fndecl),
m_access (access)
{
}
std::unique_ptr<pending_note> make_note () final override
{
return make_unique<reason_attr_access>
(m_callee_fndecl, m_access);
}
private:
tree m_callee_fndecl;
const attr_access &m_access;
};
/* Use this ctxt below so that any diagnostics get the
note added to them. */
annotating_ctxt my_ctxt (callee_fndecl, *access, ctxt);
tree ptr_tree = gimple_call_arg (call, access->ptrarg);
const svalue *ptr_sval = get_rvalue (ptr_tree, &my_ctxt);
const region *reg = deref_rvalue (ptr_sval, ptr_tree, &my_ctxt);
check_region_for_write (reg, &my_ctxt);
/* We don't use the size arg for now. */
}
}
}
/* Handle a call CALL to a function with unknown behavior.
Traverse the regions in this model, determining what regions are
reachable from pointer arguments to CALL and from global variables,
recursively.
Set all reachable regions to new unknown values and purge sm-state
from their values, and from values that point to them. */
void
region_model::handle_unrecognized_call (const gcall *call,
region_model_context *ctxt)
{
tree fndecl = get_fndecl_for_call (call, ctxt);
if (fndecl && ctxt)
check_external_function_for_access_attr (call, fndecl, ctxt);
reachable_regions reachable_regs (this);
/* Determine the reachable regions and their mutability. */
{
/* Add globals and regions that already escaped in previous
unknown calls. */
m_store.for_each_cluster (reachable_regions::init_cluster_cb,
&reachable_regs);
/* Params that are pointers. */
tree iter_param_types = NULL_TREE;
if (fndecl)
iter_param_types = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
for (unsigned arg_idx = 0; arg_idx < gimple_call_num_args (call); arg_idx++)
{
/* Track expected param type, where available. */
tree param_type = NULL_TREE;
if (iter_param_types)
{
param_type = TREE_VALUE (iter_param_types);
gcc_assert (param_type);
iter_param_types = TREE_CHAIN (iter_param_types);
}
tree parm = gimple_call_arg (call, arg_idx);
const svalue *parm_sval = get_rvalue (parm, ctxt);
reachable_regs.handle_parm (parm_sval, param_type);
}
}
uncertainty_t *uncertainty = ctxt ? ctxt->get_uncertainty () : NULL;
/* Purge sm-state for the svalues that were reachable,
both in non-mutable and mutable form. */
for (svalue_set::iterator iter
= reachable_regs.begin_reachable_svals ();
iter != reachable_regs.end_reachable_svals (); ++iter)
{
const svalue *sval = (*iter);
if (ctxt)
ctxt->on_unknown_change (sval, false);
}
for (svalue_set::iterator iter
= reachable_regs.begin_mutable_svals ();
iter != reachable_regs.end_mutable_svals (); ++iter)
{
const svalue *sval = (*iter);
if (ctxt)
ctxt->on_unknown_change (sval, true);
if (uncertainty)
uncertainty->on_mutable_sval_at_unknown_call (sval);
}
/* Mark any clusters that have escaped. */
reachable_regs.mark_escaped_clusters (ctxt);
/* Update bindings for all clusters that have escaped, whether above,
or previously. */
m_store.on_unknown_fncall (call, m_mgr->get_store_manager (),
conjured_purge (this, ctxt));
/* Purge dynamic extents from any regions that have escaped mutably:
realloc could have been called on them. */
for (hash_set<const region *>::iterator
iter = reachable_regs.begin_mutable_base_regs ();
iter != reachable_regs.end_mutable_base_regs ();
++iter)
{
const region *base_reg = (*iter);
unset_dynamic_extents (base_reg);
}
}
/* Traverse the regions in this model, determining what regions are
reachable from the store and populating *OUT.
If EXTRA_SVAL is non-NULL, treat it as an additional "root"
for reachability (for handling return values from functions when
analyzing return of the only function on the stack).
If UNCERTAINTY is non-NULL, treat any svalues that were recorded
within it as being maybe-bound as additional "roots" for reachability.
Find svalues that haven't leaked. */
void
region_model::get_reachable_svalues (svalue_set *out,
const svalue *extra_sval,
const uncertainty_t *uncertainty)
{
reachable_regions reachable_regs (this);
/* Add globals and regions that already escaped in previous
unknown calls. */
m_store.for_each_cluster (reachable_regions::init_cluster_cb,
&reachable_regs);
if (extra_sval)
reachable_regs.handle_sval (extra_sval);
if (uncertainty)
for (uncertainty_t::iterator iter
= uncertainty->begin_maybe_bound_svals ();
iter != uncertainty->end_maybe_bound_svals (); ++iter)
reachable_regs.handle_sval (*iter);
/* Get regions for locals that have explicitly bound values. */
for (store::cluster_map_t::iterator iter = m_store.begin ();
iter != m_store.end (); ++iter)
{
const region *base_reg = (*iter).first;
if (const region *parent = base_reg->get_parent_region ())
if (parent->get_kind () == RK_FRAME)
reachable_regs.add (base_reg, false);
}
/* Populate *OUT based on the values that were reachable. */
for (svalue_set::iterator iter
= reachable_regs.begin_reachable_svals ();
iter != reachable_regs.end_reachable_svals (); ++iter)
out->add (*iter);
}
/* Update this model for the RETURN_STMT, using CTXT to report any
diagnostics. */
void
region_model::on_return (const greturn *return_stmt, region_model_context *ctxt)
{
tree callee = get_current_function ()->decl;
tree lhs = DECL_RESULT (callee);
tree rhs = gimple_return_retval (return_stmt);
if (lhs && rhs)
{
const svalue *sval = get_rvalue (rhs, ctxt);
const region *ret_reg = get_lvalue (lhs, ctxt);
set_value (ret_reg, sval, ctxt);
}
}
/* Update this model for a call and return of setjmp/sigsetjmp at CALL within
ENODE, using CTXT to report any diagnostics.
This is for the initial direct invocation of setjmp/sigsetjmp (which returns
0), as opposed to any second return due to longjmp/sigsetjmp. */
void
region_model::on_setjmp (const gcall *call, const exploded_node *enode,
region_model_context *ctxt)
{
const svalue *buf_ptr = get_rvalue (gimple_call_arg (call, 0), ctxt);
const region *buf_reg = deref_rvalue (buf_ptr, gimple_call_arg (call, 0),
ctxt);
/* Create a setjmp_svalue for this call and store it in BUF_REG's
region. */
if (buf_reg)
{
setjmp_record r (enode, call);
const svalue *sval
= m_mgr->get_or_create_setjmp_svalue (r, buf_reg->get_type ());
set_value (buf_reg, sval, ctxt);
}
/* Direct calls to setjmp return 0. */
if (tree lhs = gimple_call_lhs (call))
{
const svalue *new_sval
= m_mgr->get_or_create_int_cst (TREE_TYPE (lhs), 0);
const region *lhs_reg = get_lvalue (lhs, ctxt);
set_value (lhs_reg, new_sval, ctxt);
}
}
/* Update this region_model for rewinding from a "longjmp" at LONGJMP_CALL
to a "setjmp" at SETJMP_CALL where the final stack depth should be
SETJMP_STACK_DEPTH. Pop any stack frames. Leak detection is *not*
done, and should be done by the caller. */
void
region_model::on_longjmp (const gcall *longjmp_call, const gcall *setjmp_call,
int setjmp_stack_depth, region_model_context *ctxt)
{
/* Evaluate the val, using the frame of the "longjmp". */
tree fake_retval = gimple_call_arg (longjmp_call, 1);
const svalue *fake_retval_sval = get_rvalue (fake_retval, ctxt);
/* Pop any frames until we reach the stack depth of the function where
setjmp was called. */
gcc_assert (get_stack_depth () >= setjmp_stack_depth);
while (get_stack_depth () > setjmp_stack_depth)
pop_frame (NULL, NULL, ctxt);
gcc_assert (get_stack_depth () == setjmp_stack_depth);
/* Assign to LHS of "setjmp" in new_state. */
if (tree lhs = gimple_call_lhs (setjmp_call))
{
/* Passing 0 as the val to longjmp leads to setjmp returning 1. */
const svalue *zero_sval
= m_mgr->get_or_create_int_cst (TREE_TYPE (fake_retval), 0);
tristate eq_zero = eval_condition (fake_retval_sval, EQ_EXPR, zero_sval);
/* If we have 0, use 1. */
if (eq_zero.is_true ())
{
const svalue *one_sval
= m_mgr->get_or_create_int_cst (TREE_TYPE (fake_retval), 1);
fake_retval_sval = one_sval;
}
else
{
/* Otherwise note that the value is nonzero. */
m_constraints->add_constraint (fake_retval_sval, NE_EXPR, zero_sval);
}
/* Decorate the return value from setjmp as being unmergeable,
so that we don't attempt to merge states with it as zero
with states in which it's nonzero, leading to a clean distinction
in the exploded_graph betweeen the first return and the second
return. */
fake_retval_sval = m_mgr->get_or_create_unmergeable (fake_retval_sval);
const region *lhs_reg = get_lvalue (lhs, ctxt);
set_value (lhs_reg, fake_retval_sval, ctxt);
}
}
/* Update this region_model for a phi stmt of the form
LHS = PHI <...RHS...>.
where RHS is for the appropriate edge.
Get state from OLD_STATE so that all of the phi stmts for a basic block
are effectively handled simultaneously. */
void
region_model::handle_phi (const gphi *phi,
tree lhs, tree rhs,
const region_model &old_state,
region_model_context *ctxt)
{
/* For now, don't bother tracking the .MEM SSA names. */
if (tree var = SSA_NAME_VAR (lhs))
if (TREE_CODE (var) == VAR_DECL)
if (VAR_DECL_IS_VIRTUAL_OPERAND (var))
return;
const svalue *src_sval = old_state.get_rvalue (rhs, ctxt);
const region *dst_reg = old_state.get_lvalue (lhs, ctxt);
set_value (dst_reg, src_sval, ctxt);
if (ctxt)
ctxt->on_phi (phi, rhs);
}
/* Implementation of region_model::get_lvalue; the latter adds type-checking.
Get the id of the region for PV within this region_model,
emitting any diagnostics to CTXT. */
const region *
region_model::get_lvalue_1 (path_var pv, region_model_context *ctxt) const
{
tree expr = pv.m_tree;
gcc_assert (expr);
switch (TREE_CODE (expr))
{
default:
return m_mgr->get_region_for_unexpected_tree_code (ctxt, expr,
dump_location_t ());
case ARRAY_REF:
{
tree array = TREE_OPERAND (expr, 0);
tree index = TREE_OPERAND (expr, 1);
const region *array_reg = get_lvalue (array, ctxt);
const svalue *index_sval = get_rvalue (index, ctxt);
return m_mgr->get_element_region (array_reg,
TREE_TYPE (TREE_TYPE (array)),
index_sval);
}
break;
case BIT_FIELD_REF:
{
tree inner_expr = TREE_OPERAND (expr, 0);
const region *inner_reg = get_lvalue (inner_expr, ctxt);
tree num_bits = TREE_OPERAND (expr, 1);
tree first_bit_offset = TREE_OPERAND (expr, 2);
gcc_assert (TREE_CODE (num_bits) == INTEGER_CST);
gcc_assert (TREE_CODE (first_bit_offset) == INTEGER_CST);
bit_range bits (TREE_INT_CST_LOW (first_bit_offset),
TREE_INT_CST_LOW (num_bits));
return m_mgr->get_bit_range (inner_reg, TREE_TYPE (expr), bits);
}
break;
case MEM_REF:
{
tree ptr = TREE_OPERAND (expr, 0);
tree offset = TREE_OPERAND (expr, 1);
const svalue *ptr_sval = get_rvalue (ptr, ctxt);
const svalue *offset_sval = get_rvalue (offset, ctxt);
const region *star_ptr = deref_rvalue (ptr_sval, ptr, ctxt);
return m_mgr->get_offset_region (star_ptr,
TREE_TYPE (expr),
offset_sval);
}
break;
case FUNCTION_DECL:
return m_mgr->get_region_for_fndecl (expr);
case LABEL_DECL:
return m_mgr->get_region_for_label (expr);
case VAR_DECL:
/* Handle globals. */
if (is_global_var (expr))
return m_mgr->get_region_for_global (expr);
/* Fall through. */
case SSA_NAME:
case PARM_DECL:
case RESULT_DECL:
{
gcc_assert (TREE_CODE (expr) == SSA_NAME
|| TREE_CODE (expr) == PARM_DECL
|| TREE_CODE (expr) == VAR_DECL
|| TREE_CODE (expr) == RESULT_DECL);
int stack_index = pv.m_stack_depth;
const frame_region *frame = get_frame_at_index (stack_index);
gcc_assert (frame);
return frame->get_region_for_local (m_mgr, expr, ctxt);
}
case COMPONENT_REF:
{
/* obj.field */
tree obj = TREE_OPERAND (expr, 0);
tree field = TREE_OPERAND (expr, 1);
const region *obj_reg = get_lvalue (obj, ctxt);
return m_mgr->get_field_region (obj_reg, field);
}
break;
case STRING_CST:
return m_mgr->get_region_for_string (expr);
}
}
/* Assert that SRC_TYPE can be converted to DST_TYPE as a no-op. */
static void
assert_compat_types (tree src_type, tree dst_type)
{
if (src_type && dst_type && !VOID_TYPE_P (dst_type))
{
#if CHECKING_P
if (!(useless_type_conversion_p (src_type, dst_type)))
internal_error ("incompatible types: %qT and %qT", src_type, dst_type);
#endif
}
}
/* Return true if SRC_TYPE can be converted to DST_TYPE as a no-op. */
bool
compat_types_p (tree src_type, tree dst_type)
{
if (src_type && dst_type && !VOID_TYPE_P (dst_type))
if (!(useless_type_conversion_p (src_type, dst_type)))
return false;
return true;
}
/* Get the region for PV within this region_model,
emitting any diagnostics to CTXT. */
const region *
region_model::get_lvalue (path_var pv, region_model_context *ctxt) const
{
if (pv.m_tree == NULL_TREE)
return NULL;
const region *result_reg = get_lvalue_1 (pv, ctxt);
assert_compat_types (result_reg->get_type (), TREE_TYPE (pv.m_tree));
return result_reg;
}
/* Get the region for EXPR within this region_model (assuming the most
recent stack frame if it's a local). */
const region *
region_model::get_lvalue (tree expr, region_model_context *ctxt) const
{
return get_lvalue (path_var (expr, get_stack_depth () - 1), ctxt);
}
/* Implementation of region_model::get_rvalue; the latter adds type-checking.
Get the value of PV within this region_model,
emitting any diagnostics to CTXT. */
const svalue *
region_model::get_rvalue_1 (path_var pv, region_model_context *ctxt) const
{
gcc_assert (pv.m_tree);
switch (TREE_CODE (pv.m_tree))
{
default:
return m_mgr->get_or_create_unknown_svalue (TREE_TYPE (pv.m_tree));
case ADDR_EXPR:
{
/* "&EXPR". */
tree expr = pv.m_tree;
tree op0 = TREE_OPERAND (expr, 0);
const region *expr_reg = get_lvalue (op0, ctxt);
return m_mgr->get_ptr_svalue (TREE_TYPE (expr), expr_reg);
}
break;
case BIT_FIELD_REF:
{
tree expr = pv.m_tree;
tree op0 = TREE_OPERAND (expr, 0);
const region *reg = get_lvalue (op0, ctxt);
tree num_bits = TREE_OPERAND (expr, 1);
tree first_bit_offset = TREE_OPERAND (expr, 2);
gcc_assert (TREE_CODE (num_bits) == INTEGER_CST);
gcc_assert (TREE_CODE (first_bit_offset) == INTEGER_CST);
bit_range bits (TREE_INT_CST_LOW (first_bit_offset),
TREE_INT_CST_LOW (num_bits));
return get_rvalue_for_bits (TREE_TYPE (expr), reg, bits, ctxt);
}
case SSA_NAME:
case VAR_DECL:
case PARM_DECL:
case RESULT_DECL:
case ARRAY_REF:
{
const region *reg = get_lvalue (pv, ctxt);
return get_store_value (reg, ctxt);
}
case REALPART_EXPR:
case IMAGPART_EXPR:
case VIEW_CONVERT_EXPR:
{
tree expr = pv.m_tree;
tree arg = TREE_OPERAND (expr, 0);
const svalue *arg_sval = get_rvalue (arg, ctxt);
const svalue *sval_unaryop
= m_mgr->get_or_create_unaryop (TREE_TYPE (expr), TREE_CODE (expr),
arg_sval);
return sval_unaryop;
};
case INTEGER_CST:
case REAL_CST:
case COMPLEX_CST:
case VECTOR_CST:
case STRING_CST:
return m_mgr->get_or_create_constant_svalue (pv.m_tree);
case POINTER_PLUS_EXPR:
{
tree expr = pv.m_tree;
tree ptr = TREE_OPERAND (expr, 0);
tree offset = TREE_OPERAND (expr, 1);
const svalue *ptr_sval = get_rvalue (ptr, ctxt);
const svalue *offset_sval = get_rvalue (offset, ctxt);
const svalue *sval_binop
= m_mgr->get_or_create_binop (TREE_TYPE (expr), POINTER_PLUS_EXPR,
ptr_sval, offset_sval);
return sval_binop;
}
/* Binary ops. */
case PLUS_EXPR:
case MULT_EXPR:
{
tree expr = pv.m_tree;
tree arg0 = TREE_OPERAND (expr, 0);
tree arg1 = TREE_OPERAND (expr, 1);
const svalue *arg0_sval = get_rvalue (arg0, ctxt);
const svalue *arg1_sval = get_rvalue (arg1, ctxt);
const svalue *sval_binop
= m_mgr->get_or_create_binop (TREE_TYPE (expr), TREE_CODE (expr),
arg0_sval, arg1_sval);
return sval_binop;
}
case COMPONENT_REF:
case MEM_REF:
{
const region *ref_reg = get_lvalue (pv, ctxt);
return get_store_value (ref_reg, ctxt);
}
case OBJ_TYPE_REF:
{
tree expr = OBJ_TYPE_REF_EXPR (pv.m_tree);
return get_rvalue (expr, ctxt);
}
}
}
/* Get the value of PV within this region_model,
emitting any diagnostics to CTXT. */
const svalue *
region_model::get_rvalue (path_var pv, region_model_context *ctxt) const
{
if (pv.m_tree == NULL_TREE)
return NULL;
const svalue *result_sval = get_rvalue_1 (pv, ctxt);
assert_compat_types (result_sval->get_type (), TREE_TYPE (pv.m_tree));
result_sval = check_for_poison (result_sval, pv.m_tree, ctxt);
return result_sval;
}
/* Get the value of EXPR within this region_model (assuming the most
recent stack frame if it's a local). */
const svalue *
region_model::get_rvalue (tree expr, region_model_context *ctxt) const
{
return get_rvalue (path_var (expr, get_stack_depth () - 1), ctxt);
}
/* Return true if this model is on a path with "main" as the entrypoint
(as opposed to one in which we're merely analyzing a subset of the
path through the code). */
bool
region_model::called_from_main_p () const
{
if (!m_current_frame)
return false;
/* Determine if the oldest stack frame in this model is for "main". */
const frame_region *frame0 = get_frame_at_index (0);
gcc_assert (frame0);
return id_equal (DECL_NAME (frame0->get_function ()->decl), "main");
}
/* Subroutine of region_model::get_store_value for when REG is (or is within)
a global variable that hasn't been touched since the start of this path
(or was implicitly touched due to a call to an unknown function). */
const svalue *
region_model::get_initial_value_for_global (const region *reg) const
{
/* Get the decl that REG is for (or is within). */
const decl_region *base_reg
= reg->get_base_region ()->dyn_cast_decl_region ();
gcc_assert (base_reg);
tree decl = base_reg->get_decl ();
/* Special-case: to avoid having to explicitly update all previously
untracked globals when calling an unknown fn, they implicitly have
an unknown value if an unknown call has occurred, unless this is
static to-this-TU and hasn't escaped. Globals that have escaped
are explicitly tracked, so we shouldn't hit this case for them. */
if (m_store.called_unknown_fn_p ()
&& TREE_PUBLIC (decl)
&& !TREE_READONLY (decl))
return m_mgr->get_or_create_unknown_svalue (reg->get_type ());
/* If we are on a path from the entrypoint from "main" and we have a
global decl defined in this TU that hasn't been touched yet, then
the initial value of REG can be taken from the initialization value
of the decl. */
if (called_from_main_p () || TREE_READONLY (decl))
{
/* Attempt to get the initializer value for base_reg. */
if (const svalue *base_reg_init
= base_reg->get_svalue_for_initializer (m_mgr))
{
if (reg == base_reg)
return base_reg_init;
else
{
/* Get the value for REG within base_reg_init. */
binding_cluster c (base_reg);
c.bind (m_mgr->get_store_manager (), base_reg, base_reg_init);
const svalue *sval
= c.get_any_binding (m_mgr->get_store_manager (), reg);
if (sval)
{
if (reg->get_type ())
sval = m_mgr->get_or_create_cast (reg->get_type (),
sval);
return sval;
}
}
}
}
/* Otherwise, return INIT_VAL(REG). */
return m_mgr->get_or_create_initial_value (reg);
}
/* Get a value for REG, looking it up in the store, or otherwise falling
back to "initial" or "unknown" values.
Use CTXT to report any warnings associated with reading from REG. */
const svalue *
region_model::get_store_value (const region *reg,
region_model_context *ctxt) const
{
check_region_for_read (reg, ctxt);
/* Special-case: handle var_decls in the constant pool. */
if (const decl_region *decl_reg = reg->dyn_cast_decl_region ())
if (const svalue *sval = decl_reg->maybe_get_constant_value (m_mgr))
return sval;
const svalue *sval
= m_store.get_any_binding (m_mgr->get_store_manager (), reg);
if (sval)
{
if (reg->get_type ())
sval = m_mgr->get_or_create_cast (reg->get_type (), sval);
return sval;
}
/* Special-case: read at a constant index within a STRING_CST. */
if (const offset_region *offset_reg = reg->dyn_cast_offset_region ())
if (tree byte_offset_cst
= offset_reg->get_byte_offset ()->maybe_get_constant ())
if (const string_region *str_reg
= reg->get_parent_region ()->dyn_cast_string_region ())
{
tree string_cst = str_reg->get_string_cst ();
if (const svalue *char_sval
= m_mgr->maybe_get_char_from_string_cst (string_cst,
byte_offset_cst))
return m_mgr->get_or_create_cast (reg->get_type (), char_sval);
}
/* Special-case: read the initial char of a STRING_CST. */
if (const cast_region *cast_reg = reg->dyn_cast_cast_region ())
if (const string_region *str_reg
= cast_reg->get_original_region ()->dyn_cast_string_region ())
{
tree string_cst = str_reg->get_string_cst ();
tree byte_offset_cst = build_int_cst (integer_type_node, 0);
if (const svalue *char_sval
= m_mgr->maybe_get_char_from_string_cst (string_cst,
byte_offset_cst))
return m_mgr->get_or_create_cast (reg->get_type (), char_sval);
}
/* Otherwise we implicitly have the initial value of the region
(if the cluster had been touched, binding_cluster::get_any_binding,
would have returned UNKNOWN, and we would already have returned
that above). */
/* Handle globals. */
if (reg->get_base_region ()->get_parent_region ()->get_kind ()
== RK_GLOBALS)
return get_initial_value_for_global (reg);
return m_mgr->get_or_create_initial_value (reg);
}
/* Return false if REG does not exist, true if it may do.
This is for detecting regions within the stack that don't exist anymore
after frames are popped. */
bool
region_model::region_exists_p (const region *reg) const
{
/* If within a stack frame, check that the stack frame is live. */
if (const frame_region *enclosing_frame = reg->maybe_get_frame_region ())
{
/* Check that the current frame is the enclosing frame, or is called
by it. */
for (const frame_region *iter_frame = get_current_frame (); iter_frame;
iter_frame = iter_frame->get_calling_frame ())
if (iter_frame == enclosing_frame)
return true;
return false;
}
return true;
}
/* Get a region for referencing PTR_SVAL, creating a region if need be, and
potentially generating warnings via CTXT.
PTR_SVAL must be of pointer type.
PTR_TREE if non-NULL can be used when emitting diagnostics. */
const region *
region_model::deref_rvalue (const svalue *ptr_sval, tree ptr_tree,
region_model_context *ctxt) const
{
gcc_assert (ptr_sval);
gcc_assert (POINTER_TYPE_P (ptr_sval->get_type ()));
/* If we're dereferencing PTR_SVAL, assume that it is non-NULL; add this
as a constraint. This suppresses false positives from
-Wanalyzer-null-dereference for the case where we later have an
if (PTR_SVAL) that would occur if we considered the false branch
and transitioned the malloc state machine from start->null. */
tree null_ptr_cst = build_int_cst (ptr_sval->get_type (), 0);
const svalue *null_ptr = m_mgr->get_or_create_constant_svalue (null_ptr_cst);
m_constraints->add_constraint (ptr_sval, NE_EXPR, null_ptr);
switch (ptr_sval->get_kind ())
{
default:
break;
case SK_REGION:
{
const region_svalue *region_sval
= as_a <const region_svalue *> (ptr_sval);
return region_sval->get_pointee ();
}
case SK_BINOP:
{
const binop_svalue *binop_sval
= as_a <const binop_svalue