blob: 2157ad2578b858127967121ba0e821df49a2ff3c [file] [log] [blame]
/* Classes for modeling the state of memory.
Copyright (C) 2019-2023 Free Software Foundation, Inc.
Contributed by David Malcolm <dmalcolm@redhat.com>.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h"
#define INCLUDE_MEMORY
#define INCLUDE_ALGORITHM
#include "system.h"
#include "coretypes.h"
#include "make-unique.h"
#include "tree.h"
#include "function.h"
#include "basic-block.h"
#include "gimple.h"
#include "gimple-iterator.h"
#include "diagnostic-core.h"
#include "graphviz.h"
#include "options.h"
#include "cgraph.h"
#include "tree-dfa.h"
#include "stringpool.h"
#include "convert.h"
#include "target.h"
#include "fold-const.h"
#include "tree-pretty-print.h"
#include "diagnostic-color.h"
#include "bitmap.h"
#include "selftest.h"
#include "analyzer/analyzer.h"
#include "analyzer/analyzer-logging.h"
#include "ordered-hash-map.h"
#include "options.h"
#include "cgraph.h"
#include "cfg.h"
#include "analyzer/supergraph.h"
#include "sbitmap.h"
#include "analyzer/call-string.h"
#include "analyzer/program-point.h"
#include "analyzer/store.h"
#include "analyzer/region-model.h"
#include "analyzer/constraint-manager.h"
#include "diagnostic-event-id.h"
#include "analyzer/sm.h"
#include "diagnostic-event-id.h"
#include "analyzer/sm.h"
#include "analyzer/pending-diagnostic.h"
#include "analyzer/region-model-reachability.h"
#include "analyzer/analyzer-selftests.h"
#include "analyzer/program-state.h"
#include "analyzer/call-summary.h"
#include "stor-layout.h"
#include "attribs.h"
#include "tree-object-size.h"
#include "gimple-ssa.h"
#include "tree-phinodes.h"
#include "tree-ssa-operands.h"
#include "ssa-iterators.h"
#include "calls.h"
#include "is-a.h"
#include "gcc-rich-location.h"
#include "analyzer/checker-event.h"
#include "analyzer/checker-path.h"
#include "analyzer/feasible-graph.h"
#include "analyzer/record-layout.h"
#include "diagnostic-format-sarif.h"
#if ENABLE_ANALYZER
namespace ana {
auto_vec<pop_frame_callback> region_model::pop_frame_callbacks;
/* Dump T to PP in language-independent form, for debugging/logging/dumping
purposes. */
void
dump_tree (pretty_printer *pp, tree t)
{
dump_generic_node (pp, t, 0, TDF_SLIM, 0);
}
/* Dump T to PP in language-independent form in quotes, for
debugging/logging/dumping purposes. */
void
dump_quoted_tree (pretty_printer *pp, tree t)
{
pp_begin_quote (pp, pp_show_color (pp));
dump_tree (pp, t);
pp_end_quote (pp, pp_show_color (pp));
}
/* Equivalent to pp_printf (pp, "%qT", t), to avoid nesting pp_printf
calls within other pp_printf calls.
default_tree_printer handles 'T' and some other codes by calling
dump_generic_node (pp, t, 0, TDF_SLIM, 0);
dump_generic_node calls pp_printf in various places, leading to
garbled output.
Ideally pp_printf could be made to be reentrant, but in the meantime
this function provides a workaround. */
void
print_quoted_type (pretty_printer *pp, tree t)
{
pp_begin_quote (pp, pp_show_color (pp));
dump_generic_node (pp, t, 0, TDF_SLIM, 0);
pp_end_quote (pp, pp_show_color (pp));
}
/* class region_to_value_map. */
/* Assignment operator for region_to_value_map. */
region_to_value_map &
region_to_value_map::operator= (const region_to_value_map &other)
{
m_hash_map.empty ();
for (auto iter : other.m_hash_map)
{
const region *reg = iter.first;
const svalue *sval = iter.second;
m_hash_map.put (reg, sval);
}
return *this;
}
/* Equality operator for region_to_value_map. */
bool
region_to_value_map::operator== (const region_to_value_map &other) const
{
if (m_hash_map.elements () != other.m_hash_map.elements ())
return false;
for (auto iter : *this)
{
const region *reg = iter.first;
const svalue *sval = iter.second;
const svalue * const *other_slot = other.get (reg);
if (other_slot == NULL)
return false;
if (sval != *other_slot)
return false;
}
return true;
}
/* Dump this object to PP. */
void
region_to_value_map::dump_to_pp (pretty_printer *pp, bool simple,
bool multiline) const
{
auto_vec<const region *> regs;
for (iterator iter = begin (); iter != end (); ++iter)
regs.safe_push ((*iter).first);
regs.qsort (region::cmp_ptr_ptr);
if (multiline)
pp_newline (pp);
else
pp_string (pp, " {");
unsigned i;
const region *reg;
FOR_EACH_VEC_ELT (regs, i, reg)
{
if (multiline)
pp_string (pp, " ");
else if (i > 0)
pp_string (pp, ", ");
reg->dump_to_pp (pp, simple);
pp_string (pp, ": ");
const svalue *sval = *get (reg);
sval->dump_to_pp (pp, true);
if (multiline)
pp_newline (pp);
}
if (!multiline)
pp_string (pp, "}");
}
/* Dump this object to stderr. */
DEBUG_FUNCTION void
region_to_value_map::dump (bool simple) const
{
pretty_printer pp;
pp_format_decoder (&pp) = default_tree_printer;
pp_show_color (&pp) = pp_show_color (global_dc->printer);
pp.buffer->stream = stderr;
dump_to_pp (&pp, simple, true);
pp_newline (&pp);
pp_flush (&pp);
}
/* Attempt to merge THIS with OTHER, writing the result
to OUT.
For now, write (region, value) mappings that are in common between THIS
and OTHER to OUT, effectively taking the intersection.
Reject merger of different values. */
bool
region_to_value_map::can_merge_with_p (const region_to_value_map &other,
region_to_value_map *out) const
{
for (auto iter : *this)
{
const region *iter_reg = iter.first;
const svalue *iter_sval = iter.second;
const svalue * const * other_slot = other.get (iter_reg);
if (other_slot)
{
if (iter_sval == *other_slot)
out->put (iter_reg, iter_sval);
else
return false;
}
}
return true;
}
/* Purge any state involving SVAL. */
void
region_to_value_map::purge_state_involving (const svalue *sval)
{
auto_vec<const region *> to_purge;
for (auto iter : *this)
{
const region *iter_reg = iter.first;
const svalue *iter_sval = iter.second;
if (iter_reg->involves_p (sval) || iter_sval->involves_p (sval))
to_purge.safe_push (iter_reg);
}
for (auto iter : to_purge)
m_hash_map.remove (iter);
}
/* class region_model. */
/* Ctor for region_model: construct an "empty" model. */
region_model::region_model (region_model_manager *mgr)
: m_mgr (mgr), m_store (), m_current_frame (NULL),
m_dynamic_extents ()
{
m_constraints = new constraint_manager (mgr);
}
/* region_model's copy ctor. */
region_model::region_model (const region_model &other)
: m_mgr (other.m_mgr), m_store (other.m_store),
m_constraints (new constraint_manager (*other.m_constraints)),
m_current_frame (other.m_current_frame),
m_dynamic_extents (other.m_dynamic_extents)
{
}
/* region_model's dtor. */
region_model::~region_model ()
{
delete m_constraints;
}
/* region_model's assignment operator. */
region_model &
region_model::operator= (const region_model &other)
{
/* m_mgr is const. */
gcc_assert (m_mgr == other.m_mgr);
m_store = other.m_store;
delete m_constraints;
m_constraints = new constraint_manager (*other.m_constraints);
m_current_frame = other.m_current_frame;
m_dynamic_extents = other.m_dynamic_extents;
return *this;
}
/* Equality operator for region_model.
Amongst other things this directly compares the stores and the constraint
managers, so for this to be meaningful both this and OTHER should
have been canonicalized. */
bool
region_model::operator== (const region_model &other) const
{
/* We can only compare instances that use the same manager. */
gcc_assert (m_mgr == other.m_mgr);
if (m_store != other.m_store)
return false;
if (*m_constraints != *other.m_constraints)
return false;
if (m_current_frame != other.m_current_frame)
return false;
if (m_dynamic_extents != other.m_dynamic_extents)
return false;
gcc_checking_assert (hash () == other.hash ());
return true;
}
/* Generate a hash value for this region_model. */
hashval_t
region_model::hash () const
{
hashval_t result = m_store.hash ();
result ^= m_constraints->hash ();
return result;
}
/* Dump a representation of this model to PP, showing the
stack, the store, and any constraints.
Use SIMPLE to control how svalues and regions are printed. */
void
region_model::dump_to_pp (pretty_printer *pp, bool simple,
bool multiline) const
{
/* Dump stack. */
pp_printf (pp, "stack depth: %i", get_stack_depth ());
if (multiline)
pp_newline (pp);
else
pp_string (pp, " {");
for (const frame_region *iter_frame = m_current_frame; iter_frame;
iter_frame = iter_frame->get_calling_frame ())
{
if (multiline)
pp_string (pp, " ");
else if (iter_frame != m_current_frame)
pp_string (pp, ", ");
pp_printf (pp, "frame (index %i): ", iter_frame->get_index ());
iter_frame->dump_to_pp (pp, simple);
if (multiline)
pp_newline (pp);
}
if (!multiline)
pp_string (pp, "}");
/* Dump store. */
if (!multiline)
pp_string (pp, ", {");
m_store.dump_to_pp (pp, simple, multiline,
m_mgr->get_store_manager ());
if (!multiline)
pp_string (pp, "}");
/* Dump constraints. */
pp_string (pp, "constraint_manager:");
if (multiline)
pp_newline (pp);
else
pp_string (pp, " {");
m_constraints->dump_to_pp (pp, multiline);
if (!multiline)
pp_string (pp, "}");
/* Dump sizes of dynamic regions, if any are known. */
if (!m_dynamic_extents.is_empty ())
{
pp_string (pp, "dynamic_extents:");
m_dynamic_extents.dump_to_pp (pp, simple, multiline);
}
}
/* Dump a representation of this model to FILE. */
void
region_model::dump (FILE *fp, bool simple, bool multiline) const
{
pretty_printer pp;
pp_format_decoder (&pp) = default_tree_printer;
pp_show_color (&pp) = pp_show_color (global_dc->printer);
pp.buffer->stream = fp;
dump_to_pp (&pp, simple, multiline);
pp_newline (&pp);
pp_flush (&pp);
}
/* Dump a multiline representation of this model to stderr. */
DEBUG_FUNCTION void
region_model::dump (bool simple) const
{
dump (stderr, simple, true);
}
/* Dump a multiline representation of this model to stderr. */
DEBUG_FUNCTION void
region_model::debug () const
{
dump (true);
}
/* Assert that this object is valid. */
void
region_model::validate () const
{
m_store.validate ();
}
/* Canonicalize the store and constraints, to maximize the chance of
equality between region_model instances. */
void
region_model::canonicalize ()
{
m_store.canonicalize (m_mgr->get_store_manager ());
m_constraints->canonicalize ();
}
/* Return true if this region_model is in canonical form. */
bool
region_model::canonicalized_p () const
{
region_model copy (*this);
copy.canonicalize ();
return *this == copy;
}
/* See the comment for store::loop_replay_fixup. */
void
region_model::loop_replay_fixup (const region_model *dst_state)
{
m_store.loop_replay_fixup (dst_state->get_store (), m_mgr);
}
/* A subclass of pending_diagnostic for complaining about uses of
poisoned values. */
class poisoned_value_diagnostic
: public pending_diagnostic_subclass<poisoned_value_diagnostic>
{
public:
poisoned_value_diagnostic (tree expr, enum poison_kind pkind,
const region *src_region,
tree check_expr)
: m_expr (expr), m_pkind (pkind),
m_src_region (src_region),
m_check_expr (check_expr)
{}
const char *get_kind () const final override { return "poisoned_value_diagnostic"; }
bool use_of_uninit_p () const final override
{
return m_pkind == POISON_KIND_UNINIT;
}
bool operator== (const poisoned_value_diagnostic &other) const
{
return (m_expr == other.m_expr
&& m_pkind == other.m_pkind
&& m_src_region == other.m_src_region);
}
int get_controlling_option () const final override
{
switch (m_pkind)
{
default:
gcc_unreachable ();
case POISON_KIND_UNINIT:
return OPT_Wanalyzer_use_of_uninitialized_value;
case POISON_KIND_FREED:
case POISON_KIND_DELETED:
return OPT_Wanalyzer_use_after_free;
case POISON_KIND_POPPED_STACK:
return OPT_Wanalyzer_use_of_pointer_in_stale_stack_frame;
}
}
bool terminate_path_p () const final override { return true; }
bool emit (diagnostic_emission_context &ctxt) final override
{
switch (m_pkind)
{
default:
gcc_unreachable ();
case POISON_KIND_UNINIT:
{
ctxt.add_cwe (457); /* "CWE-457: Use of Uninitialized Variable". */
return ctxt.warn ("use of uninitialized value %qE",
m_expr);
}
break;
case POISON_KIND_FREED:
{
ctxt.add_cwe (416); /* "CWE-416: Use After Free". */
return ctxt.warn ("use after %<free%> of %qE",
m_expr);
}
break;
case POISON_KIND_DELETED:
{
ctxt.add_cwe (416); /* "CWE-416: Use After Free". */
return ctxt.warn ("use after %<delete%> of %qE",
m_expr);
}
break;
case POISON_KIND_POPPED_STACK:
{
/* TODO: which CWE? */
return ctxt.warn
("dereferencing pointer %qE to within stale stack frame",
m_expr);
}
break;
}
}
label_text describe_final_event (const evdesc::final_event &ev) final override
{
switch (m_pkind)
{
default:
gcc_unreachable ();
case POISON_KIND_UNINIT:
return ev.formatted_print ("use of uninitialized value %qE here",
m_expr);
case POISON_KIND_FREED:
return ev.formatted_print ("use after %<free%> of %qE here",
m_expr);
case POISON_KIND_DELETED:
return ev.formatted_print ("use after %<delete%> of %qE here",
m_expr);
case POISON_KIND_POPPED_STACK:
return ev.formatted_print
("dereferencing pointer %qE to within stale stack frame",
m_expr);
}
}
void mark_interesting_stuff (interesting_t *interest) final override
{
if (m_src_region)
interest->add_region_creation (m_src_region);
}
/* Attempt to suppress false positives.
Reject paths where the value of the underlying region isn't poisoned.
This can happen due to state merging when exploring the exploded graph,
where the more precise analysis during feasibility analysis finds that
the region is in fact valid.
To do this we need to get the value from the fgraph. Unfortunately
we can't simply query the state of m_src_region (from the enode),
since it might be a different region in the fnode state (e.g. with
heap-allocated regions, the numbering could be different).
Hence we access m_check_expr, if available. */
bool check_valid_fpath_p (const feasible_node &fnode,
const gimple *emission_stmt)
const final override
{
if (!m_check_expr)
return true;
/* We've reached the enode, but not necessarily the right function_point.
Try to get the state at the correct stmt. */
region_model emission_model (fnode.get_model ().get_manager());
if (!fnode.get_state_at_stmt (emission_stmt, &emission_model))
/* Couldn't get state; accept this diagnostic. */
return true;
const svalue *fsval = emission_model.get_rvalue (m_check_expr, NULL);
/* Check to see if the expr is also poisoned in FNODE (and in the
same way). */
const poisoned_svalue * fspval = fsval->dyn_cast_poisoned_svalue ();
if (!fspval)
return false;
if (fspval->get_poison_kind () != m_pkind)
return false;
return true;
}
private:
tree m_expr;
enum poison_kind m_pkind;
const region *m_src_region;
tree m_check_expr;
};
/* A subclass of pending_diagnostic for complaining about shifts
by negative counts. */
class shift_count_negative_diagnostic
: public pending_diagnostic_subclass<shift_count_negative_diagnostic>
{
public:
shift_count_negative_diagnostic (const gassign *assign, tree count_cst)
: m_assign (assign), m_count_cst (count_cst)
{}
const char *get_kind () const final override
{
return "shift_count_negative_diagnostic";
}
bool operator== (const shift_count_negative_diagnostic &other) const
{
return (m_assign == other.m_assign
&& same_tree_p (m_count_cst, other.m_count_cst));
}
int get_controlling_option () const final override
{
return OPT_Wanalyzer_shift_count_negative;
}
bool emit (diagnostic_emission_context &ctxt) final override
{
return ctxt.warn ("shift by negative count (%qE)", m_count_cst);
}
label_text describe_final_event (const evdesc::final_event &ev) final override
{
return ev.formatted_print ("shift by negative amount here (%qE)", m_count_cst);
}
private:
const gassign *m_assign;
tree m_count_cst;
};
/* A subclass of pending_diagnostic for complaining about shifts
by counts >= the width of the operand type. */
class shift_count_overflow_diagnostic
: public pending_diagnostic_subclass<shift_count_overflow_diagnostic>
{
public:
shift_count_overflow_diagnostic (const gassign *assign,
int operand_precision,
tree count_cst)
: m_assign (assign), m_operand_precision (operand_precision),
m_count_cst (count_cst)
{}
const char *get_kind () const final override
{
return "shift_count_overflow_diagnostic";
}
bool operator== (const shift_count_overflow_diagnostic &other) const
{
return (m_assign == other.m_assign
&& m_operand_precision == other.m_operand_precision
&& same_tree_p (m_count_cst, other.m_count_cst));
}
int get_controlling_option () const final override
{
return OPT_Wanalyzer_shift_count_overflow;
}
bool emit (diagnostic_emission_context &ctxt) final override
{
return ctxt.warn ("shift by count (%qE) >= precision of type (%qi)",
m_count_cst, m_operand_precision);
}
label_text describe_final_event (const evdesc::final_event &ev) final override
{
return ev.formatted_print ("shift by count %qE here", m_count_cst);
}
private:
const gassign *m_assign;
int m_operand_precision;
tree m_count_cst;
};
/* If ASSIGN is a stmt that can be modelled via
set_value (lhs_reg, SVALUE, CTXT)
for some SVALUE, get the SVALUE.
Otherwise return NULL. */
const svalue *
region_model::get_gassign_result (const gassign *assign,
region_model_context *ctxt)
{
tree lhs = gimple_assign_lhs (assign);
if (gimple_has_volatile_ops (assign)
&& !gimple_clobber_p (assign))
{
conjured_purge p (this, ctxt);
return m_mgr->get_or_create_conjured_svalue (TREE_TYPE (lhs),
assign,
get_lvalue (lhs, ctxt),
p);
}
tree rhs1 = gimple_assign_rhs1 (assign);
enum tree_code op = gimple_assign_rhs_code (assign);
switch (op)
{
default:
return NULL;
case POINTER_PLUS_EXPR:
{
/* e.g. "_1 = a_10(D) + 12;" */
tree ptr = rhs1;
tree offset = gimple_assign_rhs2 (assign);
const svalue *ptr_sval = get_rvalue (ptr, ctxt);
const svalue *offset_sval = get_rvalue (offset, ctxt);
/* Quoting tree.def, "the second operand [of a POINTER_PLUS_EXPR]
is an integer of type sizetype". */
offset_sval = m_mgr->get_or_create_cast (size_type_node, offset_sval);
const svalue *sval_binop
= m_mgr->get_or_create_binop (TREE_TYPE (lhs), op,
ptr_sval, offset_sval);
return sval_binop;
}
break;
case POINTER_DIFF_EXPR:
{
/* e.g. "_1 = p_2(D) - q_3(D);". */
tree rhs2 = gimple_assign_rhs2 (assign);
const svalue *rhs1_sval = get_rvalue (rhs1, ctxt);
const svalue *rhs2_sval = get_rvalue (rhs2, ctxt);
// TODO: perhaps fold to zero if they're known to be equal?
const svalue *sval_binop
= m_mgr->get_or_create_binop (TREE_TYPE (lhs), op,
rhs1_sval, rhs2_sval);
return sval_binop;
}
break;
/* Assignments of the form
set_value (lvalue (LHS), rvalue (EXPR))
for various EXPR.
We already have the lvalue for the LHS above, as "lhs_reg". */
case ADDR_EXPR: /* LHS = &RHS; */
case BIT_FIELD_REF:
case COMPONENT_REF: /* LHS = op0.op1; */
case MEM_REF:
case REAL_CST:
case COMPLEX_CST:
case VECTOR_CST:
case INTEGER_CST:
case ARRAY_REF:
case SSA_NAME: /* LHS = VAR; */
case VAR_DECL: /* LHS = VAR; */
case PARM_DECL:/* LHS = VAR; */
case REALPART_EXPR:
case IMAGPART_EXPR:
return get_rvalue (rhs1, ctxt);
case ABS_EXPR:
case ABSU_EXPR:
case CONJ_EXPR:
case BIT_NOT_EXPR:
case FIX_TRUNC_EXPR:
case FLOAT_EXPR:
case NEGATE_EXPR:
case NOP_EXPR:
case VIEW_CONVERT_EXPR:
{
/* Unary ops. */
const svalue *rhs_sval = get_rvalue (rhs1, ctxt);
const svalue *sval_unaryop
= m_mgr->get_or_create_unaryop (TREE_TYPE (lhs), op, rhs_sval);
return sval_unaryop;
}
case EQ_EXPR:
case GE_EXPR:
case LE_EXPR:
case NE_EXPR:
case GT_EXPR:
case LT_EXPR:
case UNORDERED_EXPR:
case ORDERED_EXPR:
{
tree rhs2 = gimple_assign_rhs2 (assign);
const svalue *rhs1_sval = get_rvalue (rhs1, ctxt);
const svalue *rhs2_sval = get_rvalue (rhs2, ctxt);
if (TREE_TYPE (lhs) == boolean_type_node)
{
/* Consider constraints between svalues. */
tristate t = eval_condition (rhs1_sval, op, rhs2_sval);
if (t.is_known ())
return m_mgr->get_or_create_constant_svalue
(t.is_true () ? boolean_true_node : boolean_false_node);
}
/* Otherwise, generate a symbolic binary op. */
const svalue *sval_binop
= m_mgr->get_or_create_binop (TREE_TYPE (lhs), op,
rhs1_sval, rhs2_sval);
return sval_binop;
}
break;
case PLUS_EXPR:
case MINUS_EXPR:
case MULT_EXPR:
case MULT_HIGHPART_EXPR:
case TRUNC_DIV_EXPR:
case CEIL_DIV_EXPR:
case FLOOR_DIV_EXPR:
case ROUND_DIV_EXPR:
case TRUNC_MOD_EXPR:
case CEIL_MOD_EXPR:
case FLOOR_MOD_EXPR:
case ROUND_MOD_EXPR:
case RDIV_EXPR:
case EXACT_DIV_EXPR:
case LSHIFT_EXPR:
case RSHIFT_EXPR:
case LROTATE_EXPR:
case RROTATE_EXPR:
case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
case BIT_AND_EXPR:
case MIN_EXPR:
case MAX_EXPR:
case COMPLEX_EXPR:
{
/* Binary ops. */
tree rhs2 = gimple_assign_rhs2 (assign);
const svalue *rhs1_sval = get_rvalue (rhs1, ctxt);
const svalue *rhs2_sval = get_rvalue (rhs2, ctxt);
if (ctxt && (op == LSHIFT_EXPR || op == RSHIFT_EXPR))
{
/* "INT34-C. Do not shift an expression by a negative number of bits
or by greater than or equal to the number of bits that exist in
the operand." */
if (const tree rhs2_cst = rhs2_sval->maybe_get_constant ())
if (TREE_CODE (rhs2_cst) == INTEGER_CST
&& INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
{
if (tree_int_cst_sgn (rhs2_cst) < 0)
ctxt->warn
(make_unique<shift_count_negative_diagnostic>
(assign, rhs2_cst));
else if (compare_tree_int (rhs2_cst,
TYPE_PRECISION (TREE_TYPE (rhs1)))
>= 0)
ctxt->warn
(make_unique<shift_count_overflow_diagnostic>
(assign,
int (TYPE_PRECISION (TREE_TYPE (rhs1))),
rhs2_cst));
}
}
const svalue *sval_binop
= m_mgr->get_or_create_binop (TREE_TYPE (lhs), op,
rhs1_sval, rhs2_sval);
return sval_binop;
}
/* Vector expressions. In theory we could implement these elementwise,
but for now, simply return unknown values. */
case VEC_DUPLICATE_EXPR:
case VEC_SERIES_EXPR:
case VEC_COND_EXPR:
case VEC_PERM_EXPR:
case VEC_WIDEN_MULT_HI_EXPR:
case VEC_WIDEN_MULT_LO_EXPR:
case VEC_WIDEN_MULT_EVEN_EXPR:
case VEC_WIDEN_MULT_ODD_EXPR:
case VEC_UNPACK_HI_EXPR:
case VEC_UNPACK_LO_EXPR:
case VEC_UNPACK_FLOAT_HI_EXPR:
case VEC_UNPACK_FLOAT_LO_EXPR:
case VEC_UNPACK_FIX_TRUNC_HI_EXPR:
case VEC_UNPACK_FIX_TRUNC_LO_EXPR:
case VEC_PACK_TRUNC_EXPR:
case VEC_PACK_SAT_EXPR:
case VEC_PACK_FIX_TRUNC_EXPR:
case VEC_PACK_FLOAT_EXPR:
case VEC_WIDEN_LSHIFT_HI_EXPR:
case VEC_WIDEN_LSHIFT_LO_EXPR:
return m_mgr->get_or_create_unknown_svalue (TREE_TYPE (lhs));
}
}
/* Workaround for discarding certain false positives from
-Wanalyzer-use-of-uninitialized-value
of the form:
((A OR-IF B) OR-IF C)
and:
((A AND-IF B) AND-IF C)
where evaluating B is redundant, but could involve simple accesses of
uninitialized locals.
When optimization is turned on the FE can immediately fold compound
conditionals. Specifically, c_parser_condition parses this condition:
((A OR-IF B) OR-IF C)
and calls c_fully_fold on the condition.
Within c_fully_fold, fold_truth_andor is called, which bails when
optimization is off, but if any optimization is turned on can convert the
((A OR-IF B) OR-IF C)
into:
((A OR B) OR_IF C)
for sufficiently simple B
i.e. the inner OR-IF becomes an OR.
At gimplification time the inner OR becomes BIT_IOR_EXPR (in gimplify_expr),
giving this for the inner condition:
tmp = A | B;
if (tmp)
thus effectively synthesizing a redundant access of B when optimization
is turned on, when compared to:
if (A) goto L1; else goto L4;
L1: if (B) goto L2; else goto L4;
L2: if (C) goto L3; else goto L4;
for the unoptimized case.
Return true if CTXT appears to be handling such a short-circuitable stmt,
such as the def-stmt for B for the:
tmp = A | B;
case above, for the case where A is true and thus B would have been
short-circuited without optimization, using MODEL for the value of A. */
static bool
within_short_circuited_stmt_p (const region_model *model,
const gassign *assign_stmt)
{
/* We must have an assignment to a temporary of _Bool type. */
tree lhs = gimple_assign_lhs (assign_stmt);
if (TREE_TYPE (lhs) != boolean_type_node)
return false;
if (TREE_CODE (lhs) != SSA_NAME)
return false;
if (SSA_NAME_VAR (lhs) != NULL_TREE)
return false;
/* The temporary bool must be used exactly once: as the second arg of
a BIT_IOR_EXPR or BIT_AND_EXPR. */
use_operand_p use_op;
gimple *use_stmt;
if (!single_imm_use (lhs, &use_op, &use_stmt))
return false;
const gassign *use_assign = dyn_cast <const gassign *> (use_stmt);
if (!use_assign)
return false;
enum tree_code op = gimple_assign_rhs_code (use_assign);
if (!(op == BIT_IOR_EXPR ||op == BIT_AND_EXPR))
return false;
if (!(gimple_assign_rhs1 (use_assign) != lhs
&& gimple_assign_rhs2 (use_assign) == lhs))
return false;
/* The first arg of the bitwise stmt must have a known value in MODEL
that implies that the value of the second arg doesn't matter, i.e.
1 for bitwise or, 0 for bitwise and. */
tree other_arg = gimple_assign_rhs1 (use_assign);
/* Use a NULL ctxt here to avoid generating warnings. */
const svalue *other_arg_sval = model->get_rvalue (other_arg, NULL);
tree other_arg_cst = other_arg_sval->maybe_get_constant ();
if (!other_arg_cst)
return false;
switch (op)
{
default:
gcc_unreachable ();
case BIT_IOR_EXPR:
if (zerop (other_arg_cst))
return false;
break;
case BIT_AND_EXPR:
if (!zerop (other_arg_cst))
return false;
break;
}
/* All tests passed. We appear to be in a stmt that generates a boolean
temporary with a value that won't matter. */
return true;
}
/* Workaround for discarding certain false positives from
-Wanalyzer-use-of-uninitialized-value
seen with -ftrivial-auto-var-init=.
-ftrivial-auto-var-init= will generate calls to IFN_DEFERRED_INIT.
If the address of the var is taken, gimplification will give us
something like:
_1 = .DEFERRED_INIT (4, 2, &"len"[0]);
len = _1;
The result of DEFERRED_INIT will be an uninit value; we don't
want to emit a false positive for "len = _1;"
Return true if ASSIGN_STMT is such a stmt. */
static bool
due_to_ifn_deferred_init_p (const gassign *assign_stmt)
{
/* We must have an assignment to a decl from an SSA name that's the
result of a IFN_DEFERRED_INIT call. */
if (gimple_assign_rhs_code (assign_stmt) != SSA_NAME)
return false;
tree lhs = gimple_assign_lhs (assign_stmt);
if (TREE_CODE (lhs) != VAR_DECL)
return false;
tree rhs = gimple_assign_rhs1 (assign_stmt);
if (TREE_CODE (rhs) != SSA_NAME)
return false;
const gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
const gcall *call = dyn_cast <const gcall *> (def_stmt);
if (!call)
return false;
if (gimple_call_internal_p (call)
&& gimple_call_internal_fn (call) == IFN_DEFERRED_INIT)
return true;
return false;
}
/* Check for SVAL being poisoned, adding a warning to CTXT.
Return SVAL, or, if a warning is added, another value, to avoid
repeatedly complaining about the same poisoned value in followup code.
SRC_REGION is a hint about where SVAL came from, and can be NULL. */
const svalue *
region_model::check_for_poison (const svalue *sval,
tree expr,
const region *src_region,
region_model_context *ctxt) const
{
if (!ctxt)
return sval;
if (const poisoned_svalue *poisoned_sval = sval->dyn_cast_poisoned_svalue ())
{
enum poison_kind pkind = poisoned_sval->get_poison_kind ();
/* Ignore uninitialized uses of empty types; there's nothing
to initialize. */
if (pkind == POISON_KIND_UNINIT
&& sval->get_type ()
&& is_empty_type (sval->get_type ()))
return sval;
if (pkind == POISON_KIND_UNINIT)
if (const gimple *curr_stmt = ctxt->get_stmt ())
if (const gassign *assign_stmt
= dyn_cast <const gassign *> (curr_stmt))
{
/* Special case to avoid certain false positives. */
if (within_short_circuited_stmt_p (this, assign_stmt))
return sval;
/* Special case to avoid false positive on
-ftrivial-auto-var-init=. */
if (due_to_ifn_deferred_init_p (assign_stmt))
return sval;
}
/* If we have an SSA name for a temporary, we don't want to print
'<unknown>'.
Poisoned values are shared by type, and so we can't reconstruct
the tree other than via the def stmts, using
fixup_tree_for_diagnostic. */
tree diag_arg = fixup_tree_for_diagnostic (expr);
if (src_region == NULL && pkind == POISON_KIND_UNINIT)
src_region = get_region_for_poisoned_expr (expr);
/* Can we reliably get the poisoned value from "expr"?
This is for use by poisoned_value_diagnostic::check_valid_fpath_p.
Unfortunately, we might not have a reliable value for EXPR.
Hence we only query its value now, and only use it if we get the
poisoned value back again. */
tree check_expr = expr;
const svalue *foo_sval = get_rvalue (expr, NULL);
if (foo_sval == sval)
check_expr = expr;
else
check_expr = NULL;
if (ctxt->warn (make_unique<poisoned_value_diagnostic> (diag_arg,
pkind,
src_region,
check_expr)))
{
/* We only want to report use of a poisoned value at the first
place it gets used; return an unknown value to avoid generating
a chain of followup warnings. */
sval = m_mgr->get_or_create_unknown_svalue (sval->get_type ());
}
return sval;
}
return sval;
}
/* Attempt to get a region for describing EXPR, the source of region of
a poisoned_svalue for use in a poisoned_value_diagnostic.
Return NULL if there is no good region to use. */
const region *
region_model::get_region_for_poisoned_expr (tree expr) const
{
if (TREE_CODE (expr) == SSA_NAME)
{
tree decl = SSA_NAME_VAR (expr);
if (decl && DECL_P (decl))
expr = decl;
else
return NULL;
}
return get_lvalue (expr, NULL);
}
/* Update this model for the ASSIGN stmt, using CTXT to report any
diagnostics. */
void
region_model::on_assignment (const gassign *assign, region_model_context *ctxt)
{
tree lhs = gimple_assign_lhs (assign);
tree rhs1 = gimple_assign_rhs1 (assign);
const region *lhs_reg = get_lvalue (lhs, ctxt);
/* Any writes other than to the stack are treated
as externally visible. */
if (ctxt)
{
enum memory_space memspace = lhs_reg->get_memory_space ();
if (memspace != MEMSPACE_STACK)
ctxt->maybe_did_work ();
}
/* Most assignments are handled by:
set_value (lhs_reg, SVALUE, CTXT)
for some SVALUE. */
if (const svalue *sval = get_gassign_result (assign, ctxt))
{
tree expr = get_diagnostic_tree_for_gassign (assign);
check_for_poison (sval, expr, NULL, ctxt);
set_value (lhs_reg, sval, ctxt);
return;
}
enum tree_code op = gimple_assign_rhs_code (assign);
switch (op)
{
default:
{
if (0)
sorry_at (assign->location, "unhandled assignment op: %qs",
get_tree_code_name (op));
const svalue *unknown_sval
= m_mgr->get_or_create_unknown_svalue (TREE_TYPE (lhs));
set_value (lhs_reg, unknown_sval, ctxt);
}
break;
case CONSTRUCTOR:
{
if (TREE_CLOBBER_P (rhs1))
{
/* e.g. "x ={v} {CLOBBER};" */
clobber_region (lhs_reg);
}
else
{
/* Any CONSTRUCTOR that survives to this point is either
just a zero-init of everything, or a vector. */
if (!CONSTRUCTOR_NO_CLEARING (rhs1))
zero_fill_region (lhs_reg, ctxt);
unsigned ix;
tree index;
tree val;
FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (rhs1), ix, index, val)
{
gcc_assert (TREE_CODE (TREE_TYPE (rhs1)) == VECTOR_TYPE);
if (!index)
index = build_int_cst (integer_type_node, ix);
gcc_assert (TREE_CODE (index) == INTEGER_CST);
const svalue *index_sval
= m_mgr->get_or_create_constant_svalue (index);
gcc_assert (index_sval);
const region *sub_reg
= m_mgr->get_element_region (lhs_reg,
TREE_TYPE (val),
index_sval);
const svalue *val_sval = get_rvalue (val, ctxt);
set_value (sub_reg, val_sval, ctxt);
}
}
}
break;
case STRING_CST:
{
/* e.g. "struct s2 x = {{'A', 'B', 'C', 'D'}};". */
const svalue *rhs_sval = get_rvalue (rhs1, ctxt);
m_store.set_value (m_mgr->get_store_manager(), lhs_reg, rhs_sval,
ctxt ? ctxt->get_uncertainty () : NULL);
}
break;
}
}
/* Handle the pre-sm-state part of STMT, modifying this object in-place.
Write true to *OUT_UNKNOWN_SIDE_EFFECTS if the stmt has unknown
side effects. */
void
region_model::on_stmt_pre (const gimple *stmt,
bool *out_unknown_side_effects,
region_model_context *ctxt)
{
switch (gimple_code (stmt))
{
default:
/* No-op for now. */
break;
case GIMPLE_ASSIGN:
{
const gassign *assign = as_a <const gassign *> (stmt);
on_assignment (assign, ctxt);
}
break;
case GIMPLE_ASM:
{
const gasm *asm_stmt = as_a <const gasm *> (stmt);
on_asm_stmt (asm_stmt, ctxt);
if (ctxt)
ctxt->maybe_did_work ();
}
break;
case GIMPLE_CALL:
{
/* Track whether we have a gcall to a function that's not recognized by
anything, for which we don't have a function body, or for which we
don't know the fndecl. */
const gcall *call = as_a <const gcall *> (stmt);
*out_unknown_side_effects = on_call_pre (call, ctxt);
}
break;
case GIMPLE_RETURN:
{
const greturn *return_ = as_a <const greturn *> (stmt);
on_return (return_, ctxt);
}
break;
}
}
/* Given a call CD with function attribute FORMAT_ATTR, check that the
format arg to the call is a valid null-terminated string. */
void
region_model::check_call_format_attr (const call_details &cd,
tree format_attr) const
{
/* We assume that FORMAT_ATTR has already been validated. */
/* arg0 of the attribute should be kind of format strings
that this function expects (e.g. "printf"). */
const tree arg0_tree_list = TREE_VALUE (format_attr);
if (!arg0_tree_list)
return;
/* arg1 of the attribute should be the 1-based parameter index
to treat as the format string. */
const tree arg1_tree_list = TREE_CHAIN (arg0_tree_list);
if (!arg1_tree_list)
return;
const tree arg1_value = TREE_VALUE (arg1_tree_list);
if (!arg1_value)
return;
unsigned format_arg_idx = TREE_INT_CST_LOW (arg1_value) - 1;
if (cd.num_args () <= format_arg_idx)
return;
/* Subclass of annotating_context that
adds a note about the format attr to any saved diagnostics. */
class annotating_ctxt : public annotating_context
{
public:
annotating_ctxt (const call_details &cd,
unsigned fmt_param_idx)
: annotating_context (cd.get_ctxt ()),
m_cd (cd),
m_fmt_param_idx (fmt_param_idx)
{
}
void add_annotations () final override
{
class reason_format_attr
: public pending_note_subclass<reason_format_attr>
{
public:
reason_format_attr (const call_arg_details &arg_details)
: m_arg_details (arg_details)
{
}
const char *get_kind () const final override
{
return "reason_format_attr";
}
void emit () const final override
{
inform (DECL_SOURCE_LOCATION (m_arg_details.m_called_fndecl),
"parameter %i of %qD marked as a format string"
" via %qs attribute",
m_arg_details.m_arg_idx + 1, m_arg_details.m_called_fndecl,
"format");
}
bool operator== (const reason_format_attr &other) const
{
return m_arg_details == other.m_arg_details;
}
private:
call_arg_details m_arg_details;
};
call_arg_details arg_details (m_cd, m_fmt_param_idx);
add_note (make_unique<reason_format_attr> (arg_details));
}
private:
const call_details &m_cd;
unsigned m_fmt_param_idx;
};
annotating_ctxt my_ctxt (cd, format_arg_idx);
call_details my_cd (cd, &my_ctxt);
my_cd.check_for_null_terminated_string_arg (format_arg_idx);
}
/* Ensure that all arguments at the call described by CD are checked
for poisoned values, by calling get_rvalue on each argument.
Check that calls to functions with "format" attribute have valid
null-terminated strings for their format argument. */
void
region_model::check_call_args (const call_details &cd) const
{
for (unsigned arg_idx = 0; arg_idx < cd.num_args (); arg_idx++)
cd.get_arg_svalue (arg_idx);
/* Handle attribute "format". */
if (tree format_attr = cd.lookup_function_attribute ("format"))
check_call_format_attr (cd, format_attr);
}
/* Update this model for an outcome of a call that returns a specific
integer constant.
If UNMERGEABLE, then make the result unmergeable, e.g. to prevent
the state-merger code from merging success and failure outcomes. */
void
region_model::update_for_int_cst_return (const call_details &cd,
int retval,
bool unmergeable)
{
if (!cd.get_lhs_type ())
return;
if (TREE_CODE (cd.get_lhs_type ()) != INTEGER_TYPE)
return;
const svalue *result
= m_mgr->get_or_create_int_cst (cd.get_lhs_type (), retval);
if (unmergeable)
result = m_mgr->get_or_create_unmergeable (result);
set_value (cd.get_lhs_region (), result, cd.get_ctxt ());
}
/* Update this model for an outcome of a call that returns zero.
If UNMERGEABLE, then make the result unmergeable, e.g. to prevent
the state-merger code from merging success and failure outcomes. */
void
region_model::update_for_zero_return (const call_details &cd,
bool unmergeable)
{
update_for_int_cst_return (cd, 0, unmergeable);
}
/* Update this model for an outcome of a call that returns non-zero.
Specifically, assign an svalue to the LHS, and add a constraint that
that svalue is non-zero. */
void
region_model::update_for_nonzero_return (const call_details &cd)
{
if (!cd.get_lhs_type ())
return;
if (TREE_CODE (cd.get_lhs_type ()) != INTEGER_TYPE)
return;
cd.set_any_lhs_with_defaults ();
const svalue *zero
= m_mgr->get_or_create_int_cst (cd.get_lhs_type (), 0);
const svalue *result
= get_store_value (cd.get_lhs_region (), cd.get_ctxt ());
add_constraint (result, NE_EXPR, zero, cd.get_ctxt ());
}
/* Subroutine of region_model::maybe_get_copy_bounds.
The Linux kernel commonly uses
min_t([unsigned] long, VAR, sizeof(T));
to set an upper bound on the size of a copy_to_user.
Attempt to simplify such sizes by trying to get the upper bound as a
constant.
Return the simplified svalue if possible, or NULL otherwise. */
static const svalue *
maybe_simplify_upper_bound (const svalue *num_bytes_sval,
region_model_manager *mgr)
{
tree type = num_bytes_sval->get_type ();
while (const svalue *raw = num_bytes_sval->maybe_undo_cast ())
num_bytes_sval = raw;
if (const binop_svalue *binop_sval = num_bytes_sval->dyn_cast_binop_svalue ())
if (binop_sval->get_op () == MIN_EXPR)
if (binop_sval->get_arg1 ()->get_kind () == SK_CONSTANT)
{
return mgr->get_or_create_cast (type, binop_sval->get_arg1 ());
/* TODO: we might want to also capture the constraint
when recording the diagnostic, or note that we're using
the upper bound. */
}
return NULL;
}
/* Attempt to get an upper bound for the size of a copy when simulating a
copy function.
NUM_BYTES_SVAL is the symbolic value for the size of the copy.
Use it if it's constant, otherwise try to simplify it. Failing
that, use the size of SRC_REG if constant.
Return a symbolic value for an upper limit on the number of bytes
copied, or NULL if no such value could be determined. */
const svalue *
region_model::maybe_get_copy_bounds (const region *src_reg,
const svalue *num_bytes_sval)
{
if (num_bytes_sval->maybe_get_constant ())
return num_bytes_sval;
if (const svalue *simplified
= maybe_simplify_upper_bound (num_bytes_sval, m_mgr))
num_bytes_sval = simplified;
if (num_bytes_sval->maybe_get_constant ())
return num_bytes_sval;
/* For now, try just guessing the size as the capacity of the
base region of the src.
This is a hack; we might get too large a value. */
const region *src_base_reg = src_reg->get_base_region ();
num_bytes_sval = get_capacity (src_base_reg);
if (num_bytes_sval->maybe_get_constant ())
return num_bytes_sval;
/* Non-constant: give up. */
return NULL;
}
/* Get any known_function for FNDECL for call CD.
The call must match all assumptions made by the known_function (such as
e.g. "argument 1's type must be a pointer type").
Return NULL if no known_function is found, or it does not match the
assumption(s). */
const known_function *
region_model::get_known_function (tree fndecl, const call_details &cd) const
{
known_function_manager *known_fn_mgr = m_mgr->get_known_function_manager ();
return known_fn_mgr->get_match (fndecl, cd);
}
/* Get any known_function for IFN, or NULL. */
const known_function *
region_model::get_known_function (enum internal_fn ifn) const
{
known_function_manager *known_fn_mgr = m_mgr->get_known_function_manager ();
return known_fn_mgr->get_internal_fn (ifn);
}
/* Get any builtin_known_function for CALL and emit any warning to CTXT
if not NULL.
The call must match all assumptions made by the known_function (such as
e.g. "argument 1's type must be a pointer type").
Return NULL if no builtin_known_function is found, or it does
not match the assumption(s).
Internally calls get_known_function to find a known_function and cast it
to a builtin_known_function.
For instance, calloc is a C builtin, defined in gcc/builtins.def
by the DEF_LIB_BUILTIN macro. Such builtins are recognized by the
analyzer by their name, so that even in C++ or if the user redeclares
them but mismatch their signature, they are still recognized as builtins.
Cases when a supposed builtin is not flagged as one by the FE:
The C++ FE does not recognize calloc as a builtin if it has not been
included from a standard header, but the C FE does. Hence in C++ if
CALL comes from a calloc and stdlib is not included,
gcc/tree.h:fndecl_built_in_p (CALL) would be false.
In C code, a __SIZE_TYPE__ calloc (__SIZE_TYPE__, __SIZE_TYPE__) user
declaration has obviously a mismatching signature from the standard, and
its function_decl tree won't be unified by
gcc/c-decl.cc:match_builtin_function_types.
Yet in both cases the analyzer should treat the calls as a builtin calloc
so that extra attributes unspecified by the standard but added by GCC
(e.g. sprintf attributes in gcc/builtins.def), useful for the detection of
dangerous behavior, are indeed processed.
Therefore for those cases when a "builtin flag" is not added by the FE,
builtins' kf are derived from builtin_known_function, whose method
builtin_known_function::builtin_decl returns the builtin's
function_decl tree as defined in gcc/builtins.def, with all the extra
attributes. */
const builtin_known_function *
region_model::get_builtin_kf (const gcall *call,
region_model_context *ctxt /* = NULL */) const
{
region_model *mut_this = const_cast <region_model *> (this);
tree callee_fndecl = mut_this->get_fndecl_for_call (call, ctxt);
if (! callee_fndecl)
return NULL;
call_details cd (call, mut_this, ctxt);
if (const known_function *kf = get_known_function (callee_fndecl, cd))
return kf->dyn_cast_builtin_kf ();
return NULL;
}
/* Update this model for the CALL stmt, using CTXT to report any
diagnostics - the first half.
Updates to the region_model that should be made *before* sm-states
are updated are done here; other updates to the region_model are done
in region_model::on_call_post.
Return true if the function call has unknown side effects (it wasn't
recognized and we don't have a body for it, or are unable to tell which
fndecl it is). */
bool
region_model::on_call_pre (const gcall *call, region_model_context *ctxt)
{
call_details cd (call, this, ctxt);
/* Special-case for IFN_DEFERRED_INIT.
We want to report uninitialized variables with -fanalyzer (treating
-ftrivial-auto-var-init= as purely a mitigation feature).
Handle IFN_DEFERRED_INIT by treating it as no-op: don't touch the
lhs of the call, so that it is still uninitialized from the point of
view of the analyzer. */
if (gimple_call_internal_p (call)
&& gimple_call_internal_fn (call) == IFN_DEFERRED_INIT)
return false; /* No side effects. */
/* Get svalues for all of the arguments at the callsite, to ensure that we
complain about any uninitialized arguments. This might lead to
duplicates if any of the handling below also looks up the svalues,
but the deduplication code should deal with that. */
if (ctxt)
check_call_args (cd);
tree callee_fndecl = get_fndecl_for_call (call, ctxt);
if (gimple_call_internal_p (call))
if (const known_function *kf
= get_known_function (gimple_call_internal_fn (call)))
{
kf->impl_call_pre (cd);
return false; /* No further side effects. */
}
if (!callee_fndecl)
{
cd.set_any_lhs_with_defaults ();
return true; /* Unknown side effects. */
}
if (const known_function *kf = get_known_function (callee_fndecl, cd))
{
kf->impl_call_pre (cd);
return false; /* No further side effects. */
}
cd.set_any_lhs_with_defaults ();
const int callee_fndecl_flags = flags_from_decl_or_type (callee_fndecl);
if (callee_fndecl_flags & (ECF_CONST | ECF_PURE))
return false; /* No side effects. */
if (fndecl_built_in_p (callee_fndecl))
return true; /* Unknown side effects. */
if (!fndecl_has_gimple_body_p (callee_fndecl))
return true; /* Unknown side effects. */
return false; /* No side effects. */
}
/* Update this model for the CALL stmt, using CTXT to report any
diagnostics - the second half.
Updates to the region_model that should be made *after* sm-states
are updated are done here; other updates to the region_model are done
in region_model::on_call_pre.
If UNKNOWN_SIDE_EFFECTS is true, also call handle_unrecognized_call
to purge state. */
void
region_model::on_call_post (const gcall *call,
bool unknown_side_effects,
region_model_context *ctxt)
{
if (tree callee_fndecl = get_fndecl_for_call (call, ctxt))
{
call_details cd (call, this, ctxt);
if (const known_function *kf = get_known_function (callee_fndecl, cd))
{
kf->impl_call_post (cd);
return;
}
/* Was this fndecl referenced by
__attribute__((malloc(FOO)))? */
if (lookup_attribute ("*dealloc", DECL_ATTRIBUTES (callee_fndecl)))
{
impl_deallocation_call (cd);
return;
}
}
if (unknown_side_effects)
{
handle_unrecognized_call (call, ctxt);
if (ctxt)
ctxt->maybe_did_work ();
}
}
/* Purge state involving SVAL from this region_model, using CTXT
(if non-NULL) to purge other state in a program_state.
For example, if we're at the def-stmt of an SSA name, then we need to
purge any state for svalues that involve that SSA name. This avoids
false positives in loops, since a symbolic value referring to the
SSA name will be referring to the previous value of that SSA name.
For example, in:
while ((e = hashmap_iter_next(&iter))) {
struct oid2strbuf *e_strbuf = (struct oid2strbuf *)e;
free (e_strbuf->value);
}
at the def-stmt of e_8:
e_8 = hashmap_iter_next (&iter);
we should purge the "freed" state of:
INIT_VAL(CAST_REG(‘struct oid2strbuf’, (*INIT_VAL(e_8))).value)
which is the "e_strbuf->value" value from the previous iteration,
or we will erroneously report a double-free - the "e_8" within it
refers to the previous value. */
void
region_model::purge_state_involving (const svalue *sval,
region_model_context *ctxt)
{
if (!sval->can_have_associated_state_p ())
return;
m_store.purge_state_involving (sval, m_mgr);
m_constraints->purge_state_involving (sval);
m_dynamic_extents.purge_state_involving (sval);
if (ctxt)
ctxt->purge_state_involving (sval);
}
/* A pending_note subclass for adding a note about an
__attribute__((access, ...)) to a diagnostic. */
class reason_attr_access : public pending_note_subclass<reason_attr_access>
{
public:
reason_attr_access (tree callee_fndecl, const attr_access &access)
: m_callee_fndecl (callee_fndecl),
m_ptr_argno (access.ptrarg),
m_access_str (TREE_STRING_POINTER (access.to_external_string ()))
{
}
const char *get_kind () const final override { return "reason_attr_access"; }
void emit () const final override
{
inform (DECL_SOURCE_LOCATION (m_callee_fndecl),
"parameter %i of %qD marked with attribute %qs",
m_ptr_argno + 1, m_callee_fndecl, m_access_str);
}
bool operator== (const reason_attr_access &other) const
{
return (m_callee_fndecl == other.m_callee_fndecl
&& m_ptr_argno == other.m_ptr_argno
&& !strcmp (m_access_str, other.m_access_str));
}
private:
tree m_callee_fndecl;
unsigned m_ptr_argno;
const char *m_access_str;
};
/* Check CALL a call to external function CALLEE_FNDECL based on
any __attribute__ ((access, ....) on the latter, complaining to
CTXT about any issues.
Currently we merely call check_region_for_write on any regions
pointed to by arguments marked with a "write_only" or "read_write"
attribute. */
void
region_model::check_function_attr_access (const gcall *call,
tree callee_fndecl,
region_model_context *ctxt,
rdwr_map &rdwr_idx) const
{
gcc_assert (call);
gcc_assert (callee_fndecl);
gcc_assert (ctxt);
tree fntype = TREE_TYPE (callee_fndecl);
gcc_assert (fntype);
unsigned argno = 0;
for (tree iter = TYPE_ARG_TYPES (fntype); iter;
iter = TREE_CHAIN (iter), ++argno)
{
const attr_access* access = rdwr_idx.get (argno);
if (!access)
continue;
/* Ignore any duplicate entry in the map for the size argument. */
if (access->ptrarg != argno)
continue;
if (access->mode == access_write_only
|| access->mode == access_read_write)
{
/* Subclass of annotating_context that
adds a note about the attr access to any saved diagnostics. */
class annotating_ctxt : public annotating_context
{
public:
annotating_ctxt (tree callee_fndecl,
const attr_access &access,
region_model_context *ctxt)
: annotating_context (ctxt),
m_callee_fndecl (callee_fndecl),
m_access (access)
{
}
void add_annotations () final override
{
add_note (make_unique<reason_attr_access>
(m_callee_fndecl, m_access));
}
private:
tree m_callee_fndecl;
const attr_access &m_access;
};
/* Use this ctxt below so that any diagnostics get the
note added to them. */
annotating_ctxt my_ctxt (callee_fndecl, *access, ctxt);
tree ptr_tree = gimple_call_arg (call, access->ptrarg);
const svalue *ptr_sval = get_rvalue (ptr_tree, &my_ctxt);
const region *reg = deref_rvalue (ptr_sval, ptr_tree, &my_ctxt);
check_region_for_write (reg, nullptr, &my_ctxt);
/* We don't use the size arg for now. */
}
}
}
/* Subroutine of region_model::check_function_attr_null_terminated_string_arg,
checking one instance of __attribute__((null_terminated_string_arg)). */
void
region_model::
check_one_function_attr_null_terminated_string_arg (const gcall *call,
tree callee_fndecl,
region_model_context *ctxt,
rdwr_map &rdwr_idx,
tree attr)
{
gcc_assert (call);
gcc_assert (callee_fndecl);
gcc_assert (ctxt);
gcc_assert (attr);
tree arg = TREE_VALUE (attr);
if (!arg)
return;
/* Convert from 1-based to 0-based index. */
unsigned int arg_idx = TREE_INT_CST_LOW (TREE_VALUE (arg)) - 1;
/* If there's also an "access" attribute on the ptr param
for reading with a size param specified, then that size
limits the size of the possible read from the pointer. */
if (const attr_access* access = rdwr_idx.get (arg_idx))
if ((access->mode == access_read_only
|| access->mode == access_read_write)
&& access->sizarg != UINT_MAX)
{
call_details cd_checked (call, this, ctxt);
const svalue *limit_sval
= cd_checked.get_arg_svalue (access->sizarg);
const svalue *ptr_sval
= cd_checked.get_arg_svalue (arg_idx);
/* Try reading all of the bytes expressed by the size param,
but without emitting warnings (via a null context). */
const svalue *limited_sval
= read_bytes (deref_rvalue (ptr_sval, NULL_TREE, nullptr),
NULL_TREE,
limit_sval,
nullptr);
if (limited_sval->get_kind () == SK_POISONED)
{
/* Reading up to the truncation limit caused issues.
Assume that the string is meant to be terminated
before then, so perform a *checked* check for the
terminator. */
check_for_null_terminated_string_arg (cd_checked,
arg_idx);
}
else
{
/* Reading up to the truncation limit seems OK; repeat
the read, but with checking enabled. */
read_bytes (deref_rvalue (ptr_sval, NULL_TREE, ctxt),
NULL_TREE,
limit_sval,
ctxt);
}
return;
}
/* Otherwise, we don't have an access-attribute limiting the read.
Simulate a read up to the null terminator (if any). */
call_details cd (call, this, ctxt);
check_for_null_terminated_string_arg (cd, arg_idx);
}
/* Check CALL a call to external function CALLEE_FNDECL for any uses
of __attribute__ ((null_terminated_string_arg)), compaining
to CTXT about any issues.
Use RDWR_IDX for tracking uses of __attribute__ ((access, ....). */
void
region_model::
check_function_attr_null_terminated_string_arg (const gcall *call,
tree callee_fndecl,
region_model_context *ctxt,
rdwr_map &rdwr_idx)
{
gcc_assert (call);
gcc_assert (callee_fndecl);
gcc_assert (ctxt);
tree fntype = TREE_TYPE (callee_fndecl);
gcc_assert (fntype);
/* A function declaration can specify multiple attribute
null_terminated_string_arg, each with one argument. */
for (tree attr = TYPE_ATTRIBUTES (fntype); attr; attr = TREE_CHAIN (attr))
{
attr = lookup_attribute ("null_terminated_string_arg", attr);
if (!attr)
return;
check_one_function_attr_null_terminated_string_arg (call, callee_fndecl,
ctxt, rdwr_idx,
attr);
}
}
/* Check CALL a call to external function CALLEE_FNDECL for any
function attributes, complaining to CTXT about any issues. */
void
region_model::check_function_attrs (const gcall *call,
tree callee_fndecl,
region_model_context *ctxt)
{
gcc_assert (call);
gcc_assert (callee_fndecl);
gcc_assert (ctxt);
tree fntype = TREE_TYPE (callee_fndecl);
if (!fntype)
return;
if (!TYPE_ATTRIBUTES (fntype))
return;
/* Initialize a map of attribute access specifications for arguments
to the function call. */
rdwr_map rdwr_idx;
init_attr_rdwr_indices (&rdwr_idx, TYPE_ATTRIBUTES (fntype));
check_function_attr_access (call, callee_fndecl, ctxt, rdwr_idx);
check_function_attr_null_terminated_string_arg (call, callee_fndecl,
ctxt, rdwr_idx);
}
/* Handle a call CALL to a function with unknown behavior.
Traverse the regions in this model, determining what regions are
reachable from pointer arguments to CALL and from global variables,
recursively.
Set all reachable regions to new unknown values and purge sm-state
from their values, and from values that point to them. */
void
region_model::handle_unrecognized_call (const gcall *call,
region_model_context *ctxt)
{
tree fndecl = get_fndecl_for_call (call, ctxt);
if (fndecl && ctxt)
check_function_attrs (call, fndecl, ctxt);
reachable_regions reachable_regs (this);
/* Determine the reachable regions and their mutability. */
{
/* Add globals and regions that already escaped in previous
unknown calls. */
m_store.for_each_cluster (reachable_regions::init_cluster_cb,
&reachable_regs);
/* Params that are pointers. */
tree iter_param_types = NULL_TREE;
if (fndecl)
iter_param_types = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
for (unsigned arg_idx = 0; arg_idx < gimple_call_num_args (call); arg_idx++)
{
/* Track expected param type, where available. */
tree param_type = NULL_TREE;
if (iter_param_types)
{
param_type = TREE_VALUE (iter_param_types);
gcc_assert (param_type);
iter_param_types = TREE_CHAIN (iter_param_types);
}
tree parm = gimple_call_arg (call, arg_idx);
const svalue *parm_sval = get_rvalue (parm, ctxt);
reachable_regs.handle_parm (parm_sval, param_type);
}
}
uncertainty_t *uncertainty = ctxt ? ctxt->get_uncertainty () : NULL;
/* Purge sm-state for the svalues that were reachable,
both in non-mutable and mutable form. */
for (svalue_set::iterator iter
= reachable_regs.begin_reachable_svals ();
iter != reachable_regs.end_reachable_svals (); ++iter)
{
const svalue *sval = (*iter);
if (ctxt)
ctxt->on_unknown_change (sval, false);
}
for (svalue_set::iterator iter
= reachable_regs.begin_mutable_svals ();
iter != reachable_regs.end_mutable_svals (); ++iter)
{
const svalue *sval = (*iter);
if (ctxt)
ctxt->on_unknown_change (sval, true);
if (uncertainty)
uncertainty->on_mutable_sval_at_unknown_call (sval);
}
/* Mark any clusters that have escaped. */
reachable_regs.mark_escaped_clusters (ctxt);
/* Update bindings for all clusters that have escaped, whether above,
or previously. */
m_store.on_unknown_fncall (call, m_mgr->get_store_manager (),
conjured_purge (this, ctxt));
/* Purge dynamic extents from any regions that have escaped mutably:
realloc could have been called on them. */
for (hash_set<const region *>::iterator
iter = reachable_regs.begin_mutable_base_regs ();
iter != reachable_regs.end_mutable_base_regs ();
++iter)
{
const region *base_reg = (*iter);
unset_dynamic_extents (base_reg);
}
}
/* Traverse the regions in this model, determining what regions are
reachable from the store and populating *OUT.
If EXTRA_SVAL is non-NULL, treat it as an additional "root"
for reachability (for handling return values from functions when
analyzing return of the only function on the stack).
If UNCERTAINTY is non-NULL, treat any svalues that were recorded
within it as being maybe-bound as additional "roots" for reachability.
Find svalues that haven't leaked. */
void
region_model::get_reachable_svalues (svalue_set *out,
const svalue *extra_sval,
const uncertainty_t *uncertainty)
{
reachable_regions reachable_regs (this);
/* Add globals and regions that already escaped in previous
unknown calls. */
m_store.for_each_cluster (reachable_regions::init_cluster_cb,
&reachable_regs);
if (extra_sval)
reachable_regs.handle_sval (extra_sval);
if (uncertainty)
for (uncertainty_t::iterator iter
= uncertainty->begin_maybe_bound_svals ();
iter != uncertainty->end_maybe_bound_svals (); ++iter)
reachable_regs.handle_sval (*iter);
/* Get regions for locals that have explicitly bound values. */
for (store::cluster_map_t::iterator iter = m_store.begin ();
iter != m_store.end (); ++iter)
{
const region *base_reg = (*iter).first;
if (const region *parent = base_reg->get_parent_region ())
if (parent->get_kind () == RK_FRAME)
reachable_regs.add (base_reg, false);
}
/* Populate *OUT based on the values that were reachable. */
for (svalue_set::iterator iter
= reachable_regs.begin_reachable_svals ();
iter != reachable_regs.end_reachable_svals (); ++iter)
out->add (*iter);
}
/* Update this model for the RETURN_STMT, using CTXT to report any
diagnostics. */
void
region_model::on_return (const greturn *return_stmt, region_model_context *ctxt)
{
tree callee = get_current_function ()->decl;
tree lhs = DECL_RESULT (callee);
tree rhs = gimple_return_retval (return_stmt);
if (lhs && rhs)
{
const svalue *sval = get_rvalue (rhs, ctxt);
const region *ret_reg = get_lvalue (lhs, ctxt);
set_value (ret_reg, sval, ctxt);
}
}
/* Update this model for a call and return of setjmp/sigsetjmp at CALL within
ENODE, using CTXT to report any diagnostics.
This is for the initial direct invocation of setjmp/sigsetjmp (which returns
0), as opposed to any second return due to longjmp/sigsetjmp. */
void
region_model::on_setjmp (const gcall *call, const exploded_node *enode,
region_model_context *ctxt)
{
const svalue *buf_ptr = get_rvalue (gimple_call_arg (call, 0), ctxt);
const region *buf_reg = deref_rvalue (buf_ptr, gimple_call_arg (call, 0),
ctxt);
/* Create a setjmp_svalue for this call and store it in BUF_REG's
region. */
if (buf_reg)
{
setjmp_record r (enode, call);
const svalue *sval
= m_mgr->get_or_create_setjmp_svalue (r, buf_reg->get_type ());
set_value (buf_reg, sval, ctxt);
}
/* Direct calls to setjmp return 0. */
if (tree lhs = gimple_call_lhs (call))
{
const svalue *new_sval
= m_mgr->get_or_create_int_cst (TREE_TYPE (lhs), 0);
const region *lhs_reg = get_lvalue (lhs, ctxt);
set_value (lhs_reg, new_sval, ctxt);
}
}
/* Update this region_model for rewinding from a "longjmp" at LONGJMP_CALL
to a "setjmp" at SETJMP_CALL where the final stack depth should be
SETJMP_STACK_DEPTH. Pop any stack frames. Leak detection is *not*
done, and should be done by the caller. */
void
region_model::on_longjmp (const gcall *longjmp_call, const gcall *setjmp_call,
int setjmp_stack_depth, region_model_context *ctxt)
{
/* Evaluate the val, using the frame of the "longjmp". */
tree fake_retval = gimple_call_arg (longjmp_call, 1);
const svalue *fake_retval_sval = get_rvalue (fake_retval, ctxt);
/* Pop any frames until we reach the stack depth of the function where
setjmp was called. */
gcc_assert (get_stack_depth () >= setjmp_stack_depth);
while (get_stack_depth () > setjmp_stack_depth)
pop_frame (NULL, NULL, ctxt, false);
gcc_assert (get_stack_depth () == setjmp_stack_depth);
/* Assign to LHS of "setjmp" in new_state. */
if (tree lhs = gimple_call_lhs (setjmp_call))
{
/* Passing 0 as the val to longjmp leads to setjmp returning 1. */
const svalue *zero_sval
= m_mgr->get_or_create_int_cst (TREE_TYPE (fake_retval), 0);
tristate eq_zero = eval_condition (fake_retval_sval, EQ_EXPR, zero_sval);
/* If we have 0, use 1. */
if (eq_zero.is_true ())
{
const svalue *one_sval
= m_mgr->get_or_create_int_cst (TREE_TYPE (fake_retval), 1);
fake_retval_sval = one_sval;
}
else
{
/* Otherwise note that the value is nonzero. */
m_constraints->add_constraint (fake_retval_sval, NE_EXPR, zero_sval);
}
/* Decorate the return value from setjmp as being unmergeable,
so that we don't attempt to merge states with it as zero
with states in which it's nonzero, leading to a clean distinction
in the exploded_graph betweeen the first return and the second
return. */
fake_retval_sval = m_mgr->get_or_create_unmergeable (fake_retval_sval);
const region *lhs_reg = get_lvalue (lhs, ctxt);
set_value (lhs_reg, fake_retval_sval, ctxt);
}
}
/* Update this region_model for a phi stmt of the form
LHS = PHI <...RHS...>.
where RHS is for the appropriate edge.
Get state from OLD_STATE so that all of the phi stmts for a basic block
are effectively handled simultaneously. */
void
region_model::handle_phi (const gphi *phi,
tree lhs, tree rhs,
const region_model &old_state,
hash_set<const svalue *> &svals_changing_meaning,
region_model_context *ctxt)
{
/* For now, don't bother tracking the .MEM SSA names. */
if (tree var = SSA_NAME_VAR (lhs))
if (TREE_CODE (var) == VAR_DECL)
if (VAR_DECL_IS_VIRTUAL_OPERAND (var))
return;
const svalue *src_sval = old_state.get_rvalue (rhs, ctxt);
const region *dst_reg = old_state.get_lvalue (lhs, ctxt);
const svalue *sval = old_state.get_rvalue (lhs, nullptr);
if (sval->get_kind () == SK_WIDENING)
svals_changing_meaning.add (sval);
set_value (dst_reg, src_sval, ctxt);
if (ctxt)
ctxt->on_phi (phi, rhs);
}
/* Implementation of region_model::get_lvalue; the latter adds type-checking.
Get the id of the region for PV within this region_model,
emitting any diagnostics to CTXT. */
const region *
region_model::get_lvalue_1 (path_var pv, region_model_context *ctxt) const
{
tree expr = pv.m_tree;
gcc_assert (expr);
switch (TREE_CODE (expr))
{
default:
return m_mgr->get_region_for_unexpected_tree_code (ctxt, expr,
dump_location_t ());
case ARRAY_REF:
{
tree array = TREE_OPERAND (expr, 0);
tree index = TREE_OPERAND (expr, 1);
const region *array_reg = get_lvalue (array, ctxt);
const svalue *index_sval = get_rvalue (index, ctxt);
return m_mgr->get_element_region (array_reg,
TREE_TYPE (TREE_TYPE (array)),
index_sval);
}
break;
case BIT_FIELD_REF:
{
tree inner_expr = TREE_OPERAND (expr, 0);
const region *inner_reg = get_lvalue (inner_expr, ctxt);
tree num_bits = TREE_OPERAND (expr, 1);
tree first_bit_offset = TREE_OPERAND (expr, 2);
gcc_assert (TREE_CODE (num_bits) == INTEGER_CST);
gcc_assert (TREE_CODE (first_bit_offset) == INTEGER_CST);
bit_range bits (TREE_INT_CST_LOW (first_bit_offset),
TREE_INT_CST_LOW (num_bits));
return m_mgr->get_bit_range (inner_reg, TREE_TYPE (expr), bits);
}
break;
case MEM_REF:
{
tree ptr = TREE_OPERAND (expr, 0);
tree offset = TREE_OPERAND (expr, 1);
const svalue *ptr_sval = get_rvalue (ptr, ctxt);
const svalue *offset_sval = get_rvalue (offset, ctxt);
const region *star_ptr = deref_rvalue (ptr_sval, ptr, ctxt);
return m_mgr->get_offset_region (star_ptr,
TREE_TYPE (expr),
offset_sval);
}
break;
case FUNCTION_DECL:
return m_mgr->get_region_for_fndecl (expr);
case LABEL_DECL:
return m_mgr->get_region_for_label (expr);
case VAR_DECL:
/* Handle globals. */
if (is_global_var (expr))
return m_mgr->get_region_for_global (expr);
/* Fall through. */
case SSA_NAME:
case PARM_DECL:
case RESULT_DECL:
{
gcc_assert (TREE_CODE (expr) == SSA_NAME
|| TREE_CODE (expr) == PARM_DECL
|| VAR_P (expr)
|| TREE_CODE (expr) == RESULT_DECL);
int stack_index = pv.m_stack_depth;
const frame_region *frame = get_frame_at_index (stack_index);
gcc_assert (frame);
return frame->get_region_for_local (m_mgr, expr, ctxt);
}
case COMPONENT_REF:
{
/* obj.field */
tree obj = TREE_OPERAND (expr, 0);
tree field = TREE_OPERAND (expr, 1);
const region *obj_reg = get_lvalue (obj, ctxt);
return m_mgr->get_field_region (obj_reg, field);
}
break;
case STRING_CST:
return m_mgr->get_region_for_string (expr);
}
}
/* Assert that SRC_TYPE can be converted to DST_TYPE as a no-op. */
static void
assert_compat_types (tree src_type, tree dst_type)
{
if (src_type && dst_type && !VOID_TYPE_P (dst_type))
{
#if CHECKING_P
if (!(useless_type_conversion_p (src_type, dst_type)))
internal_error ("incompatible types: %qT and %qT", src_type, dst_type);
#endif
}
}
/* Return true if SRC_TYPE can be converted to DST_TYPE as a no-op. */
bool
compat_types_p (tree src_type, tree dst_type)
{
if (src_type && dst_type && !VOID_TYPE_P (dst_type))
if (!(useless_type_conversion_p (src_type, dst_type)))
return false;
return true;
}
/* Get the region for PV within this region_model,
emitting any diagnostics to CTXT. */
const region *
region_model::get_lvalue (path_var pv, region_model_context *ctxt) const
{
if (pv.m_tree == NULL_TREE)
return NULL;
const region *result_reg = get_lvalue_1 (pv, ctxt);
assert_compat_types (result_reg->get_type (), TREE_TYPE (pv.m_tree));
return result_reg;
}
/* Get the region for EXPR within this region_model (assuming the most
recent stack frame if it's a local). */
const region *
region_model::get_lvalue (tree expr, region_model_context *ctxt) const
{
return get_lvalue (path_var (expr, get_stack_depth () - 1), ctxt);
}
/* Implementation of region_model::get_rvalue; the latter adds type-checking.
Get the value of PV within this region_model,
emitting any diagnostics to CTXT. */
const svalue *
region_model::get_rvalue_1 (path_var pv, region_model_context *ctxt) const
{
gcc_assert (pv.m_tree);
switch (TREE_CODE (pv.m_tree))
{
default:
return m_mgr->get_or_create_unknown_svalue (TREE_TYPE (pv.m_tree));
case ADDR_EXPR:
{
/* "&EXPR". */
tree expr = pv.m_tree;
tree op0 = TREE_OPERAND (expr, 0);
const region *expr_reg = get_lvalue (op0, ctxt);
return m_mgr->get_ptr_svalue (TREE_TYPE (expr), expr_reg);
}
break;
case BIT_FIELD_REF:
{
tree expr = pv.m_tree;
tree op0 = TREE_OPERAND (expr, 0);
const region *reg = get_lvalue (op0, ctxt);
tree num_bits = TREE_OPERAND (expr, 1);
tree first_bit_offset = TREE_OPERAND (expr, 2);
gcc_assert (TREE_CODE (num_bits) == INTEGER_CST);
gcc_assert (TREE_CODE (first_bit_offset) == INTEGER_CST);
bit_range bits (TREE_INT_CST_LOW (first_bit_offset),
TREE_INT_CST_LOW (num_bits));
return get_rvalue_for_bits (TREE_TYPE (expr), reg, bits, ctxt);
}
case VAR_DECL:
if (DECL_HARD_REGISTER (pv.m_tree))
{
/* If it has a hard register, it doesn't have a memory region
and can't be referred to as an lvalue. */
return m_mgr->get_or_create_unknown_svalue (TREE_TYPE (pv.m_tree));
}
/* Fall through. */
case PARM_DECL:
case SSA_NAME:
case RESULT_DECL:
case ARRAY_REF:
{
const region *reg = get_lvalue (pv, ctxt);
return get_store_value (reg, ctxt);
}
case REALPART_EXPR:
case IMAGPART_EXPR:
case VIEW_CONVERT_EXPR:
{
tree expr = pv.m_tree;
tree arg = TREE_OPERAND (expr, 0);
const svalue *arg_sval = get_rvalue (arg, ctxt);
const svalue *sval_unaryop
= m_mgr->get_or_create_unaryop (TREE_TYPE (expr), TREE_CODE (expr),
arg_sval);
return sval_unaryop;
};
case INTEGER_CST:
case REAL_CST:
case COMPLEX_CST:
case VECTOR_CST:
case STRING_CST:
return m_mgr->get_or_create_constant_svalue (pv.m_tree);
case POINTER_PLUS_EXPR:
{
tree expr = pv.m_tree;
tree ptr = TREE_OPERAND (expr, 0);
tree offset = TREE_OPERAND (expr, 1);
const svalue *ptr_sval = get_rvalue (ptr, ctxt);
const svalue *offset_sval = get_rvalue (offset, ctxt);
const svalue *sval_binop
= m_mgr->get_or_create_binop (TREE_TYPE (expr), POINTER_PLUS_EXPR,
ptr_sval, offset_sval);
return sval_binop;
}
/* Binary ops. */
case PLUS_EXPR:
case MULT_EXPR:
case BIT_AND_EXPR:
case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
{
tree expr = pv.m_tree;
tree arg0 = TREE_OPERAND (expr, 0);
tree arg1 = TREE_OPERAND (expr, 1);
const svalue *arg0_sval = get_rvalue (arg0, ctxt);
const svalue *arg1_sval = get_rvalue (arg1, ctxt);
const svalue *sval_binop
= m_mgr->get_or_create_binop (TREE_TYPE (expr), TREE_CODE (expr),
arg0_sval, arg1_sval);
return sval_binop;
}
case COMPONENT_REF:
case MEM_REF:
{
const region *ref_reg = get_lvalue (pv, ctxt);
return get_store_value (ref_reg, ctxt);
}
case OBJ_TYPE_REF:
{
tree expr = OBJ_TYPE_REF_EXPR (pv.m_tree);
return get_rvalue (expr, ctxt);
}
}
}
/* Get the value of PV within this region_model,
emitting any diagnostics to CTXT. */
const svalue *
region_model::get_rvalue (path_var pv, region_model_context *ctxt) const
{
if (pv.m_tree == NULL_TREE)
return NULL;
const svalue *result_sval = get_rvalue_1 (pv, ctxt);
assert_compat_types (result_sval->get_type (), TREE_TYPE (pv.m_tree));
result_sval = check_for_poison (result_sval, pv.m_tree, NULL, ctxt);
return result_sval;
}
/* Get the value of EXPR within this region_model (assuming the most
recent stack frame if it's a local). */
const svalue *
region_model::get_rvalue (tree expr, region_model_context *ctxt) const
{
return get_rvalue (path_var (expr, get_stack_depth () - 1), ctxt);
}
/* Return true if this model is on a path with "main" as the entrypoint
(as opposed to one in which we're merely analyzing a subset of the
path through the code). */
bool
region_model::called_from_main_p () const
{
if (!m_current_frame)
return false;
/* Determine if the oldest stack frame in this model is for "main". */
const frame_region *frame0 = get_frame_at_index (0);
gcc_assert (frame0);
return id_equal (DECL_NAME (frame0->get_function ()->decl), "main");
}
/* Subroutine of region_model::get_store_value for when REG is (or is within)
a global variable that hasn't been touched since the start of this path
(or was implicitly touched due to a call to an unknown function). */
const svalue *
region_model::get_initial_value_for_global (const region *reg) const
{
/* Get the decl that REG is for (or is within). */
const decl_region *base_reg
= reg->get_base_region ()->dyn_cast_decl_region ();
gcc_assert (base_reg);
tree decl = base_reg->get_decl ();
/* Special-case: to avoid having to explicitly update all previously
untracked globals when calling an unknown fn, they implicitly have
an unknown value if an unknown call has occurred, unless this is
static to-this-TU and hasn't escaped. Globals that have escaped
are explicitly tracked, so we shouldn't hit this case for them. */
if (m_store.called_unknown_fn_p ()
&& TREE_PUBLIC (decl)
&& !TREE_READONLY (decl))
return m_mgr->get_or_create_unknown_svalue (reg->get_type ());
/* If we are on a path from the entrypoint from "main" and we have a
global decl defined in this TU that hasn't been touched yet, then
the initial value of REG can be taken from the initialization value
of the decl. */
if (called_from_main_p () || TREE_READONLY (decl))
return reg->get_initial_value_at_main (m_mgr);
/* Otherwise, return INIT_VAL(REG). */
return m_mgr->get_or_create_initial_value (reg);
}
/* Get a value for REG, looking it up in the store, or otherwise falling
back to "initial" or "unknown" values.
Use CTXT to report any warnings associated with reading from REG. */
const svalue *
region_model::get_store_value (const region *reg,
region_model_context *ctxt) const
{
/* Getting the value of an empty region gives an unknown_svalue. */
if (reg->empty_p ())
return m_mgr->get_or_create_unknown_svalue (reg->get_type ());
bool check_poisoned = true;
if (check_region_for_read (reg, ctxt))
check_poisoned = false;
/* Special-case: handle var_decls in the constant pool. */
if (const decl_region *decl_reg = reg->dyn_cast_decl_region ())
if (const svalue *sval = decl_reg->maybe_get_constant_value (m_mgr))
return sval;
const svalue *sval
= m_store.get_any_binding (m_mgr->get_store_manager (), reg);
if (sval)
{
if (reg->get_type ())
sval = m_mgr->get_or_create_cast (reg->get_type (), sval);
return sval;
}
/* Special-case: read at a constant index within a STRING_CST. */
if (const offset_region *offset_reg = reg->dyn_cast_offset_region ())
if (tree byte_offset_cst
= offset_reg->get_byte_offset ()->maybe_get_constant ())
if (const string_region *str_reg
= reg->get_parent_region ()->dyn_cast_string_region ())
{
tree string_cst = str_reg->get_string_cst ();
if (const svalue *char_sval
= m_mgr->maybe_get_char_from_string_cst (string_cst,
byte_offset_cst))
return m_mgr->get_or_create_cast (reg->get_type (), char_sval);
}
/* Special-case: read the initial char of a STRING_CST. */
if (const cast_region *cast_reg = reg->dyn_cast_cast_region ())
if (const string_region *str_reg
= cast_reg->get_original_region ()->dyn_cast_string_region ())
{
tree string_cst = str_reg->get_string_cst ();
tree byte_offset_cst = build_int_cst (integer_type_node, 0);
if (const svalue *char_sval
= m_mgr->maybe_get_char_from_string_cst (string_cst,
byte_offset_cst))
return m_mgr->get_or_create_cast (reg->get_type (), char_sval);
}
/* Otherwise we implicitly have the initial value of the region
(if the cluster had been touched, binding_cluster::get_any_binding,
would have returned UNKNOWN, and we would already have returned
that above). */
/* Handle globals. */
if (reg->get_base_region ()->get_parent_region ()->get_kind ()
== RK_GLOBALS)
return get_initial_value_for_global (reg);
return m_mgr->get_or_create_initial_value (reg, check_poisoned);
}
/* Return false if REG does not exist, true if it may do.
This is for detecting regions within the stack that don't exist anymore
after frames are popped. */
bool
region_model::region_exists_p (const region *reg) const
{
/* If within a stack frame, check that the stack frame is live. */
if (const frame_region *enclosing_frame = reg->maybe_get_frame_region ())
{
/* Check that the current frame is the enclosing frame, or is called
by it. */
for (const frame_region *iter_frame = get_current_frame (); iter_frame;
iter_frame = iter_frame->get_calling_frame ())
if (iter_frame == enclosing_frame)
return true;
return false;
}
return true;
}
/* Get a region for referencing PTR_SVAL, creating a region if need be, and
potentially generating warnings via CTXT.
PTR_SVAL must be of pointer type.
PTR_TREE if non-NULL can be used when emitting diagnostics. */
const region *
region_model::deref_rvalue (const svalue *ptr_sval, tree ptr_tree,
region_model_context *ctxt,
bool add_nonnull_constraint) const
{
gcc_assert (ptr_sval);
gcc_assert (POINTER_TYPE_P (ptr_sval->get_type ()));
/* If we're dereferencing PTR_SVAL, assume that it is non-NULL; add this
as a constraint. This suppresses false positives from
-Wanalyzer-null-dereference for the case where we later have an
if (PTR_SVAL) that would occur if we considered the false branch
and transitioned the malloc state machine from start->null. */
if (add_nonnull_constraint)
{
tree null_ptr_cst = build_int_cst (ptr_sval->get_type (), 0);
const svalue *null_ptr
= m_mgr->get_or_create_constant_svalue (null_ptr_cst);
m_constraints->add_constraint (ptr_sval, NE_EXPR, null_ptr);
}
switch (ptr_sval->get_kind ())
{
default:
break;
case SK_REGION:
{
const region_svalue *region_sval
= as_a <const region_svalue *> (ptr_sval);
return region_sval->get_pointee ();
}
case SK_BINOP:
{
const binop_svalue *binop_sval
= as_a <const binop_svalue *> (ptr_sval);
switch (binop_sval->get_op ())
{
case POINTER_PLUS_EXPR:
{
/* If we have a symbolic value expressing pointer arithmentic,
try to convert it to a suitable region. */
const region *parent_region
= deref_rvalue (binop_sval->get_arg0 (), NULL_TREE, ctxt);
const svalue *offset = binop_sval->get_arg1 ();
tree type= TREE_TYPE (ptr_sval->get_type ());
return m_mgr->get_offset_region (parent_region, type, offset);
}
default:
break;
}
}
break;
case SK_POISONED:
{
if (ctxt)
{
tree ptr = get_representative_tree (ptr_sval);
/* If we can't get a representative tree for PTR_SVAL
(e.g. if it hasn't been bound into the store), then
fall back on PTR_TREE, if non-NULL. */
if (!ptr)
ptr = ptr_tree;
if (ptr)
{
const poisoned_svalue *poisoned_sval
= as_a <const poisoned_svalue *> (ptr_sval);
enum poison_kind pkind = poisoned_sval->get_poison_kind ();
ctxt->warn (::make_unique<poisoned_value_diagnostic>
(ptr, pkind, nullptr, nullptr));
}
}
}
break;
}
return m_mgr->get_symbolic_region (ptr_sval);
}
/* Attempt to get BITS within any value of REG, as TYPE.
In particular, extract values from compound_svalues for the case
where there's a concrete binding at BITS.
Return an unknown svalue if we can't handle the given case.
Use CTXT to report any warnings associated with reading from REG. */
const svalue *
region_model::get_rvalue_for_bits (tree type,
const region *reg,
const bit_range &bits,
region_model_context *ctxt) const
{
const svalue *sval = get_store_value (reg, ctxt);
return m_mgr->get_or_create_bits_within (type, bits, sval);
}
/* A subclass of pending_diagnostic for complaining about writes to
constant regions of memory. */
class write_to_const_diagnostic
: public pending_diagnostic_subclass<write_to_const_diagnostic>
{
public:
write_to_const_diagnostic (const region *reg, tree decl)
: m_reg (reg), m_decl (decl)
{}
const char *get_kind () const final override
{
return "write_to_const_diagnostic";
}
bool operator== (const write_to_const_diagnostic &other) const
{
return (m_reg == other.m_reg
&& m_decl == other.m_decl);
}
int get_controlling_option () const final override
{
return OPT_Wanalyzer_write_to_const;
}
bool emit (diagnostic_emission_context &ctxt) final override
{
auto_diagnostic_group d;
bool warned;
switch (m_reg->get_kind ())
{
default:
warned = ctxt.warn ("write to %<const%> object %qE", m_decl);
break;
case RK_FUNCTION:
warned = ctxt.warn ("write to function %qE", m_decl);
break;
case RK_LABEL:
warned = ctxt.warn ("write to label %qE", m_decl);
break;
}
if (warned)
inform (DECL_SOURCE_LOCATION (m_decl), "declared here");
return warned;
}
label_text describe_final_event (const evdesc::final_event &ev) final override
{
switch (m_reg->get_kind ())
{
default:
return ev.formatted_print ("write to %<const%> object %qE here", m_decl);
case RK_FUNCTION:
return ev.formatted_print ("write to function %qE here", m_decl);
case RK_LABEL:
return ev.formatted_print ("write to label %qE here", m_decl);
}
}
private:
const region *m_reg;
tree m_decl;
};
/* A subclass of pending_diagnostic for complaining about writes to
string literals. */
class write_to_string_literal_diagnostic
: public pending_diagnostic_subclass<write_to_string_literal_diagnostic>
{
public:
write_to_string_literal_diagnostic (const region *reg)
: m_reg (reg)
{}
const char *get_kind () const final override
{
return "write_to_string_literal_diagnostic";
}
bool operator== (const write_to_string_literal_diagnostic &other) const
{
return m_reg == other.m_reg;
}
int get_controlling_option () const final override
{
return OPT_Wanalyzer_write_to_string_literal;
}
bool emit (diagnostic_emission_context &ctxt) final override
{
return ctxt.warn ("write to string literal");
/* Ideally we would show the location of the STRING_CST as well,
but it is not available at this point. */
}
label_text describe_final_event (const evdesc::final_event &ev) final override
{
return ev.formatted_print ("write to string literal here");
}
private:
const region *m_reg;
};
/* Use CTXT to warn If DEST_REG is a region that shouldn't be written to. */
void
region_model::check_for_writable_region (const region* dest_reg,
region_model_context *ctxt) const
{
/* Fail gracefully if CTXT is NULL. */
if (!ctxt)
return;
const region *base_reg = dest_reg->get_base_region ();
switch (base_reg->get_kind ())
{
default:
break;
case RK_FUNCTION:
{
const function_region *func_reg = as_a <const function_region *> (base_reg);
tree fndecl = func_reg->get_fndecl ();
ctxt->warn (make_unique<write_to_const_diagnostic>
(func_reg, fndecl));
}
break;
case RK_LABEL:
{
const label_region *label_reg = as_a <const label_region *> (base_reg);
tree label = label_reg->get_label ();
ctxt->warn (make_unique<write_to_const_diagnostic>
(label_reg, label));
}
break;
case RK_DECL:
{
const decl_region *decl_reg = as_a <const decl_region *> (base_reg);
tree decl = decl_reg->get_decl ();
/* Warn about writes to const globals.
Don't warn for writes to const locals, and params in particular,
since we would warn in push_frame when setting them up (e.g the
"this" param is "T* const"). */
if (TREE_READONLY (decl)
&& is_global_var (decl))
ctxt->warn (make_unique<write_to_const_diagnostic> (dest_reg, decl));
}
break;
case RK_STRING:
ctxt->warn (make_unique<write_to_string_literal_diagnostic> (dest_reg));
break;
}
}
/* Get the capacity of REG in bytes. */
const svalue *
region_model::get_capacity (const region *reg) const
{
switch (reg->get_kind ())
{
default:
break;
case RK_DECL:
{
const decl_region *decl_reg = as_a <const decl_region *> (reg);
tree decl = decl_reg->get_decl ();
if (TREE_CODE (decl) == SSA_NAME)
{
tree type = TREE_TYPE (decl);
tree size = TYPE_SIZE (type);
return get_rvalue (size, NULL);
}
else
{
tree size = decl_init_size (decl, false);
if (size)
return get_rvalue (size, NULL);
}
}
break;
case RK_SIZED:
/* Look through sized regions to get at the capacity
of the underlying regions. */
return get_capacity (reg->get_parent_region ());
case RK_STRING:
{
/* "Capacity" here means "size". */
const string_region *string_reg = as_a <const string_region *> (reg);
tree string_cst = string_reg->get_string_cst ();
return m_mgr->get_or_create_int_cst (size_type_node,
TREE_STRING_LENGTH (string_cst));
}
break;
}
if (const svalue *recorded = get_dynamic_extents (reg))
return recorded;
return m_mgr->get_or_create_unknown_svalue (sizetype);
}
/* If CTXT is non-NULL, use it to warn about any problems accessing REG,
using DIR to determine if this access is a read or write.
Return TRUE if an OOB access was detected.
If SVAL_HINT is non-NULL, use it as a hint in diagnostics
about the value that would be written to REG. */
bool
region_model::check_region_access (const region *reg,
enum access_direction dir,
const svalue *sval_hint,
region_model_context *ctxt) const
{
/* Fail gracefully if CTXT is NULL. */
if (!ctxt)
return false;
bool oob_access_detected = false;
check_region_for_taint (reg, dir, ctxt);
if (!check_region_bounds (reg, dir, sval_hint, ctxt))
oob_access_detected = true;
switch (dir)
{
default:
gcc_unreachable ();
case DIR_READ:
/* Currently a no-op. */
break;
case DIR_WRITE:
check_for_writable_region (reg, ctxt);
break;
}
return oob_access_detected;
}
/* If CTXT is non-NULL, use it to warn about any problems writing to REG. */
void
region_model::check_region_for_write (const region *dest_reg,
const svalue *sval_hint,
region_model_context *ctxt) const
{
check_region_access (dest_reg, DIR_WRITE, sval_hint, ctxt);
}
/* If CTXT is non-NULL, use it to warn about any problems reading from REG.
Returns TRUE if an OOB read was detected. */
bool
region_model::check_region_for_read (const region *src_reg,
region_model_context *ctxt) const
{
return check_region_access (src_reg, DIR_READ, NULL, ctxt);
}
/* Concrete subclass for casts of pointers that lead to trailing bytes. */
class dubious_allocation_size
: public pending_diagnostic_subclass<dubious_allocation_size>
{
public:
dubious_allocation_size (const region *lhs, const region *rhs,
const gimple *stmt)
: m_lhs (lhs), m_rhs (rhs), m_expr (NULL_TREE), m_stmt (stmt),
m_has_allocation_event (false)
{}
dubious_allocation_size (const region *lhs, const region *rhs,
tree expr, const gimple *stmt)
: m_lhs (lhs), m_rhs (rhs), m_expr (expr), m_stmt (stmt),
m_has_allocation_event (false)
{}
const char *get_kind () const final override
{
return "dubious_allocation_size";
}
bool operator== (const dubious_allocation_size &other) const
{
return (m_stmt == other.m_stmt
&& pending_diagnostic::same_tree_p (m_expr, other.m_expr));
}
int get_controlling_option () const final override
{
return OPT_Wanalyzer_allocation_size;
}
bool emit (diagnostic_emission_context &ctxt) final override
{
ctxt.add_cwe (131);
return ctxt.warn ("allocated buffer size is not a multiple"
" of the pointee's size");
}
label_text describe_final_event (const evdesc::final_event &ev) final
override
{
tree pointee_type = TREE_TYPE (m_lhs->get_type ());
if (m_has_allocation_event)
return ev.formatted_print ("assigned to %qT here;"
" %<sizeof (%T)%> is %qE",
m_lhs->get_type (), pointee_type,
size_in_bytes (pointee_type));
/* Fallback: Typically, we should always see an allocation_event
before. */
if (m_expr)
{
if (TREE_CODE (m_expr) == INTEGER_CST)
return ev.formatted_print ("allocated %E bytes and assigned to"
" %qT here; %<sizeof (%T)%> is %qE",
m_expr, m_lhs->get_type (), pointee_type,
size_in_bytes (pointee_type));
else
return ev.formatted_print ("allocated %qE bytes and assigned to"
" %qT here; %<sizeof (%T)%> is %qE",
m_expr, m_lhs->get_type (), pointee_type,
size_in_bytes (pointee_type));
}
return ev.formatted_print ("allocated and assigned to %qT here;"
" %<sizeof (%T)%> is %qE",
m_lhs->get_type (), pointee_type,
size_in_bytes (pointee_type));
}
void
add_region_creation_events (const region *,
tree capacity,
const event_loc_info &loc_info,
checker_path &emission_path) final override
{
emission_path.add_event
(make_unique<region_creation_event_allocation_size> (capacity, loc_info));
m_has_allocation_event = true;
}
void mark_interesting_stuff (interesting_t *interest) final override
{
interest->add_region_creation (m_rhs);
}
private:
const region *m_lhs;
const region *m_rhs;
const tree m_expr;
const gimple *m_stmt;
bool m_has_allocation_event;
};
/* Return true on dubious allocation sizes for constant sizes. */
static bool
capacity_compatible_with_type (tree cst, tree pointee_size_tree,
bool is_struct)
{
gcc_assert (TREE_CODE (cst) == INTEGER_CST);
gcc_assert (TREE_CODE (pointee_size_tree) == INTEGER_CST);
unsigned HOST_WIDE_INT pointee_size = TREE_INT_CST_LOW (pointee_size_tree);
unsigned HOST_WIDE_INT alloc_size = TREE_INT_CST_LOW (cst);
if (is_struct)
return alloc_size == 0 || alloc_size >= pointee_size;
return alloc_size % pointee_size == 0;
}
static bool
capacity_compatible_with_type (tree cst, tree pointee_size_tree)
{
return capacity_compatible_with_type (cst, pointee_size_tree, false);
}
/* Checks whether SVAL could be a multiple of SIZE_CST.
It works by visiting all svalues inside SVAL until it reaches
atomic nodes. From those, it goes back up again and adds each
node that is not a multiple of SIZE_CST to the RESULT_SET. */
class size_visitor : public visitor
{
public:
size_visitor (tree size_cst, const svalue *root_sval, constraint_manager *cm)
: m_size_cst (size_cst), m_root_sval (root_sval), m_cm (cm)
{
m_root_sval->accept (this);
}
bool is_dubious_capacity ()
{
return result_set.contains (m_root_sval);
}
void visit_constant_svalue (const constant_svalue *sval) final override
{
check_constant (sval->get_constant (), sval);
}
void visit_unaryop_svalue (const unaryop_svalue *sval) final override
{
if (CONVERT_EXPR_CODE_P (sval->get_op ())
&& result_set.contains (sval->get_arg ()))
result_set.add (sval);
}
void visit_binop_svalue (const binop_svalue *sval) final override
{
const svalue *arg0 = sval->get_arg0 ();
const svalue *arg1 = sval->get_arg1 ();
switch (sval->get_op ())
{
case MULT_EXPR:
if (result_set.contains (arg0) && result_set.contains (arg1))
result_set.add (sval);
break;
case PLUS_EXPR:
case MINUS_EXPR:
if (result_set.contains (arg0) || result_set.contains (arg1))
result_set.add (sval);
break;
default:
break;
}
}
void visit_unmergeable_svalue (const unmergeable_svalue *sval) final override
{
if (result_set.contains (sval->get_arg ()))
result_set.add (sval);
}
void visit_widening_svalue (const widening_svalue *sval) final override
{
const svalue *base = sval->get_base_svalue ();
const svalue *iter = sval->get_iter_svalue ();
if (result_set.contains (base) || result_set.contains (iter))
result_set.add (sval);
}
void visit_initial_svalue (const initial_svalue *sval) final override
{
equiv_class_id id = equiv_class_id::null ();
if (m_cm->get_equiv_class_by_svalue (sval, &id))
{
if (tree cst = id.get_obj (*m_cm).get_any_constant ())
check_constant (cst, sval);
}
else if (!m_cm->sval_constrained_p (sval))
{
result_set.add (sval);
}
}
void visit_conjured_svalue (const conjured_svalue *sval) final override
{
equiv_class_id id = equiv_class_id::null ();
if (m_cm->get_equiv_class_by_svalue (sval, &id))
if (tree cst = id.get_obj (*m_cm).get_any_constant ())
check_constant (cst, sval);
}
private:
void check_constant (tree cst, const svalue *sval)
{
switch (TREE_CODE (cst))
{
default:
/* Assume all unhandled operands are compatible. */
break;
case INTEGER_CST:
if (!capacity_compatible_with_type (cst, m_size_cst))
result_set.add (sval);
break;
}
}