blob: dfc44ef07061068475fd6d3698165f9330c76723 [file] [log] [blame]
/* Proof-of-concept of a -fanalyzer plugin.
Detect (some) uses of CPython API outside of the Global Interpreter Lock.
https://docs.python.org/3/c-api/init.html#thread-state-and-the-global-interpreter-lock
*/
/* { dg-options "-g" } */
#define INCLUDE_LIST
#define INCLUDE_MEMORY
#define INCLUDE_STRING
#define INCLUDE_VECTOR
#include "gcc-plugin.h"
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "diagnostic.h"
#include "tree.h"
#include "gimple.h"
#include "gimple-iterator.h"
#include "gimple-walk.h"
#include "diagnostics/event-id.h"
#include "context.h"
#include "channels.h"
#include "analyzer/common.h"
#include "analyzer/analyzer-logging.h"
#include "json.h"
#include "analyzer/sm.h"
#include "analyzer/pending-diagnostic.h"
int plugin_is_GPL_compatible;
#if ENABLE_ANALYZER
namespace ana {
static bool
type_based_on_pyobject_p (tree type)
{
/* Ideally we'd also check for "subclasses" here by iterating up the
first field of each struct. */
if (TREE_CODE (type) != RECORD_TYPE)
return false;
tree name = TYPE_IDENTIFIER (type);
if (!name)
return false;
return id_equal (name, "PyObject");
}
/* An experimental state machine, for tracking whether the GIL is held,
as global state.. */
class gil_state_machine : public state_machine
{
public:
gil_state_machine (logger *logger);
bool inherited_state_p () const final override { return false; }
bool on_stmt (sm_context &sm_ctxt,
const gimple *stmt) const final override;
bool can_purge_p (state_t s) const final override;
void check_for_pyobject_usage_without_gil (sm_context &sm_ctxt,
const gimple *stmt,
tree op) const;
private:
void check_for_pyobject_in_call (sm_context &sm_ctxt,
const gcall &call,
tree callee_fndecl) const;
public:
/* These states are "global", rather than per-expression. */
/* State for when we've released the GIL. */
state_t m_released_gil;
/* Stop state. */
state_t m_stop;
};
/* Subclass for diagnostics involving the GIL. */
class gil_diagnostic : public pending_diagnostic
{
public:
/* There isn't a warning ID for us to use. */
int get_controlling_option () const final override
{
return 0;
}
location_t fixup_location (location_t loc,
bool) const final override
{
/* Ideally we'd check for specific macros here, and only
resolve certain macros. */
if (linemap_location_from_macro_expansion_p (line_table, loc))
loc = linemap_resolve_location (line_table, loc,
LRK_MACRO_EXPANSION_POINT, NULL);
return loc;
}
bool
describe_state_change (pretty_printer &pp,
const evdesc::state_change &change) final override
{
if (change.is_global_p ()
&& change.m_new_state == m_sm.m_released_gil)
{
pp_string (&pp, "releasing the GIL here");
return true;
}
if (change.is_global_p ()
&& change.m_new_state == m_sm.get_start_state ())
{
pp_string (&pp, "acquiring the GIL here");
return true;
}
return false;
}
diagnostics::paths::event::meaning
get_meaning_for_state_change (const evdesc::state_change &change)
const final override
{
using event = diagnostics::paths::event;
if (change.is_global_p ())
{
if (change.m_new_state == m_sm.m_released_gil)
return event::meaning (event::verb::release,
event::noun::lock);
else if (change.m_new_state == m_sm.get_start_state ())
return event::meaning (event::verb::acquire,
event::noun::lock);
}
return event::meaning ();
}
protected:
gil_diagnostic (const gil_state_machine &sm) : m_sm (sm)
{
}
private:
const gil_state_machine &m_sm;
};
class double_save_thread : public gil_diagnostic
{
public:
double_save_thread (const gil_state_machine &sm, const gcall &call)
: gil_diagnostic (sm), m_call (call)
{}
const char *get_kind () const final override
{
return "double_save_thread";
}
bool subclass_equal_p (const pending_diagnostic &base_other) const override
{
const double_save_thread &sub_other
= (const double_save_thread &)base_other;
return &m_call == &sub_other.m_call;
}
bool emit (diagnostic_emission_context &ctxt) final override
{
return ctxt.warn ("nested usage of %qs", "Py_BEGIN_ALLOW_THREADS");
}
bool
describe_final_event (pretty_printer &pp,
const evdesc::final_event &ev) final override
{
pp_printf (&pp,
"nested usage of %qs here",
"Py_BEGIN_ALLOW_THREADS");
return true;
}
private:
const gcall &m_call;
};
class fncall_without_gil : public gil_diagnostic
{
public:
fncall_without_gil (const gil_state_machine &sm, const gcall &call,
tree callee_fndecl, unsigned arg_idx)
: gil_diagnostic (sm), m_call (call), m_callee_fndecl (callee_fndecl),
m_arg_idx (arg_idx)
{}
const char *get_kind () const final override
{
return "fncall_without_gil";
}
bool subclass_equal_p (const pending_diagnostic &base_other) const override
{
const fncall_without_gil &sub_other
= (const fncall_without_gil &)base_other;
return (&m_call == &sub_other.m_call
&& m_callee_fndecl == sub_other.m_callee_fndecl
&& m_arg_idx == sub_other.m_arg_idx);
}
bool emit (diagnostic_emission_context &ctxt) final override
{
if (m_callee_fndecl)
return ctxt.warn ("use of PyObject as argument %i of %qE"
" without the GIL",
m_arg_idx + 1, m_callee_fndecl);
else
return ctxt.warn ("use of PyObject as argument %i of call"
" without the GIL",
m_arg_idx + 1, m_callee_fndecl);
}
bool
describe_final_event (pretty_printer &pp,
const evdesc::final_event &ev) final override
{
if (m_callee_fndecl)
pp_printf (&pp,
"use of PyObject as argument %i of %qE here without the GIL",
m_arg_idx + 1, m_callee_fndecl);
else
pp_printf (&pp,
"use of PyObject as argument %i of call here without the GIL",
m_arg_idx + 1, m_callee_fndecl);
return true;
}
private:
const gcall &m_call;
tree m_callee_fndecl;
unsigned m_arg_idx;
};
class pyobject_usage_without_gil : public gil_diagnostic
{
public:
pyobject_usage_without_gil (const gil_state_machine &sm, tree expr)
: gil_diagnostic (sm), m_expr (expr)
{}
const char *get_kind () const final override
{
return "pyobject_usage_without_gil";
}
bool subclass_equal_p (const pending_diagnostic &base_other) const override
{
return same_tree_p (m_expr,
((const pyobject_usage_without_gil&)base_other).m_expr);
}
bool emit (diagnostic_emission_context &ctxt) final override
{
return ctxt.warn ("use of PyObject %qE without the GIL", m_expr);
}
bool
describe_final_event (pretty_printer &pp,
const evdesc::final_event &ev) final override
{
pp_printf (&pp,
"PyObject %qE used here without the GIL",
m_expr);
return true;
}
private:
tree m_expr;
};
/* gil_state_machine's ctor. */
gil_state_machine::gil_state_machine (logger *logger)
: state_machine ("gil", logger)
{
m_released_gil = add_state ("released_gil");
m_stop = add_state ("stop");
}
struct cb_data
{
cb_data (const gil_state_machine &sm, sm_context &sm_ctxt,
const gimple *stmt)
: m_sm (sm), m_sm_ctxt (sm_ctxt), m_stmt (stmt)
{
}
const gil_state_machine &m_sm;
sm_context &m_sm_ctxt;
const gimple *m_stmt;
};
static bool
check_for_pyobject (gimple *, tree op, tree, void *data)
{
cb_data *d = (cb_data *)data;
d->m_sm.check_for_pyobject_usage_without_gil (d->m_sm_ctxt,
d->m_stmt, op);
return true;
}
/* Assuming that the GIL has been released, complain about any
PyObject * arguments passed to CALL. */
void
gil_state_machine::check_for_pyobject_in_call (sm_context &sm_ctxt,
const gcall &call,
tree callee_fndecl) const
{
for (unsigned i = 0; i < gimple_call_num_args (&call); i++)
{
tree arg = gimple_call_arg (&call, i);
if (TREE_CODE (TREE_TYPE (arg)) != POINTER_TYPE)
continue;
tree type = TREE_TYPE (TREE_TYPE (arg));
if (type_based_on_pyobject_p (type))
{
sm_ctxt.warn (NULL_TREE,
std::make_unique<fncall_without_gil> (*this, call,
callee_fndecl,
i));
sm_ctxt.set_global_state (m_stop);
}
}
}
/* Implementation of state_machine::on_stmt vfunc for gil_state_machine. */
bool
gil_state_machine::on_stmt (sm_context &sm_ctxt,
const gimple *stmt) const
{
const state_t global_state = sm_ctxt.get_global_state ();
if (const gcall *call_stmt = dyn_cast <const gcall *> (stmt))
{
const gcall &call = *call_stmt;
if (tree callee_fndecl = sm_ctxt.get_fndecl_for_call (call))
{
if (is_named_call_p (callee_fndecl, "PyEval_SaveThread", call, 0))
{
if (0)
inform (input_location, "found call to %qs",
"PyEval_SaveThread");
if (global_state == m_released_gil)
{
sm_ctxt.warn (NULL_TREE,
std::make_unique<double_save_thread> (*this, call));
sm_ctxt.set_global_state (m_stop);
}
else
sm_ctxt.set_global_state (m_released_gil);
return true;
}
else if (is_named_call_p (callee_fndecl, "PyEval_RestoreThread",
call, 1))
{
if (0)
inform (input_location, "found call to %qs",
"PyEval_SaveThread");
if (global_state == m_released_gil)
sm_ctxt.set_global_state (m_start);
return true;
}
else if (global_state == m_released_gil)
{
/* Find PyObject * args of calls to fns with unknown bodies. */
if (!fndecl_has_gimple_body_p (callee_fndecl))
check_for_pyobject_in_call (sm_ctxt, call, callee_fndecl);
}
}
else if (global_state == m_released_gil)
check_for_pyobject_in_call (sm_ctxt, call, NULL);
}
else
if (global_state == m_released_gil)
{
/* Walk the stmt, finding uses of PyObject (or "subclasses"). */
cb_data d (*this, sm_ctxt, stmt);
walk_stmt_load_store_addr_ops (const_cast <gimple *> (stmt), &d,
check_for_pyobject,
check_for_pyobject,
check_for_pyobject);
}
return false;
}
bool
gil_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
{
return true;
}
void
gil_state_machine::check_for_pyobject_usage_without_gil (sm_context &sm_ctxt,
const gimple *stmt,
tree op) const
{
tree type = TREE_TYPE (op);
if (type_based_on_pyobject_p (type))
{
sm_ctxt.warn (NULL_TREE,
std::make_unique<pyobject_usage_without_gil> (*this, op));
sm_ctxt.set_global_state (m_stop);
}
}
namespace analyzer_events = ::gcc::topics::analyzer_events;
class gil_analyzer_events_subscriber : public analyzer_events::subscriber
{
public:
void
on_message (const analyzer_events::on_ana_init &m) final override
{
LOG_SCOPE (m.get_logger ());
m.register_state_machine
(std::make_unique<gil_state_machine> (m.get_logger ()));
}
} gil_sub;
} // namespace ana
#endif /* #if ENABLE_ANALYZER */
int
plugin_init (struct plugin_name_args *plugin_info,
struct plugin_gcc_version *version)
{
#if ENABLE_ANALYZER
const char *plugin_name = plugin_info->base_name;
if (0)
inform (input_location, "got here; %qs", plugin_name);
g->get_channels ().analyzer_events_channel.add_subscriber (ana::gil_sub);
#else
sorry_no_analyzer ();
#endif
return 0;
}