David Malcolm | ded2c2c | 2021-08-04 18:21:21 -0400 | [diff] [blame] | 1 | /* Handling inline asm in the analyzer. |
Jakub Jelinek | 7adcbaf | 2022-01-03 10:42:10 +0100 | [diff] [blame] | 2 | Copyright (C) 2021-2022 Free Software Foundation, Inc. |
David Malcolm | ded2c2c | 2021-08-04 18:21:21 -0400 | [diff] [blame] | 3 | Contributed by David Malcolm <dmalcolm@redhat.com>. |
| 4 | |
| 5 | This file is part of GCC. |
| 6 | |
| 7 | GCC is free software; you can redistribute it and/or modify it |
| 8 | under the terms of the GNU General Public License as published by |
| 9 | the Free Software Foundation; either version 3, or (at your option) |
| 10 | any later version. |
| 11 | |
| 12 | GCC is distributed in the hope that it will be useful, but |
| 13 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | General Public License for more details. |
| 16 | |
| 17 | You should have received a copy of the GNU General Public License |
| 18 | along with GCC; see the file COPYING3. If not see |
| 19 | <http://www.gnu.org/licenses/>. */ |
| 20 | |
| 21 | #include "config.h" |
David Malcolm | 6341f14 | 2022-11-03 13:47:01 -0400 | [diff] [blame] | 22 | #define INCLUDE_MEMORY |
David Malcolm | ded2c2c | 2021-08-04 18:21:21 -0400 | [diff] [blame] | 23 | #include "system.h" |
| 24 | #include "coretypes.h" |
| 25 | #include "tree.h" |
| 26 | #include "function.h" |
| 27 | #include "basic-block.h" |
| 28 | #include "gimple.h" |
| 29 | #include "gimple-iterator.h" |
| 30 | #include "diagnostic-core.h" |
| 31 | #include "pretty-print.h" |
David Malcolm | ded2c2c | 2021-08-04 18:21:21 -0400 | [diff] [blame] | 32 | #include "analyzer/analyzer.h" |
| 33 | #include "analyzer/analyzer-logging.h" |
| 34 | #include "options.h" |
| 35 | #include "analyzer/call-string.h" |
| 36 | #include "analyzer/program-point.h" |
| 37 | #include "analyzer/store.h" |
| 38 | #include "analyzer/region-model.h" |
| 39 | #include "analyzer/region-model-reachability.h" |
| 40 | #include "stmt.h" |
| 41 | |
| 42 | #if ENABLE_ANALYZER |
| 43 | |
| 44 | namespace ana { |
| 45 | |
| 46 | /* Minimal asm support for the analyzer. |
| 47 | |
| 48 | The objective of this code is to: |
| 49 | - minimize false positives from the analyzer on the Linux kernel |
| 50 | (which makes heavy use of inline asm), whilst |
| 51 | - avoiding having to "teach" the compiler anything about specific strings |
| 52 | in asm statements. |
| 53 | |
| 54 | Specifically, we want to: |
| 55 | |
| 56 | (a) mark asm outputs and certain other regions as having been written to, |
| 57 | to avoid false postives from -Wanalyzer-use-of-uninitialized-value. |
| 58 | |
| 59 | (b) identify some of these stmts as "deterministic" so that we can |
| 60 | write consistent outputs given consistent inputs, so that we can |
| 61 | avoid false positives for paths in which an asm is invoked twice |
| 62 | with the same inputs and is expected to emit the same output. |
| 63 | |
| 64 | This file implements heuristics for achieving the above. */ |
| 65 | |
| 66 | /* Determine if ASM_STMT is deterministic, in the sense of (b) above. |
| 67 | |
| 68 | Consider this x86 function taken from the Linux kernel |
| 69 | (arch/x86/include/asm/barrier.h): |
| 70 | |
| 71 | static inline unsigned long array_index_mask_nospec(unsigned long index, |
| 72 | unsigned long size) |
| 73 | { |
| 74 | unsigned long mask; |
| 75 | |
| 76 | asm volatile ("cmp %1,%2; sbb %0,%0;" |
| 77 | :"=r" (mask) |
| 78 | :"g"(size),"r" (index) |
| 79 | :"cc"); |
| 80 | return mask; |
| 81 | } |
| 82 | |
| 83 | The above is a mitigation for Spectre-variant-1 attacks, for clamping |
| 84 | an array access to within the range of [0, size] if the CPU speculates |
| 85 | past the array bounds. |
| 86 | |
| 87 | However, it is ultimately used to implement wdev_to_wvif: |
| 88 | |
| 89 | static inline struct wfx_vif * |
| 90 | wdev_to_wvif(struct wfx_dev *wdev, int vif_id) |
| 91 | { |
| 92 | vif_id = array_index_nospec(vif_id, ARRAY_SIZE(wdev->vif)); |
| 93 | if (!wdev->vif[vif_id]) { |
| 94 | return NULL; |
| 95 | } |
| 96 | return (struct wfx_vif *)wdev->vif[vif_id]->drv_priv; |
| 97 | } |
| 98 | |
| 99 | which is used by: |
| 100 | |
| 101 | if (wdev_to_wvif(wvif->wdev, 1)) |
| 102 | return wdev_to_wvif(wvif->wdev, 1)->vif; |
| 103 | |
| 104 | The code has been written to assume that wdev_to_wvif is deterministic, |
| 105 | and won't change from returning non-NULL at the "if" clause to |
| 106 | returning NULL at the "->vif" dereference. |
| 107 | |
| 108 | By treating the above specific "asm volatile" as deterministic we avoid |
| 109 | a false positive from -Wanalyzer-null-dereference. */ |
| 110 | |
| 111 | static bool |
| 112 | deterministic_p (const gasm *asm_stmt) |
| 113 | { |
| 114 | /* Assume something volatile with no inputs is querying |
| 115 | changeable state e.g. rdtsc. */ |
| 116 | if (gimple_asm_ninputs (asm_stmt) == 0 |
| 117 | && gimple_asm_volatile_p (asm_stmt)) |
| 118 | return false; |
| 119 | |
| 120 | /* Otherwise assume it's purely a function of its inputs. */ |
| 121 | return true; |
| 122 | } |
| 123 | |
| 124 | /* Update this model for the asm STMT, using CTXT to report any |
| 125 | diagnostics. |
| 126 | |
Martin Liska | e53b6e5 | 2022-01-14 16:57:02 +0100 | [diff] [blame] | 127 | Compare with cfgexpand.cc: expand_asm_stmt. */ |
David Malcolm | ded2c2c | 2021-08-04 18:21:21 -0400 | [diff] [blame] | 128 | |
| 129 | void |
| 130 | region_model::on_asm_stmt (const gasm *stmt, region_model_context *ctxt) |
| 131 | { |
| 132 | logger *logger = ctxt ? ctxt->get_logger () : NULL; |
| 133 | LOG_SCOPE (logger); |
| 134 | |
| 135 | const unsigned noutputs = gimple_asm_noutputs (stmt); |
| 136 | const unsigned ninputs = gimple_asm_ninputs (stmt); |
| 137 | |
| 138 | auto_vec<tree> output_tvec; |
| 139 | auto_vec<tree> input_tvec; |
| 140 | auto_vec<const char *> constraints; |
| 141 | |
| 142 | /* Copy the gimple vectors into new vectors that we can manipulate. */ |
| 143 | output_tvec.safe_grow (noutputs, true); |
| 144 | input_tvec.safe_grow (ninputs, true); |
| 145 | constraints.safe_grow (noutputs + ninputs, true); |
| 146 | |
| 147 | for (unsigned i = 0; i < noutputs; ++i) |
| 148 | { |
| 149 | tree t = gimple_asm_output_op (stmt, i); |
| 150 | output_tvec[i] = TREE_VALUE (t); |
| 151 | constraints[i] = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t))); |
| 152 | } |
| 153 | for (unsigned i = 0; i < ninputs; i++) |
| 154 | { |
| 155 | tree t = gimple_asm_input_op (stmt, i); |
| 156 | input_tvec[i] = TREE_VALUE (t); |
| 157 | constraints[i + noutputs] |
| 158 | = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (t))); |
| 159 | } |
| 160 | |
| 161 | /* Determine which regions are reachable from the inputs |
| 162 | to this stmt. */ |
| 163 | reachable_regions reachable_regs (this); |
| 164 | |
| 165 | int num_errors = 0; |
| 166 | |
| 167 | auto_vec<const region *> output_regions (noutputs); |
| 168 | for (unsigned i = 0; i < noutputs; ++i) |
| 169 | { |
| 170 | tree val = output_tvec[i]; |
| 171 | const char *constraint; |
| 172 | bool is_inout; |
| 173 | bool allows_reg; |
| 174 | bool allows_mem; |
| 175 | |
| 176 | const region *dst_reg = get_lvalue (val, ctxt); |
| 177 | output_regions.quick_push (dst_reg); |
| 178 | reachable_regs.add (dst_reg, true); |
| 179 | |
| 180 | /* Try to parse the output constraint. If that fails, there's |
| 181 | no point in going further. */ |
| 182 | constraint = constraints[i]; |
| 183 | if (!parse_output_constraint (&constraint, i, ninputs, noutputs, |
| 184 | &allows_mem, &allows_reg, &is_inout)) |
| 185 | { |
| 186 | if (logger) |
| 187 | logger->log ("error parsing constraint for output %i: %qs", |
| 188 | i, constraint); |
| 189 | num_errors++; |
| 190 | continue; |
| 191 | } |
| 192 | |
| 193 | if (logger) |
| 194 | { |
| 195 | logger->log ("output %i: %qs %qE" |
| 196 | " is_inout: %i allows_reg: %i allows_mem: %i", |
| 197 | i, constraint, val, |
| 198 | (int)is_inout, (int)allows_reg, (int)allows_mem); |
| 199 | logger->start_log_line (); |
| 200 | logger->log_partial (" region: "); |
| 201 | dst_reg->dump_to_pp (logger->get_printer (), true); |
| 202 | logger->end_log_line (); |
| 203 | } |
| 204 | |
| 205 | } |
| 206 | |
| 207 | /* Ideally should combine with inout_svals to determine the |
| 208 | "effective inputs" and use this for the asm_output_svalue. */ |
| 209 | |
| 210 | auto_vec<const svalue *> input_svals (ninputs); |
| 211 | for (unsigned i = 0; i < ninputs; i++) |
| 212 | { |
| 213 | tree val = input_tvec[i]; |
| 214 | const char *constraint = constraints[i + noutputs]; |
| 215 | bool allows_reg, allows_mem; |
| 216 | if (! parse_input_constraint (&constraint, i, ninputs, noutputs, 0, |
| 217 | constraints.address (), |
| 218 | &allows_mem, &allows_reg)) |
| 219 | { |
| 220 | if (logger) |
| 221 | logger->log ("error parsing constraint for input %i: %qs", |
| 222 | i, constraint); |
| 223 | num_errors++; |
| 224 | continue; |
| 225 | } |
| 226 | |
| 227 | tree src_expr = input_tvec[i]; |
| 228 | const svalue *src_sval = get_rvalue (src_expr, ctxt); |
| 229 | check_for_poison (src_sval, src_expr, ctxt); |
| 230 | input_svals.quick_push (src_sval); |
| 231 | reachable_regs.handle_sval (src_sval); |
| 232 | |
| 233 | if (logger) |
| 234 | { |
| 235 | logger->log ("input %i: %qs %qE" |
| 236 | " allows_reg: %i allows_mem: %i", |
| 237 | i, constraint, val, |
| 238 | (int)allows_reg, (int)allows_mem); |
| 239 | logger->start_log_line (); |
| 240 | logger->log_partial (" sval: "); |
| 241 | src_sval->dump_to_pp (logger->get_printer (), true); |
| 242 | logger->end_log_line (); |
| 243 | } |
| 244 | } |
| 245 | |
| 246 | if (num_errors > 0) |
| 247 | gcc_unreachable (); |
| 248 | |
| 249 | if (logger) |
| 250 | { |
| 251 | logger->log ("reachability: "); |
| 252 | reachable_regs.dump_to_pp (logger->get_printer ()); |
| 253 | logger->end_log_line (); |
| 254 | } |
| 255 | |
| 256 | /* Given the regions that were reachable from the inputs we |
| 257 | want to clobber them. |
| 258 | This is similar to region_model::handle_unrecognized_call, |
| 259 | but the unknown call policies seems too aggressive (e.g. purging state |
| 260 | from anything that's ever escaped). Instead, clobber any clusters |
| 261 | that were reachable in *this* asm stmt, rather than those that |
| 262 | escaped, and we don't treat the values as having escaped. |
| 263 | We also assume that asm stmts don't affect sm-state. */ |
| 264 | for (auto iter = reachable_regs.begin_mutable_base_regs (); |
| 265 | iter != reachable_regs.end_mutable_base_regs (); ++iter) |
| 266 | { |
| 267 | const region *base_reg = *iter; |
David Malcolm | 5f6197d | 2022-03-24 20:58:10 -0400 | [diff] [blame] | 268 | if (base_reg->symbolic_for_unknown_ptr_p () |
| 269 | || !base_reg->tracked_p ()) |
David Malcolm | ded2c2c | 2021-08-04 18:21:21 -0400 | [diff] [blame] | 270 | continue; |
| 271 | |
| 272 | binding_cluster *cluster = m_store.get_or_create_cluster (base_reg); |
David Malcolm | 3734527 | 2022-03-28 20:41:23 -0400 | [diff] [blame] | 273 | cluster->on_asm (stmt, m_mgr->get_store_manager (), |
| 274 | conjured_purge (this, ctxt)); |
David Malcolm | ded2c2c | 2021-08-04 18:21:21 -0400 | [diff] [blame] | 275 | } |
| 276 | |
| 277 | /* Update the outputs. */ |
| 278 | for (unsigned output_idx = 0; output_idx < noutputs; output_idx++) |
| 279 | { |
| 280 | tree dst_expr = output_tvec[output_idx]; |
| 281 | const region *dst_reg = output_regions[output_idx]; |
| 282 | |
| 283 | const svalue *sval; |
| 284 | if (deterministic_p (stmt) |
| 285 | && input_svals.length () <= asm_output_svalue::MAX_INPUTS) |
| 286 | sval = m_mgr->get_or_create_asm_output_svalue (TREE_TYPE (dst_expr), |
| 287 | stmt, |
| 288 | output_idx, |
| 289 | input_svals); |
| 290 | else |
| 291 | { |
| 292 | sval = m_mgr->get_or_create_conjured_svalue (TREE_TYPE (dst_expr), |
| 293 | stmt, |
David Malcolm | 3734527 | 2022-03-28 20:41:23 -0400 | [diff] [blame] | 294 | dst_reg, |
| 295 | conjured_purge (this, |
| 296 | ctxt)); |
David Malcolm | ded2c2c | 2021-08-04 18:21:21 -0400 | [diff] [blame] | 297 | } |
| 298 | set_value (dst_reg, sval, ctxt); |
| 299 | } |
| 300 | } |
| 301 | |
| 302 | } // namespace ana |
| 303 | |
| 304 | #endif /* #if ENABLE_ANALYZER */ |