cprover
Loading...
Searching...
No Matches
shadow_memory.cpp
Go to the documentation of this file.
1/*******************************************************************\
2
3Module: Symex Shadow Memory Instrumentation
4
5Author: Peter Schrammel
6
7\*******************************************************************/
8
11
12#include "shadow_memory.h"
13
16#include <util/format_expr.h>
17#include <util/format_type.h>
18#include <util/fresh_symbol.h>
19#include <util/pointer_expr.h>
20#include <util/prefix.h>
22
25
26#include "goto_symex_state.h"
27#include "shadow_memory_util.h"
28
30 goto_symex_statet &state,
31 exprt expr,
33{
34 typet type = ns.follow(expr.type());
35 clean_pointer_expr(expr, type);
36 for(const auto &field_pair : fields)
37 {
38 const symbol_exprt &shadow = add_field(state, expr, field_pair.first, type);
39
40 if(
41 field_pair.second.id() == ID_typecast &&
42 to_typecast_expr(field_pair.second).op().is_zero())
43 {
44 const auto zero_value =
45 zero_initializer(type, expr.source_location(), ns);
46 CHECK_RETURN(zero_value.has_value());
47
48 symex_assign(state, shadow, *zero_value);
49 }
50 else
51 {
52 exprt init_expr = field_pair.second;
53 const auto init_value =
55 CHECK_RETURN(init_value.has_value());
56
57 symex_assign(state, shadow, *init_value);
58 }
59
60 log.debug() << "Shadow memory: initialize field "
61 << id2string(shadow.get_identifier()) << " for " << format(expr)
62 << " with initial value " << format(field_pair.second)
64 }
65}
66
68 goto_symex_statet &state,
69 const exprt &expr,
70 const irep_idt &field_name,
71 const typet &field_type)
72{
73 const auto &function_symbol = ns.lookup(state.source.function_id);
74 const symbolt &new_symbol = get_fresh_aux_symbol(
75 field_type,
78 state.source.pc->source_location(),
79 function_symbol.mode,
80 state.symbol_table);
81
83 addresses.push_back(
85
86 return addresses.back().shadow;
87}
88
90 goto_symex_statet &state,
91 const exprt::operandst &arguments)
92{
93 // parse set_field call
95 arguments.size() == 3, CPROVER_PREFIX "set_field requires 3 arguments");
97
98 exprt expr = arguments[0];
99 typet expr_type = expr.type();
101 expr_type.id() == ID_pointer,
102 "shadow memory requires a pointer expression",
103 irep_pretty_diagnosticst{expr_type});
104
105 exprt value = arguments[2];
107 INVARIANT(
108 state.shadow_memory.address_fields.count(field_name) == 1,
109 id2string(field_name) + " should exist");
110 const auto &addresses = state.shadow_memory.address_fields.at(field_name);
111
112 // get value set
114
116
117 std::vector<exprt> value_set = state.value_set.get_value_set(expr, ns);
119 if(check_value_set_contains_only_null_ptr(ns, log, value_set, expr))
120 {
121 log.warning() << "Shadow memory: cannot set shadow memory of NULL"
122 << messaget::eom;
123 return;
124 }
125
126 // build lhs
127 const exprt &rhs = value;
128 size_t mux_size = 0;
130 get_shadow_memory(expr, value_set, addresses, ns, log, mux_size);
131
132 // add to equation
133 if(maybe_lhs.has_value())
134 {
135 if(mux_size >= 10)
136 {
137 log.warning() << "Shadow memory: mux size set_field: " << mux_size
138 << messaget::eom;
139 }
140 else
141 {
142 log.debug() << "Shadow memory: mux size set_field: " << mux_size
143 << messaget::eom;
144 }
145 const exprt lhs = deref_expr(*maybe_lhs);
146
148
149 if(lhs.type().id() == ID_empty)
150 {
151 std::stringstream s;
152 s << "Shadow memory: cannot set shadow memory via type void* for "
153 << format(expr)
154 << ". Insert a cast to the type that you want to access.";
155 throw invalid_input_exceptiont(s.str());
156 }
157
158 // Get the type of the shadow memory for this field
160 // Add a conditional cast to the shadow memory field type if `rhs` is not of
161 // the expected type
162 const exprt casted_rhs =
164 // We replicate the rhs value on each byte of the value that we set.
165 // This allows the get_field or/max semantics to operate correctly
166 // on unions.
167 const auto per_byte_rhs =
169 CHECK_RETURN(per_byte_rhs.has_value());
170
173 state,
174 lhs,
176 }
177 else
178 {
179 log.warning() << "Shadow memory: cannot set_field for " << format(expr)
180 << messaget::eom;
181 }
182}
183
184// Function synopsis
185// value_set = get_value_set(expr)
186// foreach object in value_set:
187// if(object invalid) continue;
188// get_field(&exact_match)
189// if(exact_match)
190// return;
191// return;
193 goto_symex_statet &state,
194 const exprt &lhs,
195 const exprt::operandst &arguments)
196{
197 INVARIANT(
198 arguments.size() == 2, CPROVER_PREFIX "get_field requires 2 arguments");
199 irep_idt field_name = extract_field_name(arguments[1]);
200
201 exprt expr = arguments[0];
202 typet expr_type = expr.type();
204 expr_type.id() == ID_pointer,
205 "shadow memory requires a pointer expression");
207
208 INVARIANT(
209 state.shadow_memory.address_fields.count(field_name) == 1,
210 id2string(field_name) + " should exist");
211 const auto &addresses = state.shadow_memory.address_fields.at(field_name);
212
213 // Return null (invalid object) instead of auto-object or invalid-object.
214 // This will "polish" expr from invalid and auto-obj
216
217 std::vector<exprt> value_set = state.value_set.get_value_set(expr, ns);
219
220 std::vector<std::pair<exprt, exprt>> rhs_conds_values;
222 // Used to give a default value for invalid pointers and other usages
224
226 {
228 // If we have an invalid pointer, then return the default value of the
229 // shadow memory as dereferencing it would fail
230 rhs_conds_values.emplace_back(
231 true_exprt(),
233 }
234
235 for(const auto &matched_object : value_set)
236 {
237 // Ignore "unknown"
239 {
240 log.warning() << "Shadow memory: value set contains unknown for "
241 << format(expr) << messaget::eom;
242 continue;
243 }
244 // Ignore "integer_address"
245 if(
246 to_object_descriptor_expr(matched_object).root_object().id() ==
248 {
249 log.warning() << "Shadow memory: value set contains integer_address for "
250 << format(expr) << messaget::eom;
251 continue;
252 }
253 // Ignore "ID_C_is_failed_symbol" (another incarnation of invalid pointer)
254 // TODO: check if this is obsolete (or removed by
255 // replace_invalid_object_by_null) or add default value
256 if(matched_object.type().get_bool(ID_C_is_failed_symbol))
257 {
258 log.warning() << "Shadow memory: value set contains failed symbol for "
259 << format(expr) << messaget::eom;
260 continue;
261 }
262
263 bool exact_match = false;
264
265 // List of condition ==> value (read condition implies values)
267 ns,
268 log,
270 addresses,
271 field_init_expr.type(),
272 expr,
273 lhs.type(),
275
276 // If we have an exact match we discard all the previous conditions and
277 // create an assignment. Then we'll break
278 if(exact_match)
279 {
281 }
282 // Process this match (exact will contain only one set of conditions).
283 rhs_conds_values.insert(
284 rhs_conds_values.end(),
287 if(exact_match)
288 {
289 break;
290 }
291 }
292
293 // If we have at least a condition ==> value
294 if(!rhs_conds_values.empty())
295 {
296 // Build the rhs expression from the shadow memory (big switch for all
297 // condition ==> value)
300 const size_t mux_size = rhs_conds_values.size() - 1;
301 // Don't debug if the switch is too big
302 if(mux_size >= 10)
303 {
304 log.warning() << "Shadow memory: mux size get_field: " << mux_size
305 << messaget::eom;
306 }
307 else
308 {
309 log.debug() << "Shadow memory: mux size get_field: " << mux_size
310 << messaget::eom;
311 }
312
314
315 // create the assignment of __CPROVER_shadow_memory_get_field
316 symex_assign(state, lhs, typecast_exprt::conditional_cast(rhs, lhs.type()));
317 }
318 else
319 {
320 // We don't have any condition ==> value for the pointer, so we fall-back to
321 // the initialization value of the shadow-memory.
322 log.warning() << "Shadow memory: cannot get_field for " << format(expr)
323 << messaget::eom;
325 state,
326 lhs,
328 }
329}
330
331// TODO: the following 4 functions (`symex_field_static_init`,
332// `symex_field_static_init_string_constant`,
333// `symex_field_local_init`,
334// `symex_field_dynamic_init`) do filtering on
335// the input symbol name and then call `initialize_shadow_memory` accordingly.
336// We want to refactor and improve the way the filtering is done, but given
337// that we don't have an easy mechanism to validate that we haven't changed the
338// behaviour, we want to postpone changing this until the full shadow memory
339// functionalities are integrated and we have good regression/unit testing.
340
342 goto_symex_statet &state,
343 const ssa_exprt &lhs)
344{
345 if(lhs.get_original_expr().id() != ID_symbol)
346 return;
347
348 const irep_idt &identifier =
349 to_symbol_expr(lhs.get_original_expr()).get_identifier();
350
352 return;
353
354 if(
355 has_prefix(id2string(identifier), CPROVER_PREFIX) &&
356 !has_prefix(id2string(identifier), CPROVER_PREFIX "errno"))
357 {
358 return;
359 }
360
361 const symbolt &symbol = ns.lookup(identifier);
362
363 if(
364 (id2string(symbol.name).find("::return_value") == std::string::npos &&
365 symbol.is_auxiliary) ||
366 !symbol.is_static_lifetime)
367 return;
368 if(id2string(symbol.name).find("__cs_") != std::string::npos)
369 return;
370
371 const typet &type = symbol.type;
372 log.debug() << "Shadow memory: global memory " << id2string(identifier)
373 << " of type " << from_type(ns, "", type) << messaget::eom;
374
376 state, lhs, state.shadow_memory.fields.global_fields);
377}
378
380 goto_symex_statet &state,
381 const ssa_exprt &expr,
382 const exprt &rhs)
383{
384 if(
385 expr.get_original_expr().id() == ID_symbol &&
387 id2string(to_symbol_expr(expr.get_original_expr()).get_identifier()),
389 {
390 return;
391 }
392 const index_exprt &index_expr =
393 to_index_expr(to_address_of_expr(rhs).object());
394
395 const typet &type = index_expr.array().type();
396 log.debug() << "Shadow memory: global memory "
397 << id2string(to_string_constant(index_expr.array()).get_value())
398 << " of type " << from_type(ns, "", type) << messaget::eom;
399
401 state, index_expr.array(), state.shadow_memory.fields.global_fields);
402}
403
405 goto_symex_statet &state,
406 const ssa_exprt &expr)
407{
408 const symbolt &symbol =
409 ns.lookup(to_symbol_expr(expr.get_original_expr()).get_identifier());
410
411 const std::string symbol_name = id2string(symbol.name);
412 if(
413 symbol.is_auxiliary &&
414 symbol_name.find("::return_value") == std::string::npos)
415 return;
416 if(
417 symbol_name.find("malloc::") != std::string::npos &&
418 (symbol_name.find("malloc_size") != std::string::npos ||
419 symbol_name.find("malloc_res") != std::string::npos ||
420 symbol_name.find("record_malloc") != std::string::npos ||
421 symbol_name.find("record_may_leak") != std::string::npos))
422 {
423 return;
424 }
425 if(
426 symbol_name.find("__builtin_alloca::") != std::string::npos &&
427 (symbol_name.find("alloca_size") != std::string::npos ||
428 symbol_name.find("record_malloc") != std::string::npos ||
429 symbol_name.find("record_alloca") != std::string::npos ||
430 symbol_name.find("res") != std::string::npos))
431 {
432 return;
433 }
434 if(symbol_name.find("__cs_") != std::string::npos)
435 return;
436
437 const typet &type = expr.type();
439 log.debug() << "Shadow memory: local memory "
440 << id2string(expr_l1.get_identifier()) << " of type "
441 << from_type(ns, "", type) << messaget::eom;
442
445}
446
448 goto_symex_statet &state,
449 const exprt &expr,
450 const side_effect_exprt &code)
451{
452 log.debug() << "Shadow memory: dynamic memory of type "
453 << from_type(ns, "", expr.type()) << messaget::eom;
454
456 state, expr, state.shadow_memory.fields.global_fields);
457}
458
460 const abstract_goto_modelt &goto_model,
461 message_handlert &message_handler)
462{
464
465 // Gather shadow memory declarations from goto model
466 for(const auto &goto_function : goto_model.get_goto_functions().function_map)
467 {
468 const auto &goto_program = goto_function.second.body;
469 forall_goto_program_instructions(target, goto_program)
470 {
471 if(!target->is_function_call())
472 continue;
473
474 const auto &code_function_call = to_code_function_call(target->code());
475 const exprt &function = code_function_call.function();
476
477 if(function.id() != ID_symbol)
478 continue;
479
480 const irep_idt &identifier = to_symbol_expr(function).get_identifier();
481
482 if(
483 identifier ==
485 {
488 field_definitions.global_fields,
489 true,
490 message_handler);
491 }
492 else if(
493 identifier ==
495 {
498 field_definitions.local_fields,
499 false,
500 message_handler);
501 }
502 }
503 }
504 return field_definitions;
505}
506
510 bool is_global,
511 message_handlert &message_handler)
512{
513 INVARIANT(
514 code_function_call.arguments().size() == 2,
517 " requires 2 arguments");
519
520 exprt expr = code_function_call.arguments()[1];
521
522 messaget log(message_handler);
523 log.debug() << "Shadow memory: declare " << (is_global ? "global " : "local ")
524 << "field " << id2string(field_name) << " of type "
525 << format(expr.type()) << messaget::eom;
527 {
529 "A shadow memory field must be of a bitvector type.");
530 }
531 if(to_bitvector_type(expr.type()).get_width() > 8)
532 {
534 "A shadow memory field must not be larger than 8 bits.");
535 }
536
537 // record the field's initial value (and type)
538 fields[field_name] = expr;
539}
Pre-defined bitvector types.
const bitvector_typet & to_bitvector_type(const typet &type)
Cast a typet to a bitvector_typet.
Abstract interface to eager or lazy GOTO models.
virtual const goto_functionst & get_goto_functions() const =0
Accessor to get a raw goto_functionst.
virtual void clear()
Reset the abstract state.
Definition ai.h:266
ait supplies three of the four components needed: an abstract interpreter (in this case handling func...
Definition ai.h:563
goto_instruction_codet representation of a function call statement.
dstringt has one field, an unsigned integer no which is an index into a static table of strings.
Definition dstring.h:39
Base class for all expressions.
Definition expr.h:56
std::vector< exprt > operandst
Definition expr.h:58
typet & type()
Return the type of the expression.
Definition expr.h:84
const source_locationt & source_location() const
Definition expr.h:223
value_sett value_set
Uses level 1 names, and is used to do dereferencing.
Definition goto_state.h:51
Central data structure: state.
shadow_memory_statet shadow_memory
symbol_tablet symbol_table
contains symbols that are minted during symbolic execution, such as dynamically created objects etc.
symex_targett::sourcet source
Array index operator.
Definition std_expr.h:1410
Thrown when user-provided input cannot be processed.
const irep_idt & id() const
Definition irep.h:396
Class that provides messages with a built-in verbosity 'level'.
Definition message.h:155
mstreamt & debug() const
Definition message.h:429
mstreamt & warning() const
Definition message.h:404
static eomt eom
Definition message.h:297
The null pointer constant.
The shadow memory field definitions.
field_definitiont global_fields
Field definitions for global-scope fields.
field_definitiont local_fields
Field definitions for local-scope fields.
std::map< irep_idt, exprt > field_definitiont
A field definition mapping a field name to its initial value.
shadow_memory_field_definitionst fields
The available shadow memory field definitions.
std::map< irep_idt, std::vector< shadowed_addresst > > address_fields
const symbol_exprt & add_field(goto_symex_statet &state, const exprt &expr, const irep_idt &field_name, const typet &field_type)
Registers a shadow memory field for the given original memory.
void symex_get_field(goto_symex_statet &state, const exprt &lhs, const exprt::operandst &arguments)
Symbolically executes a __CPROVER_get_field call.
static void convert_field_declaration(const code_function_callt &code_function_call, shadow_memory_field_definitionst::field_definitiont &fields, bool is_global, message_handlert &message_handler)
Converts a field declaration.
const std::function< void(goto_symex_statet &, const exprt &, const exprt) symex_assign)
void symex_field_dynamic_init(goto_symex_statet &state, const exprt &expr, const side_effect_exprt &code)
Initialize global-scope shadow memory for dynamically allocated memory.
void symex_field_static_init(goto_symex_statet &state, const ssa_exprt &lhs)
Initialize global-scope shadow memory for global/static variables.
void symex_field_static_init_string_constant(goto_symex_statet &state, const ssa_exprt &expr, const exprt &rhs)
Initialize global-scope shadow memory for string constants.
const namespacet & ns
static shadow_memory_field_definitionst gather_field_declarations(const abstract_goto_modelt &goto_model, message_handlert &message_handler)
Gathers the available shadow memory field definitions (__CPROVER_field_decl calls) from the goto mode...
void symex_field_local_init(goto_symex_statet &state, const ssa_exprt &expr)
Initialize local-scope shadow memory for local variables and parameters.
void initialize_shadow_memory(goto_symex_statet &state, exprt expr, const shadow_memory_field_definitionst::field_definitiont &fields)
Allocates and initializes a shadow memory field for the given original memory.
void symex_set_field(goto_symex_statet &state, const exprt::operandst &arguments)
Symbolically executes a __CPROVER_set_field call.
An expression containing a side effect.
Definition std_code.h:1450
Expression providing an SSA-renamed symbol of expressions.
Definition ssa_expr.h:17
const exprt & get_original_expr() const
Definition ssa_expr.h:33
Expression to hold a symbol (variable)
Definition std_expr.h:113
const irep_idt & get_identifier() const
Definition std_expr.h:142
Symbol table entry.
Definition symbol.h:28
bool is_auxiliary
Definition symbol.h:77
bool is_static_lifetime
Definition symbol.h:70
class symbol_exprt symbol_expr() const
Produces a symbol_exprt for a symbol.
Definition symbol.cpp:121
typet type
Type of symbol.
Definition symbol.h:31
irep_idt name
The unique identifier.
Definition symbol.h:40
The Boolean constant true.
Definition std_expr.h:3008
static exprt conditional_cast(const exprt &expr, const typet &type)
Definition std_expr.h:2025
The type of an expression, extends irept.
Definition type.h:29
Thrown when we encounter an instruction, parameters to an instruction etc.
std::vector< exprt > get_value_set(exprt expr, const namespacet &ns) const
Gets values pointed to by expr, including following dereference operators (i.e.
bool has_prefix(const std::string &s, const std::string &prefix)
Definition converter.cpp:13
#define CPROVER_PREFIX
optionalt< exprt > zero_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns)
Create the equivalent of zero for type type.
optionalt< exprt > expr_initializer(const typet &type, const source_locationt &source_location, const namespacet &ns, const exprt &init_byte_expr)
Create a value for type type, with all subtype bytes initialized to the given value.
Expression Initialization.
static format_containert< T > format(const T &o)
Definition format.h:37
symbolt & get_fresh_aux_symbol(const typet &type, const std::string &name_prefix, const std::string &basename_prefix, const source_locationt &source_location, const irep_idt &symbol_mode, const namespacet &ns, symbol_table_baset &symbol_table)
Installs a fresh-named symbol with respect to the given namespace ns with the requested name pattern ...
Fresh auxiliary symbol creation.
const code_function_callt & to_code_function_call(const goto_instruction_codet &code)
#define forall_goto_program_instructions(it, program)
Symbolic Execution.
const std::string & id2string(const irep_idt &d)
Definition irep.h:47
std::string from_expr(const namespacet &ns, const irep_idt &identifier, const exprt &expr)
std::string from_type(const namespacet &ns, const irep_idt &identifier, const typet &type)
API to expression classes for Pointers.
const object_descriptor_exprt & to_object_descriptor_expr(const exprt &expr)
Cast an exprt to an object_descriptor_exprt.
const address_of_exprt & to_address_of_expr(const exprt &expr)
Cast an exprt to an address_of_exprt.
const pointer_typet & to_pointer_type(const typet &type)
Cast a typet to a pointer_typet.
Symex Shadow Memory Instrumentation.
#define SHADOW_MEMORY_GLOBAL_SCOPE
#define SHADOW_MEMORY_LOCAL_SCOPE
#define SHADOW_MEMORY_PREFIX
#define SHADOW_MEMORY_FIELD_DECL
void replace_invalid_object_by_null(exprt &expr)
Replace an invalid object by a null pointer.
void shadow_memory_log_value_set_match(const namespacet &ns, const messaget &log, const exprt &address, const exprt &expr)
Logs a successful match between an address and a value within the value set.
exprt deref_expr(const exprt &expr)
Wraps a given expression into a dereference_exprt unless it is an address_of_exprt in which case it j...
bool contains_null_or_invalid(const std::vector< exprt > &value_set, const exprt &address)
Given a pointer expression check to see if it can be a null pointer or an invalid object within value...
const exprt & get_field_init_expr(const irep_idt &field_name, const goto_symex_statet &state)
Retrieve the expression that a field was initialised with within a given symex state.
bool check_value_set_contains_only_null_ptr(const namespacet &ns, const messaget &log, const std::vector< exprt > &value_set, const exprt &expr)
Checks if value_set contains only a NULL pointer expression of the same type of expr.
const typet & get_field_init_type(const irep_idt &field_name, const goto_symex_statet &state)
Retrieves the type of the shadow memory by returning the type of the shadow memory initializer value.
void shadow_memory_log_set_field(const namespacet &ns, const messaget &log, const irep_idt &field_name, const exprt &expr, const exprt &value)
Logs setting a value to a given shadow field.
std::vector< std::pair< exprt, exprt > > get_shadow_dereference_candidates(const namespacet &ns, const messaget &log, const exprt &matched_object, const std::vector< shadow_memory_statet::shadowed_addresst > &addresses, const typet &field_type, const exprt &expr, const typet &lhs_type, bool &exact_match)
Get a list of (condition, value) pairs for a certain pointer from the shadow memory,...
exprt build_if_else_expr(const std::vector< std::pair< exprt, exprt > > &conds_values)
Build an if-then-else chain from a vector containing pairs of expressions.
void shadow_memory_log_get_field(const namespacet &ns, const messaget &log, const irep_idt &field_name, const exprt &expr)
Logs getting a value corresponding to a shadow memory field.
irep_idt extract_field_name(const exprt &string_expr)
Extracts the field name identifier from a string expression, e.g.
void shadow_memory_log_value_set(const namespacet &ns, const messaget &log, const std::vector< exprt > &value_set)
Logs the retrieval of the value associated with a given shadow memory field.
void clean_pointer_expr(exprt &expr, const typet &type)
Clean the given pointer expression so that it has the right shape for being used for identifying shad...
void shadow_memory_log_text_and_expr(const namespacet &ns, const messaget &log, const char *text, const exprt &expr)
Generic logging function that will log depending on the configured verbosity.
optionalt< exprt > get_shadow_memory(const exprt &expr, const std::vector< exprt > &value_set, const std::vector< shadow_memory_statet::shadowed_addresst > &addresses, const namespacet &ns, const messaget &log, size_t &mux_size)
Get shadow memory values for a given expression within a specified value set.
Symex Shadow Memory Instrumentation Utilities.
#define CHECK_RETURN(CONDITION)
Definition invariant.h:495
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions,...
Definition invariant.h:534
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function 'invariant_violated_string'.
Definition invariant.h:423
#define DATA_INVARIANT_WITH_DIAGNOSTICS(CONDITION, REASON,...)
Definition invariant.h:535
ssa_exprt remove_level_2(ssa_exprt ssa)
Definition ssa_expr.cpp:218
#define INITIALIZE_FUNCTION
const index_exprt & to_index_expr(const exprt &expr)
Cast an exprt to an index_exprt.
Definition std_expr.h:1478
const typecast_exprt & to_typecast_expr(const exprt &expr)
Cast an exprt to a typecast_exprt.
Definition std_expr.h:2051
const symbol_exprt & to_symbol_expr(const exprt &expr)
Cast an exprt to a symbol_exprt.
Definition std_expr.h:222
const string_constantt & to_string_constant(const exprt &expr)
goto_programt::const_targett pc