Skip to content

Commit

Permalink
Value-set based dereferencing must consider possible index expressions
Browse files Browse the repository at this point in the history
Value-set based dereferencing must not take an access path through an
object that precludes a subsequent index expression from accessing a
different part of the object. Such a situation can arise when the value
set has a known (constant) offset for the pointer that would identify
one particular element in an array (within that object). The code using
value-set based dereferencing, however, may be trying to resolve a
subexpression of an index expression, where said index expression would
lead to a different element that may itself be part of a different array
within the same overall object.

Fixes: diffblue#8570
  • Loading branch information
tautschnig committed Jan 30, 2025
1 parent 1cc4bc2 commit 1447e14
Show file tree
Hide file tree
Showing 13 changed files with 78 additions and 34 deletions.
4 changes: 2 additions & 2 deletions regression/cbmc/Pointer_array7/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ int main()
uint16_t x[2];
x[0] = 1;
x[1] = 2;
uint8_t *y = (uint8_t *)x;
uint16_t z = *((uint16_t *)(y + 1));
uint8_t *y = (uint8_t *)x + 1;
uint16_t z = *((uint16_t *)(y));

#ifdef USE_BIG_ENDIAN
assert(z == 256u);
Expand Down
18 changes: 18 additions & 0 deletions regression/cbmc/Pointer_array8/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
typedef struct __attribute__((packed))
{
int data[2];
} arr;

typedef struct __attribute__((packed))
{
arr vec[2];
} arrvec;

int main()
{
arrvec A;
arrvec *x = &A;
__CPROVER_assume(x->vec[1].data[0] < 42);
unsigned k;
__CPROVER_assert(k != 2 || ((int *)x)[k] < 42, "");
}
8 changes: 8 additions & 0 deletions regression/cbmc/Pointer_array8/test.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
CORE
main.c

^EXIT=0$
^SIGNAL=0$
^VERIFICATION SUCCESSFUL$
--
^warning: ignoring
1 change: 1 addition & 0 deletions src/goto-programs/rewrite_union.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ static bool restore_union_rec(exprt &expr, const namespacet &ns)
member_exprt{be.op(), comp.get_name(), comp.type()},
be.offset(),
be.type(),
true,
ns);
if(result.has_value())
{
Expand Down
2 changes: 1 addition & 1 deletion src/goto-symex/field_sensitivity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ exprt field_sensitivityt::apply(
tmp.remove_level_2();
const member_exprt member{tmp.get_original_expr(), comp};
auto recursive_member =
get_subexpression_at_offset(member, be.offset(), be.type(), ns);
get_subexpression_at_offset(member, be.offset(), be.type(), true, ns);
if(
recursive_member.has_value() &&
(recursive_member->id() == ID_member ||
Expand Down
7 changes: 6 additions & 1 deletion src/memory-analyzer/analyze_symbol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ exprt gdb_value_extractort::get_pointer_to_member_value(
struct_symbol_expr,
member_offset,
to_pointer_type(expr.type()).base_type(),
true,
ns);
DATA_INVARIANT(
maybe_member_expr.has_value(), "structure doesn't have member");
Expand Down Expand Up @@ -554,7 +555,11 @@ exprt gdb_value_extractort::get_pointer_value(
if(target_expr.type().id() == ID_array)
{
const auto result_indexed_expr = get_subexpression_at_offset(
target_expr, 0, to_pointer_type(zero_expr.type()).base_type(), ns);
target_expr,
0,
to_pointer_type(zero_expr.type()).base_type(),
true,
ns);
CHECK_RETURN(result_indexed_expr.has_value());
if(result_indexed_expr->type() == zero_expr.type())
return *result_indexed_expr;
Expand Down
9 changes: 5 additions & 4 deletions src/pointer-analysis/value_set_dereference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -643,11 +643,12 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to(
}
}

// try to build a member/index expression - do not use byte_extract
// Try to build a member/index expression instead of using byte_extract;
// don't recurse into aggregate types as the caller's expression may be
// within an index expression, i.e., a further offset beyond o.offset() may
// get added on to it.
auto subexpr = get_subexpression_at_offset(
root_object, o.offset(), dereference_type, ns);
if(subexpr.has_value())
simplify(subexpr.value(), ns);
root_object, o.offset(), dereference_type, false, ns);
if(
subexpr.has_value() &&
subexpr.value().id() != ID_byte_extract_little_endian &&
Expand Down
2 changes: 1 addition & 1 deletion src/solvers/flattening/pointer_logic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ exprt pointer_logict::pointer_expr(
}
}
auto deep_object_opt =
get_subexpression_at_offset(object_expr, pointer.offset, subtype, ns);
get_subexpression_at_offset(object_expr, pointer.offset, subtype, true, ns);
CHECK_RETURN(deep_object_opt.has_value());
exprt deep_object = deep_object_opt.value();
simplify(deep_object, ns);
Expand Down
33 changes: 21 additions & 12 deletions src/util/pointer_offset_size.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,17 +561,11 @@ std::optional<exprt> get_subexpression_at_offset(
const exprt &expr,
const mp_integer &offset_bytes,
const typet &target_type_raw,
bool recurse_into_arrays,
const namespacet &ns)
{
if(offset_bytes == 0 && expr.type() == target_type_raw)
{
exprt result = expr;

if(expr.type() != target_type_raw)
result.type() = target_type_raw;

return result;
}
return expr;

if(
offset_bytes == 0 && expr.type().id() == ID_pointer &&
Expand Down Expand Up @@ -611,6 +605,7 @@ std::optional<exprt> get_subexpression_at_offset(
member,
offset_bytes - m_offset_bits / config.ansi_c.char_width,
target_type_raw,
recurse_into_arrays,
ns);
}

Expand Down Expand Up @@ -638,9 +633,18 @@ std::optional<exprt> get_subexpression_at_offset(
const auto offset_inside_elem = offset_bytes % elem_size_bytes;
const auto target_size_bytes =
*target_size_bits / config.ansi_c.char_width;
// only recurse if the cell completely contains the target
if(
index < array_size &&
index < array_size && array_type.element_type() == target_type_raw &&
offset_inside_elem == 0)
{
return index_exprt{
expr,
from_integer(
offset_bytes / elem_size_bytes, array_type.index_type())};
}
// only recurse if the cell completely contains the target
else if(
recurse_into_arrays && index < array_size &&
offset_inside_elem + target_size_bytes <= elem_size_bytes)
{
return get_subexpression_at_offset(
Expand All @@ -650,6 +654,7 @@ std::optional<exprt> get_subexpression_at_offset(
offset_bytes / elem_size_bytes, array_type.index_type())),
offset_inside_elem,
target_type_raw,
recurse_into_arrays,
ns);
}
}
Expand All @@ -676,7 +681,7 @@ std::optional<exprt> get_subexpression_at_offset(
{
const member_exprt member(expr, component.get_name(), component.type());
return get_subexpression_at_offset(
member, offset_bytes, target_type_raw, ns);
member, offset_bytes, target_type_raw, recurse_into_arrays, ns);
}
}
}
Expand All @@ -689,12 +694,16 @@ std::optional<exprt> get_subexpression_at_offset(
const exprt &expr,
const exprt &offset,
const typet &target_type,
bool recurse_into_arrays,
const namespacet &ns)
{
const auto offset_bytes = numeric_cast<mp_integer>(offset);

if(!offset_bytes.has_value())
return {};
else
return get_subexpression_at_offset(expr, *offset_bytes, target_type, ns);
{
return get_subexpression_at_offset(
expr, *offset_bytes, target_type, recurse_into_arrays, ns);
}
}
2 changes: 2 additions & 0 deletions src/util/pointer_offset_size.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,14 @@ std::optional<exprt> get_subexpression_at_offset(
const exprt &expr,
const mp_integer &offset,
const typet &target_type,
bool recurse_into_arrays,
const namespacet &ns);

std::optional<exprt> get_subexpression_at_offset(
const exprt &expr,
const exprt &offset,
const typet &target_type,
bool recurse_into_arrays,
const namespacet &ns);

#endif // CPROVER_UTIL_POINTER_OFFSET_SIZE_H
2 changes: 1 addition & 1 deletion src/util/simplify_expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2023,7 +2023,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)

// try to refine it down to extracting from a member or an index in an array
auto subexpr =
get_subexpression_at_offset(expr.op(), *offset, expr.type(), ns);
get_subexpression_at_offset(expr.op(), *offset, expr.type(), true, ns);
if(subexpr.has_value() && subexpr.value() != expr)
return changed(simplify_rec(subexpr.value())); // recursive call

Expand Down
2 changes: 1 addition & 1 deletion src/util/simplify_expr_struct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ simplify_exprt::simplify_member(const member_exprt &expr)
if(requested_offset.has_value())
{
auto equivalent_member = get_subexpression_at_offset(
typecast_expr.op(), *requested_offset, expr.type(), ns);
typecast_expr.op(), *requested_offset, expr.type(), true, ns);

// Guess: turning this into a byte-extract operation is not really an
// optimisation.
Expand Down
22 changes: 11 additions & 11 deletions unit/util/pointer_offset_size.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,30 +34,30 @@ TEST_CASE("Build subexpression to access element at offset into array")
symbol_exprt a("array", array_type);

{
const auto result = get_subexpression_at_offset(a, 0, t, ns);
const auto result = get_subexpression_at_offset(a, 0, t, true, ns);
REQUIRE(result.value() == index_exprt(a, from_integer(0, c_index_type())));
}

{
const auto result = get_subexpression_at_offset(a, 32 / 8, t, ns);
const auto result = get_subexpression_at_offset(a, 32 / 8, t, true, ns);
REQUIRE(result.value() == index_exprt(a, from_integer(1, c_index_type())));
}

{
const auto result =
get_subexpression_at_offset(a, from_integer(0, size_type()), t, ns);
get_subexpression_at_offset(a, from_integer(0, size_type()), t, true, ns);
REQUIRE(result.value() == index_exprt(a, from_integer(0, c_index_type())));
}

{
const auto result =
get_subexpression_at_offset(a, size_of_expr(t, ns).value(), t, ns);
get_subexpression_at_offset(a, size_of_expr(t, ns).value(), t, true, ns);
REQUIRE(result.value() == index_exprt(a, from_integer(1, c_index_type())));
}

{
const signedbv_typet small_t(8);
const auto result = get_subexpression_at_offset(a, 1, small_t, ns);
const auto result = get_subexpression_at_offset(a, 1, small_t, true, ns);
REQUIRE(
result.value() == make_byte_extract(
index_exprt(a, from_integer(0, c_index_type())),
Expand All @@ -67,7 +67,7 @@ TEST_CASE("Build subexpression to access element at offset into array")

{
const signedbv_typet int16_t(16);
const auto result = get_subexpression_at_offset(a, 3, int16_t, ns);
const auto result = get_subexpression_at_offset(a, 3, int16_t, true, ns);
// At offset 3 there are only 8 bits remaining in an element of type t so
// not enough to fill a 16 bit int, so this cannot be transformed in an
// index_exprt.
Expand All @@ -94,30 +94,30 @@ TEST_CASE("Build subexpression to access element at offset into struct")
symbol_exprt s("struct", st);

{
const auto result = get_subexpression_at_offset(s, 0, t, ns);
const auto result = get_subexpression_at_offset(s, 0, t, true, ns);
REQUIRE(result.value() == member_exprt(s, "foo", t));
}

{
const auto result = get_subexpression_at_offset(s, 32 / 8, t, ns);
const auto result = get_subexpression_at_offset(s, 32 / 8, t, true, ns);
REQUIRE(result.value() == member_exprt(s, "bar", t));
}

{
const auto result =
get_subexpression_at_offset(s, from_integer(0, size_type()), t, ns);
get_subexpression_at_offset(s, from_integer(0, size_type()), t, true, ns);
REQUIRE(result.value() == member_exprt(s, "foo", t));
}

{
const auto result =
get_subexpression_at_offset(s, size_of_expr(t, ns).value(), t, ns);
get_subexpression_at_offset(s, size_of_expr(t, ns).value(), t, true, ns);
REQUIRE(result.value() == member_exprt(s, "bar", t));
}

{
const signedbv_typet small_t(8);
const auto result = get_subexpression_at_offset(s, 1, small_t, ns);
const auto result = get_subexpression_at_offset(s, 1, small_t, true, ns);
REQUIRE(
result.value() ==
make_byte_extract(
Expand Down

0 comments on commit 1447e14

Please sign in to comment.