Skip to content

Commit

Permalink
Preliminary support for llvm.objectsize & __memset_chk (#556)
Browse files Browse the repository at this point in the history
* Added preliminary support for llvm.objectsize and __memset_chk
  • Loading branch information
ThomasHaas authored Nov 8, 2023
1 parent 7f1e609 commit da499dc
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 13 deletions.
14 changes: 14 additions & 0 deletions dartagnan/src/main/java/com/dat3m/dartagnan/program/IRHelper.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package com.dat3m.dartagnan.program;

import com.dat3m.dartagnan.expression.BConst;
import com.dat3m.dartagnan.program.event.core.CondJump;
import com.dat3m.dartagnan.program.event.core.Event;
import com.dat3m.dartagnan.program.event.functions.AbortIf;
import com.dat3m.dartagnan.program.event.functions.Return;

import java.util.HashSet;
import java.util.Set;
Expand All @@ -20,4 +24,14 @@ public static Set<Event> bulkDelete(Set<Event> toBeDeleted) {
}
return nonDeleted;
}

/*
Returns true if the syntactic successor of <e> (e.getSuccessor()) is not (generally) a semantic successor,
because <e> always jumps/branches/terminates etc.
*/
public static boolean isAlwaysBranching(Event e) {
return e instanceof Return
|| e instanceof AbortIf abort && abort.getCondition() instanceof BConst b && b.getValue()
|| e instanceof CondJump jump && jump.isGoto();
}
}
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
package com.dat3m.dartagnan.program.processing;

import com.dat3m.dartagnan.expression.BConst;
import com.dat3m.dartagnan.program.Function;
import com.dat3m.dartagnan.program.IRHelper;
import com.dat3m.dartagnan.program.Thread;
import com.dat3m.dartagnan.program.event.core.CondJump;
import com.dat3m.dartagnan.program.event.core.Event;
import com.dat3m.dartagnan.program.event.functions.AbortIf;
import com.dat3m.dartagnan.program.event.functions.Return;
import com.dat3m.dartagnan.utils.dependable.DependencyGraph;
import com.google.common.collect.Lists;
import org.sosy_lab.common.configuration.Configuration;
Expand Down Expand Up @@ -111,7 +109,7 @@ private void computeBranchDecomposition() {
if (e.equals(exit)) {
break;
}
if (isAlwaysBranching(e)) {
if (IRHelper.isAlwaysBranching(e)) {
curBranch = new MovableBranch();
curBranch.id = id++;
branches.add(curBranch);
Expand All @@ -120,12 +118,6 @@ private void computeBranchDecomposition() {
}
}

private boolean isAlwaysBranching(Event e) {
return e instanceof Return
|| e instanceof AbortIf abort && abort.getCondition() instanceof BConst b && b.getValue()
|| e instanceof CondJump jump && jump.isGoto();
}

private List<MovableBranch> computeReordering(final List<MovableBranch> movables) {
if (movables.size() < 3) {
return new ArrayList<>(movables);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ public enum Info {
false, false, true, true, Intrinsics::handleLLVMIntrinsic),
LLVM_ASSUME("llvm.assume", false, false, true, true, Intrinsics::inlineLLVMAssume),
LLVM_META(List.of("llvm.stacksave", "llvm.stackrestore", "llvm.lifetime"), false, false, true, true, Intrinsics::inlineAsZero),
LLVM_OBJECTSIZE("llvm.objectsize", false, false, true, false, null),
LLVM_EXPECT("llvm.expect", false, false, true, true, Intrinsics::inlineLLVMExpect),
LLVM_MEMCPY("llvm.memcpy", true, true, true, false, Intrinsics::inlineMemCpy),
LLVM_MEMSET("llvm.memset", true, false, true, false, Intrinsics::inlineMemSet),
Expand All @@ -135,7 +136,7 @@ public enum Info {
LKMM_FENCE("__LKMM_FENCE", false, false, false, true, Intrinsics::handleLKMMIntrinsic),
// --------------------------- Misc ---------------------------
STD_MEMCPY("memcpy", true, true, true, false, Intrinsics::inlineMemCpy),
STD_MEMSET("memset", true, false, true, false, Intrinsics::inlineMemSet),
STD_MEMSET(List.of("memset", "__memset_chk"), true, false, true, false, Intrinsics::inlineMemSet),
STD_MEMCMP("memcmp", false, true, true, false, Intrinsics::inlineMemCmp),
STD_MALLOC("malloc", false, false, true, true, Intrinsics::inlineMalloc),
STD_FREE("free", true, false, true, true, Intrinsics::inlineAsZero),//TODO support free
Expand Down Expand Up @@ -189,7 +190,7 @@ public boolean isEarly() {

private boolean matches(String funcName) {
boolean isPrefix = switch(this) {
case LLVM, LLVM_ASSUME, LLVM_META, LLVM_MEMCPY, LLVM_MEMSET, LLVM_EXPECT -> true;
case LLVM, LLVM_ASSUME, LLVM_META, LLVM_MEMCPY, LLVM_MEMSET, LLVM_EXPECT, LLVM_OBJECTSIZE -> true;
default -> false;
};
BiPredicate<String, String> matchingFunction = isPrefix ? String::startsWith : String::equals;
Expand Down Expand Up @@ -764,12 +765,20 @@ private List<Event> inlineMemCmp(FunctionCall call) {
return replacement;
}

// Handles both std.memset and llvm.memset
// Handles, std.memset, llvm.memset and __memset_chk (checked memset)
private List<Event> inlineMemSet(FunctionCall call) {
final Expression dest = call.getArguments().get(0);
final Expression fillExpr = call.getArguments().get(1);
final Expression countExpr = call.getArguments().get(2);
// final Expression isVolatile = call.getArguments.get(3) // LLVM's memset has an extra argument
// final Expression boundExpr = call.getArguments.get(3) // __memset_chk has an extra argument

//FIXME: Handle memset_chk correctly. For now, we ignore the bound check parameter because that one is
// usually provided by llvm.objectsize which we cannot resolve for now. Since we usually assume UB-freedom,
// the check can be ignored for the most part.
if (call.getCalledFunction().getName().equals("__memset_chk")) {
logger.warn("Treating call to \"__memset_chk\" as call to \"memset\": skipping bound checks.");
}

if (!(countExpr instanceof IValue countValue)) {
final String error = "Cannot handle memset with dynamic count argument: " + call;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ private ProcessingManager(Configuration config) throws InvalidConfigurationExcep
dynamicPureLoopCutting ? DynamicPureLoopCutting.fromConfig(config) : null,
ProgramProcessor.fromFunctionProcessor(
FunctionProcessor.chain(
ResolveLLVMObjectSizeCalls.fromConfig(config),
sccp,
dce ? DeadAssignmentElimination.fromConfig(config) : null,
RemoveDeadCondJumps.fromConfig(config)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package com.dat3m.dartagnan.program.processing;

import com.dat3m.dartagnan.expression.ExpressionFactory;
import com.dat3m.dartagnan.expression.IValue;
import com.dat3m.dartagnan.expression.type.IntegerType;
import com.dat3m.dartagnan.program.Function;
import com.dat3m.dartagnan.program.event.EventFactory;
import com.dat3m.dartagnan.program.event.core.Event;
import com.dat3m.dartagnan.program.event.functions.ValueFunctionCall;
import org.sosy_lab.common.configuration.Configuration;

import java.math.BigInteger;

/*
https://llvm.org/docs/LangRef.html#llvm-objectsize-intrinsic
A call to llvm.objectsize(ptr, ...) returns the size of the (accessible) object pointed to by <ptr>.
For example, if ptr_base is returned by alloc(100), then llvm.objectsize(ptr_base) == 100,
but llvm.objectsize(ptr_base + 80) == 20 (~ there are only 20 accessible bytes at that offset).
If the size cannot be determined statically, the call is replaced by either 0 or -1.
The intrinsic is typically used together with bound-checked memset/memcpy calls (e.g. __memset_chk) where the bound
check may get statically eliminated.
NOTE: For simplicity, we will always assume the object size to be statically unknown and return -1/0.
FIXME: A correct implementation needs to traverse the use-def chain from a llvm.objectsize call back to
the allocation site while tracking offset computations.
Unfortunately, this is quite hard to do in the presence of array indexing which can result in dynamic offsets.
A way around this is to rely on unrolling + CP to resolve dynamic offsets, then run this pass, and finally
run a second CP to propagate the resolved object sizes.
*/
public class ResolveLLVMObjectSizeCalls implements FunctionProcessor {

private ResolveLLVMObjectSizeCalls() {}

public static ResolveLLVMObjectSizeCalls fromConfig(Configuration config) {
return new ResolveLLVMObjectSizeCalls();
}

@Override
public void run(Function function) {
function.getEvents(ValueFunctionCall.class)
.stream().filter(call -> call.isDirectCall() && call.getCalledFunction().getIntrinsicInfo() == Intrinsics.Info.LLVM_OBJECTSIZE)
.forEach(this::resolveObjectSizeCall);
}

private void resolveObjectSizeCall(ValueFunctionCall call) {
//final Expression ptr = call.getArguments().get(0);
final IValue zeroIfUnknown = (IValue)call.getArguments().get(1); // else -1 if unknown
//final IValue nullIsUnknown = (IValue)call.getArguments().get(2);
//final IValue isDynamic = (IValue) call.getArguments().get(3); // Meaning of this is unclear

// TODO: We treat all pointers as unknown for now.
final ExpressionFactory exprs = ExpressionFactory.getInstance();
final BigInteger value = zeroIfUnknown.isOne() ? BigInteger.ZERO : BigInteger.ONE.negate();
final Event constAssignment = EventFactory.newLocal(
call.getResultRegister(), exprs.makeValue(value, (IntegerType) call.getResultRegister().getType())
);
constAssignment.copyAllMetadataFrom(call);
call.replaceBy(constAssignment);
}
}

0 comments on commit da499dc

Please sign in to comment.