Skip to content

Commit

Permalink
feat(compiler): more accurate compiler detection via heuristics (#419)
Browse files Browse the repository at this point in the history
* feat(compiler): more accurate compiler detection via heuristics

* tests(compiler): update minimal proxy test

* tests(compiler): update minimal proxy test

* tests(inspect): llama broke tests

* chore: make clippy happy

* tests(inspect): llama broke tests
  • Loading branch information
Jon-Becker authored May 30, 2024
1 parent 9f760ad commit b3a6a2f
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 31 deletions.
49 changes: 48 additions & 1 deletion crates/common/src/ether/bytecode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@ use crate::{
utils::strings::decode_hex,
Error,
};
use ethers::types::Bytes;
use eyre::Result;
use std::fs;
use tracing::error;

/// Given a target, determines whether it is a contract address, bytecode, or file path, and returns
/// the bytecode for the target.
pub async fn get_bytecode_from_target(target: &str, rpc_url: &str) -> Result<Vec<u8>, Error> {
if ADDRESS_REGEX.is_match(target).unwrap_or(false) {
// Target is a contract address, so we need to fetch the bytecode from the RPC provider.
Expand Down Expand Up @@ -35,10 +39,53 @@ pub async fn get_bytecode_from_target(target: &str, rpc_url: &str) -> Result<Vec
}
}

/// Removes pushed bytes from the bytecode, leaving only the instructions
/// themselves.
///
/// For example:
/// 0x6060 (PUSH1 0x60) would become 0x60 (PUSH1).
/// 0x60806040 (PUSH1 0x60 PUSH1 0x40) would become 0x60 0x60 (PUSH1 PUSH1).
pub fn remove_pushbytes_from_bytecode(bytecode: Bytes) -> Result<Bytes> {
let push_range = 0x5f..=0x7f;
let mut pruned = Vec::new();

let mut i = 0;
while i < bytecode.len() {
if push_range.contains(&bytecode[i]) {
pruned.push(bytecode[i]);
i += bytecode[i] as usize - 0x5f + 1;
} else {
pruned.push(bytecode[i]);
i += 1;
}
}

Ok(Bytes::from(pruned))
}

#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use ethers::types::Bytes;
use std::{fs, str::FromStr};

#[test]
fn test_remove_pushbytes_from_bytecode() {
let bytecode = Bytes::from_str("0x6040").unwrap();
let pruned = remove_pushbytes_from_bytecode(bytecode).unwrap();
assert_eq!(pruned, Bytes::from_str("0x60").unwrap());

let bytecode = Bytes::from_str("0x60406080").unwrap();
let pruned = remove_pushbytes_from_bytecode(bytecode).unwrap();
assert_eq!(pruned, Bytes::from_str("0x6060").unwrap());

let bytecode = Bytes::from_str(
"0x604060807f2222222222222222222222222222222222222222222222222222222222222222",
)
.unwrap();
let pruned = remove_pushbytes_from_bytecode(bytecode).unwrap();
assert_eq!(pruned, Bytes::from_str("0x60607f").unwrap());
}

#[tokio::test]
async fn test_get_bytecode_when_target_is_address() {
Expand Down
87 changes: 63 additions & 24 deletions crates/common/src/ether/compiler.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::fmt::Display;

use crate::utils::iter::ByteSliceExt;
use crate::{ether::bytecode::remove_pushbytes_from_bytecode, utils::iter::ByteSliceExt};
use ethers::types::Bytes;
use tracing::{debug, trace, warn};

#[derive(Debug, PartialEq, Clone)]
Expand All @@ -22,19 +23,17 @@ impl Display for Compiler {
}
}

// returns the compiler version used to compile the contract.
// for example: (solc, 0.8.10) or (vyper, 0.2.16)
/// returns the compiler version used to compile the contract.
/// for example: (solc, 0.8.10) or (vyper, 0.2.16)
///
/// Research:
/// https://jbecker.dev/research/evm-compiler-fingerprinting
pub fn detect_compiler(bytecode: &[u8]) -> (Compiler, String) {
let mut compiler = Compiler::Unknown;
let mut version = "unknown".to_string();

// perform prefix check for rough version matching
if bytecode.starts_with(&[0x36, 0x3d, 0x3d, 0x37, 0x3d, 0x3d, 0x3d, 0x36, 0x3d, 0x73]) ||
bytecode.starts_with(&[0x5f, 0x5f, 0x36, 0x5f, 0x5f, 0x37])
{
compiler = Compiler::Proxy;
version = "minimal".to_string();
} else if bytecode.starts_with(&[
// Previously known heuristic: perform prefix check for rough version matching
if bytecode.starts_with(&[
0x36, 0x60, 0x00, 0x60, 0x00, 0x37, 0x61, 0x10, 0x00, 0x60, 0x00, 0x36, 0x60, 0x00, 0x73,
]) {
compiler = Compiler::Proxy;
Expand All @@ -60,11 +59,54 @@ pub fn detect_compiler(bytecode: &[u8]) -> (Compiler, String) {
compiler = Compiler::Solc;
}

// TODO: add more heuristics for compiler version detection
// Remove `PUSHN [u8; n]` bytes so we are left with only operations
let pruned_bytecode = remove_pushbytes_from_bytecode(Bytes::from_iter(bytecode.iter()))
.expect("invalid bytecode");

// detect minimal proxies
if pruned_bytecode.eq(&vec![
0x36, 0x3d, 0x3d, 0x37, 0x3d, 0x3d, 0x3d, 0x36, 0x3d, 0x73, 0x5a, 0xf4, 0x3d, 0x82, 0x80,
0x3e, 0x90, 0x3d, 0x91, 0x60, 0x57, 0xfd, 0x5b, 0xf3,
]) {
compiler = Compiler::Proxy;
version = "minimal".to_string();
}

// heuristics are in the form of (sequence, solc confidence, vyper confidence)
let heuristics = [
([0x80, 0x63, 0x14, 0x61, 0x57], 0.9447, 0.0),
([0x14, 0x61, 0x57, 0x80, 0x63], 0.9371, 0.0),
([0x61, 0x57, 0x80, 0x63, 0x14], 0.9371, 0.0),
([0x57, 0x80, 0x63, 0x14, 0x61], 0.9371, 0.0),
([0x54, 0x60, 0x52, 0x60, 0x60], 0.00, 0.3103),
([0x60, 0x54, 0x60, 0x52, 0x60], 0.00, 0.3054),
([0x61, 0x52, 0x61, 0x51, 0x61], 0.00, 0.2894),
([0x61, 0x51, 0x61, 0x52, 0x60], 0.00, 0.2816),
([0x61, 0x52, 0x60, 0x61, 0x52], 0.00, 0.2734),
([0x90, 0x50, 0x90, 0x50, 0x81], 0.00, 0.2727),
([0x61, 0x52, 0x7f, 0x61, 0x52], 0.00, 0.2656),
];

// for each heuristic, check if the bytecode contains the sequence and increment the confidence
// for that compiler. the compiler with the highest confidence is chosen
let (mut solc_confidence, mut vyper_confidence) = (0.0, 0.0);
for (sequence, solc, vyper) in heuristics.iter() {
if pruned_bytecode.contains_slice(sequence) {
solc_confidence += solc;
vyper_confidence += vyper;
}
}

if solc_confidence != 0.0 && solc_confidence > vyper_confidence {
compiler = Compiler::Solc;
} else if vyper_confidence != 0.0 && vyper_confidence > solc_confidence {
compiler = Compiler::Vyper;
}

// Previously known heuristic: check for cbor encoded compiler metadata
// check for cbor encoded compiler metadata
// https://cbor.io
if compiler == Compiler::Solc {
if bytecode.contains_slice(&[0x73, 0x6f, 0x6c, 0x63, 0x43]) {
let compiler_version = bytecode.split_by_slice(&[0x73, 0x6f, 0x6c, 0x63, 0x43]);

if compiler_version.len() > 1 {
Expand All @@ -74,11 +116,12 @@ pub fn detect_compiler(bytecode: &[u8]) -> (Compiler, String) {
.map(|v| v.to_string())
.collect::<Vec<String>>()
.join(".");
compiler = Compiler::Solc;
}

trace!("exact compiler version match found due to cbor encoded metadata: {}", version);
}
} else if compiler == Compiler::Vyper {
} else if bytecode.contains_slice(&[0x76, 0x79, 0x70, 0x65, 0x72, 0x83]) {
let compiler_version = bytecode.split_by_slice(&[0x76, 0x79, 0x70, 0x65, 0x72, 0x83]);

if compiler_version.len() > 1 {
Expand All @@ -88,31 +131,27 @@ pub fn detect_compiler(bytecode: &[u8]) -> (Compiler, String) {
.map(|v| v.to_string())
.collect::<Vec<String>>()
.join(".");
compiler = Compiler::Vyper;
}

trace!("exact compiler version match found due to cbor encoded metadata");
}
}

if compiler == Compiler::Solc {
debug!("detected compiler {compiler} {version}.");
} else {
warn!("detected compiler {} {} is not supported by heimdall.", compiler, version);
debug!("detected compiler {compiler} {version}.");

// if not Solidity, warn
if compiler != Compiler::Solc {
warn!("{} is not fully supported by heimdall", compiler);
}

(compiler, version.trim_end_matches('.').to_string())
}

#[cfg(test)]
mod test_compiler {
use super::*;

#[test]
fn test_detect_compiler_proxy_minimal() {
let bytecode = &[0x36, 0x3d, 0x3d, 0x37, 0x3d, 0x3d, 0x3d, 0x36, 0x3d, 0x73];
let expected_result = (Compiler::Proxy, "minimal".to_string());
assert_eq!(detect_compiler(bytecode), expected_result);
}
use super::*;

#[test]
fn test_detect_compiler_proxy_vyper() {
Expand Down
8 changes: 6 additions & 2 deletions crates/common/src/ether/rpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -523,9 +523,13 @@ pub mod tests {

#[tokio::test]
async fn test_get_trace() {
let rpc_url = std::env::var("RPC_URL").unwrap_or_else(|_| {
println!("RPC_URL not set, skipping test");
std::process::exit(0);
});

let transaction_hash = "0x9a5f4ef7678a94dd87048eeec931d30af21b1f4cecbf7e850a531d2bb64a54ac";
let rpc_url = "https://eth.llamarpc.com";
let trace = get_trace(transaction_hash, rpc_url).await;
let trace = get_trace(transaction_hash, &rpc_url).await;

assert!(trace.is_ok())
}
Expand Down
23 changes: 19 additions & 4 deletions crates/core/tests/test_inspect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,16 @@ mod integration_tests {

#[tokio::test]
async fn test_inspect_simple() {
let rpc_url = std::env::var("RPC_URL").unwrap_or_else(|_| {
println!("RPC_URL not set, skipping test");
std::process::exit(0);
});

let args = InspectArgs {
target: String::from(
"0xa5f676d0ee4c23cc1ccb0b802be5aaead5827a3337c06e9da8b0a85dfa3e7dd5",
),
rpc_url: String::from("https://eth.llamarpc.com"),
rpc_url: rpc_url,
default: true,
transpose_api_key: String::from(""),
name: String::from(""),
Expand All @@ -23,11 +28,16 @@ mod integration_tests {

#[tokio::test]
async fn test_inspect_create() {
let rpc_url = std::env::var("RPC_URL").unwrap_or_else(|_| {
println!("RPC_URL not set, skipping test");
std::process::exit(0);
});

let args = InspectArgs {
target: String::from(
"0x37321f192623002fc4b398b90ea825c37f81e29526fd355cff93ef6962fc0fba",
),
rpc_url: String::from("https://eth.llamarpc.com"),
rpc_url: rpc_url,
default: true,
transpose_api_key: String::from(""),
name: String::from(""),
Expand All @@ -42,6 +52,11 @@ mod integration_tests {
#[test]
#[ignore]
fn heavy_test_inspect_thorough() {
let rpc_url = std::env::var("RPC_URL").unwrap_or_else(|_| {
println!("RPC_URL not set, skipping test");
std::process::exit(0);
});

// load ./tests/testdata/txids.json into a vector using serde
let txids = serde_json::from_str::<Value>(
&std::fs::read_to_string("./tests/testdata/txids.json").expect("failed to read file"),
Expand All @@ -57,10 +72,10 @@ mod integration_tests {
let total = txids.len();

// task_pool(items, num_threads, f)
let results = task_pool(txids, 10, |txid: String| {
let results = task_pool(txids, 10, move |txid: String| {
let args = InspectArgsBuilder::new()
.target(txid.to_string())
.rpc_url("https://eth.llamarpc.com".to_string())
.rpc_url(rpc_url.to_string())
.build()
.expect("failed to build args");

Expand Down

0 comments on commit b3a6a2f

Please sign in to comment.