-
Notifications
You must be signed in to change notification settings - Fork 51
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This is a tool I made while studying the casm bytecode format. Since making thoth disassembler work requires some extra steps, I figured it would be handy to have a version that relies on our assembler package and supports the exact versions of input files that are relevant to this project. I tried to produce the correct Cairo0 program when disassembling. If done carefully (and with metadata provided from the compiled json file), we can use it to test the assembler in an encode-decode style (basically we can use the disassembler output as an assembler parser input). Given this cairo0 source file: ```cairo %builtins output from starkware.cairo.common.serialize import serialize_word func div2(x: felt) -> felt { return x / 2; } func main{output_ptr: felt*}() { alloc_locals; local x = 42; local y = x + 1; local z = div2(x); if (y == 0) { serialize_word(z); } else { serialize_word(y); } ret; } ``` And a compiled casm bytecode produced from it (output.json), we can disassemble it into the following: ```casm // func entry pc=0 // [fp-3] => word: felt // [fp-4] => output_ptr: felt* (implicit arg) func starkware.cairo.common.serialize.serialize_word{output_ptr: felt*}(word: felt) { assert [fp-3] = [[fp-4]]; assert [ap] = [fp-4] + 1, ap++; ret; } // func entry pc=4 // [fp-3] => x: felt func div2(x: felt) -> felt { assert [ap] = [fp-3] * 1809251394333065606848661391547535052811553607665798349986546028067936010241, ap++; // div 2 ret; } // func entry pc=7 // [fp-3] => output_ptr: felt* (implicit arg) func main{output_ptr: felt*}() { nop; // alloc_locals; ap += 3 assert [fp] = 42; assert [fp+1] = [fp] + 1; assert [ap] = [fp], ap++; call rel -10; // func div2; ap += 2 assert [fp+2] = [ap-1]; jmp rel 8 if [fp+1] != 0; // targets L1 assert [ap] = [fp-3], ap++; assert [ap] = [fp+2], ap++; call rel -21; // func starkware.cairo.common.serialize.serialize_word; ap += 2 jmp rel 6; // targets L3 L1: assert [ap] = [fp-3], ap++; assert [ap] = [fp+1], ap++; call rel -27; // func starkware.cairo.common.serialize.serialize_word; ap += 2 L3: ret; } ``` This disassembler annotates some lines with recognized patterns like division operations. It does not include any hints-related information (yet?)
- Loading branch information
Showing
14 changed files
with
811 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
package main | ||
|
||
import ( | ||
"encoding/json" | ||
"fmt" | ||
"os" | ||
"strings" | ||
|
||
"github.com/NethermindEth/cairo-vm-go/pkg/disasm" | ||
f "github.com/consensys/gnark-crypto/ecc/stark-curve/fp" | ||
|
||
"github.com/urfave/cli/v2" | ||
) | ||
|
||
// disasmProgram implements a "disasm" subcommand. | ||
type disasmProgram struct { | ||
pathToFile string | ||
bytecodeKey string | ||
|
||
rawCasm map[string]any | ||
|
||
bytecode []*f.Element | ||
|
||
disassembled *disasm.Program | ||
} | ||
|
||
func (p *disasmProgram) Action(ctx *cli.Context) error { | ||
p.pathToFile = ctx.Args().Get(0) | ||
if p.pathToFile == "" { | ||
return fmt.Errorf("path to casm file not set") | ||
} | ||
|
||
type step struct { | ||
name string | ||
fn func() error | ||
} | ||
steps := []step{ | ||
{"unmarshal casm file", p.unmarshalCasmFileStep}, | ||
{"load bytecode", p.loadBytecodeStep}, | ||
{"disassemble", p.disassembleStep}, | ||
{"print", p.printStep}, | ||
} | ||
for _, s := range steps { | ||
if err := s.fn(); err != nil { | ||
return fmt.Errorf("%s: %w", s.name, err) | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (p *disasmProgram) unmarshalCasmFileStep() error { | ||
data, err := os.ReadFile(p.pathToFile) | ||
if err != nil { | ||
return err | ||
} | ||
if err := json.Unmarshal(data, &p.rawCasm); err != nil { | ||
return err | ||
} | ||
return nil | ||
} | ||
|
||
func (p *disasmProgram) loadBytecodeStep() error { | ||
// Since different versions of CASM files may store bytecode at different places | ||
// (e.g. "data" in Cairo0 and "bytecode" in Cairo1), | ||
// we allow the user to specify the bytecode array location. | ||
// By default, this value will be equal to the default supported version location | ||
// (Cairo0 for now and Cairo1 in the future). | ||
keys := strings.Split(p.bytecodeKey, ".") | ||
|
||
v := lookupKeys(p.rawCasm, keys...) | ||
if v == nil { | ||
return fmt.Errorf("key %q doesn't lead to a bytecode", p.bytecodeKey) | ||
} | ||
|
||
slice, ok := v.([]any) | ||
if !ok { | ||
return fmt.Errorf("%q: expected a slice of strings", p.bytecodeKey) | ||
} | ||
|
||
p.bytecode = make([]*f.Element, 0, len(slice)) | ||
for i, s := range slice { | ||
s, ok := s.(string) | ||
if !ok { | ||
return fmt.Errorf("%q: expected a slice of strings, found %T", p.bytecodeKey, slice[i]) | ||
} | ||
felt, err := new(f.Element).SetString(s) | ||
if err != nil { | ||
return fmt.Errorf("%q[%d]: parse %q: %w", p.bytecodeKey, i, s, err) | ||
} | ||
p.bytecode = append(p.bytecode, felt) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func (p *disasmProgram) disassembleStep() error { | ||
prog, err := disasm.FromBytecode(disasm.Config{ | ||
Bytecode: p.bytecode, | ||
Indent: 4, | ||
}) | ||
if err != nil { | ||
return err | ||
} | ||
p.disassembled = prog | ||
return nil | ||
} | ||
|
||
func (p *disasmProgram) printStep() error { | ||
for _, l := range p.disassembled.Lines { | ||
if len(l.Text) == 0 { | ||
fmt.Printf("// %s\n", strings.Join(l.Comments, "; ")) | ||
continue | ||
} | ||
if len(l.Comments) == 0 { | ||
fmt.Printf("%s;\n", l.Text) | ||
} else { | ||
fmt.Printf("%s; // %s\n", l.Text, strings.Join(l.Comments, "; ")) | ||
} | ||
} | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
package main | ||
|
||
import ( | ||
"errors" | ||
"fmt" | ||
"strings" | ||
|
||
f "github.com/consensys/gnark-crypto/ecc/stark-curve/fp" | ||
"github.com/urfave/cli/v2" | ||
) | ||
|
||
// instFieldsProgram implements an "inst-fields" subcommand. | ||
type instFieldsProgram struct{} | ||
|
||
func (p *instFieldsProgram) Action(ctx *cli.Context) error { | ||
s := ctx.Args().Get(0) | ||
if s == "" { | ||
return errors.New("expected 1 non-empty positional argument") | ||
} | ||
|
||
felt, err := new(f.Element).SetString(s) | ||
if err != nil { | ||
return fmt.Errorf("parsing %q argument: %w", s, err) | ||
} | ||
if !felt.IsUint64() { | ||
return errors.New("instruction bytes overflow uint64") | ||
} | ||
|
||
u64 := felt.Uint64() | ||
|
||
fmt.Printf("uint64 value: %v\n", u64) | ||
|
||
// We don't use the assembler's package code here to make it possible | ||
// to use this dumper tool even if assembler package can't validate | ||
// the input. Unlike the assembler package, this tool doesn't care | ||
// if the provided bits are valid or not. | ||
// It will split them into "fields" expected by the CASM instruction encoding. | ||
|
||
type instField struct { | ||
name string | ||
width int // in bits | ||
signed bool | ||
} | ||
encodingList := []instField{ | ||
{"off_dst", 16, true}, | ||
{"off_op0", 16, true}, | ||
{"off_op1", 16, true}, | ||
{"dst_reg", 1, false}, | ||
{"op0_reg", 1, false}, | ||
{"op1_src", 3, false}, | ||
{"res_logic", 2, false}, | ||
{"pc_update", 3, false}, | ||
{"ap_update", 2, false}, | ||
{"opcode", 3, false}, | ||
} | ||
|
||
const onesMask = ^uint64(0) | ||
|
||
var chunks []string | ||
|
||
offset := int(0) | ||
for _, field := range encodingList { | ||
mask := onesMask >> (64 - field.width) | ||
fieldBits := (u64 >> offset) & mask | ||
if field.signed { | ||
fmt.Printf("%s: %v (%b)\n", field.name, int16(fieldBits), fieldBits) | ||
} else { | ||
fmt.Printf("%s: %v (%b)\n", field.name, fieldBits, fieldBits) | ||
} | ||
chunks = append(chunks, fmt.Sprintf("%b", fieldBits)) | ||
offset += field.width | ||
} | ||
|
||
fmt.Printf("bits: %s\n", strings.Join(chunks, " ")) | ||
|
||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package main | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
|
||
"github.com/urfave/cli/v2" | ||
) | ||
|
||
func main() { | ||
disasm := &disasmProgram{} | ||
instFields := &instFieldsProgram{} | ||
|
||
app := &cli.App{ | ||
Name: "casm-inspect", | ||
Usage: "casm-inspect <subcmd> [args...]", | ||
Description: "A cairo zero file inspector", | ||
EnableBashCompletion: true, | ||
Suggest: true, | ||
DefaultCommand: "help", | ||
Commands: []*cli.Command{ | ||
{ | ||
Name: "inst-fields", | ||
Usage: "inst-fields 0xa0680017fff8000", | ||
Description: "print CASM instruction fields", | ||
Action: instFields.Action, | ||
}, | ||
{ | ||
Name: "disasm", | ||
Usage: "disasm compiled_cairo0.json", | ||
Description: "disassemble the casm from the compiled cairo program", | ||
Action: disasm.Action, | ||
Flags: []cli.Flag{ | ||
&cli.StringFlag{ | ||
Name: "bytecode", | ||
Usage: "a JSON key containing CASM bytecode (period-separated for multi-keys)", | ||
Required: false, | ||
Value: "data", | ||
Destination: &disasm.bytecodeKey, | ||
}, | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
if err := app.Run(os.Args); err != nil { | ||
fmt.Println(err) | ||
os.Exit(1) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
package main | ||
|
||
// lookupKeys performs a multi-level map search given a list of keys to query. | ||
// Given a map like {"a": {"b": {"c": 10}}} and keys ["a", "b", "c"] this | ||
// function will return 10 (a value of the deepest lookup). | ||
func lookupKeys(m map[string]any, keys ...string) any { | ||
var current any = m | ||
for _, k := range keys { | ||
asMap, ok := current.(map[string]any) | ||
if !ok { | ||
return nil | ||
} | ||
current = asMap[k] | ||
} | ||
return current | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
package disasm | ||
|
||
import ( | ||
"github.com/NethermindEth/cairo-vm-go/pkg/assembler" | ||
f "github.com/consensys/gnark-crypto/ecc/stark-curve/fp" | ||
) | ||
|
||
type casmInstruction struct { | ||
*assembler.Instruction | ||
|
||
arg *f.Element | ||
bytecodeOffset int64 | ||
} | ||
|
||
func (inst *casmInstruction) JumpTarget() (int64, bool) { | ||
if inst.Opcode == assembler.OpCodeRet { | ||
return 0, false | ||
} | ||
if inst.PcUpdate == assembler.PcUpdateNextInstr { | ||
return 0, false | ||
} | ||
|
||
offset := feltToInt64(inst.arg) | ||
if inst.PcUpdate == assembler.PcUpdateJump { | ||
return offset, true | ||
} | ||
return inst.bytecodeOffset + offset, true | ||
} | ||
|
||
func (inst *casmInstruction) Size() int64 { | ||
// Note: OpCodeCall also has an immediate (call target). | ||
if inst.Op1Source == assembler.Imm { | ||
return 2 | ||
} | ||
return 1 | ||
} |
Oops, something went wrong.