Skip to content

Commit

Permalink
add cmd/casm-inspect disasm utility
Browse files Browse the repository at this point in the history
This is a tool I made while studying the casm bytecode format.
Since making thoth disassembler work requires some extra steps,
I figured it would be handy to have a version that relies on our
assembler package and supports the exact versions of input
files that are relevant to this project.

I tried to produce the correct Cairo0 program when disassembling.
If done carefully (and with metadata provided from the compiled json file),
we can use it to test the assembler in an encode-decode style
(basically we can use the disassembler output as an assembler parser input).

Given this cairo0 source file:

```cairo
%builtins output

from starkware.cairo.common.serialize import serialize_word

func div2(x: felt) -> felt {
    return x / 2;
}

func main{output_ptr: felt*}() {
    alloc_locals;
    local x = 42;
    local y = x + 1;
    local z = div2(x);
    if (y == 0) {
      serialize_word(z);
    } else {
      serialize_word(y);
    }
    ret;
}
```

And a compiled casm bytecode produced from it (output.json), we can disassemble it into the following:

```casm
// func entry pc=0
// [fp-3] => word: felt
// [fp-4] => output_ptr: felt* (implicit arg)
func starkware.cairo.common.serialize.serialize_word{output_ptr: felt*}(word: felt) {
    assert [fp-3] = [[fp-4]];
    assert [ap] = [fp-4] + 1, ap++;
    ret;
}
// func entry pc=4
// [fp-3] => x: felt
func div2(x: felt) -> felt {
    assert [ap] = [fp-3] * 1809251394333065606848661391547535052811553607665798349986546028067936010241, ap++; // div 2
    ret;
}
// func entry pc=7
// [fp-3] => output_ptr: felt* (implicit arg)
func main{output_ptr: felt*}() {
    nop; // alloc_locals; ap += 3
    assert [fp] = 42;
    assert [fp+1] = [fp] + 1;
    assert [ap] = [fp], ap++;
    call rel -10; // func div2; ap += 2
    assert [fp+2] = [ap-1];
    jmp rel 8 if [fp+1] != 0; // targets L1
    assert [ap] = [fp-3], ap++;
    assert [ap] = [fp+2], ap++;
    call rel -21; // func starkware.cairo.common.serialize.serialize_word; ap += 2
    jmp rel 6; // targets L3
  L1:
    assert [ap] = [fp-3], ap++;
    assert [ap] = [fp+1], ap++;
    call rel -27; // func starkware.cairo.common.serialize.serialize_word; ap += 2
  L3:
    ret;
}
```

This disassembler annotates some lines with recognized patterns like division operations.
It does not include any hints-related information (yet?)
  • Loading branch information
quasilyte committed Jan 29, 2024
1 parent 6539a87 commit 1e9cff3
Show file tree
Hide file tree
Showing 14 changed files with 849 additions and 0 deletions.
133 changes: 133 additions & 0 deletions cmd/casm-inspect/disasm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
package main

import (
"encoding/json"
"fmt"
"os"
"strings"

"github.com/NethermindEth/cairo-vm-go/pkg/disasm"
f "github.com/consensys/gnark-crypto/ecc/stark-curve/fp"

"github.com/urfave/cli/v2"
)

// disasmProgram implements a "disasm" subcommand.
type disasmProgram struct {
pathToFile string
bytecodeKey string

rawCasm map[string]any

bytecode []*f.Element

disassembled *disasm.Program
}

func (p *disasmProgram) Action(ctx *cli.Context) error {
p.pathToFile = ctx.Args().Get(0)
if p.pathToFile == "" {
return fmt.Errorf("path to casm file not set")
}

type step struct {
name string
fn func() error
}
steps := []step{
{"unmarshal casm file", p.unmarshalCasmFileStep},
{"load bytecode", p.loadBytecodeStep},
{"disassemble", p.disassembleStep},
{"print", p.printStep},
}
for _, s := range steps {
if err := s.fn(); err != nil {
return fmt.Errorf("%s: %w", s.name, err)
}
}

return nil
}

func (p *disasmProgram) unmarshalCasmFileStep() error {
data, err := os.ReadFile(p.pathToFile)
if err != nil {
return err
}
if err := json.Unmarshal(data, &p.rawCasm); err != nil {
return err
}
return nil
}

func (p *disasmProgram) loadBytecodeStep() error {
// Since different versions of CASM files may store bytecode at different places
// (e.g. "data" in Cairo0 and "bytecode" in Cairo1),
// we allow the user to specify the bytecode array location.
// By default, this value will be equal to the default supported version location
// (Cairo0 for now and Cairo1 in the future).
keys := strings.Split(p.bytecodeKey, ".")

v := lookupKeys(p.rawCasm, keys...)
if v == nil {
return fmt.Errorf("key %q doesn't lead to a bytecode", p.bytecodeKey)
}

slice, ok := v.([]any)
if !ok {
return fmt.Errorf("%q: expected a slice of strings", p.bytecodeKey)
}

p.bytecode = make([]*f.Element, 0, len(slice))
for i, s := range slice {
s, ok := s.(string)
if !ok {
return fmt.Errorf("%q: expected a slice of strings, found %T", p.bytecodeKey, slice[i])
}
felt, err := new(f.Element).SetString(s)
if err != nil {
return fmt.Errorf("%q[%d]: parse %q: %w", p.bytecodeKey, i, s, err)
}
p.bytecode = append(p.bytecode, felt)
}

return nil
}

func (p *disasmProgram) disassembleStep() error {
var prime *f.Element
primeString, ok := lookupKeys(p.rawCasm, "prime").(string)
if ok {
p, err := new(f.Element).SetString(primeString)
if err != nil {
return fmt.Errorf("can't parse the specified prime value %q", primeString)
}
prime = p
}

prog, err := disasm.FromBytecode(disasm.Config{
Bytecode: p.bytecode,
Indent: 4,
Prime: prime,
})
if err != nil {
return err
}
p.disassembled = prog
return nil
}

func (p *disasmProgram) printStep() error {
for _, l := range p.disassembled.Lines {
if len(l.Text) == 0 {
fmt.Printf("// %s\n", strings.Join(l.Comments, "; "))
continue
}
if len(l.Comments) == 0 {
fmt.Printf("%s;\n", l.Text)
} else {
fmt.Printf("%s; // %s\n", l.Text, strings.Join(l.Comments, "; "))
}
}
return nil
}
77 changes: 77 additions & 0 deletions cmd/casm-inspect/inst_fields.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package main

import (
"errors"
"fmt"
"strings"

f "github.com/consensys/gnark-crypto/ecc/stark-curve/fp"
"github.com/urfave/cli/v2"
)

// instFieldsProgram implements an "inst-fields" subcommand.
type instFieldsProgram struct{}

func (p *instFieldsProgram) Action(ctx *cli.Context) error {
s := ctx.Args().Get(0)
if s == "" {
return errors.New("expected 1 non-empty positional argument")
}

felt, err := new(f.Element).SetString(s)
if err != nil {
return fmt.Errorf("parsing %q argument: %w", s, err)
}
if !felt.IsUint64() {
return errors.New("instruction bytes overflow uint64")
}

u64 := felt.Uint64()

fmt.Printf("uint64 value: %v\n", u64)

// We don't use the assembler's package code here to make it possible
// to use this dumper tool even if assembler package can't validate
// the input. Unlike the assembler package, this tool doesn't care
// if the provided bits are valid or not.
// It will split them into "fields" expected by the CASM instruction encoding.

type instField struct {
name string
width int // in bits
signed bool
}
encodingList := []instField{
{"off_dst", 16, true},
{"off_op0", 16, true},
{"off_op1", 16, true},
{"dst_reg", 1, false},
{"op0_reg", 1, false},
{"op1_src", 3, false},
{"res_logic", 2, false},
{"pc_update", 3, false},
{"ap_update", 2, false},
{"opcode", 3, false},
}

const onesMask = ^uint64(0)

var chunks []string

offset := int(0)
for _, field := range encodingList {
mask := onesMask >> (64 - field.width)
fieldBits := (u64 >> offset) & mask
if field.signed {
fmt.Printf("%s: %v (%b)\n", field.name, int16(fieldBits), fieldBits)
} else {
fmt.Printf("%s: %v (%b)\n", field.name, fieldBits, fieldBits)
}
chunks = append(chunks, fmt.Sprintf("%b", fieldBits))
offset += field.width
}

fmt.Printf("bits: %s\n", strings.Join(chunks, " "))

return nil
}
50 changes: 50 additions & 0 deletions cmd/casm-inspect/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package main

import (
"fmt"
"os"

"github.com/urfave/cli/v2"
)

func main() {
disasm := &disasmProgram{}
instFields := &instFieldsProgram{}

app := &cli.App{
Name: "casm-inspect",
Usage: "casm-inspect <subcmd> [args...]",
Description: "A cairo zero file inspector",
EnableBashCompletion: true,
Suggest: true,
DefaultCommand: "help",
Commands: []*cli.Command{
{
Name: "inst-fields",
Usage: "inst-fields 0xa0680017fff8000",
Description: "print CASM instruction fields",
Action: instFields.Action,
},
{
Name: "disasm",
Usage: "disasm compiled_cairo0.json",
Description: "disassemble the casm from the compiled cairo program",
Action: disasm.Action,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "bytecode",
Usage: "a JSON key containing CASM bytecode (period-separated for multi-keys)",
Required: false,
Value: "data",
Destination: &disasm.bytecodeKey,
},
},
},
},
}

if err := app.Run(os.Args); err != nil {
fmt.Println(err)
os.Exit(1)
}
}
16 changes: 16 additions & 0 deletions cmd/casm-inspect/utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package main

// lookupKeys performs a multi-level map search given a list of keys to query.
// Given a map like {"a": {"b": {"c": 10}}} and keys ["a", "b", "c"] this
// function will return 10 (a value of the deepest lookup).
func lookupKeys(m map[string]any, keys ...string) any {
var current any = m
for _, k := range keys {
asMap, ok := current.(map[string]any)
if !ok {
return nil
}
current = asMap[k]
}
return current
}
39 changes: 39 additions & 0 deletions pkg/disasm/casm.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package disasm

import (
"github.com/NethermindEth/cairo-vm-go/pkg/assembler"
f "github.com/consensys/gnark-crypto/ecc/stark-curve/fp"
)

type casmInstruction struct {
*assembler.Instruction

arg *f.Element
bytecodeOffset int64
}

func (inst *casmInstruction) JumpTarget() (int64, bool) {
if inst.Opcode == assembler.OpCodeRet {
return 0, false
}
if inst.PcUpdate == assembler.PcUpdateNextInstr {
return 0, false
}

offset := feltToInt64(inst.arg)
if inst.PcUpdate == assembler.PcUpdateJump {
return offset, true
}
return inst.bytecodeOffset + offset, true
}

func (inst *casmInstruction) Size() int64 {
switch {
case inst.Op1Source == assembler.Imm:
return 2
case inst.Opcode == assembler.OpCodeCall:
return 2
default:
return 1
}
}
Loading

0 comments on commit 1e9cff3

Please sign in to comment.