forked from jaaron/toy-bytecode
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathschemer.sch
1548 lines (1399 loc) · 49.5 KB
/
schemer.sch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
;; Copyright 2012 J. Aaron Pendergrass
;; This file is part of toy-bytecode.
;; toy-bytecode is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; toy-bytecode is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with toy-bytecode. If not, see <http://www.gnu.org/licenses/>.
; utility functions
(define last (lambda (l) (if (and (not (null? l))
(pair? l)
(pair? (cdr l))
(not (null? (cdr l))))
(last (cdr l))
l)))
(define string-is-numeric? (lambda (str)
(let ((first-char (string-ref str 0))
(strlen (string-length str))
(test (lambda (c v) (if v (char-is-digit? c) v))))
(and (< 0 strlen)
(if (< 1 strlen)
(and
(or (char=? first-char #\-)
(char=? first-char #\+)
(char-is-digit? first-char))
(string-fold test #t str 1 (string-length str)))
(char-is-digit? first-char))))))
; The underlying VM uses a tagged memory model that differentiates between
; numbers, pointers, vm constants, and language constants.
; Numeric values in the assembly must be tagged with an appropriate identifier
; to convince the assembler to tag the cell with the appropriate type.
;
; string constants for each instruction
(define ins-push "PUSH")
(define ins-pop "POP")
(define ins-swap "SWAP")
(define ins-dup "DUP")
(define ins-rot "ROT")
(define ins-call "CALL")
(define ins-ret "RET")
(define ins-jmp "JMP")
(define ins-jtrue "JTRUE")
(define ins-end "END")
(define ins-add "ADD")
(define ins-sub "SUB")
(define ins-eq "EQ")
(define ins-lt "LT")
(define ins-stor "STOR")
(define ins-load "LOAD")
(define ins-aloc "ALOC")
(define ins-rdrr "RDRR")
(define ins-wtrr "WTRR")
(define ins-isnum "ISNUM")
(define ins-isptr "ISPTR")
;; The instructions are all used at most once in the compiler so its
;; cheaper to include them as string literals then to clutter up the
;; environment with them.
;; (define ins-mul "MUL")
;; (define ins-div "DIV")
;; (define ins-mod "MOD")
;; (define ins-shl "SHL")
;; (define ins-shr "SHR")
;; (define ins-bor "BOR")
;; (define ins-band "BAND")
;; (define ins-getc "GETC")
;; (define ins-dump "DUMP")
;; (define ins-pint "PINT")
;; (define ins-pchr "PCHR")
;; (define ins-islconst "ISLCONST")
;; (define ins-ischr "ISCHR")
;; (define ins-isins "ISINS")
; Language constants
(define false-value "FALSE")
(define true-value "TRUE")
; return a string containing the asm representation of a number
(define asm-number (lambda (x) (string-append "n" (if (number? x) (number->string x) x))))
; return a string containing the asm representation of a pointer
(define asm-pointer (lambda (x y) (string-append "p" (if (number? x) (number->string x) x)
"," (if (number? x) (number->string y) y)
)))
; return a string containing the asm representation of a language constant
(define asm-lang-const (lambda (x) (string-append "l" (if (number? x) (number->string x) x))))
(define asm-label-reference (lambda (name) (string-append "@" name)))
(define asm-label-definition (lambda (name) (string-append ":" name)))
(define asm-label-definition-sz (lambda (name sz)
(string-append (asm-label-definition name)
(string-append "," (number->string sz)))))
(define ptr-type-offset (asm-number 0))
(define consbox-size 2)
(define asm-consbox-size (asm-number consbox-size))
(define consbox-car-offset (asm-number 0))
(define consbox-cdr-offset (asm-number 1))
(define vector-type-flag (asm-lang-const 2))
(define vector-length-offset (asm-number 1))
(define raw-vector-elems-offset 2)
(define vector-elems-offset (asm-number raw-vector-elems-offset))
(define string-type-flag (asm-lang-const 3))
(define string-length-offset vector-length-offset)
(define string-chars-offset vector-elems-offset)
(define symbol-type-flag (asm-lang-const 3))
; A few builtin forms are handled specially
; by the compiler. Some of these will later
; be subsumed by macro capabilities but for right
; now they are just special voodoo.
;
; This list associates the symbols with special
; compiler functions.
(define special-forms
(lambda ()
(list
(cons "set!" compile-set!)
(cons "lambda" compile-lambda)
(cons "let" compile-let)
(cons "letrec" compile-letrec)
(cons "if" compile-if)
(cons "define" compile-define)
(cons "begin" compile-begin)
(cons "quote" compile-quote)
(cons "and" compile-and)
(cons "or" compile-or)
)
))
; search the list of builtin forms for a
; particular form
(define find-special
(lambda (f)
(letrec ((helper (lambda (ss)
(if (null? ss) #f
(if (string=? f (car (car ss)))
(cdr (car ss))
(helper (cdr ss)))))))
(helper (special-forms)))))
; The top-level-env is a list containing the list of symbols
; defined by the compiler at the top level.
;
; The runtime environment is represented as a list of vectors
; references to symbols are replaced with a traversal of this
; structure based on the level of enclosing scope where the symbol was
; defined, and the index of the variable in that scope. This is taken
; directly from the SECD machine's representation of the environment.
;
; For example suppose we have something like:
; (((lambda (x)
; (lambda (z) (+ x z)))
; 5)
; 7)
;
; The inner lambda (that gets returned and applied to the arg 7) refers to three
; different symbols: '+' 'x' and 'z' that are each declared at a different depth
; in the enclosing environment.
; When this lambda is evaluated (with the argument 7) the environment will look like:
; ([7]
; [5]
; ["=" "null?" "cons" "car" "cdr" "+" ...])
;
; So the reference to symbol 'z' will be compiled to (vector-ref (car env) 0)
; the reference to symbol 'x' will be compiled to (vector-ref (car (cdr env)) 0) and
; the reference to symbol '+' will be compiled to (vector-ref (car (cdr (cdr env))) 5)
;
; Note: this isn't strictly accurate since the symbols 'vector-ref',
; 'car' and 'cdr' are themselves defined in the environment and
; would thus require lookups making this expansion impossible.
; What really happens is that a non-closure form of the car and
; cdr procedures are invoked directly. See the functions
; u-call-* below.
(define top-level-env
(quote ((("equal?" "equal")
("=" "equal")
("<" "less_than")
("null?" "null_q")
("cons" "cons")
("car" "car")
("cdr" "cdr")
("+" "add")
("-" "subtract")
("*" "multiply")
("%" "modulo")
("/" "divide")
("print-char" "print_char")
("print-num" "print_num")
("string?" "string_q")
("number?" "number_q")
("char?" "char_q")
("pair?" "pair_q")
("read-char" "read_char")
("quit" "quit")
("set-car!" "set_car")
("set-cdr!" "set_cdr")
("make-vector" "make_vector")
("vector-ref" "vector_ref")
("vector-set!" "vector_set")
("vector-length" "vector_length")
("make-string" "make_string")
("string-set!" "vector_set")
("string-ref" "vector_ref")
("string-length" "vector_length")
("vapply" "vapply")
("char=?" "equal") ;; for characters
("char<?" "less_than") ;; for characters
("eof-object?" "eof_object_q")))))
(define top-level-env-endptr (last (car top-level-env)))
(define initial-env-label "__initial_environment")
(define append-named-consbox
(lambda (name car-value cdr-value)
(append-instructions
(asm-label-definition-sz name consbox-size)
car-value
cdr-value)
name))
(define append-consbox
(lambda (car-value cdr-value)
(append-named-consbox (fresh-label) car-value cdr-value)))
(define append-list-as-vector
(lambda (vec-list)
(let ((lbl (fresh-label)))
(append-instructions
(asm-label-definition-sz lbl (+ (length vec-list) raw-vector-elems-offset))
vector-type-flag
(asm-number (length vec-list)))
(apply append-instructions vec-list)
lbl)))
(define append-initial-env
(lambda ()
(append-named-consbox "__nil"
(asm-label-reference "__nil")
(asm-label-reference "__nil"))
(append-named-consbox
initial-env-label
(asm-label-reference
(append-list-as-vector (map (lambda (l) (asm-label-reference (cadr l))) (car top-level-env))))
(asm-label-reference "__nil"))))
; (fresh-label) is used to generate labels for each lambda
; expression and for string constants.
(define label-counter 0)
(define fresh-label (lambda ()
(set! label-counter (+ label-counter 1))
(string-append "__anonymous" (number->string label-counter))
))
; append an instruction to the ouptut stream
(define append-instruction
(lambda (ins)
(begin
(display ins)
(display "\n"))))
; append a list of instructions to the output stream
(define append-instructions
(lambda inss
(letrec ((helper (lambda (inss)
(if (null? inss) (quote ())
(begin
(append-instruction (car inss))
(helper (cdr inss)))))))
(helper inss))))
;
; We're now actually into the guts of the compiler.
; The conventions are as follows:
; Functions of the form
; - 'assembly-foo' take no arguments, append asm instructions
; to the output stream for completing the task
; foo, and return no useful result.
;
; - 'u-call-foo' serve as a wrapper around the assembly-foo
; functions. For larger blocks of assembly code
; the u-call-foo insert a CALL to the definitions,
; shorter ones are inlined. Again, no useful result
; is returned.
;
; - 'compile-foo' these are the main compiler functions. All of
; these take atleast two arguments, the s-expression
; to compile and the symbolic environment list used
; to resolve references. Some of these take a boolean
; 'rest' argument which is a bad hack to support tail-call
; optimization. If 'rest' is false it means their is
; definitely no continuation to this expression and so
; a closure invocation can be optimized by not storing the
; return environment and using a JMP rather than a CALL
; (see assembly-funcall vs. assembly-tailcall below).
; All compile-* functions must return either 0-arity function
; or false. The 0-arity function represents work that is
; being delayed until after the compilation of the main
; program body, e.g., the body of lambda expressions.
; It is vital that these return values be propagated out
; to the main compiler loop 'do-compiler-task'
; assembly for the primitive list functions car, cdr, and cons
; (ptr) -> ((car ptr))
(define assembly-car (lambda () (append-instructions ins-push consbox-car-offset ins-load)))
; (ptr) -> ((cdr ptr))
(define assembly-cdr (lambda () (append-instructions ins-push consbox-cdr-offset ins-load)))
; (cdr car) -> ((cons car cdr))
(define assembly-cons (lambda ()
(append-instructions
ins-push asm-consbox-size ; (car cdr 2)
ins-aloc ; (car cdr hp)
ins-push consbox-cdr-offset ; (car cdr hp 1)
ins-stor ; (car hp) cdr stored
ins-push consbox-car-offset ; (car hp 0)
ins-stor) ; (hp) cdr stored
))
; top is the cons box to set, then the new value
(define assembly-set-car
(lambda () (append-instructions ins-push consbox-car-offset ins-stor)))
(define assembly-set-cdr
(lambda () (append-instructions ins-push consbox-cdr-offset ins-stor)))
; these define how to call the three primitives car, cdr, and cons as
; part of larger compiler generated sequences (e.g., function application)
; for car, we just inline the assembly. For cdr and cons we do a machine level
; call into a function.
(define u-call-car (lambda () (assembly-car))) ; car is 3 instructions, a function call is the same length
; so there is no reason not to inline it.
(define u-call-cdr (lambda () (assembly-cdr))) ; same with cdr.
(define u-call-cons ; cons is really big (13 instructions)! we'll never inline it
(lambda () (append-instructions ins-push (asm-label-reference "__u_cons") ins-call)))
(define u-call-set-car (lambda () (assembly-set-car)))
(define u-call-set-cdr (lambda () (assembly-set-cdr)))
(define u-call-make-vector (lambda () (append-instructions
ins-push (asm-label-reference "__u_make_vector_nofill")
ins-call)))
; function application convention
; top of stack is the closure to apply, then the arguments
; this is tricky. We need to cons the argument list onto
; the closure's environment, store the existing
; environment pointer to the stack, set the environment
; pointer to the new list, invoke the closure's code,
; then restore the environment pointer on return.
;
(define assembly-make-args-helper (lambda (nr-args)
(if (= nr-args 0) #f
(begin
(append-instructions
ins-push (asm-number (+ (+ raw-vector-elems-offset nr-args) -1))
ins-stor)
(assembly-make-args-helper (- nr-args 1))))))
(define assembly-make-args (lambda (nr-args)
(append-instructions
ins-push (asm-number nr-args))
(u-call-make-vector)
(assembly-make-args-helper nr-args)))
;; special case for referencing arguments to this function (i.e., depth = 0).
(define assembly-get-arg
(lambda (idx)
(append-instruction ins-rdrr)
(u-call-car)
(append-instructions
ins-push (asm-number (+ raw-vector-elems-offset idx))
ins-load)))
(define assembly-set-arg
(lambda (idx)
(append-instruction ins-rdrr)
(u-call-car)
(append-instructions
ins-push (asm-number (+ raw-vector-elems-offset idx))
ins-stor
ins-pop)))
(define assembly-nrargs
(lambda ()
(append-instruction ins-rdrr)
(u-call-car)
(append-instructions
ins-push vector-length-offset
ins-load)))
; (args clos) -> ((clos args))
(define assembly-funcall (lambda ()
(append-instructions ; (args clos rp)
(asm-label-definition "__funcall_tramp")
ins-rot ; (rp args clos)
ins-dup) ; (rp args clos clos)
(u-call-car) ; (rp args clos env)
(append-instructions
ins-swap ; (rp args env clos)
ins-rot) ; (rp clos args env)
(u-call-cons) ; (rp clos (args . env)*)
(append-instructions
ins-rdrr ; (rp clos (args . env) renv)
ins-swap ; (rp clos renv (args . env) )
ins-wtrr ; (rp clos renv) rr = (args . env)
ins-rot) ; (renv rp clos)
(u-call-cdr) ; (renv rp clos-code)
(append-instruction ins-jmp)))
(define u-call-funcall (lambda ()
(append-instructions ins-push (asm-label-reference "__funcall_tramp")
ins-call
ins-swap
ins-wtrr)))
; tail calls are sneakier we avoid saving the current
; env pointer.
; (args clos) -> ((clos args))
(define assembly-tailcall (lambda ()
(append-instructions
(asm-label-definition "__tailcall_tramp")
ins-dup) ; (renv rp args clos clos)
(u-call-car) ; (renv rp args clos env)
(append-instructions
ins-swap ; (renv rp args env clos)
ins-rot) ; (renv rp clos args env)
(u-call-cons) ; (renv rp clos (args . env)* )
(append-instruction ins-wtrr) ; (renv rp clos) rr = (args . env)
; note that we didn't store the current env
; this is a tail call so we'll return straight
; to the current renv/rp!
(u-call-cdr) ; (renv rp code)
(append-instruction ins-jmp) ; we jump into the call with
; (renv rp)
; on return we'll have pc = rp, and
; (renv rval) on the stack
; just as on return from non-tail call above.
))
(define u-call-tailcall (lambda ()
(append-instructions "PUSH" (asm-label-reference "__tailcall_tramp")
"JMP")))
; returning is simple since cleanup is handled by the caller
(define assembly-funret (lambda () (append-instruction ins-ret)))
; Assembly for loading a cell from the environment.
; assembly-env-cell places the cons box whose car is
; at the desired offsets on the stack.
; assembly-env-val actually loads the value.
(define assembly-env-vec
(lambda (depth)
(append-instructions
ins-rdrr ; (env)
ins-push (asm-number depth) ; (env d)
ins-push (asm-label-reference "__u_nth_cell") ; (env d u_nth)
ins-call)
(u-call-car)))
(define assembly-env-val
(lambda (env-length depth idx)
(if (= env-length (+ depth 1)) ;; getting something from top-level-env
(begin
(append-instructions
ins-push (asm-label-reference initial-env-label))
(u-call-car)
(append-instructions
ins-push (asm-number (+ raw-vector-elems-offset idx))
ins-load))
(if (= depth 0)
(assembly-get-arg idx)
(begin
(assembly-env-vec depth)
(append-instructions
ins-push (asm-number (+ raw-vector-elems-offset idx))
ins-load))))))
(define assembly-set-env-val
(lambda (env-length depth idx)
(if (= env-length (+ depth 1))
(begin
(append-instructions
ins-push (asm-label-reference initial-env-label))
(u-call-car)
(append-instructions
ins-push (asm-number (+ raw-vector-elems-offset idx))
ins-stor))
(begin
(assembly-env-vec depth)
(append-instructions
ins-push (asm-number (+ raw-vector-elems-offset idx)) ins-stor)))))
(define assembly-nil
(lambda ()
(append-instructions ins-push (asm-label-reference "__nil") )))
; Lookup functions, find a particular symbol in the symbolic environment
; list. These are complimentary to the assembly-env-* functions above.
(define lookup-reference-offset
(lambda (r e cont)
(if (null? e) (cont #f)
(if (string=? r (if (list? (car e)) (car (car e)) (car e)))
(cont 0)
(lookup-reference-offset r (cdr e)
(lambda (z)
(cont (if z (+ z 1) z))))))))
(define lookup-reference-depth
(lambda (r e cont)
(if (null? e) (cont #f)
(lookup-reference-offset r (car e)
(lambda (z)
(cont
(if z
(cons 0 z)
(lookup-reference-depth r (cdr e)
(lambda (w)
(if w
(cons (+ (car w) 1) (cdr w))
#f))))))))))
(define lookup-reference
(lambda (r e)
(lookup-reference-depth r e (lambda (x) x))))
; do-compile-task is the main compiler loop.
; it takes a 0-arity function to invoke (or false),
; and recurs on the result of invoking the function.
(define do-compile-task
(lambda (t) (if t (do-compile-task (t)) #f)))
; Compilation functions
(define compile-number
(lambda (c env) (append-instructions ins-push (asm-number c)) #f))
(define calculate-string-list-length
(lambda (strl n)
(if (null? strl) n
(calculate-string-list-length
(if (char=? (car strl) #\\) (cdr (cdr strl)) (cdr strl))
(+ n 1)))))
(define calculate-string-length
(lambda (str)
(calculate-string-list-length (string->list str) -2)))
(define compile-string
(lambda (s env)
(let ((strlabel (fresh-label))
(strlen (calculate-string-length s)))
(append-instructions ins-push (asm-label-reference strlabel))
(lambda ()
(append-instructions
(asm-label-definition-sz strlabel (+ strlen 2))
string-type-flag
(asm-number strlen)
s)
#f))))
(define calculate-symbol-length
(lambda (s) 0))
(define compile-symbol
(lambda (s env)
(let ((symlabel (fresh-label))
(symlen (calculate-symbol-length s)))
(append-instructions ins-push (asm-label-reference symlabel))
(lambda ()
(append-instructions
(asm-label-definition-sz symlabel (+ symlen 2))
symbol-type-flag
(asm-number symlen)
(string-append "\"" (string-append s "\"")))
#f))))
; this doesn't handle escaped chars except newline, tab, quote, double quote and backslash
(define compile-char
(lambda (s env)
(append-instructions
ins-push
(string-append "'"
(string-append
(if (string=? s "#\\tab")
"\\t"
(if (string=? s "#\\newline")
"\\n"
(if (string=? s "#\\\\")
"\\\\"
(if (string=? s "#\\'")
"\\'"
(if (string=? s "#\\\"") "\\\""
(if (string=? s "#\\space")
" "
(substring s 2 3)))))))
"'"))) #f))
(define compile-reference
(lambda (r env)
(let ((i (lookup-reference r env)))
(if i
(begin
(append-instruction (string-append ";; Resolving symbol " r))
(assembly-env-val (length env) (car i) (cdr i))
(append-instruction (string-append ";; Resolved symbol " r)))
;; this is an error
(begin
;; this should really write to stderr.
(display (string-append "Undefined symbol: " r))
(newline)
(quit))
)
#f
)))
(define compile-atom
(lambda (x env quoted)
(if (string=? x "#t")
(begin (append-instructions ins-push true-value) #f)
(if (string=? x "#f")
(begin (append-instructions ins-push false-value) #f)
(if (string-is-numeric? x)
(compile-number x env)
(if (char=? (car (string->list x)) #\")
(compile-string x env)
(if (char=? (car (string->list x)) #\#)
(compile-char x env)
(if (string=? "nil" x)
(begin (assembly-nil) #f)
(if quoted
(compile-symbol x env)
(compile-reference x env))))))))))
(define list-part
(lambda (l)
(letrec ((helper (lambda (l acc)
(if (pair? l)
(helper (cdr l) (cons (car l) acc))
(reverse acc)))))
(helper l '()))))
(define process-params
(lambda (plist)
(if (list? plist)
(begin ;; (display "plist is list")
;; (newline)
plist)
(let ((fixed-params (list-part plist))
(variadic-param (if (pair? plist) (cdr (last plist)) plist)))
(let ((nr-fixed-params (length fixed-params)))
(append-instructions
ins-push (asm-number nr-fixed-params)
ins-push (asm-label-reference "__u_make_varargs_list")
ins-call
ins-pop)
(append fixed-params (list variadic-param)))))))
; Hm, we should probably be flagging code pointers with something
; so that we can avoid gc'ing them. Right now the VM just assumes the
; code is statically defined below initial heap pointer but in order
; to support eval we'll have to do something more clever later.
(define compile-lambda
(lambda (l env rest)
(let ((label (fresh-label)))
(append-instructions
ins-rdrr ins-push (asm-label-reference label) )
(u-call-cons)
(lambda ()
(append-instruction (asm-label-definition label))
(let ((r (compile-sequence (cddr l)
(cons
(process-params (cadr l))
env) #f)))
(assembly-funret)
r)))))
(define compile-let-bindings
(lambda (bs env)
(if (null? bs) #f
(let ((r2 (compile-sexp (car (cdr (car bs))) env #t))
(r1 (compile-let-bindings (cdr bs) env)))
(lambda ()
(do-compile-task r1)
(do-compile-task r2))))))
(define compile-let
(lambda (l env rest)
(let ((r1 (compile-let-bindings (car (cdr l)) env))
(e (map (lambda (x) (car x)) (car (cdr l)))))
(assembly-make-args (length (cadr l)))
(append-instruction ins-rdrr)
(u-call-cons)
(append-instruction ins-wtrr)
(let ((r2 (compile-sequence (cdr (cdr l)) (cons e env) rest)))
(if rest
(begin
(append-instruction ins-rdrr)
(u-call-cdr)
(append-instruction ins-wtrr))
#f)
(lambda ()
(do-compile-task r1)
(do-compile-task r2))
))))
(define compile-set!
(lambda (l env rest)
(let ((cell-id (lookup-reference (cadr l) env)))
(let ((r (compile-sexp (caddr l) env #t)))
(assembly-set-env-val (length env) (car cell-id) (cdr cell-id))
r))))
(define compile-letrec
(lambda (l env rest)
(letrec ((empty-binders (map (lambda (b) (list (car b) (list "quote" '())))
(cadr l)))
(helper (lambda (binders body)
(if (null? binders) body
(helper (cdr binders)
(cons (cons "set!" (car binders))
body))))))
(compile-sexp
(cons "let"
(cons empty-binders
(helper (reverse (cadr l))
(cddr l))))
env rest))))
(define compile-begin
(lambda (l env rest) (compile-sequence (cdr l) env rest)))
(define compile-sequence
(lambda (l env rest)
(if (null? l) #f
(let ((r1 (compile-sexp (car l) env (if (null? (cdr l)) rest #t) )))
(if (not (null? (cdr l)))
(append-instruction ins-pop)
#f
)
(let ((r2 (compile-sequence (cdr l) env rest)))
(lambda ()
(do-compile-task r1)
(do-compile-task r2)
))))))
; define is really sneaky in that it has to modify
; the environment (both symbolic and the non) so that
; whatever symbol is being defined can be referenced in
; lambda bodies that were declared previously (which is
; part of why lambda body compilation is delayed until
; after the main compilation). This involves using set-car!
; to modify both environment pointers such that
; (car post-env) == (cons v (car pre-env))
; where v is the value of the defined symbol and pre-env and
; post-env are the environments before and after the call.
(define compile-define
(lambda (l env rest)
(append-instruction (string-append ";; Definition of " (car (cdr l))))
(let ((v (lookup-reference (car (cdr l)) env))
(r (compile-sexp (car (cdr (cdr l))) env #t)))
(if v
(begin
(append-instruction (string-append ";; Updating binding " (car (cdr l))))
(assembly-set-env-val (length env) (car v) (cdr v)))
(begin
(assembly-set-env-val (length env) (- (length env) 1) (length (car top-level-env)))
(set-cdr! top-level-env-endptr
(cons (list (car (cdr l)) "__nil")
(cdr top-level-env-endptr)))
(set! top-level-env-endptr (cdr top-level-env-endptr))))
(append-instruction ins-pop)
r)))
(define compile-and
(lambda (l env rest)
(let ((out-label (fresh-label)))
(letrec ((helper (lambda (es rs)
(let ((r (compile-sexp (car es) env #t))
(es (cdr es)))
(if (null? es)
(begin
(append-instruction (asm-label-definition out-label))
(lambda () (do-compile-task r) (rs)))
(begin
(append-instructions ins-dup
ins-push false-value
ins-eq
ins-push (asm-label-reference out-label)
ins-jtrue
ins-pop)
(helper es (lambda () (do-compile-task r) (rs)))))))))
(if (null? (cdr l))
(append-instructions ins-push true-value)
(helper (cdr l) (lambda () #f)))))))
(define compile-or
(lambda (l env rest)
(let ((out-label (fresh-label)))
(letrec ((helper (lambda (es rs)
(let ((r (compile-sexp (car es) env #t))
(es (cdr es)))
(if (null? es)
(begin
(append-instruction (asm-label-definition out-label))
(lambda () (do-compile-task r) (rs)))
(let ((next-term (fresh-label)))
(append-instructions ins-dup
ins-push false-value
ins-eq
ins-push (asm-label-reference next-term)
ins-jtrue
ins-push (asm-label-reference out-label)
ins-jmp
(asm-label-definition next-term)
ins-pop)
(helper es (lambda () (do-compile-task r) rs))))))))
(if (null? (cdr l))
(append-instructions ins-push false-value)
(helper (cdr l) (lambda () #f)))))))
; when we can detect application of a builtin
; we can avoid function call overhead and just inline the assembly
(define compile-if
(lambda (l env rest)
(if (not (= (length l) 4))
(begin
(display "Error in compile-if wrong number of arguments\n\t")
(display l)
(newline)
(quit))
(let ((false-label (fresh-label))
(join-label (fresh-label))
(conditional (car (cdr l)))
(true-case (car (cdr (cdr l))))
(false-case (car (cdr (cdr (cdr l))))))
(let ((r1 (compile-sexp conditional env #t))
(x (append-instructions
ins-push false-value
ins-eq
ins-push (asm-label-reference false-label)
ins-jtrue))
(r2 (compile-sexp true-case env rest))
(y (append-instructions ins-push (asm-label-reference join-label) ins-jmp
(asm-label-definition false-label)))
(r3 (compile-sexp false-case env rest)))
(append-instruction (asm-label-definition join-label))
(lambda ()
(do-compile-task r1)
(do-compile-task r2)
(do-compile-task r3)
)
)
))))
(define compile-quoted-sexp
(lambda (s env rest)
(if (pair? s)
(let ((r2 (compile-quoted-sexp (car s) env #t))
(r1 (compile-quoted-sexp (cdr s) env #t))
)
(u-call-cons)
(lambda ()
(do-compile-task r1)
(do-compile-task r2)
#f))
(if (null? s)
(begin
(assembly-nil)
#f)
(compile-atom s env #t)))))
(define compile-quote
(lambda (s env rest)
(compile-quoted-sexp (car (cdr s)) env rest)))
(define compile-arguments
(lambda (n l env)
(if (null? l)
(assembly-make-args n)
(let ((r2 (compile-sexp (car l) env #t))
(r1 (compile-arguments n (cdr l) env)))
(lambda ()
(do-compile-task r1)
(do-compile-task r2)
)))))
(define compile-list
(lambda (l env rest)
(let ((s (find-special (car l))))
(if s
(s l env rest)
(let ((r1 (compile-arguments (length (cdr l)) (cdr l) env))
(r2 (compile-sexp (car l) env #t)))
(if rest
(u-call-funcall)
(u-call-tailcall)
)
(lambda ()
(do-compile-task r1)
(do-compile-task r2)))))))
(define compile-sexp
(lambda (s env rest)
(if (list? s)
(compile-list s env rest)
(compile-atom s env #f))))
(define assembly-builtin-header
(lambda (name)
(let ((uu-name (string-append "__" name)))
(append-named-consbox name
(asm-label-reference initial-env-label)
(asm-label-reference uu-name))
(append-instruction (asm-label-definition uu-name)))))
(define define-builtin-functions
(lambda (initial-env)
(begin
(let ((loop (fresh-label))
(out (fresh-label)))
;; (display "variadic plist")
(append-instructions
(asm-label-definition "__u_make_varargs_list")
ins-swap) ;; (rp nr-fixed-params)
(assembly-nil) ;; (rp nr-fixed-params nil)
(assembly-nrargs) ;; (rp nr-fixed-params nil nr-args)
(append-instructions
(asm-label-definition loop) ;; (rp nr-fixed-params l i)
ins-rot ;; (rp i nr-fixed-params l)
ins-rot ;; (rp l i nr-fixed-params)
ins-dup ;; (rp l i nr-fixed-params nr-fixed-params)
ins-rot ;; (rp l nr-fixed-params i nr-fixed-params)
ins-swap ;; (rp l nr-fixed-params nr-fixed-params i)
ins-dup ;; (rp l nr-fixed-params nr-fixed-params i i)
ins-rot ;; (rp l nr-fixed-params i i nr-fixed-params)
ins-sub ;; (rp l nr-fixed-params i (- i nr-fixed-params))
ins-push (asm-number 0) ;; (rp l nr-fixed-params i (- i nr-fixed-params) 0)
ins-eq ;; (rp l nr-fixed-params i (= (- i nr-fixed-params) 0))
ins-push (asm-label-reference out) ;; (rp l nr-fixed-params i (= (- i nr-fixed-params) 0) out)
ins-jtrue ;; (rp l nr-fixed-params i)
ins-rot ;; (rp i l nr-fixed-params)
ins-rot ;; (rp nr-fixed-params i l)
ins-swap ;; (rp nr-fixed-params l i)
ins-push (asm-number 1) ;; (rp nr-fixed-params l i 1)
ins-sub ;; (rp nr-fixed-params l (- i 1))
ins-dup ;; (rp nr-fixed-params l (- i 1) (- i 1))
ins-rot ;; (rp nr-fixed-params (- i 1) i (- i 1))
ins-rdrr) ;; (rp nr-fixed-params (- i 1) i (- i 1) env)
(u-call-car) ;; (rp nr-fixed-params (- i 1) i (- i 1) (car env))
(append-instructions
ins-swap ;; (rp nr-fixed-params (- i 1) l (car env) (- i 1))
ins-push vector-elems-offset ;; (rp nr-fixed-params (- i 1) l (car env) (- i 1) vector-elems-offset)
ins-add ;; (rp nr-fixed-params (- i 1) l (car env) (+ (- i 1) vector-elems-offset))
ins-load ;; (rp nr-fixed-params (- i 1) l (aref (car env) (- i 1)))
ins-swap) ;; (rp nr-fixed-params (- i 1) (aref (car env) (- i 1)) l)
(u-call-cons) ;; (rp nr-fixed-params (- i 1) (cons (aref (car env) (- i 1)) l))
(append-instructions
ins-swap ;; (rp nr-fixed-params (cons (aref (car env) (- i 1)) l) (- i 1))
ins-push (asm-label-reference loop) ;; (rp nr-fixed-params (cons (aref (car env) (- i 1)) l) (- i 1) loop)
ins-jmp
(asm-label-definition out) ;; (rp l nr-fixed-params i)
ins-pop ;; (rp l nr-fixed-params)
ins-rdrr) ;; (rp l nr-fixed-params env)
(u-call-car) ;; (rp l nr-fixed-params (car env)))
(append-instructions
ins-swap ;; (rp l (car env) nr-fixed-params)
ins-push vector-elems-offset ;; (rp l (car env) nr-fixed-params vector-elems-offset)
ins-add ;; (rp l (car env) (+ nr-fixed-params vector-elems-offset))
ins-stor ;; (rp (car env))
ins-ret))) ;; ((car env))
(begin
(append-instructions