forked from albertvanderhorst/yourforth
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathyourforth.fas
2225 lines (2140 loc) · 90.7 KB
/
yourforth.fas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
; yourforth: the simplest Forth compiler possible, i86 Linux version.
; Originated as a FASM version of ciforth created by ``m4'' from generic listing.
;
; This is a community effort in the spirit of the Forth Interest Group.
; It is public domain and only knows contributors, no copyright holders.
; Contributors: <when contributing, put your name here>
; yourforth $Revision: 6.28 $
; To build:
; wget http://flatassembler.net/fasm-1.70.03.tgz; tar xfz fasm-1.70.03.tgz; fasm/fasm yourforth.fas
; That is right. Not only did you brew coffee, you've been to Columbia, picked the beans,
; and roasted them as well! Next time you only need "fasm/fasm ourforth.fas".
; If you're new at Forth you may want to read the Chapter "gentle introduction"
; of yourforth.pdf.
; ---------------- preliminary remark -------------------------
; Normally a Forth in assembly style would use the official assembler
; on the operating system at hand, in our case gas or ``GNU as'' as it is called
; nowadays.
; The one-liner illustrates why we deviate here. A gas equivalent would download 1 Gbyte!
; I use fasm because it has overwhelming advantages.
; By implication the following advantages point to disadvantages of gas.
; 1. fasm accommodates one simple segment
; 2. fasm accommodates Intel syntax fairly
; 3. fasm has a traditional macro facility
; 4. fasm needs no linking step
; 5. fasm is small, basically an executable.
; Also it accommodates symbol reassignment and labels that are local to macro's,
; which is kind of essential to the way we want to use macro's.
; ------------------ 0. introduction ------------------------------------
; A famous chess player once stated that, if we were to find aliens living on
; another planet, we should expect that they will play Go. Likewise
; discovering Forth is something every civilisation in the universe will
; do eventually. Chuck Moore claims to have discovered, rather than invented, Forth.
; You'll find the structure of yourforth simple, and compelling.
; The model for Forth is the stored program computer, actually the best invention since
; sliced bread. The first computers had a work area where numbers were stored, and
; punched card that represented the instructions. Then someone came with the idea, to
; have one memory space, and store numbers there, but also put the program in the
; same space and then have the computer read the instructions from memory.
; Now a small program can write a larger program, and then execute it!
; It is similar to how the human brain continuously reconditions its synapses
; in a perpetual learning process. At last a machine with the potential of artificial
; intelligence was born. So a memory item is identified by a number,
; the "address" of the memory item.
; The first program-building program was probably an assembler, but at the time the larger
; program could not be deposited into memory and had to punched onto tape.
; Guess what? It is still done this way, only the tape has been replaced
; by disk files or EPROM. Only the artificial-intelligence-languages,
; LISP and Forth, allow to have a program that extends itself
; indefinitely. Interestingly, you will find that most programs cripple
; the computer, by separating into a text and a data segment, the
; equivalent of the punched card and the work area of above. Not so in
; Forth, were data and code belonging together remain close.
; It is hard to instruct a traditional compiler like GNU's to have such a
; unified memory space. It is illustrative that the option for the linker is
; called -magic and it is is probably only there, because Richard
; Stallman is a fan of that other ai-capable language, LISP. This applies
; to GNU as. Of course what we want is simple, and with fasm doing
; simple things is easy.
; The first c-compiler in existance was written in assembler (what else?)
; but then immediately they rewrote the compiler in c.
; Likewise a PL-1 compiler is written in PL-1, an algol68 compiler
; is written in algol68. Can't a Forth be written in Forth?
; Actually it is! It is even easier. If you want to add features to a c-compiler
; you must edit the source and rebuild it. Being an interpreter yourforth
; just can include a Forth source file with the facilities
; you want. Only the kernel here has to be written in assembler,
; like the first c-compiler ever.
; Even in this assembler source you will see that most of the code is actually
; Forthcode, that is compiled by the assembler (with a renaming convention).
; In the last part there is even Forth source that is compiled by yourforth
; at startup. That is equivalent to including a source file, but less messy.
; Forth is the only language simple enough to make this feasible.
; For the following you must be familiar with the concept of a data stack
; (ex. 0.1) and with behaviour of yourforth under a test (ex. 0.2).
; ---------------------- 1. memory and addressing ---------------------------------
; Memory consists of bytes
; and, yes, nothing funny, that are the bytes you buy in the store.
; Those bytes are numbered, and thus identified, and they
; can be used for data or code, as I choose.
; yourforth uses memory between BM and EM. (exercise)
; So you can use the byte addressed as BM, as BM+1, up till and including EM-1,
; but not EM. (ex. 1.1)
; Bytes can represent characters, small pictures that are associated
; with sounds (in most languages) or concepts in e.g. Chinese.
; Chunks of bytes are needed to represent numbers as large as BM.
; We take 4 bytes together to form a cell, resulting in a 32-bit Forth,
; and restricting memory size to 4 Gbyte. Now we can manipulate
; fairly large numbers, of plus or minus 2000 millions, and addresses
; (which are just numbers)
; up to ca. 2 Gbyte. We will shun negative addresses in yourforth.
; The word "word" has a special meaning in Forth, so using official Forth jargon
; "cell" for those chunks diminishes confusion.
cw = 4 ; Cell width
; 32 bits also happens to be the word size of the processor we are using, or more
; precisely -- because it is probably a 64-bit processor -- of the wordsize we
; are instructing the processor to use.
use32
; This is not enough, Linux must know that this is a 32 bit (ELF)
; executable (not ELF64), to start the program in the right mode.
; This is the way to instruct fasm to put such a header at the front
; of our program.
FORMAT ELF EXECUTABLE ; fasm makes an executable directly, no linker.
; We are not particularly interested at what address Forth starts, as long as
; we have enough bytes to work with, and
; that's what we define here. It is made to work, by declaring the right amount
; of space.
; (exercise)
;
forthsize = 0x100000 ; One megabyte, for starters.
;
tasksize = 0x10000 ; Total size of the uninitialised memory near EM
stacksize = tasksize/4 ; Its subdivisions.
cibsize = tasksize/2
; ---------------------- 2. Communicating with the world --------------------
; Forth is an interpreter, an environment that you can type commands into
; to get them executed.
; Characters are glyph's, the bitmaps on the screen, that are associated
; with the content of one byte.
; Forth doesn't care much what the console
; shows you, nor what content results from pressing a particular key.
; Internally only rows of bytes are compared, and build up,
; and they are passed to the console for display.
; The typing and the showing of results goes through
; a "console" which is typically a terminal window on a Linux X-screen, but
; could equally well be an old fashioned VT100 terminal connected with a
; 100 m RS232 line. Linux has the convenient notions of a "standard input" and
; a "standard output" with facilities to use them. We let Linux take care of
; how this i/o is setup, and within Forth we just use it.
; This file is in UTF-8, but the character encoding doesn't count for much in Forth.
; (ex. 2.1). The inventor of Forth, Chuck Moore uses a Huffman
; encoding of his whim for colorforth.
; This brings us to the subject of the interface with the Operating
; System. The interfacing with Linux is brutally direct. We use the system calls,
; that have been given numbers. Each performs a basic functionality, e.g.
; the open system call opens a file.
; Their documentation shows on commands like "man 2 exit".
__NR_exit = 0x1
__NR_open = 0x5
__NR_close = 0x6
__NR_creat = 0x8
__NR_unlink = 0xa
__NR_chdir = 0xc
__NR_read = 0x3
__NR_select = 0x52
__NR_newselect = 0x8e
__NR_write = 0x4
__NR_ioctl = 0x36
__NR_ioperm = 0x65
__NR_iopl = 0x6e
__NR_lseek = 0x13
__NR_execve = 0xb
__NR_fork = 0x2
__NR_waitpid = 0x7
__NR_pipe = 0x2a
; See XOS of how the dispatching takes place. (ex. 2.2)
; The numbers have been copied from a secret place in Linux's include hierarchy (exercise)
; and the exact names are retained, which means an exception to the naming convention
; that words containing capitals or underscores are Forth names.
;
; Experienced assembler programmers expect two segments, one that we will fill
; with our forth code, and one unitialized that may be very large.
; (We could get by with one segment if we initialize all space. Then we'll
; have 1 Gbyte executables.)
; The first segment would be marked BITS (or some such), and the second one NOBIT,
; and while linking we
; would have made sure that they fit snugly together. Now this is a bit silly,
; because the assembler perfectly knows that the NOBITS part is not initialised.
; Also it is not easy. Fitting two segments snugly goes against the spirit of the
; linker. There is no end of incantations needed, and worse yet, the high priests
; change those at will.
; Surprise! fasm doesn't have this nonsensical keyword. Not only doesn't it store
; the content of segments without content, it doesn't store the uninitialised part
; of segments. We can put everything in
; one segment, and the uninitialised part occupies no room on our hard disk.
segment executable readable writable
; So one segment, no separation between the punch cards containing executable
; code and a workspace containing our data. We can have the computer invent
; a program, put it into its memory then execute it. The last part of this source,
; is Forth code. The first part is sufficient for compiling that during startup.
; It is fast enough that you will not notice it. (exercise).
bm: ; This is the first address of Forth.
; As said, we are not particularly interested in what address bm represents and don't
; force the assemblers hand.
; You probably know a little bit how an interpreter works. Contrary to a compiler
; that may inspect a whole file, it takes the input one word at a time.
; In Forth word's are separated by blank space. The word is looked up in a list,
; and it is executed, which means that it gets in control. There is no "interpreter
; loop" that sniffs the word out, and refuses it if it is no good. No. You type
; "DROP" and if it's there, you get it. The Intel program counter is set
; to the machine code of "DROP" and the ship sails.
; Not all that we want the interpreter to understands can be put in the list, e.g.
; we can't put all 32 bit numbers there. There are other notations for
; expressions that are known at compile time and constant : "aap" 1234 0x1234.
; We'll call those denotations.
; The solution for denotations, is that it sufficient to have a partial
; or ambiguous match. yourforth requires that the first characters are
; recognized, so it can handle a 0x1234 notation for hexadecimal but not
; Intel's 1234H.
; Being a prefix is merely a property of the Forth definition and it doesn't
; make looking up a word all that harder.
; So, if the first part of a word matches a prefix definition in the
; list, e.g. " for strings, then the definition with name `` " ''
; is executed. For you new to Forth, a string consisting of one double
; quote is a perfectly good Forth name. Get used to it!
; It itself takes care of parsing the remainder of the denotation.
; At the very end of the list we have a catch-all, which always matches,
; because it is the empty string. This interprets the string as a
; number. I admit, this is a bit of a trick.
; Now the intelligent part. You can use the Forth system to extend itself by
; instructing the interpreter to add a new command to the list.
; We will discuss that later in great detail. For now let's sketch how
; the dictionary looks. It is basically a list of Forth words, each with
; a header. The list is expandable, and the most recent word is highest
; in memory. In Forth you can redefine a word, then it hides a word of
; that same name that might exist. You get a warning, but it is not an
; error. In this way you can replace a word with a version with
; debugging information, you can use the old version to build the word.
; In interactive sessions this feature is often used to correct
; mistakes. Oops, forgot a carriage return. Let me just redefine it
; as itself plus a carriage return.
;
; Forth definitions despite their division in categories like colon
; definitions, variables, constants, cd-objects are uniform. Zooming in
; we see the header, a structure with 5 fields each of one cell,
; followed by memory that is owned by this definition, then the
; following header. What is in the memory is up to the definition. You
; may consider this Forth's version of an object. If there is one thing
; to remember about objects that is that objects have a character and an
; identity. An objects character, its type or class, is determined by
; its behaviour or code. The identity of an object is determined by the
; data it carries. Likewise Forth's object have a code field and a data
; field. If you are into objects, you may associate the code field with
; methods and the data fields with fields in traditional objects. You
; will however discover that Forth is uncannily concrete and precise.
;
; The list mechanism requires a link field. This starts at the most
; recent word. Its link contains the address of the header of the
; previous definition, and so on, and the last definition contains a
; zero. Then of course there is one field for the name. The flag field
; with bits up for miscellaneous properties, brings the field count to 5.
;
; So let's summarize.
; A definition consist of a header with 5 fields CDFLN and its data
; area. The definitions together make up the memory up to a point that
; moves up when adding new definitions. Then we have free
; memory up till the end of a reserved with a few scratch area's notably
; the stack.
;
; The name is not very essential. You can change the name of any
; definition in a working program and it keeps working all
; the same. A definition can only be found if you know the name, of
; course. (EXERCISE!)
;
; The link field also is not. It is only used by the interpreter to look
; up a definition. A typical application program will never inspect link
; fields. Set them all to zero and the program keeps working fine. A
; definition can only be found if it is linked in properly, so the
; compilation of that program cannot succeed. In fact the : definition
; that starts compilation cannot be found. Exercise.
;
; The flag field contains a bit up for properties a word might have. For
; example it may be hidden, which means that it must be ignored during a
; lookup. Exercise.
;
; More essential is the data field. All but the basic instruction of the
; Forth engine have data, associated: a constant value, a buffer
; address, a structure with fields. Also note that most Forth code is
; interpreted. The interpreted code is data, not machine code. Exercise.
;
; Absolutely essential is the code field. This contains an address and
; the genuine Intel Program counter is directly and unconditionally
; loaded with that address if the definition is called upon. If that
; address is wrong you end up in genuine limbo. Remember Forth is
; concrete and precise, no handwaving. I repeat it for clarity. If you
; type in a Forth word, it is looked up, and its machine code address is
; fetched. Then your Intel machine starts, for better or worse,
; executing the code at that address. There is no safety net. If that
; code doesn't cooperate with Forth-as-a-whole your machine crashes.
; (exercise)
; You'll see no symbolic definition for field offsets in the assembler code.
; The only offsets that occurs is 1*cw, which means the data field offset.
; In high level Forth the offsets are handled by words >CFA >DFA etc.
;
; To an interpreter strings play of course an important role. Forth
; strings are represented as an address and a length. Putting two things
; on the stack is of course no problem. This may be unfamiliar for the
; c-programmer, but familiar for the Pascal programmer. Note that modern
; character encodings may contain embedded zero's, and several bytes may
; combine to represent glyph. yourforth doesn't care. If stored in
; memory the length occupies a whole cell, and the characters follow,
; one per byte, as far as this file is concerned. Counting those
; characters is a chore, as we will need a string for each name of a
; built in command.
; That chore is easily handled by the assembler using a macro.
macro string content
{
local startstring, endstring
dd endstring -startstring
startstring:
db content
endstring:
align cw
}
; "content" is of course the string itself, and fasm treats the start
; and the end of the string as locals, i.e. they are generated new for each
; string and are not visible outside the macro's.
; DD lays down a 32 bit value. It is short for DD, "define double (16-bits) word ".
; In yourforth a lot of DD's are used, to represent headers as well as interpreted
; code. You see that we keep those cells aligned to 32 bit boundaries. This is
; not strictly necessary, but it makes inspecting memory so much easier.
; <I'm tempted to use a 'fix macro' to redefine DD as DC >
; The header macro lays down the 5 fields in memory. You don't need to understand
; the following fully yet. The details are introduced gradually as each type of
; definition will be explained where it first occurs.
; All fields are passed to the macro
; as is, except for the name that is passed as a string. The string is
; to be laid out in memory just like in the string macro and a pointer
; to that is put in the name field.
macro header dea, cfield,dfield,ffield,lfield,forthname
{
local nfield, startstring, endstring
nfield: dd endstring -startstring
startstring: db forthname
endstring:
align cw
dea: dd cfield, dfield, ffield, lfield, nfield
lastlink EQU dea
}
; Learn the five letters cdfln by heart!
; They stand for code, data, flag, link and name.
;
; Let's look at an example first.
; header _BM_, docon, bm, 0, lastlink, "BM"
; This is a constant containing the bottom of the memory.
; _BM_ is the name such as used in the assembler, "BM" is the name
; within Forth. This string must be visible from Forth, unlike _BM_
; and bm, that stay within the assembler.
; docon is the code executed for a constant.
; bm is the data, i.e. a constant known by the assembler, which is
; in this case the label representing the bottom of the memory,
; that we defined above.
; 0 means that there are no flags. lastlink just means normal linking.
;
; Apart from a better overview and taking care of name strings,
; "header" makes one other thing easier. It is cumbersome to fill
; in lfield with the name of the previous definition. By setting
; lastlink -- changing it with every invocation of header -- the name of
; the previous definition is always available as "lastlink".
; So you will see that almost always all header specifies lastlink
; for lfield.
; As a result a definition can be moved around without changes to its header.
; We can always override this by replacing
; "lastlink" by the actual value we want to have.
; "dea" is the name used within the assembler. Remember forthname is a
; string like "+" that probably is not an acceptable name for the word
; in the assembler. So we need "dea" to use the word in the assembler.
; The "dea" passed to the header macro, results in that name being usable
; in the assembler as a number and an address. It contains only lower
; case characters and underscores.
; Each name in yourforth itself contains any assortion of weird characters and capital
; letters, no lower case. Let's take R@ as an example.
; It is identified by it dea: 'R@ a denotation, a fixed number.
; Within the assembler this number is represented by the name r_fetch.
; A simpler example is DROP. The assembler name is "DROP", which is the same
; number as 'DROP in yourforth.
; Here is the renaming convention:
; The capitals are kept, and if the resulting name clashes in the assembler,
; underscores are added before and after.
; Any special characters are replaced according to the following table.
; We'll have to write them out, like
; ! @ % , * ( < > / + -
; fetch store percent comma star paren less greater slash plus minus
;
; [ ~
; bracket not
;
; Sometimes < and > are used to indicate data transfer and are then
; named to or from. I'm lazy and translate (CREATE) to paren_CREATE,
; not leftparen_CREATE_rightparen.
;
; fetch and store are used in forth names that fetch something from memory
; or store something in memory.
;
; A denotation is a constant value that is generated on the fly by
; the compiler or interpreter.
; Because this is difficult, we postpone it.
; The most important types of definitions are lowlevel code, high level code,
; low level objects(constants, variables/buffers) and high level objects.
; They are gradually introduced by example, but here is an overview of the
; different types. The names starting in "do" are assembler labels.
; - cfield contains docon. The definition is a constant, i.e. when executed it
; puts a constant on the stack. That constant is present at the DFA.
; - cfield contains dobuf. The definition is a variable or buffer, i.e. when executed it
; puts an address on the stack of an area where data can be stored.
; That address is present at the DFA, typically dea+5*cw
; - cfield contains docol. docol is the interpreter of Forth. It takes care that
; the high level code is executed, that is found via the DFA. This typically
; contains dea+5*cw. In other words the high level code contained at the post
; header address is executed. These are the ordinary functions.
; - cfield contains dea+5*cw. In other words, its machine code is
; unique and contained at the post header address to be executed.
; These are the regular low level words that make up the Forth
; language like "+" and "DROP". Of course this code is different for
; all of those words.
; - cfield contains dodo. The Forth definition is generated using a CREATE DOES>
; construct, Forth's poor man's objects of the 1970's. We will elaborate on this later.
;
;
figrel = 6
dummymask = 0x01
invisiblemask = 0x02
immediatemask = 0x04
denotationmask = 0x08
prefixmask = immediatemask+denotationmask
epipe = 0x20
;
; The virtual machine reserves some of the Intel registers for its own.
; In the first place we need an interpretation pointer, to keep track
; of where we are in the interpreted code. It points to the -- high
; level -- instruction that is to be executed after the current
; definitions finishes. This is similar to the program counter in an
; assembly program and we will use SI for it.
; Then of course we need a data stack, for which we use
; the Intel stack, and the stack pointer is just SP.
; This leaves us with the return stack, that keeps track of
; where we are. Whenever SI is needed to interpret another high
; level definition, the previous value is stored there, and restored
; when we want to continue with the calling definition.
; Intel has run out of stacks
; already, but we can emulate a stack using the BP register.
; It just means that we have to use a couple of instructions to push
; SI, or pop it.
;
; Within yourforth you have the constants DSP0 and RSP0 that contain
; the initial stack pointers for the data and the return stack
; respectively. [ ex. 2 ]
; COLD is the Forth definition that starts
; up Forth. So we load the registers SP and BP with the data field of
; DSP0 and RSP0 and SI with the post header address of COLD, where its
; Forth code resides. It ends with machine code that results in executing
; the next Forth instruction. This is done all the time, so we have
; a shorthand for it, called next.
macro next {
lodsd
jmp dword[eax]
}
; Now the choice for SI becomes clear. The lodsd instruction loads the dea
; from the instruction stream into EAX, and increments SI on the way,
; making it point to the next Forth instruction.
; [Actually this is a bit tricky. This requires that the so called
; direction bit is set to forward, using the instruction cld. After
; executing the instruction std, lodsd is switched to decrementing.
; We keep the forward direction always.]
; Then we jump to what is contained in the code field of the dea, so actually
; [eax+0*cw] . We rely on that code to end again with a ``next'', and if
; it needs to use SI (e.g. for nesting), it must restore it beforehand.
; exercise with (CREATE)
; Executing the code is started with an eax that contains the dea of the
; word. So even if the code field is filled with the same pointer,
; the objects data is different and results in different data or even different
; action. OO fan's will recognize this, it is just a self pointer.
cld ; This direction must never change, or next stops working
mov esp, [DSP_zero+1*cw]
mov ebp, [RSP_zero+1*cw]
mov esi, COLD+5*cw
next
;
; The following word is our first high level Forth word or "colon definition"
; in Forth speak.
header BYE, docol, BYE+5*cw, 0, FORTH, "BYE"
dd _zero_, _, _, _one_, XOS
dd EXIT
; Executing this words means that the program jumps to address docol that takes
; care that SI is going to point to _zero_ , then does next.
; This is possible because eax contains the address BYE of this header
; as discussed with the description of the next macro.
docol: lea ebp,[ebp - 1*cw] ; Push SI on the return stack
mov [ebp],esi
mov esi,[eax+1*cw] ; Fill it with the data field
next
; In the case of BYE the data field contains BYE+5*cw, which is the address
; that contains _zero_.
; So now we have it. Consecutively _zero_ _ _ _one XOS and EXIT are executed.
; The word XOS finds 0 X X 1 on the stack, which instructs it to do linux
; system call 1 (an exit) with one parameter 0 and two dummy parameters.
; This ends the Forth session with a status code of 0, meaning okay. (exercise).
; [One last remark. Because BYE is our first definition, you may expect it to
; have a link of 0 and to be it last in the linked list. But we want the
; word FORTH and the denotations to come last, yet not discuss them until later.]
;
; EXIT is the reverse of docol. ( In the above case EXIT is not reached.)
; This is our first example of a low level word, or "code definition".
; You see that we use our lastlink trick as described with the header macro.
header EXIT, EXIT+5*cw, 0, 0, lastlink, "EXIT"
mov esi,[ebp] ; Pop SI from the return stack
lea ebp,[ebp+1*cw]
next
; The next executed at the end of the FORTH instruction before EXIT,
; jumps to EXIT+5*cw which is the mov instruction.
; So EXIT just abandons the current SI and loads it with what docol has left.
; Note that the data field is not used and the content of AX is not used.
; Because you understand Intel assembly language a bit, I present you
; with a table of the 5 instructions of the virtual machine:
; Forth machine cells Intel equivalent
; DEA, invocation 1 call XXX
; EXIT 1 rts
; BRANCH rel 2 jr rel
; 0BRANCH rel 2 jz rel
; LIT value 2 pushi value
; expandable your choice whatever
;
; That is all there is to it. If it seems few,
; let me remind you that all operations like + fall under the
; invocation of a definition. The conditional jump can be used to
; construct if-endif's (called IF THEN 's in Forth) and begin-until's.
; Together with the unconditional jump you can have all control
; constructs. EXIT BRANCH 0BRANCH LIT are the names of actual
; definitions in the dictionary. Their invocation is the same as
; for just any DEA.
; The last line I put in to remind you that this is not the end of it.
; You can add basic constructs e.g. special looping constructs later.
; You can use assembler language in defining it, and you can do this
; in a separate file, that you will choose to include, or not.
; FIXME: actual those looping are still built in.
; LIT does something very similar to next, it fetches
; the cell from SI with auto increment, using the now familiar
; lodsd instruction. Now we have to put that on the data stack.
header LIT, LIT+5*cw, 0, 0, lastlink, "LIT"
lodsd
push eax
next
; BRANCH also fetches a cell in line that serves as an offset to SI.
; The three instruction
; following round it up to a multiple of cw, an action that is called
; alignment. Why that is done, we'll come back to later.
; Normally the offset is a multiple of cw and nothing happens.
; Si is incremented with the offset and
; code is hence to forth fetched from there.
header BRANCH, BRANCH+5*cw, 0, 0, lastlink, "BRANCH"
BRAN1: lodsd
dec eax
or al, cw - 1
inc eax
add esi,eax
next
; 0BRANCH is similar to BRANCH. The difference is just that before
; the offset is added, it can be zeroed. This depends on a flag
; popped from the data stack.
header zero_BRANCH, zero_BRANCH+5*cw, 0, 0, lastlink, "0BRANCH"
lodsd
pop ebx
or ebx,ebx
jz ZBRAN1
xor eax,eax
ZBRAN1: add esi,eax
next
; This completes the virtual Forth machine that is used in yourforth.
; You can however at all times add anything that you can think off.
; Probably you will find 0BRANCH confusing and hard to find out what
; code is executed with what flag.
; Also here again we want the offsets calculated for us.
; This leads us to define some macro's.
macro goto label
{ dd BRANCH, label-$-cw }
; 0BRANCH always emulate a control structure, and different macros
; are used to show which one.
macro ifto label
{ dd zero_BRANCH, label-$-cw }
macro whileto label
{ dd zero_BRANCH, label-$-cw }
macro untilto label
{ dd zero_BRANCH, label-$-cw }
; They all have in common that on the preceeding condition
; being true/non-zero/on,
; the code following is executed. Otherwise we skip to "label".
; For ifto that code is executed once up till "label".
; For whileto the code following
; is part of the loop, so we stay in the loop. The label is where we want
; to jump out of the loop.
; For untilto the code following is outside of a loop, so we fall through
; on a true condition. Now the label is the start of the loop.
; Don't worry, Forthers hate complicated control structures. In the
; code of <your>forth you will not find a single nested loop.
;
;
; -------------------- low level definitions: operators ----------------------------
; Operators are definitions that take all inputs from the stack,
; do some transformation and put all results back on the stack. In other
; words they have no side effects. Because they are so simple and fundamental
; they are mostly written in assembler.
; Examples are operations like + or <.
; This words does nothing.
header NOOP, NOOP+5*cw, 0, 0, lastlink, "NOOP"
next
; Another word that does almost nothing, it pushes a "whatever" value to the
; stack
header _, _+5*cw, 0, 0, lastlink, "_"
push eax
next
; -------------------- low level definitions: stack operations -----------------
; Stack operations reorder, duplicate or remove items from the data stack
; without doing a transformation on them. They are essential to Forth, of course.
header _DUP_, _DUP_+5*cw, 0, 0, lastlink, "DUP"
pop eax
push eax
push eax
next
header OVER, OVER+5*cw, 0, 0, lastlink, "OVER"
pop edx
pop eax
push eax
push edx
push eax
next
header SWAP, SWAP+5*cw, 0, 0, lastlink, "SWAP"
pop edx
pop eax
push edx
push eax
next
header DROP, DROP+5*cw, 0, 0, lastlink, "DROP"
pop eax
next
header NIP, NIP+5*cw, 0, 0, lastlink, "NIP"
pop eax
pop ebx
push eax
next
header two_DROP, two_DROP+5*cw, 0, 0, lastlink, "2DROP"
pop eax
pop eax
next
header two_DUP, two_DUP+5*cw, 0, 0, lastlink, "2DUP"
pop eax
pop edx
push edx
push eax
push edx
push eax
next
header two_SWAP, two_SWAP+5*cw, 0, 0, lastlink, "2SWAP"
pop ebx
pop ecx
pop eax
pop edx
push ecx
push ebx
push edx
push eax
next
header two_OVER, two_OVER+5*cw, 0, 0, lastlink, "2OVER"
pop ebx
pop ecx
pop eax
pop edx
push edx
push eax
push ecx
push ebx
push edx
push eax
next
; The return stack can be used for scratch. For an example see SDSWAP
header to_R, to_R+5*cw, 0, 0, lastlink, ">R"
pop ebx
lea ebp,[ebp - 1*cw]
mov [ebp],ebx
next
header R_from, R_from+5*cw, 0, 0, lastlink, "R>"
mov eax,[ebp]
lea ebp,[ebp + 1*cw]
push eax
next
header R_fetch , R_fetch +5*cw, 0, 0, lastlink, "R@"
mov eax,[ebp]
push eax
next
; SDSWAP reorders a,b1,b2 on the stack into b1,b2,a.
; We have no way to directly get b1 into place, but if we first
; move b2 out of the way to the return stack, we can, by swapping.
; Now get b2 back, leaving b1,a,b2. That we can handle.
header SDSWAP, docol, SDSWAP+5*cw, 0, lastlink, "SDSWAP"
dd to_R ; Get b2 out of the way.
dd SWAP ; a,b1 --> b1,a
dd R_from ; b1,a,b2 : almost correct
dd SWAP
dd EXIT
; ------------------------------------------------------------------------------
; In yourforth a stack is just a block of memory set aside for the purpose.
; We need a few words to control the stacks.
header DSP_fetch, DSP_fetch+5*cw, 0, 0, lastlink, "DSP@"
mov eax,esp
push eax
next
header DSP_store, DSP_store+5*cw, 0, 0, lastlink, "DSP!"
pop eax
mov esp,eax
next
header DEPTH, docol, DEPTH+5*cw, 0, lastlink, "DEPTH"
dd DSP_zero
dd DSP_fetch
dd _minus_
dd LIT, cw, _slash_
dd _one_, _minus_
dd EXIT
header RSP_fetch, RSP_fetch+5*cw, 0, 0, lastlink, "RSP@"
push ebp
next
header RSP_store, RSP_store+5*cw, 0, 0, lastlink, "RSP!"
pop ebp
next
; ---------------------------- operators with results ------------
; Apart from the reordering or duplicating operators, there is a whole slew of
; operators that have no side effects, i.e. the only result is on the
; stack.
; All the usual logical and arithmetical operators are present in Forth,
; but in this Forth there is nothing floating point related.
; 0<> is the most fundamental of logical operators.
; It takes a dirty flag, anything not zero, and turns it into a proper
; flag, i.e. all bits set. A false flag, zero, remains zero.
; Its code is interesting. neg subtracts the number from zero, which generates
; a borrow if it was not zero. So it transfers the flag to the carry bit.
; sbb transfers the carry bit to a flag in a register. Subtracting a
; register from itself with borrow generates a zero, unless the borrow
; was set to begin with.
header zero_unequal, zero_unequal+5*cw, 0, 0, lastlink, "0<>"
pop eax
neg eax
sbb eax,eax
push eax
next
; Now 0= is easy. To 0<> add an instruction to complement the carry flag
header zero_equal, zero_equal+5*cw, 0, 0, lastlink, "0="
pop eax
neg eax
cmc
sbb eax,eax
push eax
next
; NOT is just an other name for 0=. We copy the code field of "0="
header _NOT_, (zero_equal+5*cw), 0, 0, lastlink, "NOT"
header zero_less, zero_less+5*cw, 0, 0, lastlink, "0<"
pop eax
shl eax,1 ; Move eax's sign bit into the carry bit.
sbb eax,eax
push eax
next
header NEGATE, NEGATE+5*cw, 0, 0, lastlink, "NEGATE"
pop eax
neg eax
push eax
next
header INVERT, INVERT+5*cw, 0, 0, lastlink, "INVERT"
pop eax
not eax
push eax
next
header ALIGNED , ALIGNED +5*cw, 0, 0, lastlink, "ALIGNED"
pop eax
dec eax
or eax, 1*cw-1
inc eax
push eax
next
header one_plus, docol, one_plus+5*cw, 0, lastlink, "1+"
dd _one_
dd _plus_
dd EXIT
header CELL_plus, docol, CELL_plus+5*cw, 0, lastlink, "CELL+"
dd LIT, cw
dd _plus_
dd EXIT
header CELLS, docol, CELLS+5*cw, 0, lastlink, "CELLS"
dd LIT, 2
dd LSHIFT
dd EXIT
; The archetype of an arithmetic operator, pop 2 registers, add them
; and push the result.
header _plus_, _plus_+5*cw, 0, 0, lastlink, "+"
pop eax
pop ebx
add eax,ebx
push eax
next
header _AND_, _AND_+5*cw, 0, 0, lastlink, "AND"
pop eax
pop ebx
and eax,ebx
push eax
next
header _OR_, _OR_+5*cw, 0, 0, lastlink, "OR"
pop eax
pop ebx
or eax,ebx
push eax
next
header _XOR_, _XOR_+5*cw, 0, 0, lastlink, "XOR"
pop eax
pop ebx
xor eax,ebx
push eax
next
header _minus_, _minus_+5*cw, 0, 0, lastlink, "-"
pop edx
pop eax
sub eax,edx
push eax
next
; Intel has a set instruction that moves the condition we need (such as equality)
; into the least significant bit of AX.
header _equal_, _equal_+5*cw, 0, 0, lastlink, "="
pop edx
pop ebx
xor eax,eax ; Zero all of AX
cmp ebx,edx
sete al ; Set least significant bit according to "equal"
neg eax ; 1 -> -1 : a proper flag
push eax
next
header _unequal_, _unequal_+5*cw, 0, 0, lastlink, "<>"
pop edx
pop ebx
xor eax,eax
cmp ebx,edx
setne al ; Set bit to "not equal"
neg eax
push eax
next
header _less_, _less_+5*cw, 0, 0, lastlink, "<"
pop edx
pop ebx
xor eax,eax
cmp ebx,edx
setl al ; Set bit to "less"
neg eax
push eax
next
header _greater_, _greater_+5*cw, 0, 0, lastlink, ">"
pop edx
pop ebx
xor eax,eax
cmp ebx,edx
setg al ; Set bit to "greater"
neg eax
push eax
next
header LSHIFT, LSHIFT+5*cw, 0, 0, lastlink, "LSHIFT"
pop ecx
pop eax
shl eax,cl
push eax
next
header RSHIFT, RSHIFT+5*cw, 0, 0, lastlink, "RSHIFT"
pop ecx
pop eax
shr eax,cl
push eax
next
; ------------------------------------------------------------------------------
; Not all basic instruction must be in assembler. The following operators
; are defined as Forth code, They are simple so you can get used
; to how Forth code looks if expressed in assembler languages.
; To get the absolute value, if it is less than zero, negate it.
header _ABS_, docol, _ABS_+5*cw, 0, lastlink, "ABS"
dd _DUP_, zero_less ; Leave condition
ifto abs1 ; Assembles : 0BRANCH offset
dd NEGATE ; Execute conditionally
abs1: ; Auxiliary label to mark the end.
dd EXIT
header MIN, docol, MIN+5*cw, 0, lastlink, "MIN"
dd two_DUP, _greater_
ifto min1
dd SWAP
min1:
dd DROP
dd EXIT
header MAX, docol, MAX+5*cw, 0, lastlink, "MAX"
dd two_DUP, _less_
ifto max1
dd SWAP
max1:
dd DROP
dd EXIT
; ------------------------------------------------------------------------------
; 32 by 32 bit signed multiplication, with 64 bit product.
; The basic low level word for all multiplications.
header M_star, M_star+5*cw, 0, 0, lastlink, "M*"
pop eax
pop ebx
imul ebx
xchg eax,edx
push edx
push eax
next
; 64 by 32 bit signed division, with 32 bit quotient and remainder.
; The basic low level word for all divisions and modulo.
header SM_slash_REM, SM_slash_REM+5*cw, 0, 0, lastlink, "SM/REM"
pop ebx
pop edx
pop eax
idiv ebx
push edx
push eax
next
; ------------------------------------------------------------------------------
; Regular cell*cell to cell multiplication.
; It is assumed that the result fits in a cell!
header _star_, docol, _star_+5*cw, 0, lastlink, "*"
dd M_star
dd DROP ; Drop m.s. part.
dd EXIT
; Regular cell by cell /MOD. Use the return stack to move the
; divider out of the way.
header slash_MOD, docol, slash_MOD+5*cw, 0, lastlink, "/MOD"
dd to_R
dd _DUP_, zero_less ; Sign extend 32->64
dd R_from ; Ready for 64 by 32.
dd SM_slash_REM
dd EXIT
header _slash_, docol, _slash_+5*cw, 0, lastlink, "/"
dd slash_MOD
dd NIP
dd EXIT
header _MOD_, docol, _MOD_+5*cw, 0, lastlink, "MOD"
dd slash_MOD
dd DROP
dd EXIT
header star_slash_MOD, docol, star_slash_MOD+5*cw, 0, lastlink, "*/MOD"
dd to_R
dd M_star
dd R_from
dd SM_slash_REM
dd EXIT
header star_slash, docol, star_slash+5*cw, 0, lastlink, "*/"
dd star_slash_MOD
dd NIP
dd EXIT
; --------------------- memory store and fetch ---------------------------------
header _fetch_, _fetch_+5*cw, 0, 0, lastlink, "@"
pop ebx
mov eax,[ebx]
push eax
next
header C_fetch, C_fetch+5*cw, 0, 0, lastlink, "C@"
pop ebx
xor eax,eax
mov al,[ebx]
push eax
next