diff --git a/.github/workflows/blank.yml b/.github/workflows/blank.yml index f46f30ebf..cb78ad6c9 100644 --- a/.github/workflows/blank.yml +++ b/.github/workflows/blank.yml @@ -1,12 +1,10 @@ -name: Build +name: Main workflow on: pull_request: push: - paths-ignore: - - 'README.md' - branches: - - '1.10' + +permissions: read-all jobs: build: @@ -14,27 +12,26 @@ jobs: fail-fast: false matrix: os: - #- macos-latest - - ubuntu-20.04 - #- windows-latest - ocaml-version: - #- 4.11.0 - - 4.10.1 - #- 4.09.1 - #- 4.08.1 + - ubuntu-latest + ocaml-compiler: + - 4.13.1 runs-on: ${{ matrix.os }} steps: - - name: Checkout code - uses: actions/checkout@v2 - - name: Use OCaml ${{ matrix.ocaml-version }} - uses: avsm/setup-ocaml@v1 + - name: Checkout tree + uses: actions/checkout@v3 + + - name: Set-up OCaml ${{ matrix.ocaml-compiler }} + uses: ocaml/setup-ocaml@v2 with: - ocaml-version: ${{ matrix.ocaml-version }} + ocaml-compiler: ${{ matrix.ocaml-compiler }} - run: opam pin add Lama.dev . --no-action - run: opam depext Lama.dev --yes --with-test - run: opam install . --deps-only --with-test - - run: opam exec -- make #dune build - - run: opam exec -- make regression # dune runtest + - run: eval $(opam env) + - run: opam exec -- make + - run: opam exec -- make regression-all + - run: opam exec -- make unit_tests + - run: opam exec -- make negative_scenarios_tests diff --git a/.gitignore b/.gitignore index 5955142ad..8a7ab828c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ *.o .merlin +.vscode \ No newline at end of file diff --git a/Lama.opam b/Lama.opam index 4ecc52d0e..9c4ef486f 100644 --- a/Lama.opam +++ b/Lama.opam @@ -1,23 +1,27 @@ opam-version: "2.0" -version: "1.10" +version: "1.2" -synopsis: "Lama programming system" +synopsis: "Lama programming language" maintainer: "dboulytchev@gmail.com" -authors: "dboulytchev@gmail.com" -homepage: "https://github.com/JetBrains-Research/Lama" -bug-reports: "https://github.com/JetBrains-Research/Lama/issues" +authors: [ + "Dmitry Boulytchev " + "Daniil Berezun " + "Egor Sheremetov " +] +homepage: "https://github.com/PLTools/Lama" +bug-reports: "https://github.com/PLTools/Lama/issues" depends: [ - "ocaml" { >= "4.07.1" } + "ocaml" { >= "4.13.1" } "ocamlfind" { build } "camlp5" { >= "8.00.05" } "ostap" { >= "0.5"} - "GT" { >= "0.5.0" } + "GT" { >= "0.5.1" } ] build: [ [make] - [make "regression"] {with-test} + [make "regression-all"] {with-test} ] install: [make "install"] @@ -25,7 +29,7 @@ depexts: [ ["gcc-multilib"] {os-family = "debian"} ] -dev-repo: "git+https://github.com/JetBrains-Research/Lama.git" +dev-repo: "git+https://github.com/PLTools/Lama.git" url { - src: "git+https://github.com/JetBrains-Research/Lama.git#1.10+ocaml4.10" + src: "git+https://github.com/PLTools/Lama.git#1.20" } diff --git a/Makefile b/Makefile index 8a3b6313c..8808cb6cf 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,9 @@ all: $(MAKE) -C runtime $(MAKE) -C byterun $(MAKE) -C stdlib + $(MAKE) -C runtime unit_tests.o + $(MAKE) -C runtime invariants_check.o + $(MAKE) -C runtime invariants_check_debug_print.o STD_FILES=$(shell ls stdlib/*.[oi] stdlib/*.lama runtime/runtime.a runtime/Std.i) @@ -21,13 +24,28 @@ uninstall: $(RM) -r `opam var share`/Lama $(RM) `opam var bin`/$(EXECUTABLE) +regression-all: regression regression-expressions + regression: - $(MAKE) clean check -C regression - $(MAKE) clean check -C stdlib/regression + $(MAKE) clean check -j8 -C regression + $(MAKE) clean check -j8 -C stdlib/regression + +regression-expressions: + $(MAKE) clean check -j8 -C regression/expressions + $(MAKE) clean check -j8 -C regression/deep-expressions + +unit_tests: + ./runtime/unit_tests.o + ./runtime/invariants_check.o + ./runtime/invariants_check_debug_print.o + +negative_scenarios_tests: + $(MAKE) -C runtime negative_tests clean: $(MAKE) clean -C src $(MAKE) clean -C runtime $(MAKE) clean -C stdlib $(MAKE) clean -C regression + $(MAKE) clean -C byterun $(MAKE) clean -C bench diff --git a/README.md b/README.md index 4e135130d..2278c478d 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,9 @@ -| Lama 1.10 | Lama-devel 1.10 | -| -------------------- | -------------------------- | -| [![Lama 1.10][1]][2] | [![Lama-devel 1.10][3]][4] | +| Lama 1.2 | +| ------------------- | +| [![Lama 1.2][1]][2] | -[1]: https://github.com/JetBrains-Research/Lama/workflows/Build/badge.svg?branch=1.10 -[2]: https://github.com/JetBrains-Research/Lama/actions -[3]: https://github.com/JetBrains-Research/Lama-devel/workflows/Build/badge.svg?branch=1.10 -[4]: https://github.com/JetBrains-Research/Lama-devel/actions +[1]: https://github.com/PLTools/Lama/workflows/Build/badge.svg?branch=1.10 +[2]: https://github.com/PLTools/Lama/actions # Lama @@ -26,13 +24,13 @@ The name ![lama](lama.svg) is an acronym for *Lambda-Algol* since the language h The main purpose of ![lama](lama.svg) is to present a repertoire of constructs with certain runtime behavior and relevant implementation techniques. The lack of a type system (a vital feature for a real-world language -for software engineering) is an intensional decision that allows showing the unchained diversity of runtime behaviors, including those that a typical type system is called to prevent. +for software engineering) is an intensional decision that allows showing the unchained diversity of runtime behaviors, including those that a typical type system is called to prevent. On the other hand the language can be used in the future as a raw substrate to apply various ways of software verification (including type systems). The current implementation contains a native code compiler for **x86-32**, written in **OCaml**, a runtime library with garbage-collection support, written in **C**, and a small standard library, written in ![lama](lama.svg) itself. The native code compiler uses **gcc** as a toolchain. -In addition, a source-level reference interpreter is implemented as well as a compiler to a small stack machine. +In addition, a source-level reference interpreter is implemented as well as a compiler to a small stack machine. The stack machine code can in turn be either interpreted on a stack machine interpreter, or used as an intermediate representation by the native code compiler. ## Language Specification @@ -51,9 +49,9 @@ Ubuntu-based variant of WSL is recommended. * System-wide prerequisites: - `gcc-multilib` - + For example, (for Debian-based GNU/Linux): - ```bash + ```bash $ sudo apt install gcc-multilib ``` @@ -73,10 +71,10 @@ Ubuntu-based variant of WSL is recommended. 1. Install the right [switch](https://opam.ocaml.org/doc/Manual.html#Switches) for the OCaml compiler ```bash - # for fresh opam - $ opam switch create lama --packages=ocaml-variants.4.14.0+options,ocaml-option-flambda - # for old opam - $ opam switch create lama ocaml-variants.4.10.1+flambda + # for fresh opam + $ opam switch create lama --packages=ocaml-variants.4.14.0+options,ocaml-option-flambda + # for old opam + $ opam switch create lama ocaml-variants.4.13.1+flambda ``` * In the above command: @@ -100,7 +98,7 @@ Ubuntu-based variant of WSL is recommended. 3. Pin Lama package using `opam` and right URL (remember of "#" being a comment character in various shells) ```bash - $ opam pin add Lama https://github.com/JetBrains-Research/Lama.git\#1.10 --no-action + $ opam pin add Lama https://github.com/PLTools/Lama.git\#v1.2 --no-action ``` The extra '#' sign is added because in various Shells it is the start of a comment @@ -120,5 +118,13 @@ Ubuntu-based variant of WSL is recommended. ### Smoke-testing (optional) -Clone the repository and run `make -C tutorial`. +Clone the repository and run `make -C tutorial`. It should build a local compiler `src/lamac` and a few tutorial executables in `tutorial/`. + +### Useful links + +* [Plugin for VS Code](https://marketplace.visualstudio.com/items?itemName=arsavelev.lsp-lama) + +### Changes in Lama 1.2 + +* New garbage collector: single-threaded stop-the-world `LISP2` (see GC Handbook for details: [1st edition](https://www.cs.kent.ac.uk/people/staff/rej/gcbook/), [2nd edition](http://gchandbook.org/)) [mark-compact](https://www.memorymanagement.org/glossary/m.html#term-mark-compact). diff --git a/byterun/Makefile b/byterun/Makefile index 45fc9c562..369a57273 100644 --- a/byterun/Makefile +++ b/byterun/Makefile @@ -1,8 +1,10 @@ +FLAGS=-m32 -g2 -fstack-protector-all + all: byterun.o - $(CC) -m32 -g -o byterun byterun.o ../runtime/runtime.a + $(CC) $(FLAGS) -o byterun byterun.o ../runtime/runtime.a byterun.o: byterun.c - $(CC) -g -fstack-protector-all -m32 -c byterun.c + $(CC) $(FLAGS) -g -c byterun.c clean: - $(RM) *.a *.o *~ + $(RM) *.a *.o *~ byterun diff --git a/dune-project b/dune-project new file mode 100644 index 000000000..31718a853 --- /dev/null +++ b/dune-project @@ -0,0 +1,3 @@ +(lang dune 3.3) + +(cram enable) diff --git a/regression/Makefile b/regression/Makefile index 43c3c6376..50bcebaec 100644 --- a/regression/Makefile +++ b/regression/Makefile @@ -1,18 +1,24 @@ -TESTS=$(sort $(basename $(wildcard test*.lama))) +DEBUG_FILES=stack-dump-before data-dump-before extra-roots-dump-before heap-dump-before stack-dump-after data-dump-after extra-roots-dump-after heap-dump-after +TESTS=$(sort $(filter-out test111, $(basename $(wildcard test*.lama)))) LAMAC=../src/lamac .PHONY: check $(TESTS) -check: $(TESTS) + +check: ctest111 $(TESTS) $(TESTS): %: %.lama - @echo $@ - cat $@.input | LAMA=../runtime $(LAMAC) -i $< > $@.log && diff $@.log orig/$@.log - cat $@.input | LAMA=../runtime $(LAMAC) -ds -s $< > $@.log && diff $@.log orig/$@.log - LAMA=../runtime $(LAMAC) $< && cat $@.input | ./$@ > $@.log && diff $@.log orig/$@.log + @echo "regression/$@" + @cat $@.input | LAMA=../runtime $(LAMAC) -i $< > $@.log && diff $@.log orig/$@.log + @cat $@.input | LAMA=../runtime $(LAMAC) -ds -s $< > $@.log && diff $@.log orig/$@.log + @LAMA=../runtime $(LAMAC) $< && cat $@.input | ./$@ > $@.log && diff $@.log orig/$@.log + +ctest111: + @echo "regression/test111" + @LAMA=../runtime $(LAMAC) test111.lama && cat test111.input | ./test111 > test111.log && diff test111.log orig/test111.log clean: - $(RM) test*.log *.s *~ $(TESTS) *.i + $(RM) test*.log *.s *.sm *~ $(TESTS) *.i $(DEBUG_FILES) test111 $(MAKE) clean -C expressions $(MAKE) clean -C deep-expressions diff --git a/regression/deep-expressions/Makefile b/regression/deep-expressions/Makefile index b37c09433..981d04b6b 100644 --- a/regression/deep-expressions/Makefile +++ b/regression/deep-expressions/Makefile @@ -7,10 +7,10 @@ LAMAC = ../../src/lamac check: $(TESTS) $(TESTS): %: %.lama - @echo $@ + @echo "regression/deep-expressions/$@" @LAMA=../../runtime $(LAMAC) $< && cat $@.input | ./$@ > $@.log && diff $@.log orig/$@.log - @cat $@.input | $(LAMAC) -i $< > $@.log && diff $@.log orig/$@.log - @cat $@.input | $(LAMAC) -s $< > $@.log && diff $@.log orig/$@.log + @cat $@.input | LAMA=../../runtime $(LAMAC) -i $< > $@.log && diff $@.log orig/$@.log + @cat $@.input | LAMA=../../runtime $(LAMAC) -s $< > $@.log && diff $@.log orig/$@.log clean: rm -f *.log *.s *~ diff --git a/regression/expressions/Makefile b/regression/expressions/Makefile index 8ab41d698..4e4737c6e 100644 --- a/regression/expressions/Makefile +++ b/regression/expressions/Makefile @@ -7,10 +7,10 @@ RC = ../../src/lamac check: $(TESTS) $(TESTS): %: %.lama - @echo $@ + @echo "regression/expressions/$@" @LAMA=../../runtime $(RC) $< && cat $@.input | ./$@ > $@.log && diff $@.log orig/$@.log - @cat $@.input | $(RC) -i $< > $@.log && diff $@.log orig/$@.log - @cat $@.input | $(RC) -s $< > $@.log && diff $@.log orig/$@.log + @cat $@.input | LAMA=../../runtime $(RC) -i $< > $@.log && diff $@.log orig/$@.log + @cat $@.input | LAMA=../../runtime $(RC) -s $< > $@.log && diff $@.log orig/$@.log clean: rm -f *.log *.s *~ diff --git a/regression/orig/test111.log b/regression/orig/test111.log new file mode 100644 index 000000000..65c710776 --- /dev/null +++ b/regression/orig/test111.log @@ -0,0 +1,6 @@ +97 +98 +99 +100 +97 +98 diff --git a/regression/test111.input b/regression/test111.input new file mode 100644 index 000000000..e69de29bb diff --git a/regression/test111.lama b/regression/test111.lama new file mode 100644 index 000000000..464e1aaf6 --- /dev/null +++ b/regression/test111.lama @@ -0,0 +1,21 @@ +import Std; + +fun printString (s) { + var i; + for i := 0, i < s.length, i := i + 1 do + write (s[i]) + od +} + +fun printListConcat ( str_list ) { + printString (stringcat ( str_list )) +} + +var l = { "a" , "b" , "c" , "d" }; +printListConcat (l); + +l := { "ab" }; +printListConcat (l); + +l := {}; +printListConcat (l) \ No newline at end of file diff --git a/runtime/.clang-format b/runtime/.clang-format new file mode 100644 index 000000000..fd32153a9 --- /dev/null +++ b/runtime/.clang-format @@ -0,0 +1,134 @@ +# Common settings +BasedOnStyle: LLVM +TabWidth: 2 +IndentWidth: 2 +UseTab: Never +ColumnLimit: 100 +IndentCaseLabels: true + +# https://clang.llvm.org/docs/ClangFormatStyleOptions.html +--- +Language: Cpp + +DisableFormat: false +Standard: Auto + +AccessModifierOffset: -4 +AlignAfterOpenBracket: true +AlignConsecutiveAssignments: Consecutive +AlignConsecutiveDeclarations: Consecutive +AlignEscapedNewlines: Right +AlignOperands: true +AlignTrailingComments: false +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: Always +AllowShortCaseLabelsOnASingleLine: true +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: AllIfsAndElse +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: false +BinPackParameters: true +BitFieldColonSpacing: Both + + +# Configure each individual brace in BraceWrapping +BreakBeforeBraces: Attach +# Control of individual brace wrapping cases +BraceWrapping: + AfterClass: true + AfterControlStatement: Always + AfterEnum : true + AfterFunction : true + AfterNamespace : true + AfterStruct : true + AfterUnion : true + BeforeCatch : true + BeforeElse : true + IndentBraces : false + AfterExternBlock : true + SplitEmptyFunction : false + SplitEmptyRecord : false + SplitEmptyNamespace : true + + +BreakAfterJavaFieldAnnotations: true +BreakBeforeInheritanceComma: false +BreakBeforeBinaryOperators: NonAssignment +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: true +BreakStringLiterals: true + +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +SpaceBeforeCpp11BracedList: false +DerivePointerAlignment: false +ExperimentalAutoDetectBinPacking: false +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +IndentCaseLabels: true +FixNamespaceComments: true +IndentWrappedFunctionNames: true +KeepEmptyLinesAtTheStartOfBlocks: true +MacroBlockBegin: '' +MacroBlockEnd: '' +JavaScriptQuotes: Double +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 4 +ObjCSpaceAfterProperty: true +ObjCSpaceBeforeProtocolList: true +PenaltyBreakBeforeFirstCallParameter: 19 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 + +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 60 +PointerAlignment: Right +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: Custom +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDeclarationName: true + AfterFunctionDefinitionName: true + AfterIfMacros: true + AfterOverloadedOperator: true + AfterRequiresInClause: true + AfterRequiresInExpression: true + BeforeNonEmptyParentheses: false + +SpaceBeforeRangeBasedForLoopColon: false +SpaceInEmptyBlock: true +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 3 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInConditionalStatement: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceAfterTemplateKeyword: true +SpaceBeforeInheritanceColon: true + +SortUsingDeclarations: true +SortIncludes: CaseInsensitive + +IndentGotoLabels: false +InsertBraces: false + +# Comments are for developers, they should arrange them +ReflowComments: false + +IncludeBlocks: Regroup +IndentPPDirectives: AfterHash +SeparateDefinitionBlocks: Always diff --git a/runtime/Makefile b/runtime/Makefile index a82daa3f0..f3757ec29 100644 --- a/runtime/Makefile +++ b/runtime/Makefile @@ -1,12 +1,44 @@ +CC=gcc +COMMON_FLAGS=-m32 -g2 -fstack-protector-all +PROD_FLAGS=$(COMMON_FLAGS) -DLAMA_ENV +TEST_FLAGS=$(COMMON_FLAGS) -DDEBUG_VERSION +UNIT_TESTS_FLAGS=$(TEST_FLAGS) +INVARIANTS_CHECK_FLAGS=$(TEST_FLAGS) -DFULL_INVARIANT_CHECKS -all: gc_runtime.o runtime.o - ar rc runtime.a gc_runtime.o runtime.o +# this target is the most important one, its' artefacts should be used as a runtime of Lama +all: gc.o runtime.o + ar rc runtime.a runtime.o gc.o -gc_runtime.o: gc_runtime.s - $(CC) -g -fstack-protector-all -m32 -c gc_runtime.s +NEGATIVE_TESTS=$(sort $(basename $(notdir $(wildcard negative_scenarios/*_neg.c)))) + +$(NEGATIVE_TESTS): %: negative_scenarios/%.c + @echo "Running test $@" + @$(CC) -o $@.o $(COMMON_FLAGS) negative_scenarios/$@.c gc.c + @./$@.o 2> negative_scenarios/$@.err || diff negative_scenarios/$@.err negative_scenarios/expected/$@.err + +negative_tests: $(NEGATIVE_TESTS) + +# this is a target that runs unit tests, scenarios are written in a single file `test_main.c` +unit_tests.o: gc.c gc.h runtime.c runtime.h runtime_common.h virt_stack.c virt_stack.h test_main.c test_util.s + $(CC) -o unit_tests.o $(UNIT_TESTS_FLAGS) gc.c virt_stack.c runtime.c test_main.c test_util.s + +# this target also runs unit tests but with additional expensive checks of GC invariants which aren't used in production version +invariants_check.o: gc.c gc.h runtime.c runtime.h runtime_common.h virt_stack.c virt_stack.h test_main.c test_util.s + $(CC) -o invariants_check.o $(INVARIANTS_CHECK_FLAGS) gc.c virt_stack.c runtime.c test_main.c test_util.s + +# this target also runs unit tests but with additional expensive checks of GC invariants which aren't used in production version +# additionally, it prints debug information +invariants_check_debug_print.o: gc.c gc.h runtime.c runtime.h runtime_common.h virt_stack.c virt_stack.h test_main.c test_util.s + $(CC) -o invariants_check_debug_print.o $(INVARIANTS_CHECK_FLAGS) -DDEBUG_PRINT gc.c virt_stack.c runtime.c test_main.c test_util.s + +virt_stack.o: virt_stack.h virt_stack.c + $(CC) $(PROD_FLAGS) -c virt_stack.c + +gc.o: gc.c gc.h + $(CC) -rdynamic $(PROD_FLAGS) -c gc.c runtime.o: runtime.c runtime.h - $(CC) -g -fstack-protector-all -m32 -c runtime.c + $(CC) $(PROD_FLAGS) -c runtime.c clean: - $(RM) *.a *.o *~ + $(RM) *.a *.o *~ negative_scenarios/*.err diff --git a/runtime/TODO.md b/runtime/TODO.md new file mode 100644 index 000000000..d47c7327c --- /dev/null +++ b/runtime/TODO.md @@ -0,0 +1,19 @@ +### TODO list + +- [x] Fix heap&stack&extra_roots dump +- [x] Remove extra and dead code +- [x] Debug print -> DEBUG_PRINT mode +- [x] Check `mmap`/`remap`/... +- [x] Check: `__gc_stack_bot`: same issue as `__gc_stack_top`? +- [x] Check: Can we get rid of `__gc_init` (as an assembly (implement in C instead))? (answer: if we make main in which every Lama file is compiled set `__gc_stack_bottom` to current `ebp` then yes, otherwise we need access to registers) +- [x] Check: runtime tags: should always the last bit be 1? (Answer: not really, however, we still need to distinguish between 5 different options (because unboxed values should have its own value to be returned from `LkindOf`)) +- [x] Fix warnings in ML code +- [x] TODO: debug flag doesn't compile +- [x] Sexp: move the tag to be `contents[0]` instead of the word in sexp header; i.e. get rid of sexp as separate data structure +- [x] Run Lama compiler on Lama +- [ ] Add more stress tests (for graph-like structures) to `stdlib/regression` and unit tests +- [ ] Magic constants +- [ ] Normal documentation: a-la doxygen +- [ ] Think: normal debug mode +- [ ] Fix warnings in C code +- [ ] Modes (like FULL_INVARIANTS) -> separate files \ No newline at end of file diff --git a/runtime/gc.c b/runtime/gc.c new file mode 100644 index 000000000..829a82077 --- /dev/null +++ b/runtime/gc.c @@ -0,0 +1,922 @@ +#define _GNU_SOURCE 1 + +#include "gc.h" + +#include "runtime_common.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const size_t INIT_HEAP_SIZE = MINIMUM_HEAP_CAPACITY; + +#ifdef DEBUG_VERSION +size_t cur_id = 0; +#endif + +static extra_roots_pool extra_roots; + +size_t __gc_stack_top = 0, __gc_stack_bottom = 0; +#ifdef LAMA_ENV +extern const size_t __start_custom_data, __stop_custom_data; +#endif + +#ifdef DEBUG_VERSION +memory_chunk heap; +#else +static memory_chunk heap; +#endif + +#ifdef DEBUG_VERSION +void dump_heap (); +#endif + +void handler (int sig) { + void *array[10]; + int size; + + // get void*'s for all entries on the stack + size = backtrace(array, 10); + fprintf(stderr, "heap size is %zu\n", heap.size); + backtrace_symbols_fd(array, size, STDERR_FILENO); + exit(1); +} + +void *alloc (size_t size) { +#ifdef DEBUG_VERSION + ++cur_id; +#endif + size_t bytes_sz = size; + size = BYTES_TO_WORDS(size); +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "allocation of size %zu words (%zu bytes): ", size, bytes_sz); +#endif + void *p = gc_alloc_on_existing_heap(size); + if (!p) { + // not enough place in the heap, need to perform GC cycle + p = gc_alloc(size); + } + return p; +} + +#ifdef FULL_INVARIANT_CHECKS + +// precondition: obj_content is a valid address pointing to the content of an object +static void print_object_info (FILE *f, void *obj_content) { + data *d = TO_DATA(obj_content); + size_t obj_tag = TAG(d->data_header); + size_t obj_id = d->id; + fprintf(f, "id %zu tag %zu | ", obj_id, obj_tag); +} + +static void print_unboxed (FILE *f, int unboxed) { fprintf(f, "unboxed %zu | ", unboxed); } + +static FILE *print_stack_content (char *filename) { + FILE *f = fopen(filename, "w+"); + ftruncate(fileno(f), 0); + fprintf(f, "Stack content:\n"); + for (size_t *stack_ptr = (size_t *)((void *)__gc_stack_top + 4); + stack_ptr < (size_t *)__gc_stack_bottom; + ++stack_ptr) { + size_t value = *stack_ptr; + if (is_valid_heap_pointer((size_t *)value)) { + fprintf(f, "%p, ", (void *)value); + print_object_info(f, (void *)value); + } else { + print_unboxed(f, (int)value); + } + fprintf(f, "\n"); + } + fprintf(f, "Stack content end.\n"); + return f; +} + +// precondition: obj_content is a valid address pointing to the content of an object +static void objects_dfs (FILE *f, void *obj_content) { + void *obj_header = get_obj_header_ptr(obj_content); + data *obj_data = TO_DATA(obj_content); + // internal mark-bit for this dfs, should be recovered by the caller + if ((obj_data->forward_address & 2) != 0) { return; } + // set this bit as 1 + obj_data->forward_address |= 2; + fprintf(f, "object at addr %p: ", obj_content); + print_object_info(f, obj_content); + /*fprintf(f, "object id: %zu | ", obj_data->id);*/ + // first cycle: print object's fields + for (obj_field_iterator field_it = ptr_field_begin_iterator(obj_header); + !field_is_done_iterator(&field_it); + obj_next_field_iterator(&field_it)) { + size_t field_value = *(size_t *)field_it.cur_field; + if (is_valid_heap_pointer((size_t *)field_value)) { + print_object_info(f, (void *)field_value); + /*fprintf(f, "%zu ", TO_DATA(field_value)->id);*/ + } else { + print_unboxed(f, (int)field_value); + } + } + fprintf(f, "\n"); + for (obj_field_iterator field_it = ptr_field_begin_iterator(obj_header); + !field_is_done_iterator(&field_it); + obj_next_field_iterator(&field_it)) { + size_t field_value = *(size_t *)field_it.cur_field; + if (is_valid_heap_pointer((size_t *)field_value)) { objects_dfs(f, (void *)field_value); } + } +} + +FILE *print_objects_traversal (char *filename, bool marked) { + FILE *f = fopen(filename, "w+"); + ftruncate(fileno(f), 0); + for (heap_iterator it = heap_begin_iterator(); !heap_is_done_iterator(&it); + heap_next_obj_iterator(&it)) { + void *obj_header = it.current; + data *obj_data = TO_DATA(get_object_content_ptr(obj_header)); + if ((obj_data->forward_address & 1) == marked) { + objects_dfs(f, get_object_content_ptr(obj_header)); + } + } + + // resetting bit that represent mark-bit for this internal dfs-traversal + for (heap_iterator it = heap_begin_iterator(); !heap_is_done_iterator(&it); + heap_next_obj_iterator(&it)) { + void *obj_header = it.current; + data *obj_data = TO_DATA(get_object_content_ptr(obj_header)); + obj_data->forward_address &= (~2); + } + fflush(f); + + // print extra roots + for (int i = 0; i < extra_roots.current_free; i++) { + fprintf(f, "extra root %p %p: ", extra_roots.roots[i], *(size_t **)extra_roots.roots[i]); + } + fflush(f); + return f; +} + +int files_cmp (FILE *f1, FILE *f2) { + int symbol1, symbol2; + int position = 0; + while (true) { + symbol1 = fgetc(f1); + symbol2 = fgetc(f2); + if (symbol1 == EOF && symbol2 == EOF) { return -1; } + if (symbol1 != symbol2) { return position; } + ++position; + } +} + +#endif + +void *gc_alloc_on_existing_heap (size_t size) { + if (heap.current + size <= heap.end) { + void *p = (void *)heap.current; + heap.current += size; + memset(p, 0, size * sizeof(size_t)); + return p; + } + return NULL; +} + +void *gc_alloc (size_t size) { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "===============================GC cycle has started\n"); +#endif +#ifdef FULL_INVARIANT_CHECKS + FILE *stack_before = print_stack_content("stack-dump-before-compaction"); + FILE *heap_before = print_objects_traversal("before-mark", 0); + fclose(heap_before); +#endif + mark_phase(); +#ifdef FULL_INVARIANT_CHECKS + FILE *heap_before_compaction = print_objects_traversal("after-mark", 1); +#endif + + compact_phase(size); +#ifdef FULL_INVARIANT_CHECKS + FILE *stack_after = print_stack_content("stack-dump-after-compaction"); + FILE *heap_after_compaction = print_objects_traversal("after-compaction", 0); + + int pos = files_cmp(stack_before, stack_after); + if (pos >= 0) { // position of difference is found + fprintf(stderr, "Stack is modified incorrectly, see position %d\n", pos); + exit(1); + } + fclose(stack_before); + fclose(stack_after); + pos = files_cmp(heap_before_compaction, heap_after_compaction); + if (pos >= 0) { // position of difference is found + fprintf(stderr, "GC invariant is broken, pos is %d\n", pos); + exit(1); + } + fclose(heap_before_compaction); + fclose(heap_after_compaction); +#endif +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "===============================GC cycle has finished\n"); +#endif + return gc_alloc_on_existing_heap(size); +} + +static void gc_root_scan_stack () { + for (size_t *p = (size_t *)(__gc_stack_top + 4); p < (size_t *)__gc_stack_bottom; ++p) { + gc_test_and_mark_root((size_t **)p); + } +} + +void mark_phase (void) { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "marking has started\n"); + fprintf(stderr, + "gc_root_scan_stack has started: gc_top=%p bot=%p\n", + (void *)__gc_stack_top, + (void *)__gc_stack_bottom); +#endif + gc_root_scan_stack(); +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "gc_root_scan_stack has finished\n"); + fprintf(stderr, "scan_extra_roots has started\n"); +#endif + scan_extra_roots(); +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "scan_extra_roots has finished\n"); + fprintf(stderr, "scan_global_area has started\n"); +#endif +#ifdef LAMA_ENV + scan_global_area(); +#endif +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "scan_global_area has finished\n"); + fprintf(stderr, "marking has finished\n"); +#endif +} + +void compact_phase (size_t additional_size) { + size_t live_size = compute_locations(); + + // all in words + size_t next_heap_size = + MAX(live_size * EXTRA_ROOM_HEAP_COEFFICIENT + additional_size, MINIMUM_HEAP_CAPACITY); + size_t next_heap_pseudo_size = MAX(next_heap_size, heap.size); + + memory_chunk old_heap = heap; + heap.begin = mremap( + heap.begin, WORDS_TO_BYTES(heap.size), WORDS_TO_BYTES(next_heap_pseudo_size), MREMAP_MAYMOVE); + if (heap.begin == MAP_FAILED) { + perror("ERROR: compact_phase: mremap failed\n"); + exit(1); + } + heap.end = heap.begin + next_heap_pseudo_size; + heap.size = next_heap_pseudo_size; + heap.current = heap.begin + (old_heap.current - old_heap.begin); + + update_references(&old_heap); + physically_relocate(&old_heap); + + heap.current = heap.begin + live_size; +} + +size_t compute_locations () { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "GC compute_locations started\n"); +#endif + size_t *free_ptr = heap.begin; + heap_iterator scan_iter = heap_begin_iterator(); + + for (; !heap_is_done_iterator(&scan_iter); heap_next_obj_iterator(&scan_iter)) { + void *header_ptr = scan_iter.current; + void *obj_content = get_object_content_ptr(header_ptr); + if (is_marked(obj_content)) { + size_t sz = BYTES_TO_WORDS(obj_size_header_ptr(header_ptr)); + // forward address is responsible for object header pointer + set_forward_address(obj_content, (size_t)free_ptr); + free_ptr += sz; + } + } + +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "GC compute_locations finished\n"); +#endif + // it will return number of words + return free_ptr - heap.begin; +} + +void scan_and_fix_region (memory_chunk *old_heap, void *start, void *end) { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "GC scan_and_fix_region started\n"); +#endif + for (size_t *ptr = (size_t *)start; ptr < (size_t *)end; ++ptr) { + size_t ptr_value = *ptr; + // this can't be expressed via is_valid_heap_pointer, because this pointer may point area corresponding to the old + // heap + if (is_valid_pointer((size_t *)ptr_value) && (size_t)old_heap->begin <= ptr_value + && ptr_value <= (size_t)old_heap->current) { + void *obj_ptr = (void *)heap.begin + ((void *)ptr_value - (void *)old_heap->begin); + void *new_addr = + (void *)heap.begin + ((void *)get_forward_address(obj_ptr) - (void *)old_heap->begin); + size_t content_offset = get_header_size(get_type_row_ptr(obj_ptr)); + *(void **)ptr = new_addr + content_offset; + } + } +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "GC scan_and_fix_region finished\n"); +#endif +} + +void scan_and_fix_region_roots (memory_chunk *old_heap) { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "extra roots started: number of extra roots %i\n", extra_roots.current_free); +#endif + for (int i = 0; i < extra_roots.current_free; i++) { + size_t *ptr = (size_t *)extra_roots.roots[i]; + size_t ptr_value = *ptr; + if (!is_valid_pointer((size_t *)ptr_value)) { continue; } + // skip this one since it was already fixed from scanning the stack + if ((extra_roots.roots[i] >= (void **)__gc_stack_top + && extra_roots.roots[i] < (void **)__gc_stack_bottom) +#ifdef LAMA_ENV + || (extra_roots.roots[i] <= (void **)&__stop_custom_data + && extra_roots.roots[i] >= (void **)&__start_custom_data) +#endif + ) { +#ifdef DEBUG_VERSION + if (is_valid_heap_pointer((size_t *)ptr_value)) { +# ifdef DEBUG_PRINT + fprintf(stderr, + "|\tskip extra root: %p (%p), since it points to Lama's stack top=%p bot=%p\n", + extra_roots.roots[i], + (void *)ptr_value, + (void *)__gc_stack_top, + (void *)__gc_stack_bottom); +# endif + } +# ifdef LAMA_ENV + else if ((extra_roots.roots[i] <= (void *)&__stop_custom_data + && extra_roots.roots[i] >= (void *)&__start_custom_data)) { + fprintf( + stderr, + "|\tskip extra root: %p (%p), since it points to Lama's static area stop=%p start=%p\n", + extra_roots.roots[i], + (void *)ptr_value, + (void *)&__stop_custom_data, + (void *)&__start_custom_data); + exit(1); + } +# endif + else { +# ifdef DEBUG_PRINT + fprintf(stderr, + "|\tskip extra root: %p (%p): not a valid Lama pointer \n", + extra_roots.roots[i], + (void *)ptr_value); +# endif + } +#endif + continue; + } + if ((size_t)old_heap->begin <= ptr_value && ptr_value <= (size_t)old_heap->current) { + void *obj_ptr = (void *)heap.begin + ((void *)ptr_value - (void *)old_heap->begin); + void *new_addr = + (void *)heap.begin + ((void *)get_forward_address(obj_ptr) - (void *)old_heap->begin); + size_t content_offset = get_header_size(get_type_row_ptr(obj_ptr)); + *(void **)ptr = new_addr + content_offset; +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, + "|\textra root (%p) %p -> %p\n", + extra_roots.roots[i], + (void *)ptr_value, + (void *)*ptr); +#endif + } + } +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "|\textra roots finished\n"); +#endif +} + +void update_references (memory_chunk *old_heap) { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "GC update_references started\n"); +#endif + heap_iterator it = heap_begin_iterator(); + while (!heap_is_done_iterator(&it)) { + if (is_marked(get_object_content_ptr(it.current))) { + for (obj_field_iterator field_iter = ptr_field_begin_iterator(it.current); + !field_is_done_iterator(&field_iter); + obj_next_ptr_field_iterator(&field_iter)) { + + size_t *field_value = *(size_t **)field_iter.cur_field; + if (field_value < old_heap->begin || field_value > old_heap->current) { continue; } + // this pointer should also be modified according to old_heap->begin + void *field_obj_content_addr = + (void *)heap.begin + (*(void **)field_iter.cur_field - (void *)old_heap->begin); + // important, we calculate new_addr very carefully here, because objects may relocate to another memory chunk + void *new_addr = + heap.begin + + ((size_t *)get_forward_address(field_obj_content_addr) - (size_t *)old_heap->begin); + // update field reference to point to new_addr + // since, we want fields to point to an actual content, we need to add this extra content_offset + // because forward_address itself is a pointer to the object's header + size_t content_offset = get_header_size(get_type_row_ptr(field_obj_content_addr)); +#ifdef DEBUG_VERSION + if (!is_valid_heap_pointer((void *)(new_addr + content_offset))) { +# ifdef DEBUG_PRINT + fprintf(stderr, + "ur: incorrect pointer assignment: on object with id %d", + TO_DATA(get_object_content_ptr(it.current))->id); +# endif + exit(1); + } +#endif + *(void **)field_iter.cur_field = new_addr + content_offset; + } + } + heap_next_obj_iterator(&it); + } + // fix pointers from stack + scan_and_fix_region(old_heap, (void *)__gc_stack_top + 4, (void *)__gc_stack_bottom + 4); + + // fix pointers from extra_roots + scan_and_fix_region_roots(old_heap); + +#ifdef LAMA_ENV + assert((void *)&__stop_custom_data >= (void *)&__start_custom_data); + scan_and_fix_region(old_heap, (void *)&__start_custom_data, (void *)&__stop_custom_data); +#endif +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "GC update_references finished\n"); +#endif +} + +void physically_relocate (memory_chunk *old_heap) { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "GC physically_relocate started\n"); +#endif + heap_iterator from_iter = heap_begin_iterator(); + + while (!heap_is_done_iterator(&from_iter)) { + void *obj = get_object_content_ptr(from_iter.current); + heap_iterator next_iter = from_iter; + heap_next_obj_iterator(&next_iter); + if (is_marked(obj)) { + // Move the object from its old location to its new location relative to + // the heap's (possibly new) location, 'to' points to future object header + size_t *to = heap.begin + ((size_t *)get_forward_address(obj) - (size_t *)old_heap->begin); + memmove(to, from_iter.current, obj_size_header_ptr(from_iter.current)); + unmark_object(get_object_content_ptr(to)); + } + from_iter = next_iter; + } +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "GC physically_relocate finished\n"); +#endif +} + +inline bool is_valid_heap_pointer (const size_t *p) { + return !UNBOXED(p) && (size_t)heap.begin <= (size_t)p && (size_t)p <= (size_t)heap.current; +} + +static inline bool is_valid_pointer (const size_t *p) { return !UNBOXED(p); } + +static inline void queue_enqueue (heap_iterator *tail_iter, void *obj) { + void *tail = tail_iter->current; + void *tail_content = get_object_content_ptr(tail); + set_forward_address(tail_content, (size_t)obj); + make_enqueued(obj); + heap_next_obj_iterator(tail_iter); +} + +static inline void *queue_dequeue (heap_iterator *head_iter) { + void *head = head_iter->current; + void *head_content = get_object_content_ptr(head); + void *value = (void *)get_forward_address(head_content); + make_dequeued(value); + heap_next_obj_iterator(head_iter); + return value; +} + +void mark (void *obj) { + if (!is_valid_heap_pointer(obj) || is_marked(obj)) { return; } + + // TL;DR: [q_head_iter, q_tail_iter) q_head_iter -- current dequeue's victim, q_tail_iter -- place for next enqueue + // in forward_address of corresponding element we store address of element to be removed after dequeue operation + heap_iterator q_head_iter = heap_begin_iterator(); + // iterator where we will write address of the element that is going to be enqueued + heap_iterator q_tail_iter = q_head_iter; + queue_enqueue(&q_tail_iter, obj); + + // invariant: queue contains only objects that are valid heap pointers (each corresponding to content of unmarked + // object) also each object is in queue only once + while (q_head_iter.current != q_tail_iter.current) { + // while the queue is non-empty + void *cur_obj = queue_dequeue(&q_head_iter); + mark_object(cur_obj); + void *header_ptr = get_obj_header_ptr(cur_obj); + for (obj_field_iterator ptr_field_it = ptr_field_begin_iterator(header_ptr); + !field_is_done_iterator(&ptr_field_it); + obj_next_ptr_field_iterator(&ptr_field_it)) { + void *field_value = *(void **)ptr_field_it.cur_field; + if (!is_valid_heap_pointer(field_value) || is_marked(field_value) + || is_enqueued(field_value)) { + continue; + } + // if we came to this point it must be true that field_value is unmarked and not currently in queue + // thus, we maintain the invariant + queue_enqueue(&q_tail_iter, field_value); + } + } +} + +void scan_extra_roots (void) { + for (int i = 0; i < extra_roots.current_free; ++i) { + // this dereferencing is safe since runtime is pushing correct pointers into extra_roots + mark(*extra_roots.roots[i]); + } +} + +#ifdef LAMA_ENV +void scan_global_area (void) { + // __start_custom_data is pointing to beginning of global area, thus all dereferencings are safe + for (size_t *ptr = (size_t *)&__start_custom_data; ptr < (size_t *)&__stop_custom_data; ++ptr) { + mark(*(void **)ptr); + } +} +#endif + +extern void gc_test_and_mark_root (size_t **root) { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, + "\troot = %p (%p), stack addresses: [%p, %p)\n", + root, + *root, + (void *)__gc_stack_top + 4, + (void *)__gc_stack_bottom); +#endif + mark((void *)*root); +} + +void __gc_init (void) { + __gc_stack_bottom = (size_t)__builtin_frame_address(1) + 4; + __init(); +} + +void __init (void) { + signal(SIGSEGV, handler); + size_t space_size = INIT_HEAP_SIZE * sizeof(size_t); + + srandom(time(NULL)); + + heap.begin = mmap( + NULL, space_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0); + if (heap.begin == MAP_FAILED) { + perror("ERROR: __init: mmap failed\n"); + exit(1); + } + heap.end = heap.begin + INIT_HEAP_SIZE; + heap.size = INIT_HEAP_SIZE; + heap.current = heap.begin; + clear_extra_roots(); +} + +extern void __shutdown (void) { + munmap(heap.begin, heap.size); +#ifdef DEBUG_VERSION + cur_id = 0; +#endif + heap.begin = NULL; + heap.end = NULL; + heap.size = 0; + heap.current = NULL; + __gc_stack_top = 0; + __gc_stack_bottom = 0; +} + +void clear_extra_roots (void) { extra_roots.current_free = 0; } + +void push_extra_root (void **p) { + if (extra_roots.current_free >= MAX_EXTRA_ROOTS_NUMBER) { + perror("ERROR: push_extra_roots: extra_roots_pool overflow\n"); + exit(1); + } + assert(p >= (void **)__gc_stack_top || p < (void **)__gc_stack_bottom); + extra_roots.roots[extra_roots.current_free] = p; + extra_roots.current_free++; +} + +void pop_extra_root (void **p) { + if (extra_roots.current_free == 0) { + perror("ERROR: pop_extra_root: extra_roots are empty\n"); + exit(1); + } + extra_roots.current_free--; + if (extra_roots.roots[extra_roots.current_free] != p) { + perror("ERROR: pop_extra_root: stack invariant violation\n"); + exit(1); + } +} + +/* Functions for tests */ + +#if defined(DEBUG_VERSION) +size_t objects_snapshot (int *object_ids_buf, size_t object_ids_buf_size) { + size_t *ids_ptr = (size_t *)object_ids_buf; + size_t i = 0; + for (heap_iterator it = heap_begin_iterator(); + !heap_is_done_iterator(&it) && i < object_ids_buf_size; + heap_next_obj_iterator(&it), ++i) { + void *header_ptr = it.current; + data *d = TO_DATA(get_object_content_ptr(header_ptr)); + ids_ptr[i] = d->id; + } + return i; +} +#endif + +#ifdef DEBUG_VERSION +extern char *de_hash (int); + +void dump_heap () { + size_t i = 0; + for (heap_iterator it = heap_begin_iterator(); !heap_is_done_iterator(&it); + heap_next_obj_iterator(&it), ++i) { + void *header_ptr = it.current; + void *content_ptr = get_object_content_ptr(header_ptr); + data *d = TO_DATA(content_ptr); + lama_type t = get_type_header_ptr(header_ptr); + switch (t) { + case ARRAY: fprintf(stderr, "of kind ARRAY\n"); break; + case CLOSURE: fprintf(stderr, "of kind CLOSURE\n"); break; + case STRING: fprintf(stderr, "of kind STRING\n"); break; + case SEXP: + fprintf(stderr, "of kind SEXP with tag %s\n", de_hash(TO_SEXP(content_ptr)->tag)); + break; + } + } +} + +void set_stack (size_t stack_top, size_t stack_bottom) { + __gc_stack_top = stack_top; + __gc_stack_bottom = stack_bottom; +} + +void set_extra_roots (size_t extra_roots_size, void **extra_roots_ptr) { + memcpy(extra_roots.roots, extra_roots_ptr, MIN(sizeof(extra_roots.roots), extra_roots_size)); + clear_extra_roots(); +} + +#endif + +/* Utility functions */ + +size_t get_forward_address (void *obj) { + data *d = TO_DATA(obj); + return GET_FORWARD_ADDRESS(d->forward_address); +} + +void set_forward_address (void *obj, size_t addr) { + data *d = TO_DATA(obj); + SET_FORWARD_ADDRESS(d->forward_address, addr); +} + +bool is_marked (void *obj) { + data *d = TO_DATA(obj); + int mark_bit = GET_MARK_BIT(d->forward_address); + return mark_bit; +} + +void mark_object (void *obj) { + data *d = TO_DATA(obj); + SET_MARK_BIT(d->forward_address); +} + +void unmark_object (void *obj) { + data *d = TO_DATA(obj); + RESET_MARK_BIT(d->forward_address); +} + +bool is_enqueued (void *obj) { + data *d = TO_DATA(obj); + return IS_ENQUEUED(d->forward_address) != 0; +} + +void make_enqueued (void *obj) { + data *d = TO_DATA(obj); + MAKE_ENQUEUED(d->forward_address); +} + +void make_dequeued (void *obj) { + data *d = TO_DATA(obj); + MAKE_DEQUEUED(d->forward_address); +} + +heap_iterator heap_begin_iterator () { + heap_iterator it = {.current = heap.begin}; + return it; +} + +void heap_next_obj_iterator (heap_iterator *it) { + void *ptr = it->current; + size_t obj_size = obj_size_header_ptr(ptr); + // make sure we take alignment into consideration + obj_size = BYTES_TO_WORDS(obj_size); + it->current += obj_size; +} + +bool heap_is_done_iterator (heap_iterator *it) { return it->current >= heap.current; } + +lama_type get_type_row_ptr (void *ptr) { + data *data_ptr = TO_DATA(ptr); + return get_type_header_ptr(data_ptr); +} + +lama_type get_type_header_ptr (void *ptr) { + int *header = (int *)ptr; + switch (TAG(*header)) { + case ARRAY_TAG: return ARRAY; + case STRING_TAG: return STRING; + case CLOSURE_TAG: return CLOSURE; + case SEXP_TAG: return SEXP; + default: { +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "ERROR: get_type_header_ptr: unknown object header, cur_id=%d", cur_id); + raise(SIGINT); // only for debug purposes +#else +# ifdef FULL_INVARIANT_CHECKS +# ifdef DEBUG_PRINT + fprintf(stderr, + "ERROR: get_type_header_ptr: unknown object header, ptr is %p, tag %i, heap size is " + "%d cur_id=%d stack_top=%p stack_bot=%p ", + ptr, + TAG(*header), + heap.size, + cur_id, + (void *)__gc_stack_top, + (void *)__gc_stack_bottom); +# endif + FILE *heap_before_compaction = print_objects_traversal("dump_kill", 1); + fclose(heap_before_compaction); +# endif + kill(getpid(), SIGSEGV); +#endif + exit(1); + } + } +} + +size_t obj_size_row_ptr (void *ptr) { + data *data_ptr = TO_DATA(ptr); + return obj_size_header_ptr(data_ptr); +} + +size_t obj_size_header_ptr (void *ptr) { + int len = LEN(*(int *)ptr); + switch (get_type_header_ptr(ptr)) { + case ARRAY: return array_size(len); + case STRING: return string_size(len); + case CLOSURE: return closure_size(len); + case SEXP: return sexp_size(len); + default: { +#ifdef DEBUG_VERSION + fprintf(stderr, "ERROR: obj_size_header_ptr: unknown object header, cur_id=%d", cur_id); + raise(SIGINT); // only for debug purposes +#else + perror("ERROR: obj_size_header_ptr: unknown object header\n"); +#endif + exit(1); + } + } +} + +size_t array_size (size_t sz) { return get_header_size(ARRAY) + MEMBER_SIZE * sz; } + +size_t string_size (size_t len) { + // string should be null terminated + return get_header_size(STRING) + len + 1; +} + +size_t closure_size (size_t sz) { return get_header_size(CLOSURE) + MEMBER_SIZE * sz; } + +size_t sexp_size (size_t members) { return get_header_size(SEXP) + MEMBER_SIZE * (members + 1); } + +obj_field_iterator field_begin_iterator (void *obj) { + lama_type type = get_type_header_ptr(obj); + obj_field_iterator it = {.type = type, .obj_ptr = obj, .cur_field = get_object_content_ptr(obj)}; + switch (type) { + case STRING: { + it.cur_field = get_end_of_obj(it.obj_ptr); + break; + } + case CLOSURE: + case SEXP: { + it.cur_field += MEMBER_SIZE; + break; + } + default: break; + } + return it; +} + +obj_field_iterator ptr_field_begin_iterator (void *obj) { + obj_field_iterator it = field_begin_iterator(obj); + // corner case when obj has no fields + if (field_is_done_iterator(&it)) { return it; } + if (is_valid_pointer(*(size_t **)it.cur_field)) { return it; } + obj_next_ptr_field_iterator(&it); + return it; +} + +void obj_next_field_iterator (obj_field_iterator *it) { it->cur_field += MEMBER_SIZE; } + +void obj_next_ptr_field_iterator (obj_field_iterator *it) { + do { + obj_next_field_iterator(it); + } while (!field_is_done_iterator(it) && !is_valid_pointer(*(size_t **)it->cur_field)); +} + +bool field_is_done_iterator (obj_field_iterator *it) { + return it->cur_field >= get_end_of_obj(it->obj_ptr); +} + +void *get_obj_header_ptr (void *ptr) { + lama_type type = get_type_row_ptr(ptr); + return ptr - get_header_size(type); +} + +void *get_object_content_ptr (void *header_ptr) { + lama_type type = get_type_header_ptr(header_ptr); + return header_ptr + get_header_size(type); +} + +void *get_end_of_obj (void *header_ptr) { return header_ptr + obj_size_header_ptr(header_ptr); } + +size_t get_header_size (lama_type type) { + switch (type) { + case STRING: + case CLOSURE: + case ARRAY: + case SEXP: return DATA_HEADER_SZ; + default: perror("ERROR: get_header_size: unknown object type\n"); +#ifdef DEBUG_VERSION + raise(SIGINT); // only for debug purposes +#endif + exit(1); + } +} + +void *alloc_string (int len) { + data *obj = alloc(string_size(len)); + obj->data_header = STRING_TAG | (len << 3); +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "%p, [STRING] tag=%zu\n", obj, TAG(obj->data_header)); +#endif +#ifdef DEBUG_VERSION + obj->id = cur_id; +#endif + obj->forward_address = 0; + return obj; +} + +void *alloc_array (int len) { + data *obj = alloc(array_size(len)); + obj->data_header = ARRAY_TAG | (len << 3); +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "%p, [ARRAY] tag=%zu\n", obj, TAG(obj->data_header)); +#endif +#ifdef DEBUG_VERSION + obj->id = cur_id; +#endif + obj->forward_address = 0; + return obj; +} + +void *alloc_sexp (int members) { + sexp *obj = alloc(sexp_size(members)); + obj->data_header = SEXP_TAG | (members << 3); +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "%p, SEXP tag=%zu\n", obj, TAG(obj->data_header)); +#endif +#ifdef DEBUG_VERSION + obj->id = cur_id; +#endif + obj->forward_address = 0; + obj->tag = 0; + return obj; +} + +void *alloc_closure (int captured) { + + data *obj = alloc(closure_size(captured)); + obj->data_header = CLOSURE_TAG | (captured << 3); +#if defined(DEBUG_VERSION) && defined(DEBUG_PRINT) + fprintf(stderr, "%p, [CLOSURE] tag=%zu\n", obj, TAG(obj->data_header)); +#endif +#ifdef DEBUG_VERSION + obj->id = cur_id; +#endif + obj->forward_address = 0; + return obj; +} diff --git a/runtime/gc.h b/runtime/gc.h new file mode 100644 index 000000000..627577c15 --- /dev/null +++ b/runtime/gc.h @@ -0,0 +1,251 @@ +// ============================================================================ +// GC +// ============================================================================ +// This is an implementation of a compactifying garbage collection algorithm. +// GC algorithm itself consists of two major stages: +// 1. Marking roots +// 2. Compacting stage +// Compacting is implemented in a very similar fashion to LISP2 algorithm, +// which is well-known. +// Most important pieces of code to discover to understand how everything works: +// - void *gc_alloc (size_t): this function is basically called whenever we are +// not able to allocate memory on the existing heap via simple bump allocator. +// - mark_phase(): this function will tell you everything you need to know +// about marking. I would also recommend to pay attention to the fact that +// marking is implemented without usage of any additional memory. Already +// allocated space is sufficient (for details see 'void mark (void *obj)'). +// - void compact_phase (size_t additional_size): the whole compaction phase +// can be understood by looking at this piece of code plus couple of other +// functions used in there. It is basically an implementation of LISP2. + +#ifndef __LAMA_GC__ +#define __LAMA_GC__ + +#include "runtime_common.h" + +#define GET_MARK_BIT(x) (((int)(x)) & 1) +#define SET_MARK_BIT(x) (x = (((int)(x)) | 1)) +#define IS_ENQUEUED(x) (((int)(x)) & 2) +#define MAKE_ENQUEUED(x) (x = (((int)(x)) | 2)) +#define MAKE_DEQUEUED(x) (x = (((int)(x)) & (~2))) +#define RESET_MARK_BIT(x) (x = (((int)(x)) & (~1))) +// since last 2 bits are used for mark-bit and enqueued-bit and due to correct +// alignment we can expect that last 2 bits don't influence address (they +// should always be zero) +#define GET_FORWARD_ADDRESS(x) (((size_t)(x)) & (~3)) +// take the last two bits as they are and make all others zero +#define SET_FORWARD_ADDRESS(x, addr) (x = ((x & 3) | ((int)(addr)))) +// if heap is full after gc shows in how many times it has to be extended +#define EXTRA_ROOM_HEAP_COEFFICIENT 2 +#ifdef DEBUG_VERSION +# define MINIMUM_HEAP_CAPACITY (8) +#else +# define MINIMUM_HEAP_CAPACITY (1 << 2) +#endif + +#include +#include + +typedef enum { ARRAY, CLOSURE, STRING, SEXP } lama_type; + +typedef struct { + size_t *current; +} heap_iterator; + +typedef struct { + lama_type type; // holds type of object, which fields we are iterating over + void *obj_ptr; // place to store a pointer to the object header + void *cur_field; +} obj_field_iterator; + +// Memory pool for linear memory allocation +typedef struct { + size_t *begin; + size_t *end; + size_t *current; + size_t size; +} memory_chunk; + + +// the only GC-related function that should be exposed, others are useful for tests and internal implementation +// allocates object of the given size on the heap +void *alloc(size_t); +// takes number of words as a parameter +void *gc_alloc(size_t); +// takes number of words as a parameter +void *gc_alloc_on_existing_heap(size_t); + +// specific for mark-and-compact_phase gc +void mark (void *obj); +void mark_phase (void); +// marks each pointer from extra roots +void scan_extra_roots (void); +#ifdef LAMA_ENV +// marks each valid pointer from global area +void scan_global_area (void); +#endif +// takes number of words that are required to be allocated somewhere on the heap +void compact_phase (size_t additional_size); +// specific for Lisp-2 algorithm +size_t compute_locations (); +void update_references (memory_chunk *); +void physically_relocate (memory_chunk *); + + +// ============================================================================ +// GC extra roots +// ============================================================================ +// Lama's program stack is continuous, i.e. it never interleaves with runtime +// function's activation records. But some valid Lama's pointers can escape +// into runtime. Those values (theirs stack addresses) has to be registered in +// an auxiliary data structure called `extra_roots_pool`. +// extra_roots_pool is a simple LIFO stack. During `pop` it compares that pop's +// argument is equal to the current stack top. +#define MAX_EXTRA_ROOTS_NUMBER 32 + +typedef struct { + int current_free; + void **roots[MAX_EXTRA_ROOTS_NUMBER]; +} extra_roots_pool; + +void clear_extra_roots (void); +void push_extra_root (void **p); +void pop_extra_root (void **p); + + +// ============================================================================ +// Implemented in GASM: see gc_runtime.s +// ============================================================================ +// MANDATORY TO CALL BEFORE ANY INTERACTION WITH GC (apart from cases where we +// are working with virtual stack as happens in tests) +void __gc_init (void); + +// should be called before interaction with GC in case of using in tests with +// virtual stack, otherwise it is automatically invoked by `__gc_init` +void __init (void); + +// mostly useful for tests but basically you want to call this in case you want +// to deallocate all object allocated via GC +extern void __shutdown (void); + + +// ============================================================================ +// invoked from GASM: see gc_runtime.s +// ============================================================================ +extern void gc_test_and_mark_root (size_t **root); +bool is_valid_heap_pointer (const size_t *); +static inline bool is_valid_pointer (const size_t *); + + +// ============================================================================ +// Auxiliary functions for tests +// ============================================================================ +#if defined(DEBUG_VERSION) +// makes a snapshot of current objects in heap (both alive and dead), writes these ids to object_ids_buf, +// returns number of ids dumped +// object_ids_buf is pointer to area preallocated by user for dumping ids of objects in heap +// object_ids_buf_size is in WORDS, NOT BYTES +size_t objects_snapshot (int *object_ids_buf, size_t object_ids_buf_size); +#endif + + +#ifdef DEBUG_VERSION +// essential function to mock program stack +void set_stack (size_t stack_top, size_t stack_bottom); + +// function to mock extra roots (Lama specific) +void set_extra_roots (size_t extra_roots_size, void **extra_roots_ptr); +#endif + + +// ============================================================================ +// Utility functions +// ============================================================================ +// accepts pointer to the start of the region and to the end of the region +// scans it and if it meets a pointer, it should be modified in according to forward address +void scan_and_fix_region (memory_chunk *old_heap, void *start, void *end); + +// takes a pointer to an object content as an argument, returns forwarding address +size_t get_forward_address (void *obj); + +// takes a pointer to an object content as an argument, sets forwarding address to value 'addr' +void set_forward_address (void *obj, size_t addr); + +// takes a pointer to an object content as an argument, returns whether this object was marked as live +bool is_marked (void *obj); + +// takes a pointer to an object content as an argument, marks the object as live +void mark_object (void *obj); + +// takes a pointer to an object content as an argument, marks the object as dead +void unmark_object (void *obj); + +// takes a pointer to an object content as an argument, returns whether this object was enqueued to the queue (which is used in mark phase) +bool is_enqueued (void *obj); + +// takes a pointer to an object content as an argument, marks object as enqueued +void make_enqueued (void *obj); + +// takes a pointer to an object content as an argument, unmarks object as enqueued +void make_dequeued (void *obj); + +// returns iterator to an object with the lowest address +heap_iterator heap_begin_iterator (); +void heap_next_obj_iterator (heap_iterator *it); +bool heap_is_done_iterator (heap_iterator *it); + +// returns correct type when pointer to actual data is passed (header is excluded) +lama_type get_type_row_ptr (void *ptr); +// returns correct type when pointer to an object header is passed +lama_type get_type_header_ptr (void *ptr); + +// returns correct object size (together with header) of an object, ptr is pointer to an actual data is passed (header is excluded) +size_t obj_size_row_ptr (void *ptr); +// returns correct object size (together with header) of an object, ptr is pointer to an object header +size_t obj_size_header_ptr (void *ptr); + +// returns total padding size that we need to store given object type +size_t get_header_size (lama_type type); + +// returns number of bytes that are required to allocate array with 'sz' elements (header included) +size_t array_size (size_t sz); + +// returns number of bytes that are required to allocate string of length 'l' (header included) +size_t string_size (size_t len); + +// returns number of bytes that are required to allocate closure with 'sz-1' captured values (header included) +size_t closure_size (size_t sz); + +// returns number of bytes that are required to allocate s-expression with 'members' fields (header included) +size_t sexp_size (size_t members); + +// returns an iterator over object fields, obj is ptr to object header +// (in case of s-exp, it is mandatory that obj ptr is very beginning of the object, +// considering that now we store two versions of header in there) +obj_field_iterator field_begin_iterator (void *obj); + +// returns an iterator over object fields which are actual pointers, obj is ptr to object header +// (in case of s-exp, it is mandatory that obj ptr is very beginning of the object, +// considering that now we store two versions of header in there) +obj_field_iterator ptr_field_begin_iterator (void *obj); + +// moves the iterator to next object field +void obj_next_field_iterator (obj_field_iterator *it); + +// moves the iterator to the next object field which is an actual pointer +void obj_next_ptr_field_iterator (obj_field_iterator *it); + +// returns if we are done iterating over fields of the object +bool field_is_done_iterator (obj_field_iterator *it); + +// ptr is pointer to the actual object content, returns pointer to the very beginning of the object (header) +void *get_obj_header_ptr (void *ptr); +void *get_object_content_ptr (void *header_ptr); +void *get_end_of_obj (void *header_ptr); + +void *alloc_string (int len); +void *alloc_array (int len); +void *alloc_sexp (int members); +void *alloc_closure (int captured); + +#endif diff --git a/runtime/gc_runtime.s b/runtime/gc_runtime.s deleted file mode 100644 index 5abc9d72e..000000000 --- a/runtime/gc_runtime.s +++ /dev/null @@ -1,116 +0,0 @@ - .data -printf_format: .string "Stack root: %lx\n" -printf_format2: .string "BOT: %lx\n" -printf_format3: .string "TOP: %lx\n" -printf_format4: .string "EAX: %lx\n" -printf_format5: .string "LOL\n" -__gc_stack_bottom: .long 0 -__gc_stack_top: .long 0 - - .globl __pre_gc - .globl __post_gc - .globl __gc_init - .globl __gc_root_scan_stack - .globl __gc_stack_top - .globl __gc_stack_bottom - .extern init_pool - .extern gc_test_and_copy_root - .text - -__gc_init: movl %ebp, __gc_stack_bottom - addl $4, __gc_stack_bottom - call __init - ret - - // if __gc_stack_top is equal to 0 - // then set __gc_stack_top to %ebp - // else return -__pre_gc: - pushl %eax - movl __gc_stack_top, %eax - cmpl $0, %eax - jne __pre_gc_2 - movl %ebp, %eax - // addl $8, %eax - movl %eax, __gc_stack_top -__pre_gc_2: - popl %eax - ret - - // if __gc_stack_top has been set by the caller - // (i.e. it is equal to its %ebp) - // then set __gc_stack_top to 0 - // else return -__post_gc: - pushl %eax - movl __gc_stack_top, %eax - cmpl %eax, %ebp - jnz __post_gc2 - movl $0, __gc_stack_top -__post_gc2: - popl %eax - ret - - // Scan stack for roots - // strting from __gc_stack_top - // till __gc_stack_bottom -__gc_root_scan_stack: - pushl %ebp - movl %esp, %ebp - pushl %ebx - pushl %edx - movl __gc_stack_top, %eax - jmp next - -loop: - movl (%eax), %ebx - - // check that it is not a pointer to code section - // i.e. the following is not true: - // __executable_start <= (%eax) <= __etext -check11: - leal __executable_start, %edx - cmpl %ebx, %edx - jna check12 - jmp check21 - -check12: - leal __etext, %edx - cmpl %ebx, %edx - jnb next - - // check that it is not a pointer into the program stack - // i.e. the following is not true: - // __gc_stack_bottom <= (%eax) <= __gc_stack_top -check21: - cmpl %ebx, __gc_stack_top - jna check22 - jmp loop2 - -check22: - cmpl %ebx, __gc_stack_bottom - jnb next - - // check if it a valid pointer - // i.e. the lastest bit is set to zero -loop2: - andl $0x00000001, %ebx - jnz next -gc_run_t: - pushl %eax - pushl %eax - call gc_test_and_copy_root - addl $4, %esp - popl %eax - -next: - addl $4, %eax - cmpl %eax, __gc_stack_bottom - jne loop -returnn: - movl $0, %eax - popl %edx - popl %ebx - movl %ebp, %esp - popl %ebp - ret diff --git a/runtime/negative_scenarios/expected/extra_roots_empty_pop_neg.err b/runtime/negative_scenarios/expected/extra_roots_empty_pop_neg.err new file mode 100644 index 000000000..9b9f69fec --- /dev/null +++ b/runtime/negative_scenarios/expected/extra_roots_empty_pop_neg.err @@ -0,0 +1,2 @@ +ERROR: pop_extra_root: extra_roots are empty +: Success diff --git a/runtime/negative_scenarios/expected/extra_roots_overflow_neg.err b/runtime/negative_scenarios/expected/extra_roots_overflow_neg.err new file mode 100644 index 000000000..24df3dbbe --- /dev/null +++ b/runtime/negative_scenarios/expected/extra_roots_overflow_neg.err @@ -0,0 +1,2 @@ +ERROR: push_extra_roots: extra_roots_pool overflow +: Success diff --git a/runtime/negative_scenarios/expected/extra_roots_pop_mismatch_neg.err b/runtime/negative_scenarios/expected/extra_roots_pop_mismatch_neg.err new file mode 100644 index 000000000..950d19495 --- /dev/null +++ b/runtime/negative_scenarios/expected/extra_roots_pop_mismatch_neg.err @@ -0,0 +1,2 @@ +ERROR: pop_extra_root: stack invariant violation +: Success diff --git a/runtime/negative_scenarios/extra_roots_empty_pop_neg.c b/runtime/negative_scenarios/extra_roots_empty_pop_neg.c new file mode 100644 index 000000000..ada20b6a9 --- /dev/null +++ b/runtime/negative_scenarios/extra_roots_empty_pop_neg.c @@ -0,0 +1,5 @@ +#include "../gc.h" + +#include + +int main () { pop_extra_root((void **)NULL); } \ No newline at end of file diff --git a/runtime/negative_scenarios/extra_roots_overflow_neg.c b/runtime/negative_scenarios/extra_roots_overflow_neg.c new file mode 100644 index 000000000..f03e2e98d --- /dev/null +++ b/runtime/negative_scenarios/extra_roots_overflow_neg.c @@ -0,0 +1,7 @@ +#include "../gc.h" + +#include + +int main () { + for (size_t i = 0; i < MAX_EXTRA_ROOTS_NUMBER + 1; ++i) { push_extra_root(NULL); } +} \ No newline at end of file diff --git a/runtime/negative_scenarios/extra_roots_pop_mismatch_neg.c b/runtime/negative_scenarios/extra_roots_pop_mismatch_neg.c new file mode 100644 index 000000000..8e55a8bf1 --- /dev/null +++ b/runtime/negative_scenarios/extra_roots_pop_mismatch_neg.c @@ -0,0 +1,6 @@ +#include "../gc.h" + +int main () { + push_extra_root(NULL); + pop_extra_root((void **)239); +} \ No newline at end of file diff --git a/runtime/runtime.c b/runtime/runtime.c index 5f799c0eb..c74962d53 100644 --- a/runtime/runtime.c +++ b/runtime/runtime.c @@ -1,221 +1,105 @@ /* Runtime library */ -# define _GNU_SOURCE 1 +#define _GNU_SOURCE 1 -# include "runtime.h" +#include "runtime.h" -# define __ENABLE_GC__ -# ifndef __ENABLE_GC__ -# define alloc malloc -# endif - -//# define DEBUG_PRINT 1 - -#ifdef DEBUG_PRINT -int indent = 0; -void print_indent (void) { - for (int i = 0; i < indent; i++) printf (" "); - printf("| "); -} -#endif +#include "gc.h" +#include "runtime_common.h" extern size_t __gc_stack_top, __gc_stack_bottom; -/* GC pool structure and data; declared here in order to allow debug print */ -typedef struct { - size_t * begin; - size_t * end; - size_t * current; - size_t size; -} pool; - -static pool from_space; -static pool to_space; -size_t *current; -/* end */ - -# ifdef __ENABLE_GC__ - -/* GC extern invariant for built-in functions */ -extern void __pre_gc (); -extern void __post_gc (); - -# else - -# define __pre_gc __pre_gc_subst -# define __post_gc __post_gc_subst - -void __pre_gc_subst () {} -void __post_gc_subst () {} - -# endif -/* end */ - -# define STRING_TAG 0x00000001 -# define ARRAY_TAG 0x00000003 -# define SEXP_TAG 0x00000005 -# define CLOSURE_TAG 0x00000007 -# define UNBOXED_TAG 0x00000009 // Not actually a tag; used to return from LkindOf - -# define LEN(x) ((x & 0xFFFFFFF8) >> 3) -# define TAG(x) (x & 0x00000007) - -# define TO_DATA(x) ((data*)((char*)(x)-sizeof(int))) -# define TO_SEXP(x) ((sexp*)((char*)(x)-2*sizeof(int))) -# ifdef DEBUG_PRINT // GET_SEXP_TAG is necessary for printing from space -# define GET_SEXP_TAG(x) (LEN(x)) -#endif - -# define UNBOXED(x) (((int) (x)) & 0x0001) -# define UNBOX(x) (((int) (x)) >> 1) -# define BOX(x) ((((int) (x)) << 1) | 0x0001) - -/* GC extra roots */ -# define MAX_EXTRA_ROOTS_NUMBER 32 -typedef struct { - int current_free; - void ** roots[MAX_EXTRA_ROOTS_NUMBER]; -} extra_roots_pool; - -static extra_roots_pool extra_roots; - -void clear_extra_roots (void) { - extra_roots.current_free = 0; -} - -void push_extra_root (void ** p) { -# ifdef DEBUG_PRINT - indent++; print_indent (); - printf ("push_extra_root %p %p\n", p, &p); fflush (stdout); -# endif - if (extra_roots.current_free >= MAX_EXTRA_ROOTS_NUMBER) { - perror ("ERROR: push_extra_roots: extra_roots_pool overflow"); - exit (1); - } - extra_roots.roots[extra_roots.current_free] = p; - extra_roots.current_free++; -# ifdef DEBUG_PRINT - indent--; -# endif -} - -void pop_extra_root (void ** p) { -# ifdef DEBUG_PRINT - indent++; print_indent (); - printf ("pop_extra_root %p %p\n", p, &p); fflush (stdout); -# endif - if (extra_roots.current_free == 0) { - perror ("ERROR: pop_extra_root: extra_roots are empty"); - exit (1); - } - extra_roots.current_free--; - if (extra_roots.roots[extra_roots.current_free] != p) { -# ifdef DEBUG_PRINT - print_indent (); - printf ("%i %p %p", extra_roots.current_free, - extra_roots.roots[extra_roots.current_free], p); - fflush (stdout); -# endif - perror ("ERROR: pop_extra_root: stack invariant violation"); - exit (1); - } -# ifdef DEBUG_PRINT - indent--; -# endif -} +#define PRE_GC() \ + bool flag = false; \ + flag = __gc_stack_top == 0; \ + if (flag) { __gc_stack_top = (size_t)__builtin_frame_address(0); } \ + assert(__gc_stack_top != 0); \ + assert(__builtin_frame_address(0) <= (void *)__gc_stack_top); -/* end */ +#define POST_GC() \ + assert(__builtin_frame_address(0) <= (void *)__gc_stack_top); \ + if (flag) { __gc_stack_top = 0; } static void vfailure (char *s, va_list args) { - fflush (stdout); - fprintf (stderr, "*** FAILURE: "); - vfprintf (stderr, s, args); // vprintf (char *, va_list) <-> printf (char *, ...) - exit (255); + fprintf(stderr, "*** FAILURE: "); + vfprintf(stderr, s, args); // vprintf (char *, va_list) <-> printf (char *, ...) + exit(255); } void failure (char *s, ...) { va_list args; - va_start (args, s); - vfailure (s, args); + va_start(args, s); + vfailure(s, args); } void Lassert (void *f, char *s, ...) { if (!UNBOX(f)) { va_list args; - va_start (args, s); - vfailure (s, args); + va_start(args, s); + vfailure(s, args); } } -# define ASSERT_BOXED(memo, x) \ - do if (UNBOXED(x)) failure ("boxed value expected in %s\n", memo); while (0) -# define ASSERT_UNBOXED(memo, x) \ - do if (!UNBOXED(x)) failure ("unboxed value expected in %s\n", memo); while (0) -# define ASSERT_STRING(memo, x) \ - do if (!UNBOXED(x) && TAG(TO_DATA(x)->tag) \ - != STRING_TAG) failure ("string value expected in %s\n", memo); while (0) +#define ASSERT_BOXED(memo, x) \ + do \ + if (UNBOXED(x)) failure("boxed value expected in %s\n", memo); \ + while (0) +#define ASSERT_UNBOXED(memo, x) \ + do \ + if (!UNBOXED(x)) failure("unboxed value expected in %s\n", memo); \ + while (0) +#define ASSERT_STRING(memo, x) \ + do \ + if (!UNBOXED(x) && TAG(TO_DATA(x)->data_header) != STRING_TAG) \ + failure("string value expected in %s\n", memo); \ + while (0) -typedef struct { - int tag; - char contents[0]; -} data; - -typedef struct { - int tag; - data contents; -} sexp; - -extern void* alloc (size_t); -extern void* Bsexp (int n, ...); -extern int LtagHash (char*); +extern void *Bsexp (int n, ...); +extern int LtagHash (char *); void *global_sysargs; -// Gets a raw tag +// Gets a raw data_header extern int LkindOf (void *p) { if (UNBOXED(p)) return UNBOXED_TAG; - - return TAG(TO_DATA(p)->tag); + + return TAG(TO_DATA(p)->data_header); } -// Compare sexprs tags +// Compare s-exprs tags extern int LcompareTags (void *p, void *q) { data *pd, *qd; - - ASSERT_BOXED ("compareTags, 0", p); - ASSERT_BOXED ("compareTags, 1", q); + + ASSERT_BOXED("compareTags, 0", p); + ASSERT_BOXED("compareTags, 1", q); pd = TO_DATA(p); qd = TO_DATA(q); - if (TAG(pd->tag) == SEXP_TAG && TAG(qd->tag) == SEXP_TAG) { - return - #ifndef DEBUG_PRINT - BOX((TO_SEXP(p)->tag) - (TO_SEXP(q)->tag)); - #else - BOX((GET_SEXP_TAG(TO_SEXP(p)->tag)) - (GET_SEXP_TAG(TO_SEXP(p)->tag))); - #endif + if (TAG(pd->data_header) == SEXP_TAG && TAG(qd->data_header) == SEXP_TAG) { + return BOX((TO_SEXP(p)->tag) - (TO_SEXP(q)->tag)); + } else { + failure("not a sexpr in compareTags: %d, %d\n", TAG(pd->data_header), TAG(qd->data_header)); } - else failure ("not a sexpr in compareTags: %d, %d\n", TAG(pd->tag), TAG(qd->tag)); - - return 0; // never happens + // dead code + return 0; } // Functional synonym for built-in operator ":"; -void* Ls__Infix_58 (void *p, void *q) { +void *Ls__Infix_58 (void *p, void *q) { void *res; - - __pre_gc (); + + PRE_GC(); push_extra_root(&p); push_extra_root(&q); - res = Bsexp (BOX(3), p, q, LtagHash ("cons")); //BOX(848787)); + res = Bsexp(BOX(3), p, q, LtagHash("cons")); //BOX(848787)); pop_extra_root(&q); pop_extra_root(&p); - __post_gc (); + POST_GC(); return res; } @@ -237,9 +121,7 @@ int Ls__Infix_3838 (void *p, void *q) { } // Functional synonym for built-in operator "=="; -int Ls__Infix_6161 (void *p, void *q) { - return BOX(p == q); -} +int Ls__Infix_6161 (void *p, void *q) { return BOX(p == q); } // Functional synonym for built-in operator "!="; int Ls__Infix_3361 (void *p, void *q) { @@ -325,118 +207,96 @@ int Ls__Infix_37 (void *p, void *q) { } extern int Llength (void *p) { - data *a = (data*) BOX (NULL); - ASSERT_BOXED(".length", p); - - a = TO_DATA(p); - return BOX(LEN(a->tag)); + return BOX(LEN(TO_DATA(p)->data_header)); } -static char* chars = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'"; +static char *chars = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'"; -extern char* de_hash (int); +extern char *de_hash (int); extern int LtagHash (char *s) { char *p; - int h = 0, limit = 0; - + int h = 0, limit = 0; + p = s; while (*p && limit++ <= 4) { - char *q = chars; - int pos = 0; - - for (; *q && *q != *p; q++, pos++); + char *q = chars; + int pos = 0; - if (*q) h = (h << 6) | pos; - else failure ("tagHash: character not found: %c\n", *p); + for (; *q && *q != *p; q++, pos++) + ; + + if (*q) h = (h << 6) | pos; + else failure("tagHash: character not found: %c\n", *p); p++; } - if (strncmp (s, de_hash (h), 5) != 0) { - failure ("%s <-> %s\n", s, de_hash(h)); - } - + if (strncmp(s, de_hash(h), 5) != 0) { failure("%s <-> %s\n", s, de_hash(h)); } + return BOX(h); } -char* de_hash (int n) { - // static char *chars = (char*) BOX (NULL); - static char buf[6] = {0,0,0,0,0,0}; - char *p = (char *) BOX (NULL); - p = &buf[5]; +char *de_hash (int n) { + static char buf[6] = {0, 0, 0, 0, 0, 0}; + char *p = (char *)BOX(NULL); + p = &buf[5]; -#ifdef DEBUG_PRINT - indent++; print_indent (); - printf ("de_hash: tag: %d\n", n); fflush (stdout); -#endif - *p-- = 0; while (n != 0) { -#ifdef DEBUG_PRINT - print_indent (); - printf ("char: %c\n", chars [n & 0x003F]); fflush (stdout); -#endif - *p-- = chars [n & 0x003F]; - n = n >> 6; + *p-- = chars[n & 0x003F]; + n = n >> 6; } -#ifdef DEBUG_PRINT - indent--; -#endif - return ++p; } typedef struct { char *contents; - int ptr; - int len; + int ptr; + int len; } StringBuf; static StringBuf stringBuf; -# define STRINGBUF_INIT 128 +#define STRINGBUF_INIT 128 static void createStringBuf () { - stringBuf.contents = (char*) malloc (STRINGBUF_INIT); + stringBuf.contents = (char *)malloc(STRINGBUF_INIT); memset(stringBuf.contents, 0, STRINGBUF_INIT); - stringBuf.ptr = 0; - stringBuf.len = STRINGBUF_INIT; + stringBuf.ptr = 0; + stringBuf.len = STRINGBUF_INIT; } -static void deleteStringBuf () { - free (stringBuf.contents); -} +static void deleteStringBuf () { free(stringBuf.contents); } static void extendStringBuf () { int len = stringBuf.len << 1; - stringBuf.contents = (char*) realloc (stringBuf.contents, len); + stringBuf.contents = (char *)realloc(stringBuf.contents, len); stringBuf.len = len; } static void vprintStringBuf (char *fmt, va_list args) { - int written = 0, - rest = 0; - char *buf = (char*) BOX(NULL); + int written = 0, rest = 0; + char *buf = (char *)BOX(NULL); va_list vsnargs; - - again: - va_copy (vsnargs, args); - - buf = &stringBuf.contents[stringBuf.ptr]; - rest = stringBuf.len - stringBuf.ptr; - written = vsnprintf (buf, rest, fmt, vsnargs); +again: + va_copy(vsnargs, args); + + buf = &stringBuf.contents[stringBuf.ptr]; + rest = stringBuf.len - stringBuf.ptr; + + written = vsnprintf(buf, rest, fmt, vsnargs); va_end(vsnargs); - + if (written >= rest) { - extendStringBuf (); + extendStringBuf(); goto again; } @@ -446,142 +306,122 @@ static void vprintStringBuf (char *fmt, va_list args) { static void printStringBuf (char *fmt, ...) { va_list args; - va_start (args, fmt); - vprintStringBuf (fmt, args); + va_start(args, fmt); + vprintStringBuf(fmt, args); } -int is_valid_heap_pointer (void *p); - static void printValue (void *p) { - data *a = (data*) BOX(NULL); - int i = BOX(0); - if (UNBOXED(p)) printStringBuf ("%d", UNBOX(p)); - else { - if (! is_valid_heap_pointer(p)) { - printStringBuf ("0x%x", p); + data *a = (data *)BOX(NULL); + int i = BOX(0); + if (UNBOXED(p)) { + printStringBuf("%d", UNBOX(p)); + } else { + if (!is_valid_heap_pointer(p)) { + printStringBuf("0x%x", p); return; } - + a = TO_DATA(p); - switch (TAG(a->tag)) { - case STRING_TAG: - printStringBuf ("\"%s\"", a->contents); - break; + switch (TAG(a->data_header)) { + case STRING_TAG: printStringBuf("\"%s\"", a->contents); break; - case CLOSURE_TAG: - printStringBuf ("tag); i++) { - if (i) printValue ((void*)((int*) a->contents)[i]); - else printStringBuf ("0x%x", (void*)((int*) a->contents)[i]); - - if (i != LEN(a->tag) - 1) printStringBuf (", "); - } - printStringBuf (">"); - break; - - case ARRAY_TAG: - printStringBuf ("["); - for (i = 0; i < LEN(a->tag); i++) { - printValue ((void*)((int*) a->contents)[i]); - if (i != LEN(a->tag) - 1) printStringBuf (", "); - } - printStringBuf ("]"); - break; - - case SEXP_TAG: { -#ifndef DEBUG_PRINT - char * tag = de_hash (TO_SEXP(p)->tag); -#else - char * tag = de_hash (GET_SEXP_TAG(TO_SEXP(p)->tag)); -#endif - - if (strcmp (tag, "cons") == 0) { - data *b = a; - - printStringBuf ("{"); - - while (LEN(a->tag)) { - printValue ((void*)((int*) b->contents)[0]); - b = (data*)((int*) b->contents)[1]; - if (! UNBOXED(b)) { - printStringBuf (", "); - b = TO_DATA(b); - } - else break; - } - - printStringBuf ("}"); + case CLOSURE_TAG: { + + printStringBuf("data_header); i++) { + if (i) printValue((void *)((int *)a->contents)[i]); + else printStringBuf("0x%x", (void *)((int *)a->contents)[i]); + if (i != LEN(a->data_header) - 1) printStringBuf(", "); + } + printStringBuf(">"); + break; } - else { - printStringBuf ("%s", tag); - if (LEN(a->tag)) { - printStringBuf (" ("); - for (i = 0; i < LEN(a->tag); i++) { - printValue ((void*)((int*) a->contents)[i]); - if (i != LEN(a->tag) - 1) printStringBuf (", "); - } - printStringBuf (")"); - } + case ARRAY_TAG: { + printStringBuf("["); + for (i = 0; i < LEN(a->data_header); i++) { + printValue((void *)((int *)a->contents)[i]); + if (i != LEN(a->data_header) - 1) printStringBuf(", "); + } + printStringBuf("]"); + break; } - } - break; - default: - printStringBuf ("*** invalid tag: 0x%x ***", TAG(a->tag)); + case SEXP_TAG: { + sexp *sa = (sexp *)a; + char *tag = de_hash(sa->tag); + if (strcmp(tag, "cons") == 0) { + sexp *sb = sa; + printStringBuf("{"); + while (LEN(sb->data_header)) { + printValue((void *)((int *)sb->contents)[0]); + int list_next = ((int *)sb->contents)[1]; + if (!UNBOXED(list_next)) { + printStringBuf(", "); + sb = TO_SEXP(list_next); + } else break; + } + printStringBuf("}"); + } else { + printStringBuf("%s", tag); + sexp *sexp_a = (sexp *)a; + if (LEN(a->data_header)) { + printStringBuf(" ("); + for (i = 0; i < LEN(sexp_a->data_header); i++) { + printValue((void *)((int *)sexp_a->contents)[i]); + if (i != LEN(sexp_a->data_header) - 1) printStringBuf(", "); + } + printStringBuf(")"); + } + } + } break; + + default: printStringBuf("*** invalid data_header: 0x%x ***", TAG(a->data_header)); } } } static void stringcat (void *p) { data *a; - int i; - - if (UNBOXED(p)) ; + int i; + + if (UNBOXED(p)) + ; else { a = TO_DATA(p); - switch (TAG(a->tag)) { - case STRING_TAG: - printStringBuf ("%s", a->contents); - break; - - case SEXP_TAG: { -#ifndef DEBUG_PRINT - char * tag = de_hash (TO_SEXP(p)->tag); -#else - char * tag = de_hash (GET_SEXP_TAG(TO_SEXP(p)->tag)); -#endif - if (strcmp (tag, "cons") == 0) { - data *b = a; - - while (LEN(a->tag)) { - stringcat ((void*)((int*) b->contents)[0]); - b = (data*)((int*) b->contents)[1]; - if (! UNBOXED(b)) { - b = TO_DATA(b); - } - else break; - } - } - else printStringBuf ("*** non-list tag: %s ***", tag); - } - break; + switch (TAG(a->data_header)) { + case STRING_TAG: printStringBuf("%s", a->contents); break; + + case SEXP_TAG: { + char *tag = de_hash(TO_SEXP(p)->tag); + + if (strcmp(tag, "cons") == 0) { + sexp *b = (sexp *)a; - default: - printStringBuf ("*** invalid tag: 0x%x ***", TAG(a->tag)); + while (LEN(b->data_header)) { + stringcat((void *)((int *)b->contents)[0]); + int next_b = ((int *)b->contents)[1]; + if (!UNBOXED(next_b)) { + b = TO_SEXP(next_b); + } else break; + } + } else printStringBuf("*** non-list data_header: %s ***", tag); + } break; + + default: printStringBuf("*** invalid data_header: 0x%x ***", TAG(a->data_header)); } } } extern int Luppercase (void *v) { ASSERT_UNBOXED("Luppercase:1", v); - return BOX(toupper ((int) UNBOX(v))); + return BOX(toupper((int)UNBOX(v))); } extern int Llowercase (void *v) { ASSERT_UNBOXED("Llowercase:1", v); - return BOX(tolower ((int) UNBOX(v))); + return BOX(tolower((int)UNBOX(v))); } extern int LmatchSubString (char *subj, char *patt, int pos) { @@ -591,834 +431,725 @@ extern int LmatchSubString (char *subj, char *patt, int pos) { ASSERT_STRING("matchSubString:1", subj); ASSERT_STRING("matchSubString:2", patt); ASSERT_UNBOXED("matchSubString:3", pos); - - n = LEN (p->tag); - if (n + UNBOX(pos) > LEN(s->tag)) - return BOX(0); - - return BOX(strncmp (subj + UNBOX(pos), patt, n) == 0); + n = LEN(p->data_header); + + if (n + UNBOX(pos) > LEN(s->data_header)) return BOX(0); + + return BOX(strncmp(subj + UNBOX(pos), patt, n) == 0); } -extern void* Lsubstring (void *subj, int p, int l) { - data *d = TO_DATA(subj); - int pp = UNBOX (p), ll = UNBOX (l); +extern void *Lsubstring (void *subj, int p, int l) { + data *d = TO_DATA(subj); + int pp = UNBOX(p), ll = UNBOX(l); ASSERT_STRING("substring:1", subj); ASSERT_UNBOXED("substring:2", p); ASSERT_UNBOXED("substring:3", l); - - if (pp + ll <= LEN(d->tag)) { + + if (pp + ll <= LEN(d->data_header)) { data *r; - - __pre_gc (); - push_extra_root (&subj); - r = (data*) alloc (ll + 1 + sizeof (int)); - pop_extra_root (&subj); + PRE_GC(); + + push_extra_root(&subj); + r = (data *)alloc_string(ll); + pop_extra_root(&subj); - r->tag = STRING_TAG | (ll << 3); + strncpy(r->contents, (char *)subj + pp, ll); - strncpy (r->contents, (char*) subj + pp, ll); - - __post_gc (); + POST_GC(); - return r->contents; + return r->contents; } - - failure ("substring: index out of bounds (position=%d, length=%d, \ - subject length=%d)", pp, ll, LEN(d->tag)); + + failure("substring: index out of bounds (position=%d, length=%d, \ + subject length=%d)", + pp, + ll, + LEN(d->data_header)); } extern struct re_pattern_buffer *Lregexp (char *regexp) { - regex_t *b = (regex_t*) malloc (sizeof (regex_t)); + regex_t *b = (regex_t *)malloc(sizeof(regex_t)); /* printf ("regexp: %s,\t%x\n", regexp, b); */ - - memset (b, 0, sizeof (regex_t)); - - int n = (int) re_compile_pattern (regexp, strlen (regexp), b); - - if (n != 0) { - failure ("%", strerror (n)); - }; + + memset(b, 0, sizeof(regex_t)); + + int n = (int)re_compile_pattern(regexp, strlen(regexp), b); + + if (n != 0) { failure("%", strerror(n)); }; return b; } extern int LregexpMatch (struct re_pattern_buffer *b, char *s, int pos) { int res; - + ASSERT_BOXED("regexpMatch:1", b); ASSERT_STRING("regexpMatch:2", s); ASSERT_UNBOXED("regexpMatch:3", pos); - res = re_match (b, s, LEN(TO_DATA(s)->tag), UNBOX(pos), 0); + res = re_match(b, s, LEN(TO_DATA(s)->data_header), UNBOX(pos), 0); /* printf ("regexpMatch %x: %s, res=%d\n", b, s+UNBOX(pos), res); */ - - if (res) { - return BOX (res); - } - return BOX (res); + if (res) { return BOX(res); } + + return BOX(res); } -extern void* Bstring (void*); +extern void *Bstring (void *); void *Lclone (void *p) { data *obj; sexp *sobj; - void* res; - int n; -#ifdef DEBUG_PRINT - register int * ebp asm ("ebp"); - indent++; print_indent (); - printf ("Lclone arg: %p %p\n", &p, p); fflush (stdout); -#endif - __pre_gc (); - + void *res; + int n; if (UNBOXED(p)) return p; - else { - data *a = TO_DATA(p); - int t = TAG(a->tag), l = LEN(a->tag); - push_extra_root (&p); - switch (t) { - case STRING_TAG: -#ifdef DEBUG_PRINT - print_indent (); - printf ("Lclone: string1 &p=%p p=%p\n", &p, p); fflush (stdout); -#endif - res = Bstring (TO_DATA(p)->contents); -#ifdef DEBUG_PRINT - print_indent (); - printf ("Lclone: string2 %p %p\n", &p, p); fflush (stdout); -#endif - break; + PRE_GC(); + + data *a = TO_DATA(p); + int t = TAG(a->data_header), l = LEN(a->data_header); + + push_extra_root(&p); + switch (t) { + case STRING_TAG: res = Bstring(TO_DATA(p)->contents); break; - case ARRAY_TAG: + case ARRAY_TAG: + obj = (data *)alloc_array(l); + memcpy(obj, TO_DATA(p), array_size(l)); + res = (void *)obj->contents; + break; case CLOSURE_TAG: -#ifdef DEBUG_PRINT - print_indent (); - printf ("Lclone: closure or array &p=%p p=%p ebp=%p\n", &p, p, ebp); fflush (stdout); -#endif - obj = (data*) alloc (sizeof(int) * (l+1)); - memcpy (obj, TO_DATA(p), sizeof(int) * (l+1)); - res = (void*) (obj->contents); + obj = (data *)alloc_closure(l); + memcpy(obj, TO_DATA(p), closure_size(l)); + res = (void *)(obj->contents); break; - + case SEXP_TAG: -#ifdef DEBUG_PRINT - print_indent (); printf ("Lclone: sexp\n"); fflush (stdout); -#endif - sobj = (sexp*) alloc (sizeof(int) * (l+2)); - memcpy (sobj, TO_SEXP(p), sizeof(int) * (l+2)); - res = (void*) sobj->contents.contents; + obj = (data *)alloc_sexp(l); + memcpy(obj, TO_DATA(p), sexp_size(l)); + res = (void *)obj->contents; break; - - default: - failure ("invalid tag %d in clone *****\n", t); - } - pop_extra_root (&p); + + default: failure("invalid data_header %d in clone *****\n", t); } -#ifdef DEBUG_PRINT - print_indent (); printf ("Lclone ends1\n"); fflush (stdout); -#endif + pop_extra_root(&p); - __post_gc (); -#ifdef DEBUG_PRINT - print_indent (); - printf ("Lclone ends2\n"); fflush (stdout); - indent--; -#endif + POST_GC(); return res; } -# define HASH_DEPTH 3 -# define HASH_APPEND(acc, x) (((acc + (unsigned) x) << (WORD_SIZE / 2)) | ((acc + (unsigned) x) >> (WORD_SIZE / 2))) +#define HASH_DEPTH 3 +#define HASH_APPEND(acc, x) \ + (((acc + (unsigned)x) << (WORD_SIZE / 2)) | ((acc + (unsigned)x) >> (WORD_SIZE / 2))) int inner_hash (int depth, unsigned acc, void *p) { if (depth > HASH_DEPTH) return acc; if (UNBOXED(p)) return HASH_APPEND(acc, UNBOX(p)); - else if (is_valid_heap_pointer (p)) { + else if (is_valid_heap_pointer(p)) { data *a = TO_DATA(p); - int t = TAG(a->tag), l = LEN(a->tag), i; + int t = TAG(a->data_header), l = LEN(a->data_header), i; acc = HASH_APPEND(acc, t); - acc = HASH_APPEND(acc, l); + acc = HASH_APPEND(acc, l); switch (t) { - case STRING_TAG: { - char *p = a->contents; + case STRING_TAG: { + char *p = a->contents; + + while (*p) { + int n = (int)*p++; + acc = HASH_APPEND(acc, n); + } - while (*p) { - int n = (int) *p++; - acc = HASH_APPEND(acc, n); + return acc; } - return acc; - } - - case CLOSURE_TAG: - acc = HASH_APPEND(acc, ((void**) a->contents)[0]); - i = 1; - break; - - case ARRAY_TAG: - i = 0; - break; + case CLOSURE_TAG: + acc = HASH_APPEND(acc, ((void **)a->contents)[0]); + i = 1; + break; - case SEXP_TAG: { -#ifndef DEBUG_PRINT - int ta = TO_SEXP(p)->tag; -#else - int ta = GET_SEXP_TAG(TO_SEXP(p)->tag); -#endif - acc = HASH_APPEND(acc, ta); - i = 0; - break; - } + case ARRAY_TAG: i = 0; break; + + case SEXP_TAG: { + int ta = TO_SEXP(p)->tag; + acc = HASH_APPEND(acc, ta); + i = 1; + ++l; + break; + } - default: - failure ("invalid tag %d in hash *****\n", t); + default: failure("invalid data_header %d in hash *****\n", t); } - for (; icontents)[i]); + for (; i < l; i++) acc = inner_hash(depth + 1, acc, ((void **)a->contents)[i]); return acc; - } - else return HASH_APPEND(acc, p); + } else return HASH_APPEND(acc, p); } -extern void* LstringInt (char *b) { +extern void *LstringInt (char *b) { int n; - sscanf (b, "%d", &n); - return (void*) BOX(n); + sscanf(b, "%d", &n); + return (void *)BOX(n); } -extern int Lhash (void *p) { - return BOX(0x3fffff & inner_hash (0, 0, p)); -} +extern int Lhash (void *p) { return BOX(0x3fffff & inner_hash(0, 0, p)); } extern int LflatCompare (void *p, void *q) { if (UNBOXED(p)) { - if (UNBOXED(q)) { - return BOX (UNBOX(p) - UNBOX(q)); - } - + if (UNBOXED(q)) { return BOX(UNBOX(p) - UNBOX(q)); } + return -1; - } - else if (~UNBOXED(q)) { + } else if (~UNBOXED(q)) { return BOX(p - q); - } - else BOX(1); + } else BOX(1); } extern int Lcompare (void *p, void *q) { -# define COMPARE_AND_RETURN(x,y) do if (x != y) return BOX(x - y); while (0) - +#define COMPARE_AND_RETURN(x, y) \ + do \ + if (x != y) return BOX(x - y); \ + while (0) + if (p == q) return BOX(0); - + if (UNBOXED(p)) { - if (UNBOXED(q)) return BOX(UNBOX(p) - UNBOX(q)); + if (UNBOXED(q)) return BOX(UNBOX(p) - UNBOX(q)); else return BOX(-1); - } - else if (UNBOXED(q)) return BOX(1); + } else if (UNBOXED(q)) return BOX(1); else { - if (is_valid_heap_pointer (p)) { - if (is_valid_heap_pointer (q)) { + if (is_valid_heap_pointer(p)) { + if (is_valid_heap_pointer(q)) { data *a = TO_DATA(p), *b = TO_DATA(q); - int ta = TAG(a->tag), tb = TAG(b->tag); - int la = LEN(a->tag), lb = LEN(b->tag); - int i; - - COMPARE_AND_RETURN (ta, tb); - - switch (ta) { - case STRING_TAG: - return BOX(strcmp (a->contents, b->contents)); - - case CLOSURE_TAG: - COMPARE_AND_RETURN (((void**) a->contents)[0], ((void**) b->contents)[0]); - COMPARE_AND_RETURN (la, lb); - i = 1; - break; - - case ARRAY_TAG: - COMPARE_AND_RETURN (la, lb); - i = 0; - break; - - case SEXP_TAG: { -#ifndef DEBUG_PRINT - int ta = TO_SEXP(p)->tag, tb = TO_SEXP(q)->tag; -#else - int ta = GET_SEXP_TAG(TO_SEXP(p)->tag), tb = GET_SEXP_TAG(TO_SEXP(q)->tag); -#endif - COMPARE_AND_RETURN (ta, tb); - COMPARE_AND_RETURN (la, lb); - i = 0; - break; - } + int ta = TAG(a->data_header), tb = TAG(b->data_header); + int la = LEN(a->data_header), lb = LEN(b->data_header); + int i; + int shift = 0; - default: - failure ("invalid tag %d in compare *****\n", ta); + COMPARE_AND_RETURN(ta, tb); + + switch (ta) { + case STRING_TAG: return BOX(strcmp(a->contents, b->contents)); + + case CLOSURE_TAG: + COMPARE_AND_RETURN(((void **)a->contents)[0], ((void **)b->contents)[0]); + COMPARE_AND_RETURN(la, lb); + i = 1; + break; + + case ARRAY_TAG: + COMPARE_AND_RETURN(la, lb); + i = 0; + break; + + case SEXP_TAG: { + int tag_a = TO_SEXP(p)->tag, tag_b = TO_SEXP(q)->tag; + COMPARE_AND_RETURN(tag_a, tag_b); + COMPARE_AND_RETURN(la, lb); + i = 0; + shift = 1; + break; + } + + default: failure("invalid data_header %d in compare *****\n", ta); } - for (; icontents)[i], ((void**) b->contents)[i]); - if (c != BOX(0)) return BOX(c); + for (; i < la; i++) { + int c = Lcompare(((void **)a->contents)[i + shift], ((void **)b->contents)[i + shift]); + if (c != BOX(0)) return c; } - return BOX(0); - } - else return BOX(-1); - } - else if (is_valid_heap_pointer (q)) return BOX(1); - else return BOX (p - q); + } else return BOX(-1); + } else if (is_valid_heap_pointer(q)) return BOX(1); + else return BOX(p - q); } } -extern void* Belem (void *p, int i) { +extern void *Belem (void *p, int i) { data *a = (data *)BOX(NULL); - ASSERT_BOXED(".elem:1", p); + if (UNBOXED(p)) { ASSERT_BOXED(".elem:1", p); } ASSERT_UNBOXED(".elem:2", i); - + a = TO_DATA(p); i = UNBOX(i); - - if (TAG(a->tag) == STRING_TAG) { - return (void*) BOX(a->contents[i]); + + switch (TAG(a->data_header)) { + case STRING_TAG: return (void *)BOX(a->contents[i]); + case SEXP_TAG: return (void *)((int *)a->contents)[i + 1]; + default: return (void *)((int *)a->contents)[i]; } - - return (void*) ((int*) a->contents)[i]; } -extern void* LmakeArray (int length) { +extern void *LmakeArray (int length) { data *r; - int n, *p; + int n, *p; ASSERT_UNBOXED("makeArray:1", length); - - __pre_gc (); - n = UNBOX(length); - r = (data*) alloc (sizeof(int) * (n+1)); + PRE_GC(); - r->tag = ARRAY_TAG | (n << 3); + n = UNBOX(length); + r = (data *)alloc_array(n); - p = (int*) r->contents; + p = (int *)r->contents; while (n--) *p++ = BOX(0); - - __post_gc (); + + POST_GC(); return r->contents; } -extern void* LmakeString (int length) { +extern void *LmakeString (int length) { int n = UNBOX(length); data *r; ASSERT_UNBOXED("makeString", length); - - __pre_gc () ; - - r = (data*) alloc (n + 1 + sizeof (int)); - r->tag = STRING_TAG | (n << 3); + PRE_GC(); + + r = (data *)alloc_string(n); // '\0' in the end of the string is taken into account + + POST_GC(); - __post_gc(); - return r->contents; } -extern void* Bstring (void *p) { - int n = strlen (p); - data *s = NULL; - - __pre_gc (); -#ifdef DEBUG_PRINT - indent++; print_indent (); - printf ("Bstring: call LmakeString %s %p %p %p %i\n", p, &p, p, s, n); - fflush(stdout); -#endif - push_extra_root (&p); - s = LmakeString (BOX(n)); +extern void *Bstring (void *p) { + int n = strlen(p); + void *s = NULL; + + PRE_GC(); + + push_extra_root(&p); + s = LmakeString(BOX(n)); pop_extra_root(&p); -#ifdef DEBUG_PRINT - print_indent (); - printf ("\tBstring: call strncpy: %p %p %p %i\n", &p, p, s, n); fflush(stdout); -#endif - strncpy ((char*)s, p, n + 1); -#ifdef DEBUG_PRINT - print_indent (); - printf ("\tBstring: ends\n"); fflush(stdout); - indent--; -#endif - __post_gc (); - + strncpy((char *)&TO_DATA(s)->contents, p, n + 1); // +1 because of '\0' in the end of C-strings + + POST_GC(); + return s; } -extern void* Lstringcat (void *p) { +extern void *Lstringcat (void *p) { void *s; /* ASSERT_BOXED("stringcat", p); */ - - __pre_gc (); - - createStringBuf (); - stringcat (p); + + PRE_GC(); + + createStringBuf(); + stringcat(p); push_extra_root(&p); - s = Bstring (stringBuf.contents); + s = Bstring(stringBuf.contents); pop_extra_root(&p); - - deleteStringBuf (); - __post_gc (); + deleteStringBuf(); - return s; + POST_GC(); + + return s; } -extern void* Lstring (void *p) { - void *s = (void *) BOX (NULL); +extern void *Lstring (void *p) { + void *s = (void *)BOX(NULL); + + PRE_GC(); - __pre_gc () ; - - createStringBuf (); - printValue (p); + createStringBuf(); + printValue(p); push_extra_root(&p); - s = Bstring (stringBuf.contents); + s = Bstring(stringBuf.contents); pop_extra_root(&p); - - deleteStringBuf (); - __post_gc (); + deleteStringBuf(); + + POST_GC(); return s; } -extern void* Bclosure (int bn, void *entry, ...) { - va_list args; - int i, ai; - register int * ebp asm ("ebp"); - size_t *argss; - data *r; - int n = UNBOX(bn); - - __pre_gc (); -#ifdef DEBUG_PRINT - indent++; print_indent (); - printf ("Bclosure: create n = %d\n", n); fflush(stdout); -#endif +extern void *Bclosure (int bn, void *entry, ...) { + va_list args; + int i, ai; + register int *ebp asm("ebp"); + size_t *argss; + data *r; + int n = UNBOX(bn); + + PRE_GC(); + argss = (ebp + 12); - for (i = 0; icontents)[0] = entry; - r = (data*) alloc (sizeof(int) * (n+2)); - - r->tag = CLOSURE_TAG | ((n + 1) << 3); - ((void**) r->contents)[0] = entry; - va_start(args, entry); - - for (i = 0; icontents)[i+1] = ai; + + for (i = 0; i < n; i++) { + ai = va_arg(args, int); + ((int *)r->contents)[i + 1] = ai; } - + va_end(args); - __post_gc(); + POST_GC(); + pop_extra_root((void **)&r); argss--; - for (i = 0; icontents; } -extern void* Barray (int bn, ...) { - va_list args; - int i, ai; - data *r; +extern void *Barray (int bn, ...) { + va_list args; + int i, ai; + data *r; int n = UNBOX(bn); - - __pre_gc (); - -#ifdef DEBUG_PRINT - indent++; print_indent (); - printf ("Barray: create n = %d\n", n); fflush(stdout); -#endif - r = (data*) alloc (sizeof(int) * (n+1)); - r->tag = ARRAY_TAG | (n << 3); - + PRE_GC(); + + r = (data *)alloc_array(n); + va_start(args, bn); - - for (i = 0; icontents)[i] = ai; + + for (i = 0; i < n; i++) { + ai = va_arg(args, int); + ((int *)r->contents)[i] = ai; } - + va_end(args); - __post_gc(); -#ifdef DEBUG_PRINT - indent--; -#endif + POST_GC(); return r->contents; } -extern void* Bsexp (int bn, ...) { - va_list args; - int i; - int ai; - size_t *p; - sexp *r; - data *d; - int n = UNBOX(bn); - - __pre_gc () ; - -#ifdef DEBUG_PRINT - indent++; print_indent (); - printf("Bsexp: allocate %zu!\n",sizeof(int) * (n+1)); fflush (stdout); +#ifdef DEBUG_VERSION +extern memory_chunk heap; #endif - r = (sexp*) alloc (sizeof(int) * (n+1)); - d = &(r->contents); - r->tag = 0; - - d->tag = SEXP_TAG | ((n-1) << 3); - + +extern void *Bsexp (int bn, ...) { + va_list args; + int i; + int ai; + size_t *p; + data *r; + int n = UNBOX(bn); + + PRE_GC(); + + int fields_cnt = n - 1; + r = (data *)alloc_sexp(fields_cnt); + ((sexp *)r)->tag = 0; + va_start(args, bn); - - for (i=0; icontents)[i] = ai; - } - r->tag = UNBOX(va_arg(args, int)); + for (i = 1; i < n; i++) { + ai = va_arg(args, int); + p = (size_t *)ai; + ((int *)r->contents)[i] = ai; + } -#ifdef DEBUG_PRINT - r->tag = SEXP_TAG | ((r->tag) << 3); - print_indent (); - printf("Bsexp: ends\n"); fflush (stdout); - indent--; -#endif + ((sexp *)r)->tag = UNBOX(va_arg(args, int)); va_end(args); - __post_gc(); - - return d->contents; + POST_GC(); + return (int *)r->contents; } extern int Btag (void *d, int t, int n) { - data *r; - + data *r; + if (UNBOXED(d)) return BOX(0); else { r = TO_DATA(d); -#ifndef DEBUG_PRINT - return BOX(TAG(r->tag) == SEXP_TAG && TO_SEXP(d)->tag == UNBOX(t) && LEN(r->tag) == UNBOX(n)); -#else - return BOX(TAG(r->tag) == SEXP_TAG && - GET_SEXP_TAG(TO_SEXP(d)->tag) == UNBOX(t) && LEN(r->tag) == UNBOX(n)); -#endif + return BOX(TAG(r->data_header) == SEXP_TAG && TO_SEXP(d)->tag == UNBOX(t) + && LEN(r->data_header) == UNBOX(n)); } } +int get_tag (data *d) { return TAG(d->data_header); } + +int get_len (data *d) { return LEN(d->data_header); } + extern int Barray_patt (void *d, int n) { - data *r; - + data *r; + if (UNBOXED(d)) return BOX(0); else { r = TO_DATA(d); - return BOX(TAG(r->tag) == ARRAY_TAG && LEN(r->tag) == UNBOX(n)); + return BOX(get_tag(r) == ARRAY_TAG && get_len(r) == UNBOX(n)); } } extern int Bstring_patt (void *x, void *y) { - data *rx = (data *) BOX (NULL), - *ry = (data *) BOX (NULL); - + data *rx = (data *)BOX(NULL), *ry = (data *)BOX(NULL); + ASSERT_STRING(".string_patt:2", y); - + if (UNBOXED(x)) return BOX(0); else { - rx = TO_DATA(x); ry = TO_DATA(y); + rx = TO_DATA(x); + ry = TO_DATA(y); - if (TAG(rx->tag) != STRING_TAG) return BOX(0); - - return BOX(strcmp (rx->contents, ry->contents) == 0 ? 1 : 0); + if (TAG(rx->data_header) != STRING_TAG) return BOX(0); + + return BOX(strcmp(rx->contents, ry->contents) == 0 ? 1 : 0); } } extern int Bclosure_tag_patt (void *x) { if (UNBOXED(x)) return BOX(0); - - return BOX(TAG(TO_DATA(x)->tag) == CLOSURE_TAG); -} -extern int Bboxed_patt (void *x) { - return BOX(UNBOXED(x) ? 0 : 1); + return BOX(TAG(TO_DATA(x)->data_header) == CLOSURE_TAG); } -extern int Bunboxed_patt (void *x) { - return BOX(UNBOXED(x) ? 1 : 0); -} +extern int Bboxed_patt (void *x) { return BOX(UNBOXED(x) ? 0 : 1); } + +extern int Bunboxed_patt (void *x) { return BOX(UNBOXED(x) ? 1 : 0); } extern int Barray_tag_patt (void *x) { if (UNBOXED(x)) return BOX(0); - - return BOX(TAG(TO_DATA(x)->tag) == ARRAY_TAG); + + return BOX(TAG(TO_DATA(x)->data_header) == ARRAY_TAG); } extern int Bstring_tag_patt (void *x) { if (UNBOXED(x)) return BOX(0); - - return BOX(TAG(TO_DATA(x)->tag) == STRING_TAG); + + return BOX(TAG(TO_DATA(x)->data_header) == STRING_TAG); } extern int Bsexp_tag_patt (void *x) { if (UNBOXED(x)) return BOX(0); - - return BOX(TAG(TO_DATA(x)->tag) == SEXP_TAG); + + return BOX(TAG(TO_DATA(x)->data_header) == SEXP_TAG); } -extern void* Bsta (void *v, int i, void *x) { +extern void *Bsta (void *v, int i, void *x) { if (UNBOXED(i)) { ASSERT_BOXED(".sta:3", x); - // ASSERT_UNBOXED(".sta:2", i); - - if (TAG(TO_DATA(x)->tag) == STRING_TAG)((char*) x)[UNBOX(i)] = (char) UNBOX(v); - else ((int*) x)[UNBOX(i)] = (int) v; + data *d = TO_DATA(x); - return v; + switch (TAG(d->data_header)) { + case STRING_TAG: { + ((char *)x)[UNBOX(i)] = (char)UNBOX(v); + break; + } + case SEXP_TAG: { + ((int *)x)[UNBOX(i) + 1] = (int)v; + break; + } + default: { + ((int *)x)[UNBOX(i)] = (int)v; + } + } + } else { + *(void **)x = v; } - * (void**) x = v; - return v; } static void fix_unboxed (char *s, va_list va) { - size_t *p = (size_t*)va; - int i = 0; - + size_t *p = (size_t *)va; + int i = 0; + while (*s) { if (*s == '%') { - size_t n = p [i]; - if (UNBOXED (n)) { - p[i] = UNBOX(n); - } + size_t n = p[i]; + if (UNBOXED(n)) { p[i] = UNBOX(n); } i++; } s++; - } + } } extern void Lfailure (char *s, ...) { va_list args; - - va_start (args, s); - fix_unboxed (s, args); - vfailure (s, args); + + va_start(args, s); + fix_unboxed(s, args); + vfailure(s, args); } extern void Bmatch_failure (void *v, char *fname, int line, int col) { - createStringBuf (); - printValue (v); - failure ("match failure at %s:%d:%d, value '%s'\n", - fname, UNBOX(line), UNBOX(col), stringBuf.contents); + createStringBuf(); + printValue(v); + failure("match failure at %s:%d:%d, value '%s'\n", + fname, + UNBOX(line), + UNBOX(col), + stringBuf.contents); } -extern void* /*Lstrcat*/ Li__Infix_4343 (void *a, void *b) { - data *da = (data*) BOX (NULL); - data *db = (data*) BOX (NULL); - data *d = (data*) BOX (NULL); +extern void * /*Lstrcat*/ Li__Infix_4343 (void *a, void *b) { + data *da = (data *)BOX(NULL); + data *db = (data *)BOX(NULL); + data *d = (data *)BOX(NULL); ASSERT_STRING("++:1", a); ASSERT_STRING("++:2", b); - + da = TO_DATA(a); db = TO_DATA(b); - __pre_gc () ; + PRE_GC(); - push_extra_root (&a); - push_extra_root (&b); - d = (data *) alloc (sizeof(int) + LEN(da->tag) + LEN(db->tag) + 1); - pop_extra_root (&b); - pop_extra_root (&a); + push_extra_root(&a); + push_extra_root(&b); + d = alloc_string(LEN(da->data_header) + LEN(db->data_header)); + pop_extra_root(&b); + pop_extra_root(&a); da = TO_DATA(a); db = TO_DATA(b); - - d->tag = STRING_TAG | ((LEN(da->tag) + LEN(db->tag)) << 3); - strncpy (d->contents , da->contents, LEN(da->tag)); - strncpy (d->contents + LEN(da->tag), db->contents, LEN(db->tag)); - - d->contents[LEN(da->tag) + LEN(db->tag)] = 0; + strncpy(d->contents, da->contents, LEN(da->data_header)); + strncpy(d->contents + LEN(da->data_header), db->contents, LEN(db->data_header)); + d->contents[LEN(da->data_header) + LEN(db->data_header)] = 0; + + POST_GC(); - __post_gc(); - return d->contents; } -extern void* Lsprintf (char * fmt, ...) { +extern void *Lsprintf (char *fmt, ...) { va_list args; - void *s; + void *s; ASSERT_STRING("sprintf:1", fmt); - - va_start (args, fmt); - fix_unboxed (fmt, args); - - createStringBuf (); - vprintStringBuf (fmt, args); + va_start(args, fmt); + fix_unboxed(fmt, args); - __pre_gc (); + createStringBuf(); - push_extra_root ((void**)&fmt); - s = Bstring (stringBuf.contents); - pop_extra_root ((void**)&fmt); + vprintStringBuf(fmt, args); - __post_gc (); - - deleteStringBuf (); + PRE_GC(); + + push_extra_root((void **)&fmt); + s = Bstring(stringBuf.contents); + pop_extra_root((void **)&fmt); + + POST_GC(); + + deleteStringBuf(); return s; } -extern void* LgetEnv (char *var) { - char *e = getenv (var); +extern void *LgetEnv (char *var) { + char *e = getenv(var); void *s; - - if (e == NULL) - return BOX(0); - __pre_gc (); + if (e == NULL) return (void *)BOX(0); - s = Bstring (e); + PRE_GC(); - __post_gc (); + s = Bstring(e); + + POST_GC(); return s; } -extern int Lsystem (char *cmd) { - return BOX (system (cmd)); -} +extern int Lsystem (char *cmd) { return BOX(system(cmd)); } extern void Lfprintf (FILE *f, char *s, ...) { - va_list args = (va_list) BOX (NULL); + va_list args = (va_list)BOX(NULL); ASSERT_BOXED("fprintf:1", f); - ASSERT_STRING("fprintf:2", s); - - va_start (args, s); - fix_unboxed (s, args); - - if (vfprintf (f, s, args) < 0) { - failure ("fprintf (...): %s\n", strerror (errno)); - } + ASSERT_STRING("fprintf:2", s); + + va_start(args, s); + fix_unboxed(s, args); + + if (vfprintf(f, s, args) < 0) { failure("fprintf (...): %s\n", strerror(errno)); } } extern void Lprintf (char *s, ...) { - va_list args = (va_list) BOX (NULL); + va_list args = (va_list)BOX(NULL); ASSERT_STRING("printf:1", s); - va_start (args, s); - fix_unboxed (s, args); - - if (vprintf (s, args) < 0) { - failure ("fprintf (...): %s\n", strerror (errno)); - } + va_start(args, s); + fix_unboxed(s, args); + + if (vprintf(s, args) < 0) { failure("fprintf (...): %s\n", strerror(errno)); } - fflush (stdout); + fflush(stdout); } -extern FILE* Lfopen (char *f, char *m) { - FILE* h; +extern FILE *Lfopen (char *f, char *m) { + FILE *h; ASSERT_STRING("fopen:1", f); ASSERT_STRING("fopen:2", m); - h = fopen (f, m); - - if (h) - return h; + h = fopen(f, m); + + if (h) return h; - failure ("fopen (\"%s\", \"%s\"): %s, %s, %s\n", f, m, strerror (errno)); + failure("fopen (\"%s\", \"%s\"): %s, %s, %s\n", f, m, strerror(errno)); } extern void Lfclose (FILE *f) { ASSERT_BOXED("fclose", f); - fclose (f); + fclose(f); } -extern void* LreadLine () { +extern void *LreadLine () { char *buf; - if (scanf ("%m[^\n]", &buf) == 1) { - void * s = Bstring (buf); + if (scanf("%m[^\n]", &buf) == 1) { + void *s = Bstring(buf); + + getchar(); - getchar (); - - free (buf); + free(buf); return s; } - - if (errno != 0) - failure ("readLine (): %s\n", strerror (errno)); - return (void*) BOX (0); + if (errno != 0) failure("readLine (): %s\n", strerror(errno)); + + return (void *)BOX(0); } -extern void* Lfread (char *fname) { +extern void *Lfread (char *fname) { FILE *f; ASSERT_STRING("fread", fname); - f = fopen (fname, "r"); - - if (f) { - if (fseek (f, 0l, SEEK_END) >= 0) { - long size = ftell (f); - void *s = LmakeString (BOX(size)); - - rewind (f); - - if (fread (s, 1, size, f) == size) { - fclose (f); - return s; - } + f = fopen(fname, "r"); + + if (f && fseek(f, 0l, SEEK_END) >= 0) { + long size = ftell(f); + void *s = LmakeString(BOX(size)); + + rewind(f); + + if (fread(s, 1, size, f) == size) { + fclose(f); + return s; } } - failure ("fread (\"%s\"): %s\n", fname, strerror (errno)); + failure("fread (\"%s\"): %s\n", fname, strerror(errno)); } extern void Lfwrite (char *fname, char *contents) { @@ -1426,63 +1157,61 @@ extern void Lfwrite (char *fname, char *contents) { ASSERT_STRING("fwrite:1", fname); ASSERT_STRING("fwrite:2", contents); - - f = fopen (fname, "w"); - if (f) { - if (fprintf (f, "%s", contents) < 0); - else { - fclose (f); - return; - } - } + f = fopen(fname, "w"); - failure ("fwrite (\"%s\"): %s\n", fname, strerror (errno)); + if (f && !(fprintf(f, "%s", contents) < 0)) { + fclose(f); + } else { + failure("fwrite (\"%s\"): %s\n", fname, strerror(errno)); + } } -extern void* Lfexists (char *fname) { +extern void *Lfexists (char *fname) { FILE *f; ASSERT_STRING("fexists", fname); - f = fopen (fname, "r"); - - if (f) return BOX(1); + f = fopen(fname, "r"); - return BOX(0); -} + if (f) return (void *)BOX(1); -extern void* Lfst (void *v) { - return Belem (v, BOX(0)); + return (void *)BOX(0); } -extern void* Lsnd (void *v) { - return Belem (v, BOX(1)); -} +extern void *Lfst (void *v) { return Belem(v, BOX(0)); } -extern void* Lhd (void *v) { - return Belem (v, BOX(0)); -} +extern void *Lsnd (void *v) { return Belem(v, BOX(1)); } -extern void* Ltl (void *v) { - return Belem (v, BOX(1)); -} +extern void *Lhd (void *v) { return Belem(v, BOX(0)); } + +extern void *Ltl (void *v) { return Belem(v, BOX(1)); } /* Lread is an implementation of the "read" construct */ extern int Lread () { int result = BOX(0); - printf ("> "); - fflush (stdout); - scanf ("%d", &result); + printf("> "); + fflush(stdout); + scanf("%d", &result); return BOX(result); } +extern int Lbinoperror (void) { + fprintf(stderr, "ERROR: POINTER ARITHMETICS is forbidden; EXIT\n"); + exit(1); +} + +extern int Lbinoperror2 (void) { + fprintf(stderr, "ERROR: Comparing BOXED and UNBOXED value ; EXIT\n"); + exit(1); +} + /* Lwrite is an implementation of the "write" construct */ extern int Lwrite (int n) { - printf ("%d\n", UNBOX(n)); - fflush (stdout); + printf("%d\n", UNBOX(n)); + fflush(stdout); return 0; } @@ -1490,613 +1219,36 @@ extern int Lwrite (int n) { extern int Lrandom (int n) { ASSERT_UNBOXED("Lrandom, 0", n); - if (UNBOX(n) <= 0) { - failure ("invalid range in random: %d\n", UNBOX(n)); - } - - return BOX (random () % UNBOX(n)); + if (UNBOX(n) <= 0) { failure("invalid range in random: %d\n", UNBOX(n)); } + + return BOX(random() % UNBOX(n)); } extern int Ltime () { struct timespec t; - - clock_gettime (CLOCK_MONOTONIC_RAW, &t); - + + clock_gettime(CLOCK_MONOTONIC_RAW, &t); + return BOX(t.tv_sec * 1000000 + t.tv_nsec / 1000); } extern void set_args (int argc, char *argv[]) { data *a; - int n = argc, *p = NULL; - int i; - - __pre_gc (); - -#ifdef DEBUG_PRINT - indent++; print_indent (); - printf ("set_args: call: n=%i &p=%p p=%p: ", n, &p, p); fflush(stdout); - for (i = 0; i < n; i++) - printf("%s ", argv[i]); - printf("EE\n"); -#endif - - p = LmakeArray (BOX(n)); - push_extra_root ((void**)&p); - - for (i=0; i\n", i, &p, p); fflush(stdout); -#endif - ((int*)p) [i] = (int) Bstring (argv[i]); -#ifdef DEBUG_PRINT - print_indent (); - printf ("set_args: iteration %i <- %p %p\n", i, &p, p); fflush(stdout); -#endif - } - - pop_extra_root ((void**)&p); - __post_gc (); - - global_sysargs = p; - push_extra_root ((void**)&global_sysargs); -#ifdef DEBUG_PRINT - print_indent (); - printf ("set_args: end\n", n, &p, p); fflush(stdout); - indent--; -#endif -} - -/* GC starts here */ - -static int enable_GC = 1; - -extern void LenableGC () { - enable_GC = 1; -} - -extern void LdisableGC () { - enable_GC = 0; -} - -extern const size_t __start_custom_data, __stop_custom_data; - -# ifdef __ENABLE_GC__ - -extern void __gc_init (); + int n = argc; + int *p = NULL; + int i; -# else + PRE_GC(); -# define __gc_init __gc_init_subst -void __gc_init_subst () {} + p = LmakeArray(BOX(n)); + push_extra_root((void **)&p); -# endif + for (i = 0; i < n; i++) { ((int *)p)[i] = (int)Bstring(argv[i]); } -extern void __gc_root_scan_stack (); - -/* ======================================== */ -/* Mark-and-copy */ -/* ======================================== */ - -//static size_t SPACE_SIZE = 16; -static size_t SPACE_SIZE = 256 * 1024 * 1024; -// static size_t SPACE_SIZE = 128; -// static size_t SPACE_SIZE = 1024 * 1024; - -static int free_pool (pool * p) { - size_t *a = p->begin, b = p->size; - p->begin = NULL; - p->size = 0; - p->end = NULL; - p->current = NULL; - return munmap((void *)a, b); -} - -static void init_to_space (int flag) { - size_t space_size = 0; - if (flag) SPACE_SIZE = SPACE_SIZE << 1; - space_size = SPACE_SIZE * sizeof(size_t); - to_space.begin = mmap (NULL, space_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0); - if (to_space.begin == MAP_FAILED) { - perror ("EROOR: init_to_space: mmap failed\n"); - exit (1); - } - to_space.current = to_space.begin; - to_space.end = to_space.begin + SPACE_SIZE; - to_space.size = SPACE_SIZE; -} - -static void gc_swap_spaces (void) { -#ifdef DEBUG_PRINT - indent++; print_indent (); - printf ("gc_swap_spaces\n"); fflush (stdout); -#endif - free_pool (&from_space); - from_space.begin = to_space.begin; - from_space.current = current; - from_space.end = to_space.end; - from_space.size = to_space.size; - to_space.begin = NULL; - to_space.current = NULL; - to_space.end = NULL; - to_space.size = 0; -#ifdef DEBUG_PRINT - indent--; -#endif -} - -# define IS_VALID_HEAP_POINTER(p)\ - (!UNBOXED(p) && \ - (size_t)from_space.begin <= (size_t)p && \ - (size_t)from_space.end > (size_t)p) - -# define IN_PASSIVE_SPACE(p) \ - ((size_t)to_space.begin <= (size_t)p && \ - (size_t)to_space.end > (size_t)p) - -# define IS_FORWARD_PTR(p) \ - (!UNBOXED(p) && IN_PASSIVE_SPACE(p)) - -int is_valid_heap_pointer (void *p) { - return IS_VALID_HEAP_POINTER(p); -} - -extern size_t * gc_copy (size_t *obj); - -static void copy_elements (size_t *where, size_t *from, int len) { - int i = 0; - void * p = NULL; -#ifdef DEBUG_PRINT - indent++; print_indent (); - printf ("copy_elements: start; len = %d\n", len); fflush (stdout); -#endif - for (i = 0; i < len; i++) { - size_t elem = from[i]; - if (!IS_VALID_HEAP_POINTER(elem)) { - *where = elem; - where++; -#ifdef DEBUG_PRINT - print_indent (); - printf ("copy_elements: copy NON ptr: %zu %p \n", elem, elem); fflush (stdout); -#endif - } - else { -#ifdef DEBUG_PRINT - print_indent (); - printf ("copy_elements: fix element: %p -> %p\n", elem, *where); - fflush (stdout); -#endif - p = gc_copy ((size_t*) elem); - *where = (size_t) p; - where ++; - } -#ifdef DEBUG_PRINT - print_indent (); - printf ("copy_elements: iteration end: where = %p, *where = %p, i = %d, \ - len = %d\n", where, *where, i, len); fflush (stdout); -#endif + pop_extra_root((void **)&p); + POST_GC(); - } -#ifdef DEBUG_PRINT - print_indent (); - printf ("\tcopy_elements: end\n"); fflush (stdout); - indent--; -#endif - -} - -static int extend_spaces (void) { - void *p = (void *) BOX (NULL); - size_t old_space_size = SPACE_SIZE * sizeof(size_t), - new_space_size = (SPACE_SIZE << 1) * sizeof(size_t); - p = mremap(to_space.begin, old_space_size, new_space_size, 0); -#ifdef DEBUG_PRINT - indent++; print_indent (); -#endif - if (p == MAP_FAILED) { -#ifdef DEBUG_PRINT - print_indent (); - printf ("extend: extend_spaces: mremap failed\n"); fflush (stdout); -#endif - return 1; - } -#ifdef DEBUG_PRINT - print_indent (); - printf ("extend: %p %p %p %p\n", p, to_space.begin, to_space.end, current); - fflush (stdout); - indent--; -#endif - to_space.end += SPACE_SIZE; - SPACE_SIZE = SPACE_SIZE << 1; - to_space.size = SPACE_SIZE; - return 0; -} - -extern size_t * gc_copy (size_t *obj) { - data *d = TO_DATA(obj); - sexp *s = NULL; - size_t *copy = NULL; - int i = 0; -#ifdef DEBUG_PRINT - int len1, len2, len3; - void * objj; - void * newobjj = (void*)current; - indent++; print_indent (); - printf ("gc_copy: %p cur = %p starts\n", obj, current); - fflush (stdout); -#endif - - if (!IS_VALID_HEAP_POINTER(obj)) { -#ifdef DEBUG_PRINT - print_indent (); - printf ("gc_copy: invalid ptr: %p\n", obj); fflush (stdout); - indent--; -#endif - return obj; - } - - if (!IN_PASSIVE_SPACE(current) && current != to_space.end) { -#ifdef DEBUG_PRINT - print_indent (); - printf("ERROR: gc_copy: out-of-space %p %p %p\n", - current, to_space.begin, to_space.end); - fflush(stdout); -#endif - perror("ERROR: gc_copy: out-of-space\n"); - exit (1); - } - - if (IS_FORWARD_PTR(d->tag)) { -#ifdef DEBUG_PRINT - print_indent (); - printf ("gc_copy: IS_FORWARD_PTR: return! %p -> %p\n", obj, (size_t *) d->tag); - fflush(stdout); - indent--; -#endif - return (size_t *) d->tag; - } - - copy = current; -#ifdef DEBUG_PRINT - objj = d; -#endif - switch (TAG(d->tag)) { - case CLOSURE_TAG: -#ifdef DEBUG_PRINT - print_indent (); - printf ("gc_copy:closure_tag; len = %zu\n", LEN(d->tag)); fflush (stdout); -#endif - i = LEN(d->tag); - // current += LEN(d->tag) + 1; - // current += ((LEN(d->tag) + 1) * sizeof(int) -1) / sizeof(size_t) + 1; - current += i+1; - *copy = d->tag; - copy++; - d->tag = (int) copy; - copy_elements (copy, obj, i); - break; - - case ARRAY_TAG: -#ifdef DEBUG_PRINT - print_indent (); - printf ("gc_copy:array_tag; len = %zu\n", LEN(d->tag)); fflush (stdout); -#endif - current += ((LEN(d->tag) + 1) * sizeof (int) - 1) / sizeof (size_t) + 1; - *copy = d->tag; - copy++; - i = LEN(d->tag); - d->tag = (int) copy; - copy_elements (copy, obj, i); - break; - - case STRING_TAG: -#ifdef DEBUG_PRINT - print_indent (); - printf ("gc_copy:string_tag; len = %d\n", LEN(d->tag) + 1); fflush (stdout); -#endif - current += (LEN(d->tag) + sizeof(int)) / sizeof(size_t) + 1; - *copy = d->tag; - copy++; - d->tag = (int) copy; - strcpy ((char*)©[0], (char*) obj); - break; - - case SEXP_TAG : - s = TO_SEXP(obj); -#ifdef DEBUG_PRINT - objj = s; - len1 = LEN(s->contents.tag); - len2 = LEN(s->tag); - len3 = LEN(d->tag); - print_indent (); - printf ("gc_copy:sexp_tag; len1 = %li, len2=%li, len3 = %li\n", - len1, len2, len3); - fflush (stdout); -#endif - i = LEN(s->contents.tag); - current += i + 2; - *copy = s->tag; - copy++; - *copy = d->tag; - copy++; - d->tag = (int) copy; - copy_elements (copy, obj, i); - break; - - default: -#ifdef DEBUG_PRINT - print_indent (); - printf ("ERROR: gc_copy: weird tag: %p", TAG(d->tag)); fflush (stdout); - indent--; -#endif - perror ("ERROR: gc_copy: weird tag"); - exit (1); - return (obj); - } -#ifdef DEBUG_PRINT - print_indent (); - printf ("gc_copy: %p(%p) -> %p (%p); new-current = %p\n", - obj, objj, copy, newobjj, current); - fflush (stdout); - indent--; -#endif - return copy; -} - -extern void gc_test_and_copy_root (size_t ** root) { -#ifdef DEBUG_PRINT - indent++; -#endif - if (IS_VALID_HEAP_POINTER(*root)) { -#ifdef DEBUG_PRINT - print_indent (); - printf ("gc_test_and_copy_root: root %p top=%p bot=%p *root %p \n", root, __gc_stack_top, __gc_stack_bottom, *root); - fflush (stdout); -#endif - *root = gc_copy (*root); - } -#ifdef DEBUG_PRINT - else { - print_indent (); - printf ("gc_test_and_copy_root: INVALID HEAP POINTER root %p *root %p\n", root, *root); - fflush (stdout); - } - indent--; -#endif -} - -extern void gc_root_scan_data (void) { - size_t * p = (size_t*)&__start_custom_data; - while (p < (size_t*)&__stop_custom_data) { - gc_test_and_copy_root ((size_t**)p); - p++; - } -} - -static inline void init_extra_roots (void) { - extra_roots.current_free = 0; -} - -extern void __init (void) { - size_t space_size = SPACE_SIZE * sizeof(size_t); - - srandom (time (NULL)); - - from_space.begin = mmap (NULL, space_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0); - to_space.begin = NULL; - if (from_space.begin == MAP_FAILED) { - perror ("EROOR: init_pool: mmap failed\n"); - exit (1); - } - from_space.current = from_space.begin; - from_space.end = from_space.begin + SPACE_SIZE; - from_space.size = SPACE_SIZE; - to_space.current = NULL; - to_space.end = NULL; - to_space.size = 0; - init_extra_roots (); -} - -static void* gc (size_t size) { - if (! enable_GC) { - Lfailure ("GC disabled"); - } - - current = to_space.begin; -#ifdef DEBUG_PRINT - print_indent (); - printf ("gc: current:%p; to_space.b =%p; to_space.e =%p; \ - f_space.b = %p; f_space.e = %p; __gc_stack_top=%p; __gc_stack_bottom=%p\n", - current, to_space.begin, to_space.end, from_space.begin, from_space.end, - __gc_stack_top, __gc_stack_bottom); - fflush (stdout); -#endif - gc_root_scan_data (); -#ifdef DEBUG_PRINT - print_indent (); - printf ("gc: data is scanned\n"); fflush (stdout); -#endif - __gc_root_scan_stack (); - for (int i = 0; i < extra_roots.current_free; i++) { -#ifdef DEBUG_PRINT - print_indent (); - printf ("gc: extra_root â„– %i: %p %p\n", i, extra_roots.roots[i], - (size_t*) extra_roots.roots[i]); - fflush (stdout); -#endif - gc_test_and_copy_root ((size_t**)extra_roots.roots[i]); - } -#ifdef DEBUG_PRINT - print_indent (); - printf ("gc: no more extra roots\n"); fflush (stdout); -#endif - - if (!IN_PASSIVE_SPACE(current)) { - printf ("gc: ASSERT: !IN_PASSIVE_SPACE(current) to_begin = %p to_end = %p \ - current = %p\n", to_space.begin, to_space.end, current); - fflush (stdout); - perror ("ASSERT: !IN_PASSIVE_SPACE(current)\n"); - exit (1); - } - - while (current + size >= to_space.end) { -#ifdef DEBUG_PRINT - print_indent (); - printf ("gc: pre-extend_spaces : %p %zu %p \n", current, size, to_space.end); - fflush (stdout); -#endif - if (extend_spaces ()) { - gc_swap_spaces (); - init_to_space (1); - return gc (size); - } -#ifdef DEBUG_PRINT - print_indent (); - printf ("gc: post-extend_spaces: %p %zu %p \n", current, size, to_space.end); - fflush (stdout); -#endif - } - assert (IN_PASSIVE_SPACE(current)); - assert (current + size < to_space.end); - - gc_swap_spaces (); - from_space.current = current + size; -#ifdef DEBUG_PRINT - print_indent (); - printf ("gc: end: (allocate!) return %p; from_space.current %p; \ - from_space.end %p \n\n", - current, from_space.current, from_space.end); - fflush (stdout); - indent--; -#endif - return (void *) current; -} - -#ifdef DEBUG_PRINT -static void printFromSpace (void) { - size_t * cur = from_space.begin, *tmp = NULL; - data * d = NULL; - sexp * s = NULL; - size_t len = 0; - size_t elem_number = 0; - - printf ("\nHEAP SNAPSHOT\n===================\n"); - printf ("f_begin = %p, f_end = %p,\n", from_space.begin, from_space.end); - while (cur < from_space.current) { - printf ("data at %p", cur); - d = (data *) cur; - - switch (TAG(d->tag)) { - - case STRING_TAG: - printf ("(=>%p): STRING\n\t%s; len = %i %zu\n", - d->contents, d->contents, - LEN(d->tag), LEN(d->tag) + 1 + sizeof(int)); - fflush (stdout); - len = (LEN(d->tag) + sizeof(int)) / sizeof(size_t) + 1; - break; - - case CLOSURE_TAG: - printf ("(=>%p): CLOSURE\n\t", d->contents); - len = LEN(d->tag); - for (int i = 0; i < len; i++) { - int elem = ((int*)d->contents)[i]; - if (UNBOXED(elem)) printf ("%d ", elem); - else printf ("%p ", elem); - } - len += 1; - printf ("\n"); - fflush (stdout); - break; - - case ARRAY_TAG: - printf ("(=>%p): ARRAY\n\t", d->contents); - len = LEN(d->tag); - for (int i = 0; i < len; i++) { - int elem = ((int*)d->contents)[i]; - if (UNBOXED(elem)) printf ("%d ", elem); - else printf ("%p ", elem); - } - len += 1; - printf ("\n"); - fflush (stdout); - break; - - case SEXP_TAG: - s = (sexp *) d; - d = (data *) &(s->contents); - char * tag = de_hash (GET_SEXP_TAG(s->tag)); - printf ("(=>%p): SEXP\n\ttag(%s) ", s->contents.contents, tag); - len = LEN(d->tag); - tmp = (s->contents.contents); - for (int i = 0; i < len; i++) { - int elem = ((int*)tmp)[i]; - if (UNBOXED(elem)) printf ("%d ", UNBOX(elem)); - else printf ("%p ", elem); - } - len += 2; - printf ("\n"); - fflush (stdout); - break; - - case 0: - printf ("\nprintFromSpace: end: %zu elements\n===================\n\n", - elem_number); - return; - - default: - printf ("\nprintFromSpace: ERROR: bad tag %d", TAG(d->tag)); - perror ("\nprintFromSpace: ERROR: bad tag"); - fflush (stdout); - exit (1); - } - cur += len; - printf ("len = %zu, new cur = %p\n", len, cur); - elem_number++; - } - printf ("\nprintFromSpace: end: the whole space is printed:\ - %zu elements\n===================\n\n", elem_number); - fflush (stdout); -} -#endif - -#ifdef __ENABLE_GC__ -// alloc: allocates `size` bytes in heap -extern void * alloc (size_t size) { - void * p = (void*)BOX(NULL); - size = (size - 1) / sizeof(size_t) + 1; // convert bytes to words -#ifdef DEBUG_PRINT - indent++; print_indent (); - printf ("alloc: current: %p %zu words!", from_space.current, size); - fflush (stdout); -#endif - if (from_space.current + size < from_space.end) { - p = (void*) from_space.current; - from_space.current += size; -#ifdef DEBUG_PRINT - print_indent (); - printf (";new current: %p \n", from_space.current); fflush (stdout); - indent--; -#endif - return p; - } + global_sysargs = p; - init_to_space (0); -#ifdef DEBUG_PRINT - print_indent (); - printf ("alloc: call gc: %zu\n", size); fflush (stdout); - printFromSpace(); fflush (stdout); - p = gc (size); - print_indent (); - printf("alloc: gc END %p %p %p %p\n\n", from_space.begin, - from_space.end, from_space.current, p); fflush (stdout); - printFromSpace(); fflush (stdout); - indent--; - return p; -#else - return gc (size); -#endif + push_extra_root((void **)&global_sysargs); } -# endif diff --git a/runtime/runtime.h b/runtime/runtime.h index 677429fd7..346624e2b 100644 --- a/runtime/runtime.h +++ b/runtime/runtime.h @@ -1,21 +1,20 @@ -# ifndef __LAMA_RUNTIME__ -# define __LAMA_RUNTIME__ +#ifndef __LAMA_RUNTIME__ +#define __LAMA_RUNTIME__ -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include -# define WORD_SIZE (CHAR_BIT * sizeof(int)) +#define WORD_SIZE (CHAR_BIT * sizeof(int)) void failure (char *s, ...); -# endif +#endif diff --git a/runtime/runtime_common.h b/runtime/runtime_common.h new file mode 100644 index 000000000..9dd25e89b --- /dev/null +++ b/runtime/runtime_common.h @@ -0,0 +1,73 @@ +#ifndef __LAMA_RUNTIME_COMMON__ +#define __LAMA_RUNTIME_COMMON__ +#include + +// this flag makes GC behavior a bit different for testing purposes. +//#define DEBUG_VERSION +//#define FULL_INVARIANT_CHECKS + +#define STRING_TAG 0x00000001 +#define ARRAY_TAG 0x00000003 +#define SEXP_TAG 0x00000005 +#define CLOSURE_TAG 0x00000007 +#define UNBOXED_TAG 0x00000009 // Not actually a data_header; used to return from LkindOf + +#define LEN(x) ((x & 0xFFFFFFF8) >> 3) +#define TAG(x) (x & 0x00000007) + +#define SEXP_ONLY_HEADER_SZ (sizeof(int)) + +#ifndef DEBUG_VERSION +# define DATA_HEADER_SZ (sizeof(size_t) + sizeof(int)) +#else +# define DATA_HEADER_SZ (sizeof(size_t) + sizeof(size_t) + sizeof(int)) +#endif + +#define MEMBER_SIZE sizeof(int) + +#define TO_DATA(x) ((data *)((char *)(x)-DATA_HEADER_SZ)) +#define TO_SEXP(x) ((sexp *)((char *)(x)-DATA_HEADER_SZ)) + +#define UNBOXED(x) (((int)(x)) & 0x0001) +#define UNBOX(x) (((int)(x)) >> 1) +#define BOX(x) ((((int)(x)) << 1) | 0x0001) + +#define BYTES_TO_WORDS(bytes) (((bytes)-1) / sizeof(size_t) + 1) +#define WORDS_TO_BYTES(words) ((words) * sizeof(size_t)) + +// CAREFUL WITH DOUBLE EVALUATION! +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) + +typedef struct { + // store tag in the last three bits to understand what structure this is, other bits are filled with + // other utility info (i.e., size for array, number of fields for s-expression) + int data_header; + +#ifdef DEBUG_VERSION + size_t id; +#endif + + // last bit is used as MARK-BIT, the rest are used to store address where object should move + // last bit can be used because due to alignment we can assume that last two bits are always 0's + size_t forward_address; + char contents[0]; +} data; + +typedef struct { + // store tag in the last three bits to understand what structure this is, other bits are filled with + // other utility info (i.e., size for array, number of fields for s-expression) + int data_header; + +#ifdef DEBUG_VERSION + size_t id; +#endif + + // last bit is used as MARK-BIT, the rest are used to store address where object should move + // last bit can be used because due to alignment we can assume that last two bits are always 0's + size_t forward_address; + int tag; + int contents[0]; +} sexp; + +#endif diff --git a/runtime/test_main.c b/runtime/test_main.c new file mode 100644 index 000000000..8e88aaac8 --- /dev/null +++ b/runtime/test_main.c @@ -0,0 +1,275 @@ +#include "gc.h" +#include "runtime_common.h" + +#include +#include +#include +#include + +#ifdef DEBUG_VERSION + +// function from runtime that maps string to int value +extern int LtagHash (char *s); + +extern void *Bsexp (int n, ...); +extern void *Barray (int bn, ...); +extern void *Bstring (void *); +extern void *Bclosure (int bn, void *entry, ...); + +extern size_t __gc_stack_top, __gc_stack_bottom; + +void test_correct_structure_sizes (void) { + // something like induction base + assert((array_size(0) == get_header_size(ARRAY))); + assert((string_size(0) == get_header_size(STRING) + 1)); // +1 is because of '\0' + assert((sexp_size(0) == get_header_size(SEXP) + MEMBER_SIZE)); + assert((closure_size(0) == get_header_size(CLOSURE))); + + // just check correctness for some small sizes + for (int k = 1; k < 20; ++k) { + assert((array_size(k) == get_header_size(ARRAY) + MEMBER_SIZE * k)); + assert((string_size(k) == get_header_size(STRING) + k + 1)); + assert((sexp_size(k) == get_header_size(SEXP) + MEMBER_SIZE * (k + 1))); + assert((closure_size(k) == get_header_size(CLOSURE) + MEMBER_SIZE * k)); + } +} + +void no_gc_tests (void) { test_correct_structure_sizes(); } + +// unfortunately there is no generic function pointer that can hold pointer to function with arbitrary signature +extern size_t call_runtime_function (void *virt_stack_pointer, void *function_pointer, + size_t num_args, ...); + +# include "virt_stack.h" + +virt_stack *init_test () { + __init(); + virt_stack *st = vstack_create(); + vstack_init(st); + __gc_stack_bottom = (size_t)vstack_top(st); + return st; +} + +void cleanup_test (virt_stack *st) { + vstack_destruct(st); + __shutdown(); +} + +void force_gc_cycle (virt_stack *st) { + __gc_stack_top = (size_t)vstack_top(st) - 4; + gc_alloc(0); + __gc_stack_top = 0; +} + +void test_simple_string_alloc (void) { + virt_stack *st = init_test(); + + for (int i = 0; i < 5; ++i) { vstack_push(st, BOX(i)); } + + vstack_push(st, call_runtime_function(vstack_top(st) - 4, Bstring, 1, "abc")); + + const int N = 10; + int ids[N]; + size_t alive = objects_snapshot(ids, N); + assert((alive == 1)); + + cleanup_test(st); +} + +void test_simple_array_alloc (void) { + virt_stack *st = init_test(); + + // allocate array [ BOX(1) ] and push it onto the stack + vstack_push(st, call_runtime_function(vstack_top(st) - 4, Barray, 2, BOX(1), BOX(1))); + + const int N = 10; + int ids[N]; + size_t alive = objects_snapshot(ids, N); + assert((alive == 1)); + + cleanup_test(st); +} + +void test_simple_sexp_alloc (void) { + virt_stack *st = init_test(); + + // allocate sexp with one boxed field and push it onto the stack + // calling runtime function Bsexp(BOX(2), BOX(1), LtagHash("test")) + vstack_push( + st, call_runtime_function(vstack_top(st) - 4, Bsexp, 3, BOX(2), BOX(1), LtagHash("test"))); + + const int N = 10; + int ids[N]; + size_t alive = objects_snapshot(ids, N); + assert((alive == 1)); + + cleanup_test(st); +} + +void test_simple_closure_alloc (void) { + virt_stack *st = init_test(); + + // allocate closure with boxed captured value and push it onto the stack + vstack_push(st, call_runtime_function(vstack_top(st) - 4, Bclosure, 3, BOX(1), NULL, BOX(1))); + + const int N = 10; + int ids[N]; + size_t alive = objects_snapshot(ids, N); + assert((alive == 1)); + + cleanup_test(st); +} + +void test_single_object_allocation_with_collection_virtual_stack (void) { + virt_stack *st = init_test(); + + vstack_push(st, + call_runtime_function( + vstack_top(st) - 4, Bstring, 1, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")); + + const int N = 10; + int ids[N]; + size_t alive = objects_snapshot(ids, N); + assert((alive == 1)); + + cleanup_test(st); +} + +void test_garbage_is_reclaimed (void) { + virt_stack *st = init_test(); + + call_runtime_function(vstack_top(st) - 4, Bstring, 1, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); + + force_gc_cycle(st); + + const int N = 10; + int ids[N]; + size_t alive = objects_snapshot(ids, N); + assert((alive == 0)); + + cleanup_test(st); +} + +void test_alive_are_not_reclaimed (void) { + virt_stack *st = init_test(); + + vstack_push(st, + call_runtime_function( + vstack_top(st) - 4, Bstring, 1, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")); + + force_gc_cycle(st); + + const int N = 10; + int ids[N]; + size_t alive = objects_snapshot(ids, N); + assert((alive == 1)); + + cleanup_test(st); +} + +void test_small_tree_compaction (void) { + virt_stack *st = init_test(); + // this one will increase heap size + call_runtime_function(vstack_top(st) - 4, Bstring, 1, "aaaaaaaaaaaaaaaaaaaaaa"); + + vstack_push(st, call_runtime_function(vstack_top(st) - 4, Bstring, 1, "left-s")); + vstack_push(st, call_runtime_function(vstack_top(st) - 4, Bstring, 1, "right-s")); + vstack_push(st, + call_runtime_function(vstack_top(st) - 4, + Bsexp, + 4, + BOX(3), + vstack_kth_from_start(st, 0), + vstack_kth_from_start(st, 1), + LtagHash("tree"))); + force_gc_cycle(st); + const int SZ = 10; + int ids[SZ]; + size_t alive = objects_snapshot(ids, SZ); + assert((alive == 3)); + + // check that order is indeed preserved + for (int i = 0; i < alive - 1; ++i) { assert((ids[i] < ids[i + 1])); } + cleanup_test(st); +} + +extern size_t cur_id; + +size_t generate_random_obj_forest (virt_stack *st, int cnt, int seed) { + srand(seed); + int cur_sz = 0; + size_t alive = 0; + while (cnt) { + --cnt; + if (cur_sz == 0) { + vstack_push(st, BOX(1)); + ++cur_sz; + continue; + } + + size_t pos[2] = {rand() % vstack_size(st), rand() % vstack_size(st)}; + size_t field[2]; + for (int t = 0; t < 2; ++t) { field[t] = vstack_kth_from_start(st, pos[t]); } + size_t obj; + + if (rand() % 2) { + obj = call_runtime_function( + vstack_top(st) - 4, Bsexp, 4, BOX(3), field[0], field[1], LtagHash("test")); + } else { + obj = BOX(1); + } + // whether object is stored on stack + if (rand() % 2 != 0) { + vstack_push(st, obj); + if ((obj & 1) == 0) { ++alive; } + } + ++cur_sz; + } + force_gc_cycle(st); + return alive; +} + +void run_stress_test_random_obj_forest (int seed) { + virt_stack *st = init_test(); + + const int SZ = 100000; + + size_t expectedAlive = generate_random_obj_forest(st, SZ, seed); + + int ids[SZ]; + size_t alive = objects_snapshot(ids, SZ); + assert(alive == expectedAlive); + + // check that order is indeed preserved + for (int i = 0; i < alive - 1; ++i) { assert((ids[i] < ids[i + 1])); } + + cleanup_test(st); +} + +#endif + +#include + +int main (int argc, char **argv) { +#ifdef DEBUG_VERSION + no_gc_tests(); + + test_simple_string_alloc(); + test_simple_array_alloc(); + test_simple_sexp_alloc(); + test_simple_closure_alloc(); + test_single_object_allocation_with_collection_virtual_stack(); + test_garbage_is_reclaimed(); + test_alive_are_not_reclaimed(); + test_small_tree_compaction(); + + time_t start, end; + double diff; + time(&start); + // stress test + for (int s = 0; s < 100; ++s) { run_stress_test_random_obj_forest(s); } + time(&end); + diff = difftime(end, start); + printf("Stress tests took %.2lf seconds to complete\n", diff); +#endif +} diff --git a/runtime/test_util.s b/runtime/test_util.s new file mode 100644 index 000000000..49f9e9add --- /dev/null +++ b/runtime/test_util.s @@ -0,0 +1,40 @@ +# this is equivalent C-signature for this function +# size_t call_runtime_function(void *stack, void *func_ptr, int num_args, ...) + + .globl call_runtime_function + .type call_runtime_function, @function +call_runtime_function: + pushl %ebp + movl %esp, %ebp + + # store old stack pointer + movl %esp, %edi + + # move esp to point to the virtual stack + movl 8(%ebp), %esp + + # push arguments onto the stack + movl 16(%ebp), %ecx # num_args + test %ecx, %ecx + jz f_call # in case function doesn't have any parameters + + leal 16(%ebp), %eax # pointer to value BEFORE first argument + leal (%eax,%ecx,4), %edx # pointer to last argument (right-to-left) + +push_args_loop: + pushl (%edx) + subl $4, %edx + subl $1, %ecx + jnz push_args_loop + + # call the function +f_call: + movl 12(%ebp), %eax + call *%eax + + # restore the old stack pointer + movl %edi, %esp + + # pop the old frame pointer and return + popl %ebp # epilogue + ret diff --git a/runtime/virt_stack.c b/runtime/virt_stack.c new file mode 100644 index 000000000..2e2e7a9d0 --- /dev/null +++ b/runtime/virt_stack.c @@ -0,0 +1,34 @@ +#include "virt_stack.h" + +#include + +virt_stack *vstack_create () { return malloc(sizeof(virt_stack)); } + +void vstack_destruct (virt_stack *st) { free(st); } + +void vstack_init (virt_stack *st) { + st->cur = RUNTIME_VSTACK_SIZE; + st->buf[st->cur] = 0; +} + +void vstack_push (virt_stack *st, size_t value) { + if (st->cur == 0) { assert(0); } + --st->cur; + st->buf[st->cur] = value; +} + +size_t vstack_pop (virt_stack *st) { + if (st->cur == RUNTIME_VSTACK_SIZE) { assert(0); } + size_t value = st->buf[st->cur]; + ++st->cur; + return value; +} + +void *vstack_top (virt_stack *st) { return st->buf + st->cur; } + +size_t vstack_size (virt_stack *st) { return RUNTIME_VSTACK_SIZE - st->cur; } + +size_t vstack_kth_from_start (virt_stack *st, size_t k) { + assert(vstack_size(st) > k); + return st->buf[RUNTIME_VSTACK_SIZE - 1 - k]; +} diff --git a/runtime/virt_stack.h b/runtime/virt_stack.h new file mode 100644 index 000000000..7ea083e34 --- /dev/null +++ b/runtime/virt_stack.h @@ -0,0 +1,33 @@ +// +// Created by egor on 24.04.23. +// + +#ifndef LAMA_RUNTIME_VIRT_STACK_H +#define LAMA_RUNTIME_VIRT_STACK_H +#define RUNTIME_VSTACK_SIZE 100000 + +#include +#include + +struct { + size_t buf[RUNTIME_VSTACK_SIZE + 1]; + size_t cur; +} typedef virt_stack; + +virt_stack *vstack_create (); + +void vstack_destruct (virt_stack *st); + +void vstack_init (virt_stack *st); + +void vstack_push (virt_stack *st, size_t value); + +size_t vstack_pop (virt_stack *st); + +void *vstack_top (virt_stack *st); + +size_t vstack_size (virt_stack *st); + +size_t vstack_kth_from_start (virt_stack *st, size_t k); + +#endif //LAMA_RUNTIME_VIRT_STACK_H diff --git a/src/.ocamlformat b/src/.ocamlformat new file mode 100644 index 000000000..1756ba7ac --- /dev/null +++ b/src/.ocamlformat @@ -0,0 +1 @@ +profile=default \ No newline at end of file diff --git a/src/Driver.ml b/src/Driver.ml index 01ebf0838..ee147f3c2 100644 --- a/src/Driver.ml +++ b/src/Driver.ml @@ -2,184 +2,223 @@ exception Commandline_error of string class options args = let n = Array.length args in - let dump_ast = 0b1 in - let dump_sm = 0b010 in + let dump_ast = 0b1 in + let dump_sm = 0b010 in let dump_source = 0b100 in (* Kakadu: binary masks are cool for C code, but for OCaml I don't see any reason to save memory like this *) let help_string = - "Lama compiler. (C) JetBrains Reserach, 2017-2020.\n" ^ - "Usage: lamac \n\n" ^ - "When no options specified, builds the source file into executable.\n" ^ - "Options:\n" ^ - " -c --- compile into object file\n" ^ - " -o --- write executable into file \n" ^ - " -I --- add into unit search path list\n" ^ - " -i --- interpret on a source-level interpreter\n" ^ - " -s --- compile into stack machine code and interpret on the stack machine initerpreter\n" ^ - " -dp --- dump AST (the output will be written into .ast file)\n" ^ - " -dsrc --- dump pretty-printed source code\n" ^ - " -ds --- dump stack machine code (the output will be written into .sm file; has no\n" ^ - " effect if -i option is specfied)\n" ^ - " -b --- compile to a stack machine bytecode\n" ^ - " -v --- show version\n" ^ - " -h --- show this help\n" + "Lama compiler. (C) JetBrains Reserach, 2017-2020.\n" + ^ "Usage: lamac \n\n" + ^ "When no options specified, builds the source file into executable.\n" + ^ "Options:\n" ^ " -c --- compile into object file\n" + ^ " -o --- write executable into file \n" + ^ " -I --- add into unit search path list\n" + ^ " -i --- interpret on a source-level interpreter\n" + ^ " -s --- compile into stack machine code and interpret on the \ + stack machine initerpreter\n" + ^ " -dp --- dump AST (the output will be written into .ast file)\n" + ^ " -dsrc --- dump pretty-printed source code\n" + ^ " -ds --- dump stack machine code (the output will be written \ + into .sm file; has no\n" + ^ " effect if -i option is specfied)\n" + ^ " -b --- compile to a stack machine bytecode\n" + ^ " -v --- show version\n" ^ " -h --- show this help\n" in object (self) val version = ref false - val help = ref false - val i = ref 1 - val infile = ref (None : string option) + val help = ref false + val i = ref 1 + val infile = ref (None : string option) val outfile = ref (None : string option) - val paths = ref [X86.get_std_path ()] - val mode = ref (`Default : [`Default | `Eval | `SM | `Compile | `BC]) - val curdir = Unix.getcwd () - val debug = ref false + val paths = ref [ X86.get_std_path () ] + val mode = ref (`Default : [ `Default | `Eval | `SM | `Compile | `BC ]) + val curdir = Unix.getcwd () + val debug = ref false + (* Workaround until Ostap starts to memoize properly *) - val const = ref false + val const = ref false + (* end of the workaround *) - val dump = ref 0 + val dump = ref 0 + initializer - let rec loop () = - match self#peek with - | Some opt -> - (match opt with - (* Workaround until Ostap starts to memoize properly *) - | "-w" -> self#set_workaround - (* end of the workaround *) - | "-c" -> self#set_mode `Compile - | "-o" -> (match self#peek with None -> raise (Commandline_error "File name expected after '-o' specifier") | Some fname -> self#set_outfile fname) - | "-I" -> (match self#peek with None -> raise (Commandline_error "Path expected after '-I' specifier") | Some path -> self#add_include_path path) - | "-s" -> self#set_mode `SM - | "-b" -> self#set_mode `BC - | "-i" -> self#set_mode `Eval - | "-ds" -> self#set_dump dump_sm - | "-dsrc" -> self#set_dump dump_source - | "-dp" -> self#set_dump dump_ast - | "-h" -> self#set_help - | "-v" -> self#set_version - | "-g" -> self#set_debug - | _ -> - if opt.[0] = '-' - then raise (Commandline_error (Printf.sprintf "Invalid command line specifier ('%s')" opt)) - else self#set_infile opt - ); - loop () - | None -> () - in loop () + let rec loop () = + match self#peek with + | Some opt -> + (match opt with + (* Workaround until Ostap starts to memoize properly *) + | "-w" -> self#set_workaround + (* end of the workaround *) + | "-c" -> self#set_mode `Compile + | "-o" -> ( + match self#peek with + | None -> + raise + (Commandline_error "File name expected after '-o' specifier") + | Some fname -> self#set_outfile fname) + | "-I" -> ( + match self#peek with + | None -> + raise (Commandline_error "Path expected after '-I' specifier") + | Some path -> self#add_include_path path) + | "-s" -> self#set_mode `SM + | "-b" -> self#set_mode `BC + | "-i" -> self#set_mode `Eval + | "-ds" -> self#set_dump dump_sm + | "-dsrc" -> self#set_dump dump_source + | "-dp" -> self#set_dump dump_ast + | "-h" -> self#set_help + | "-v" -> self#set_version + | "-g" -> self#set_debug + | _ -> + if opt.[0] = '-' then + raise + (Commandline_error + (Printf.sprintf "Invalid command line specifier ('%s')" opt)) + else self#set_infile opt); + loop () + | None -> () + in + loop () + (* Workaround until Ostap starts to memoize properly *) method is_workaround = !const - method private set_workaround = - const := true + method private set_workaround = const := true + (* end of the workaround *) - method private set_help = help := true + method private set_help = help := true method private set_version = version := true - method private set_dump mask = - dump := !dump lor mask + method private set_dump mask = dump := !dump lor mask + method private set_infile name = match !infile with - | None -> infile := Some name - | Some name' -> raise (Commandline_error (Printf.sprintf "Input file ('%s') already specified" name')) + | None -> infile := Some name + | Some name' -> + raise + (Commandline_error + (Printf.sprintf "Input file ('%s') already specified" name')) + method private set_outfile name = match !outfile with - | None -> outfile := Some name - | Some name' -> raise (Commandline_error (Printf.sprintf "Output file ('%s') already specified" name')) - method private add_include_path path = - paths := path :: !paths + | None -> outfile := Some name + | Some name' -> + raise + (Commandline_error + (Printf.sprintf "Output file ('%s') already specified" name')) + + method private add_include_path path = paths := path :: !paths + method private set_mode s = match !mode with | `Default -> mode := s | _ -> raise (Commandline_error "Extra compilation mode specifier") + method private peek = let j = !i in - if j < n - then (incr i; Some (args.(j))) + if j < n then ( + incr i; + Some args.(j)) else None + method get_mode = !mode + method get_output_option = match !outfile with - | None -> Printf.sprintf "-o %s" self#basename + | None -> Printf.sprintf "-o %s" self#basename | Some name -> Printf.sprintf "-o %s" name + method get_absolute_infile = let f = self#get_infile in if Filename.is_relative f then Filename.concat curdir f else f + method get_infile = match !infile with - | None -> raise (Commandline_error "Input file not specified") + | None -> raise (Commandline_error "Input file not specified") | Some name -> name + method get_help = !help method get_include_paths = !paths - method basename = Filename.chop_suffix (Filename.basename self#get_infile) ".lama" + + method basename = + Filename.chop_suffix (Filename.basename self#get_infile) ".lama" + method topname = - match !mode with - | `Compile -> "init" ^ self#basename - | _ -> "main" + match !mode with `Compile -> "init" ^ self#basename | _ -> "main" + method dump_file ext contents = let name = self#basename in let outf = open_out (Printf.sprintf "%s.%s" name ext) in Printf.fprintf outf "%s" contents; close_out outf + method dump_AST ast = - if (!dump land dump_ast) > 0 - then ( + if !dump land dump_ast > 0 then ( let buf = Buffer.create 1024 in Buffer.add_string buf ""; - Buffer.add_string buf (Printf.sprintf " %s " self#get_infile); + Buffer.add_string buf + (Printf.sprintf " %s " self#get_infile); Buffer.add_string buf "
  • "; - GT.html(Language.Expr.t) ast buf; + GT.html Language.Expr.t ast buf; Buffer.add_string buf "
  • "; Buffer.add_string buf ""; - self#dump_file "html" (Buffer.contents buf) - ) - method dump_source (ast: Language.Expr.t) = - if (!dump land dump_source) > 0 - then Pprinter.pp Format.std_formatter ast; - method dump_SM sm = - if (!dump land dump_sm) > 0 - then self#dump_file "sm" (SM.show_prg sm) + self#dump_file "html" (Buffer.contents buf)) + + method dump_source (ast : Language.Expr.t) = + if !dump land dump_source > 0 then Pprinter.pp Format.std_formatter ast + + method dump_SM sm = + if !dump land dump_sm > 0 then self#dump_file "sm" (SM.show_prg sm) else () + method greet = (match !outfile with - | None -> () - | Some _ -> (match !mode with `Default -> () | _ -> Printf.printf "Output file option ignored in this mode.\n") - ); + | None -> () + | Some _ -> ( + match !mode with + | `Default -> () + | _ -> Printf.printf "Output file option ignored in this mode.\n")); if !version then Printf.printf "%s\n" Version.version; - if !help then Printf.printf "%s" help_string - method get_debug = - if !debug then "" else "-g" - method set_debug = - debug := true + if !help then Printf.printf "%s" help_string + + method get_debug = if !debug then "" else "-g" + method set_debug = debug := true end -let main = +let[@ocaml.warning "-32"] main = try let cmd = new options Sys.argv in cmd#greet; - match (try Language.run_parser cmd with Language.Semantic_error msg -> `Fail msg) with - | `Ok prog -> - cmd#dump_AST (snd prog); - cmd#dump_source (snd prog); - (match cmd#get_mode with - | `Default | `Compile -> - ignore @@ X86.build cmd prog - | `BC -> - SM.ByteCode.compile cmd (SM.compile cmd prog) + match + try Language.run_parser cmd + with Language.Semantic_error msg -> `Fail msg + with + | `Ok prog -> ( + cmd#dump_AST (snd prog); + cmd#dump_source (snd prog); + match cmd#get_mode with + | `Default | `Compile -> ignore @@ X86.build cmd prog + | `BC -> SM.ByteCode.compile cmd (SM.compile cmd prog) | _ -> - let rec read acc = - try - let r = read_int () in - Printf.printf "> "; - read (acc @ [r]) - with End_of_file -> acc - in - let input = read [] in - let output = - if cmd#get_mode = `Eval - then Language.eval prog input - else SM.run (SM.compile cmd prog) input - in - List.iter (fun i -> Printf.printf "%d\n" i) output - ) - | `Fail er -> Printf.eprintf "Error: %s\n" er; exit 255 + let rec read acc = + try + let r = read_int () in + Printf.printf "> "; + read (acc @ [ r ]) + with End_of_file -> acc + in + let input = read [] in + let output = + if cmd#get_mode = `Eval then Language.eval prog input + else SM.run (SM.compile cmd prog) input + in + List.iter (fun i -> Printf.printf "%d\n" i) output) + | `Fail er -> + Printf.eprintf "Error: %s\n" er; + exit 255 with - | Language.Semantic_error msg -> Printf.printf "Error: %s\n" msg; exit 255 - | Commandline_error msg -> Printf.printf "%s\n" msg; exit 255 + | Language.Semantic_error msg -> + Printf.printf "Error: %s\n" msg; + exit 255 + | Commandline_error msg -> + Printf.printf "%s\n" msg; + exit 255 diff --git a/src/Language.ml b/src/Language.ml index 693649d3d..d685fce64 100644 --- a/src/Language.ml +++ b/src/Language.ml @@ -3,6 +3,8 @@ *) module OrigList = List +[@@@ocaml.warning "-7-8-13-15-20-26-27-32"] + open GT (* Opening a library for combinator-based syntax analysis *) @@ -55,7 +57,7 @@ module Loc = let report_error ?(loc=None) str = raise (Semantic_error (str ^ match loc with None -> "" | Some (l, c) -> Printf.sprintf " at (%d, %d)" l c));; - + @type k = Unmut | Mut | FVal with show, html, foldl (* Values *) @@ -85,7 +87,7 @@ module Value = with show, html, foldl let is_int = function Int _ -> true | _ -> false - + let to_int = function | Int n -> n | x -> failwith (Printf.sprintf "int value expected (%s)\n" (show(t) (fun _ -> "") (fun _ -> "") x)) @@ -114,6 +116,7 @@ module Value = match x with | Sexp (_, a) | Array a -> ignore (update_array a i v) | String a -> ignore (update_string a i (Char.chr @@ to_int v)) + | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__) let string_val v = let buf = Buffer.create 128 in @@ -121,8 +124,7 @@ module Value = let rec inner = function | Int n -> append (string_of_int n) | String s -> append "\""; append @@ Bytes.to_string s; append "\"" - | Array a -> let n = Array.length a in - append "["; Array.iteri (fun i a -> (if i > 0 then append ", "); inner a) a; append "]" + | Array a -> append "["; Array.iteri (fun i a -> (if i > 0 then append ", "); inner a) a; append "]" | Sexp (t, a) -> let n = Array.length a in if t = "cons" then ( @@ -131,6 +133,7 @@ module Value = | [||] -> () | [|x; Int 0|] -> inner x | [|x; Sexp ("cons", a)|] -> inner x; append ", "; inner_list a + | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__) in inner_list a; append "}" ) @@ -139,6 +142,7 @@ module Value = (if n > 0 then (append " ("; Array.iteri (fun i a -> (if i > 0 then append ", "); inner a) a; append ")")) ) + | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__) in inner v; Bytes.of_string @@ Buffer.contents buf @@ -156,24 +160,27 @@ module Builtin = let eval (st, i, o, vs) args = function | "read" -> (match i with z::i' -> (st, i', o, (Value.of_int z)::vs) | _ -> failwith "Unexpected end of input") | "write" -> (st, i, o @ [Value.to_int @@ List.hd args], Value.Empty :: vs) - | ".elem" -> let [b; j] = args in - (st, i, o, let i = Value.to_int j in - (match b with - | Value.String s -> Value.of_int @@ Char.code (Bytes.get s i) - | Value.Array a -> a.(i) - | Value.Sexp (_, a) -> a.(i) - ) :: vs + | ".elem" -> (match args with + | [b; j] -> (st, i, o, let i = Value.to_int j in + (match b with + | Value.String s -> Value.of_int @@ Char.code (Bytes.get s i) + | Value.Array a -> a.(i) + | Value.Sexp (_, a) -> a.(i) + | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__) + ) :: vs + ) + | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__) ) - | "length" -> (st, i, o, (Value.of_int (match List.hd args with Value.Sexp (_, a) | Value.Array a -> Array.length a | Value.String s -> Bytes.length s))::vs) + | "length" -> (st, i, o, (Value.of_int (match List.hd args with Value.Sexp (_, a) | Value.Array a -> Array.length a | Value.String s -> Bytes.length s | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)))::vs) | ".array" -> (st, i, o, (Value.of_array @@ Array.of_list args)::vs) - | "string" -> let [a] = args in (st, i, o, (Value.of_string @@ Value.string_val a)::vs) - + | "string" -> (match args with | [a] -> (st, i, o, (Value.of_string @@ Value.string_val a)::vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)) + | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__) end (* States *) module State = struct - + (* State: global state, local state, scope variables *) @type 'a t = | I @@ -273,7 +280,7 @@ module State = | _ -> L (xs, s, st) (* Drop a local scope *) - let drop = function L (_, _, e) -> e | G _ -> I + let drop = function L (_, _, e) -> e | G _ -> I | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__) (* Observe a variable in a state and print it to stderr *) let observe st x = @@ -440,19 +447,18 @@ module Expr = let seq x = function Skip -> x | y -> Seq (x, y) - let schedule_list h::tl = - List.fold_left seq h tl + let schedule_list = function h::tl -> List.fold_left seq h tl | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__) let rec take = function | 0 -> fun rest -> [], rest - | n -> fun h::tl -> let tl', rest = take (n-1) tl in h :: tl', rest + | n -> function h::tl -> let tl', rest = take (n-1) tl in h :: tl', rest | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__) let rec eval ((st, i, o, vs) as conf) k expr = - let print_values vs = + (* let print_values vs = Printf.eprintf "Values:\n%!"; List.iter (fun v -> Printf.eprintf "%s\n%!" @@ show(Value.t) (fun _ -> "") (fun _ -> "") v) vs; Printf.eprintf "End Values\n%!" - in + in *) match expr with | Lambda (args, body) -> eval (st, i, o, Value.Closure (args, body, [|st|]) :: vs) Skip k @@ -500,73 +506,78 @@ module Expr = | Sexp (t, xs) -> eval conf k (schedule_list (xs @ [Intrinsic (fun (st, i, o, vs) -> let es, vs' = take (List.length xs) vs in (st, i, o, Value.Sexp (t, Array.of_list (List.rev es)) :: vs'))])) | Binop (op, x, y) -> - eval conf k (schedule_list [x; y; Intrinsic (fun (st, i, o, y::x::vs) -> (st, i, o, (Value.of_int @@ to_func op (Value.to_int x) (Value.to_int y)) :: vs))]) + eval conf k (schedule_list [x; y; Intrinsic (function (st, i, o, y::x::vs) -> (st, i, o, (Value.of_int @@ to_func op (Value.to_int x) (Value.to_int y)) :: vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))]) | Elem (b, i) -> - eval conf k (schedule_list [b; i; Intrinsic (fun (st, i, o, j::b::vs) -> Builtin.eval (st, i, o, vs) [b; j] ".elem")]) + eval conf k (schedule_list [b; i; Intrinsic (function (st, i, o, j::b::vs) -> Builtin.eval (st, i, o, vs) [b; j] ".elem" | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))]) | ElemRef (b, i) -> - eval conf k (schedule_list [b; i; Intrinsic (fun (st, i, o, j::b::vs) -> (st, i, o, (Value.Elem (b, Value.to_int j))::vs))]) + eval conf k (schedule_list [b; i; Intrinsic (function (st, i, o, j::b::vs) -> (st, i, o, (Value.Elem (b, Value.to_int j))::vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))]) | Call (f, args) -> eval conf k (schedule_list (f :: args @ [Intrinsic (fun (st, i, o, vs) -> let es, vs' = take (List.length args + 1) vs in - let f :: es = List.rev es in - (match f with - | Value.Builtin name -> - Builtin.eval (st, i, o, vs') es name - | Value.Closure (args, body, closure) -> - let st' = State.push (State.leave st closure.(0)) (State.from_list @@ List.combine args es) (List.map (fun x -> x, Mut) args) in - let st'', i', o', vs'' = eval (st', i, o, []) Skip body in - closure.(0) <- st''; - (State.leave st'' st, i', o', match vs'' with [v] -> v::vs' | _ -> Value.Empty :: vs') - | _ -> report_error (Printf.sprintf "callee did not evaluate to a function: \"%s\"" (show(Value.t) (fun _ -> "") (fun _ -> "") f)) - ))])) + match List.rev es with + | f :: es -> + (match f with + | Value.Builtin name -> + Builtin.eval (st, i, o, vs') es name + | Value.Closure (args, body, closure) -> + let st' = State.push (State.leave st closure.(0)) (State.from_list @@ List.combine args es) (List.map (fun x -> x, Mut) args) in + let st'', i', o', vs'' = eval (st', i, o, []) Skip body in + closure.(0) <- st''; + (State.leave st'' st, i', o', match vs'' with [v] -> v::vs' | _ -> Value.Empty :: vs') + | _ -> report_error (Printf.sprintf "callee did not evaluate to a function: \"%s\"" (show(Value.t) (fun _ -> "") (fun _ -> "") f)) + ) + | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__) + )])) | Leave -> eval (State.drop st, i, o, vs) Skip k | Assign (x, e) -> - eval conf k (schedule_list [x; e; Intrinsic (fun (st, i, o, v::x::vs) -> (update st x v, i, o, v::vs))]) + eval conf k (schedule_list [x; e; Intrinsic (function (st, i, o, v::x::vs) -> (update st x v, i, o, v::vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))]) | Seq (s1, s2) -> eval conf (seq s2 k) s1 | Skip -> (match k with Skip -> conf | _ -> eval conf Skip k) | If (e, s1, s2) -> - eval conf k (schedule_list [e; Control (fun (st, i, o, e::vs) -> (if Value.to_int e <> 0 then s1 else s2), (st, i, o, vs))]) + eval conf k (schedule_list [e; Control (function (st, i, o, e::vs) -> (if Value.to_int e <> 0 then s1 else s2), (st, i, o, vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))]) | While (e, s) -> - eval conf k (schedule_list [e; Control (fun (st, i, o, e::vs) -> (if Value.to_int e <> 0 then seq s expr else Skip), (st, i, o, vs))]) + eval conf k (schedule_list [e; Control (function (st, i, o, e::vs) -> (if Value.to_int e <> 0 then seq s expr else Skip), (st, i, o, vs) | _ -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__))]) | DoWhile (s, e) -> eval conf (seq (While (e, s)) k) s | Case (e, bs, _, _)-> - let rec branch ((st, i, o, v::vs) as conf) = function - | [] -> failwith (Printf.sprintf "Pattern matching failed: no branch is selected while matching %s\n" (show(Value.t) (fun _ -> "") (fun _ -> "") v)) - | (patt, body)::tl -> - let rec match_patt patt v st = - let update x v = function - | None -> None - | Some s -> Some (State.bind x v s) - in - match patt, v with - | Pattern.Named (x, p), v -> update x v (match_patt p v st ) - | Pattern.Wildcard , _ -> st - | Pattern.Sexp (t, ps), Value.Sexp (t', vs) when t = t' && List.length ps = Array.length vs -> match_list ps (Array.to_list vs) st - | Pattern.Array ps , Value.Array vs when List.length ps = Array.length vs -> match_list ps (Array.to_list vs) st - | Pattern.Const n , Value.Int n' when n = n' -> st - | Pattern.String s , Value.String s' when s = Bytes.to_string s' -> st - | Pattern.Boxed , Value.String _ - | Pattern.Boxed , Value.Array _ - | Pattern.UnBoxed , Value.Int _ - | Pattern.Boxed , Value.Sexp (_, _) - | Pattern.StringTag , Value.String _ - | Pattern.ArrayTag , Value.Array _ - | Pattern.ClosureTag , Value.Closure _ - | Pattern.SexpTag , Value.Sexp (_, _) -> st - | _ -> None - and match_list ps vs s = - match ps, vs with - | [], [] -> s - | p::ps, v::vs -> match_list ps vs (match_patt p v s) - | _ -> None - in - match match_patt patt v (Some State.undefined) with - | None -> branch conf tl - | Some st' -> eval (State.push st st' (List.map (fun x -> x, Unmut) @@ Pattern.vars patt), i, o, vs) k (Seq (body, Leave)) + let rec branch = + function (_,_,_,[]) -> failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__) + | ((st, i, o, v::vs) as conf) -> function + | [] -> failwith (Printf.sprintf "Pattern matching failed: no branch is selected while matching %s\n" (show(Value.t) (fun _ -> "") (fun _ -> "") v)) + | (patt, body)::tl -> + let rec match_patt patt v st = + let update x v = function + | None -> None + | Some s -> Some (State.bind x v s) + in + match patt, v with + | Pattern.Named (x, p), v -> update x v (match_patt p v st ) + | Pattern.Wildcard , _ -> st + | Pattern.Sexp (t, ps), Value.Sexp (t', vs) when t = t' && List.length ps = Array.length vs -> match_list ps (Array.to_list vs) st + | Pattern.Array ps , Value.Array vs when List.length ps = Array.length vs -> match_list ps (Array.to_list vs) st + | Pattern.Const n , Value.Int n' when n = n' -> st + | Pattern.String s , Value.String s' when s = Bytes.to_string s' -> st + | Pattern.Boxed , Value.String _ + | Pattern.Boxed , Value.Array _ + | Pattern.UnBoxed , Value.Int _ + | Pattern.Boxed , Value.Sexp (_, _) + | Pattern.StringTag , Value.String _ + | Pattern.ArrayTag , Value.Array _ + | Pattern.ClosureTag , Value.Closure _ + | Pattern.SexpTag , Value.Sexp (_, _) -> st + | _ -> None + and match_list ps vs s = + match ps, vs with + | [], [] -> s + | p::ps, v::vs -> match_list ps vs (match_patt p v s) + | _ -> None + in + match match_patt patt v (Some State.undefined) with + | None -> branch conf tl + | Some st' -> eval (State.push st st' (List.map (fun x -> x, Unmut) @@ Pattern.vars patt), i, o, vs) k (Seq (body, Leave)) in eval conf Skip (schedule_list [e; Intrinsic (fun conf -> branch conf bs)]) @@ -635,14 +646,14 @@ module Expr = let not_a_reference s = new Reason.t (Msg.make "not a reference" [||] (Msg.Locator.Point s#coord)) (* UGLY! *) - let predefined_op : (Obj.t -> Obj.t -> Obj.t) ref = Pervasives.ref (fun _ _ -> invalid_arg "must not happen") + let predefined_op : (Obj.t -> Obj.t -> Obj.t) ref = Stdlib.ref (fun _ _ -> invalid_arg "must not happen") - let defCell = Pervasives.ref 0 + let defCell = Stdlib.ref 0 (* ======= *) let makeParsers env = - let makeParser, makeBasicParser, makeScopeParser = - let def s = let Some def = Obj.magic !defCell in def s in + let [@ocaml.warning "-26"] makeParser, makeBasicParser, makeScopeParser = + let [@ocaml.warning "-20"] def s = let [@ocaml.warning "-8"] Some def = Obj.magic !defCell in def s in let ostap ( parse[infix][atr]: h:basic[infix][Void] -";" t:parse[infix][atr] {Seq (h, t)} | basic[infix][atr]; scope[infix][atr]: <(d, infix')> : def[infix] expr:parse[infix'][atr] {Scope (d, expr)} | {isVoid atr} => <(d, infix')> : def[infix] => {d <> []} => {Scope (d, materialize atr Skip)}; @@ -872,7 +883,7 @@ module Infix = show(showable) @@ Array.map (fun (ass, (_, l)) -> List.map (fun (str, kind, _) -> ass, str, kind) l) infix let extract_exports infix = - let ass_string = function `Lefta -> "L" | `Righta -> "R" | _ -> "I" in + (* let ass_string = function `Lefta -> "L" | `Righta -> "R" | _ -> "I" in *) let exported = Array.map (fun (ass, (_, ops)) -> @@ -1013,7 +1024,7 @@ module Definition = (* end of the workaround *) ) - let makeParser env exprBasic exprScope = + let [@ocaml.warning "-26"] makeParser env exprBasic exprScope = let ostap ( arg : l:$ x:LIDENT {Loc.attach x l#coord; x}; position[pub][ass][coord][newp]: @@ -1107,7 +1118,7 @@ module Interface = Buffer.contents buf (* Read an interface file *) - let read fname = + let [@ocaml.warning "-26"] read fname = let ostap ( funspec: "F" "," i:IDENT ";" {`Fun i}; varspec: "V" "," i:IDENT ";" {`Variable i}; @@ -1201,8 +1212,8 @@ ostap ( let parse cmd = let env = object - val imports = Pervasives.ref ([] : string list) - val tmp_index = Pervasives.ref 0 + val imports = Stdlib.ref ([] : string list) + val tmp_index = Stdlib.ref 0 method add_import imp = imports := imp :: !imports method get_tmp = let index = !tmp_index in incr tmp_index; Printf.sprintf "__tmp%d" index @@ -1223,7 +1234,7 @@ let parse cmd = definitions in - let definitions = Pervasives.ref None in + let definitions = Stdlib.ref None in let (makeParser, makeBasicParser, makeScopeParser) = Expr.makeParsers env in @@ -1233,7 +1244,7 @@ let parse cmd = definitions := Some (makeDefinitions env exprBasic exprScope); - let Some definitions = !definitions in + let [@ocaml.warning "-8-20"] Some definitions = !definitions in let ostap ( parse[cmd]: @@ -1255,7 +1266,7 @@ let run_parser cmd = "while"; "do"; "od"; "for"; "fun"; "var"; "public"; "external"; "import"; - "case"; "of"; "esac"; + "case"; "of"; "esac"; "box"; "val"; "str"; "sexp"; "array"; "infix"; "infixl"; "infixr"; "at"; "before"; "after"; "true"; "false"; "lazy"; "eta"; "syntax"] diff --git a/src/Makefile b/src/Makefile index 58bfb9221..c60fbf970 100644 --- a/src/Makefile +++ b/src/Makefile @@ -8,7 +8,9 @@ PXFLAGS = $(CAMLP5) BFLAGS = -rectypes -g -w -13-58 -package GT,ostap,unix OFLAGS = $(BFLAGS) -all: depend metagen $(TOPFILE) +all: # depend metagen # $(TOPFILE) + dune build ./Driver.exe + ln -sf ../_build/default/src/Driver.exe lamac metagen: echo "let version = \"Version `git rev-parse --abbrev-ref HEAD`, `git rev-parse --short HEAD`, `git rev-parse --verify HEAD |git show --no-patch --no-notes --pretty='%cd'`\"" > version.ml @@ -25,6 +27,7 @@ $(TOPFILE).byte: $(SOURCES:.ml=.cmo) clean: $(RM) $(TOPFILE) *.cm[ioxa] *.annot *.o *.opt *.byte *~ .depend + dune clean -include .depend # generic rules @@ -44,4 +47,4 @@ clean: $(OCAMLOPT) -c $(OFLAGS) $(STATIC) $(PXFLAGS) $< %.cmx: %.ml - $(OCAMLOPT) -c $(OFLAGS) $(STATIC) $(PXFLAGS) $< + $(OCAMLOPT) -c $(OFLAGS) $(STATIC) $(PXFLAGS) $< \ No newline at end of file diff --git a/src/SM.ml b/src/SM.ml index 43362125b..8397571b2 100644 --- a/src/SM.ml +++ b/src/SM.ml @@ -1,266 +1,398 @@ -open GT +open GT open Language (* The type for patters *) -@type patt = StrCmp | String | Array | Sexp | Boxed | UnBoxed | Closure with show, enum - -(* The type for local scopes tree *) -@type scope = { - blab : string; - elab : string; - names : (string * int) list; - subs : scope list; -} with show +type patt = StrCmp | String | Array | Sexp | Boxed | UnBoxed | Closure +[@@deriving gt ~options:{ show; enum }] + +(* The type for local scopes tree *) +type scope = { + blab : string; + elab : string; + names : (string * int) list; + subs : scope list; +} +[@@deriving gt ~options:{ show }] -let show_scope = show(scope);; +let show_scope = show scope (* The type for the stack machine instructions *) -@type insn = -(* binary operator *) | BINOP of string -(* put a constant on the stack *) | CONST of int -(* put a string on the stack *) | STRING of string -(* create an S-expression *) | SEXP of string * int -(* load a variable to the stack *) | LD of Value.designation -(* load a variable address to the stack *) | LDA of Value.designation -(* store a value into a variable *) | ST of Value.designation -(* store a value into a reference *) | STI -(* store a value into array/sexp/string *) | STA -(* takes an element of array/string/sexp *) | ELEM -(* a label *) | LABEL of string -(* a forwarded label *) | FLABEL of string -(* a scope label *) | SLABEL of string -(* unconditional jump *) | JMP of string -(* conditional jump *) | CJMP of string * string -(* begins procedure definition *) | BEGIN of string * int * int * Value.designation list * string list * scope list -(* end procedure definition *) | END -(* create a closure *) | CLOSURE of string * Value.designation list -(* proto closure *) | PROTO of string * string -(* proto closure to a possible constant *) | PPROTO of string * string -(* proto call *) | PCALLC of int * bool -(* calls a closure *) | CALLC of int * bool -(* calls a function/procedure *) | CALL of string * int * bool -(* returns from a function *) | RET -(* drops the top element off *) | DROP -(* duplicates the top element *) | DUP -(* swaps two top elements *) | SWAP -(* checks the tag and arity of S-expression *) | TAG of string * int -(* checks the tag and size of array *) | ARRAY of int -(* checks various patterns *) | PATT of patt -(* match failure (location, leave a value *) | FAIL of Loc.t * bool -(* external definition *) | EXTERN of string -(* public definition *) | PUBLIC of string -(* import clause *) | IMPORT of string -(* line info *) | LINE of int -with show - +type insn = + (* binary operator *) + | BINOP of string + (* put a constant on the stack *) + | CONST of int + (* put a string on the stack *) + | STRING of string + (* create an S-expression *) + | SEXP of string * int + (* load a variable to the stack *) + | LD of Value.designation + (* load a variable address to the stack *) + | LDA of Value.designation + (* store a value into a variable *) + | ST of Value.designation + (* store a value into a reference *) + | STI + (* store a value into array/sexp/string *) + | STA + (* takes an element of array/string/sexp *) + | ELEM + (* a label *) + | LABEL of string + (* a forwarded label *) + | FLABEL of string + (* a scope label *) + | SLABEL of string + (* unconditional jump *) + | JMP of string + (* conditional jump *) + | CJMP of string * string + (* begins procedure definition *) + | BEGIN of + string * int * int * Value.designation list * string list * scope list + (* end procedure definition *) + | END + (* create a closure *) + | CLOSURE of string * Value.designation list + (* proto closure *) + | PROTO of string * string + (* proto closure to a possible constant *) + | PPROTO of string * string + (* proto call *) + | PCALLC of int * bool + (* calls a closure *) + | CALLC of int * bool + (* calls a function/procedure *) + | CALL of string * int * bool + (* returns from a function *) + | RET + (* drops the top element off *) + | DROP + (* duplicates the top element *) + | DUP + (* swaps two top elements *) + | SWAP + (* checks the tag and arity of S-expression *) + | TAG of string * int + (* checks the tag and size of array *) + | ARRAY of int + (* checks various patterns *) + | PATT of patt + (* match failure (location, leave a value *) + | FAIL of Loc.t * bool + (* external definition *) + | EXTERN of string + (* public definition *) + | PUBLIC of string + (* import clause *) + | IMPORT of string + (* line info *) + | LINE of int +[@@deriving gt ~options:{ show }] + (* The type for the stack machine program *) -@type prg = insn list with show +type prg = insn list [@@deriving gt ~options:{ show }] -module ByteCode = - struct - - module M = Map.Make (String) - module S = Set.Make (String) +module ByteCode = struct + module M = Map.Make (String) + module S = Set.Make (String) - module StringTab = - struct - - type t = {mutable smap : int M.t; buffer: Buffer.t; mutable index : int} + module StringTab = struct + type t = { mutable smap : int M.t; buffer : Buffer.t; mutable index : int } - let create () = {smap = M.empty; buffer = Buffer.create 255; index = 0} + let create () = { smap = M.empty; buffer = Buffer.create 255; index = 0 } - let add st s = - try let i = M.find s st.smap in i - with Not_found -> - let i = st.index in - Buffer.add_string st.buffer s; - Buffer.add_char st.buffer (Char.chr 0); - st.smap <- M.add s i st.smap; - st.index <- st.index + String.length s + 1; - i - - end - - exception Found of int - - let opnum = - let optab = ["+"; "-"; "*"; "/"; "%"; "<"; "<="; ">"; ">="; "=="; "!="; "&&"; "!!"] in - fun s -> + let add st s = try - ignore @@ List.fold_left (fun i op -> if s = op then raise (Found i) else i+1) 1 optab; - failwith (Printf.sprintf "ERROR: undefined binary operator '%s'" s) - with - Found i -> i - -(* Below are the the numbers of occurrencies of SM instructions for the stdlib+lama compiler itself - - 7328 SLABEL - 5351 CALL - 5321 DROP - 4437 LABEL - 4331 LD - 4213 DUP - 3979 EXTERN - 3525 CONST - 2503 LINE - 2281 JMP - 1400 ST - 1122 CJMP - 922 END - 922 BEGIN - 790 SEXP - 770 CLOSURE - 519 TAG - 493 STRING - 354 FAIL - 349 CALLC - 339 BINOP - 289 ARRAY - 270 PUBLIC - 87 PATT - 39 STA - 16 FLABEL - *) - - let compile cmd insns = - let word_size = 4 in - let code = Buffer.create 256 in - let st = StringTab.create () in - let lmap = Stdlib.ref M.empty in - let pubs = Stdlib.ref S.empty in - let imports = Stdlib.ref S.empty in - let globals = Stdlib.ref M.empty in - let glob_count = Stdlib.ref 0 in - let fixups = Stdlib.ref [] in - let add_lab l = lmap := M.add l (Buffer.length code) !lmap in - let add_public l = pubs := S.add l !pubs in - let add_import l = imports := S.add l !imports in - let add_fixup l = fixups := (Buffer.length code, l) :: !fixups in - let add_bytes = List.iter (fun x -> Buffer.add_char code @@ Char .chr x) in - let add_ints = List.iter (fun x -> Buffer.add_int32_ne code @@ Int32.of_int x) in - let add_strings = List.iter (fun x -> Buffer.add_int32_ne code @@ Int32.of_int @@ StringTab.add st x) in - let add_designations n = - let b x = - match n with - None -> x - | Some b -> b * 16 + x - in - List.iter (function - | Value.Global s -> - let i = - try M.find s !globals - with Not_found -> - let i = !glob_count in - incr glob_count; - globals := M.add s i !globals; - i - in - add_bytes [b 0]; add_ints [i] - | Value.Local n -> add_bytes [b 1]; add_ints [n] - | Value.Arg n -> add_bytes [b 2]; add_ints [n] - | Value.Access n -> add_bytes [b 3]; add_ints [n] - ) - in - let insn_code = function - (* 0x0s *) | BINOP s -> add_bytes [opnum s] - (* 0x10 n:32 *) | CONST n -> add_bytes [1*16 + 0]; add_ints [n] - (* 0x11 s:32 *) | STRING s -> add_bytes [1*16 + 1]; add_strings [s] - (* 0x12 s:32 n:32 *) | SEXP (s, n) -> add_bytes [1*16 + 2]; add_strings [s]; add_ints [n] - (* 0x13 *) | STI -> add_bytes [1*16 + 3] - (* 0x14 *) | STA -> add_bytes [1*16 + 4] - - | LABEL s - | FLABEL s - | SLABEL s -> add_lab s - - (* 0x15 l:32 *) | JMP s -> add_bytes [1*16 + 5]; add_fixup s; add_ints [0] - (* 0x16 *) | END -> add_bytes [1*16 + 6] - (* 0x17 *) | RET -> add_bytes [1*16 + 7] - (* 0x18 *) | DROP -> add_bytes [1*16 + 8] - (* 0x19 *) | DUP -> add_bytes [1*16 + 9] - (* 0x1a *) | SWAP -> add_bytes [1*16 + 10] - (* 0x1b *) | ELEM -> add_bytes [1*16 + 11] - - (* 0x2d n:32 *) | LD d -> add_designations (Some 2) [d] - (* 0x3d n:32 *) | LDA d -> add_designations (Some 3) [d] - (* 0x4d n:32 *) | ST d -> add_designations (Some 4) [d] - - (* 0x50 l:32 *) | CJMP ("z" , s) -> add_bytes [5*16 + 0]; add_fixup s; add_ints [0] - (* 0x51 l:32 *) | CJMP ("nz", s) -> add_bytes [5*16 + 1]; add_fixup s; add_ints [0] - - (* 0x70 *) | CALL ("Lread", _, _) -> add_bytes [7*16 + 0] - (* 0x71 *) | CALL ("Lwrite", _, _) -> add_bytes [7*16 + 1] - (* 0x72 *) | CALL ("Llength", _, _) -> add_bytes [7*16 + 2] - (* 0x73 *) | CALL ("Lstring", _, _) -> add_bytes [7*16 + 3] - (* 0x74 *) | CALL (".array", n, _) -> add_bytes [7*16 + 4]; add_ints [n] - - (* 0x52 n:32 n:32 *) | BEGIN (_, a, l, [], _, _) -> add_bytes [5*16 + 2]; add_ints [a; l] (* with no closure *) - (* 0x53 n:32 n:32 *) | BEGIN (_, a, l, _, _, _) -> add_bytes [5*16 + 3]; add_ints [a; l] (* with a closure *) - (* 0x54 l:32 n:32 d*:32 *) | CLOSURE (s, ds) -> add_bytes [5*16 + 4]; add_fixup s; add_ints [0; List.length ds]; add_designations None ds - (* 0x55 n:32 *) | CALLC (n, tail) -> add_bytes [5*16 + 5]; add_ints [n] - (* 0x56 l:32 n:32 *) | CALL (fn, n, tail) -> add_bytes [5*16 + 6]; add_fixup fn; add_ints [0; n] - (* 0x57 s:32 n:32 *) | TAG (s, n) -> add_bytes [5*16 + 7]; add_strings [s]; add_ints [n] - (* 0x58 n:32 *) | ARRAY n -> add_bytes [5*16 + 8]; add_ints [n] - (* 0x59 n:32 n:32 *) | FAIL ((l, c), _) -> add_bytes [5*16 + 9]; add_ints [l; c] - (* 0x5a n:32 *) | LINE n -> add_bytes [5*16 + 10]; add_ints [n] - (* 0x6p *) | PATT p -> add_bytes [6*16 + enum(patt) p] - - | EXTERN s -> () - | PUBLIC s -> add_public s - | IMPORT s -> add_import s - in - List.iter insn_code insns; - add_bytes [255]; - let code = Buffer.to_bytes code in - List.iter - (fun (ofs, l) -> - Bytes.set_int32_ne code ofs (Int32.of_int @@ try M.find l !lmap with Not_found -> failwith (Printf.sprintf "ERROR: undefined label '%s'" l)) - ) - !fixups; - let pubs = List.map - (fun l -> - Int32.of_int @@ StringTab.add st l, - (Int32.of_int @@ try M.find l !lmap with Not_found -> failwith (Printf.sprintf "ERROR: undefined label '%s'" l)) - ) @@ S.elements !pubs - in - let st = Buffer.to_bytes st.StringTab.buffer in - let file = Buffer.create 1024 in - Buffer.add_int32_ne file (Int32.of_int @@ Bytes.length st); - Buffer.add_int32_ne file (Int32.of_int @@ !glob_count); - Buffer.add_int32_ne file (Int32.of_int @@ List.length pubs); - List.iter (fun (n, o) -> Buffer.add_int32_ne file n; Buffer.add_int32_ne file o) pubs; - Buffer.add_bytes file st; - Buffer.add_bytes file code; - let f = open_out_bin (Printf.sprintf "%s.bc" cmd#basename) in - Buffer.output_buffer f file; - close_out f - + let i = M.find s st.smap in + i + with Not_found -> + let i = st.index in + Buffer.add_string st.buffer s; + Buffer.add_char st.buffer (Char.chr 0); + st.smap <- M.add s i st.smap; + st.index <- st.index + String.length s + 1; + i end - + + exception Found of int + + let opnum = + let optab = + [ "+"; "-"; "*"; "/"; "%"; "<"; "<="; ">"; ">="; "=="; "!="; "&&"; "!!" ] + in + fun s -> + try + ignore + @@ List.fold_left + (fun i op -> if s = op then raise (Found i) else i + 1) + 1 optab; + failwith (Printf.sprintf "ERROR: undefined binary operator '%s'" s) + with Found i -> i + + (* Below are the the numbers of occurrencies of SM instructions for the stdlib+lama compiler itself + + 7328 SLABEL + 5351 CALL + 5321 DROP + 4437 LABEL + 4331 LD + 4213 DUP + 3979 EXTERN + 3525 CONST + 2503 LINE + 2281 JMP + 1400 ST + 1122 CJMP + 922 END + 922 BEGIN + 790 SEXP + 770 CLOSURE + 519 TAG + 493 STRING + 354 FAIL + 349 CALLC + 339 BINOP + 289 ARRAY + 270 PUBLIC + 87 PATT + 39 STA + 16 FLABEL + *) + + let compile cmd insns = + (* let word_size = 4 in *) + let code = Buffer.create 256 in + let st = StringTab.create () in + let lmap = Stdlib.ref M.empty in + let pubs = Stdlib.ref S.empty in + let imports = Stdlib.ref S.empty in + let globals = Stdlib.ref M.empty in + let glob_count = Stdlib.ref 0 in + let fixups = Stdlib.ref [] in + let add_lab l = lmap := M.add l (Buffer.length code) !lmap in + let add_public l = pubs := S.add l !pubs in + let add_import l = imports := S.add l !imports in + let add_fixup l = fixups := (Buffer.length code, l) :: !fixups in + let add_bytes = List.iter (fun x -> Buffer.add_char code @@ Char.chr x) in + let add_ints = + List.iter (fun x -> Buffer.add_int32_ne code @@ Int32.of_int x) + in + let add_strings = + List.iter (fun x -> + Buffer.add_int32_ne code @@ Int32.of_int @@ StringTab.add st x) + in + let add_designations n = + let b x = match n with None -> x | Some b -> (b * 16) + x in + List.iter (function + | Value.Global s -> + let i = + try M.find s !globals + with Not_found -> + let i = !glob_count in + incr glob_count; + globals := M.add s i !globals; + i + in + add_bytes [ b 0 ]; + add_ints [ i ] + | Value.Local n -> + add_bytes [ b 1 ]; + add_ints [ n ] + | Value.Arg n -> + add_bytes [ b 2 ]; + add_ints [ n ] + | Value.Access n -> + add_bytes [ b 3 ]; + add_ints [ n ] + | _ -> + failwith + (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)) + in + let insn_code = function + (* 0x0s *) + | BINOP s -> add_bytes [ opnum s ] + (* 0x10 n:32 *) + | CONST n -> + add_bytes [ (1 * 16) + 0 ]; + add_ints [ n ] + (* 0x11 s:32 *) + | STRING s -> + add_bytes [ (1 * 16) + 1 ]; + add_strings [ s ] + (* 0x12 s:32 n:32 *) + | SEXP (s, n) -> + add_bytes [ (1 * 16) + 2 ]; + add_strings [ s ]; + add_ints [ n ] + (* 0x13 *) + | STI -> add_bytes [ (1 * 16) + 3 ] + (* 0x14 *) + | STA -> add_bytes [ (1 * 16) + 4 ] + | LABEL s | FLABEL s | SLABEL s -> add_lab s + (* 0x15 l:32 *) + | JMP s -> + add_bytes [ (1 * 16) + 5 ]; + add_fixup s; + add_ints [ 0 ] + (* 0x16 *) + | END -> add_bytes [ (1 * 16) + 6 ] + (* 0x17 *) + | RET -> add_bytes [ (1 * 16) + 7 ] + (* 0x18 *) + | DROP -> add_bytes [ (1 * 16) + 8 ] + (* 0x19 *) + | DUP -> add_bytes [ (1 * 16) + 9 ] + (* 0x1a *) + | SWAP -> add_bytes [ (1 * 16) + 10 ] + (* 0x1b *) + | ELEM -> add_bytes [ (1 * 16) + 11 ] + (* 0x2d n:32 *) + | LD d -> add_designations (Some 2) [ d ] + (* 0x3d n:32 *) + | LDA d -> add_designations (Some 3) [ d ] + (* 0x4d n:32 *) + | ST d -> add_designations (Some 4) [ d ] + (* 0x50 l:32 *) + | CJMP ("z", s) -> + add_bytes [ (5 * 16) + 0 ]; + add_fixup s; + add_ints [ 0 ] + (* 0x51 l:32 *) + | CJMP ("nz", s) -> + add_bytes [ (5 * 16) + 1 ]; + add_fixup s; + add_ints [ 0 ] + (* 0x70 *) + | CALL ("Lread", _, _) -> add_bytes [ (7 * 16) + 0 ] + (* 0x71 *) + | CALL ("Lwrite", _, _) -> add_bytes [ (7 * 16) + 1 ] + (* 0x72 *) + | CALL ("Llength", _, _) -> add_bytes [ (7 * 16) + 2 ] + (* 0x73 *) + | CALL ("Lstring", _, _) -> add_bytes [ (7 * 16) + 3 ] + (* 0x74 *) + | CALL (".array", n, _) -> + add_bytes [ (7 * 16) + 4 ]; + add_ints [ n ] + (* 0x52 n:32 n:32 *) + | BEGIN (_, a, l, [], _, _) -> + add_bytes [ (5 * 16) + 2 ]; + add_ints [ a; l ] (* with no closure *) + (* 0x53 n:32 n:32 *) + | BEGIN (_, a, l, _, _, _) -> + add_bytes [ (5 * 16) + 3 ]; + add_ints [ a; l ] (* with a closure *) + (* 0x54 l:32 n:32 d*:32 *) + | CLOSURE (s, ds) -> + add_bytes [ (5 * 16) + 4 ]; + add_fixup s; + add_ints [ 0; List.length ds ]; + add_designations None ds + (* 0x55 n:32 *) + | CALLC (n, _) -> + add_bytes [ (5 * 16) + 5 ]; + add_ints [ n ] + (* 0x56 l:32 n:32 *) + | CALL (fn, n, _) -> + add_bytes [ (5 * 16) + 6 ]; + add_fixup fn; + add_ints [ 0; n ] + (* 0x57 s:32 n:32 *) + | TAG (s, n) -> + add_bytes [ (5 * 16) + 7 ]; + add_strings [ s ]; + add_ints [ n ] + (* 0x58 n:32 *) + | ARRAY n -> + add_bytes [ (5 * 16) + 8 ]; + add_ints [ n ] + (* 0x59 n:32 n:32 *) + | FAIL ((l, c), _) -> + add_bytes [ (5 * 16) + 9 ]; + add_ints [ l; c ] + (* 0x5a n:32 *) + | LINE n -> + add_bytes [ (5 * 16) + 10 ]; + add_ints [ n ] + (* 0x6p *) + | PATT p -> add_bytes [ (6 * 16) + enum patt p ] + | EXTERN _ -> () + | PUBLIC s -> add_public s + | IMPORT s -> add_import s + | _ -> + failwith + (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__) + in + List.iter insn_code insns; + add_bytes [ 255 ]; + let code = Buffer.to_bytes code in + List.iter + (fun (ofs, l) -> + Bytes.set_int32_ne code ofs + (Int32.of_int + @@ + try M.find l !lmap + with Not_found -> + failwith (Printf.sprintf "ERROR: undefined label '%s'" l))) + !fixups; + let pubs = + List.map (fun l -> + ( Int32.of_int @@ StringTab.add st l, + Int32.of_int + @@ + try M.find l !lmap + with Not_found -> + failwith (Printf.sprintf "ERROR: undefined label '%s'" l) )) + @@ S.elements !pubs + in + let st = Buffer.to_bytes st.StringTab.buffer in + let file = Buffer.create 1024 in + Buffer.add_int32_ne file (Int32.of_int @@ Bytes.length st); + Buffer.add_int32_ne file (Int32.of_int @@ !glob_count); + Buffer.add_int32_ne file (Int32.of_int @@ List.length pubs); + List.iter + (fun (n, o) -> + Buffer.add_int32_ne file n; + Buffer.add_int32_ne file o) + pubs; + Buffer.add_bytes file st; + Buffer.add_bytes file code; + let f = open_out_bin (Printf.sprintf "%s.bc" cmd#basename) in + Buffer.output_buffer f file; + close_out f +end + let show_prg p = let b = Buffer.create 512 in - List.iter (fun i -> Buffer.add_string b (show(insn) i); Buffer.add_string b "\n") p; - Buffer.contents b;; + List.iter + (fun i -> + Buffer.add_string b (show insn i); + Buffer.add_string b "\n") + p; + Buffer.contents b (* Values *) -@type value = (string, value array) Value.t with show - +type value = (string, value array) Value.t [@@deriving gt ~options:{ show }] + (* Local state of the SM *) -@type local = { args : value array; locals : value array; closure : value array } with show +type local = { args : value array; locals : value array; closure : value array } +[@@deriving gt ~options:{ show }] (* Global state of the SM *) -@type global = (string, value) arrow +type global = (string, value) arrow [@@deriving gt] (* Control stack *) -@type control = (prg * local) list with show +type control = (prg * local) list [@@deriving gt ~options:{ show }] (* Data stack *) -@type stack = value list with show - -(* The type for the stack machine configuration: control stack, stack, global and local states, +type stack = value list [@@deriving gt ~options:{ show }] + +(* The type for the stack machine configuration: control stack, stack, global and local states, input and output streams *) -type config = control * stack * global * local * int list * int list +type config = control * stack * global * local * int list * int list (* Stack machine interpreter @@ -268,151 +400,338 @@ type config = control * stack * global * local * int list * int list Takes an environment, a configuration and a program, and returns a configuration as a result. The environment is used to locate a label to jump to (via method env#labeled ) -*) +*) let split n l = let rec unzip (taken, rest) = function - | 0 -> (List.rev taken, rest) - | n -> let h::tl = rest in unzip (h::taken, tl) (n-1) + | 0 -> (List.rev taken, rest) + | n -> + let[@ocaml.warning "-8"] (h :: tl) = rest in + unzip (h :: taken, tl) (n - 1) in unzip ([], l) n let update glob loc z = function -| Value.Global x -> State.bind x z glob -| Value.Local i -> loc.locals.(i) <- z; glob -| Value.Arg i -> loc.args.(i) <- z; glob -| Value.Access i -> loc.closure.(i) <- z; glob - -let print_stack memo s = + | Value.Global x -> State.bind x z glob + | Value.Local i -> + loc.locals.(i) <- z; + glob + | Value.Arg i -> + loc.args.(i) <- z; + glob + | Value.Access i -> + loc.closure.(i) <- z; + glob + | _ -> + failwith (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__) + +let print_stack _ s = Printf.eprintf "Memo %!"; - List.iter (fun v -> Printf.eprintf "%s " @@ show(value) v) s; + List.iter (fun v -> Printf.eprintf "%s " @@ show value v) s; Printf.eprintf "\n%!" let show_insn = show insn - -let rec eval env (((cstack, stack, glob, loc, i, o) as conf) : config) = function -| [] -> conf -| insn :: prg' -> - (* + +let[@ocaml.warning "-8-20"] rec eval env + ((cstack, stack, glob, loc, i, o) as conf : config) = function + | [] -> conf + | insn :: prg' -> ( + (* Printf.eprintf "eval\n"; Printf.eprintf " insn=%s\n" (show_insn insn); Printf.eprintf " stack=%s\n" (show(list) (show(value)) stack); Printf.eprintf "end\n"; *) - (match insn with - | IMPORT _ | PUBLIC _ | EXTERN _ | LINE _ -> eval env conf prg' - - | BINOP "==" -> let y::x::stack' = stack in - let z = - match x, y with - | Value.Int _, Value.Int _ -> Value.of_int @@ Expr.to_func "==" (Value.to_int x) (Value.to_int y) - | Value.Int _, _ | _, Value.Int _ -> Value.of_int 0 - | _ -> failwith "unexpected operands in comparison: %s vs. %s\n" - (show(Value.t) (fun _ -> "") (fun _ -> "") x) - (show(Value.t) (fun _ -> "") (fun _ -> "") y) - in - eval env (cstack, z :: stack', glob, loc, i, o) prg' - | BINOP op -> let y::x::stack' = stack in eval env (cstack, (Value.of_int @@ Expr.to_func op (Value.to_int x) (Value.to_int y)) :: stack', glob, loc, i, o) prg' - | CONST n -> eval env (cstack, (Value.of_int n)::stack, glob, loc, i, o) prg' - | STRING s -> eval env (cstack, (Value.of_string @@ Bytes.of_string s)::stack, glob, loc, i, o) prg' - | SEXP (s, n) -> let vs, stack' = split n stack in - eval env (cstack, (Value.sexp s @@ List.rev vs)::stack', glob, loc, i, o) prg' - - | ELEM -> let a :: b :: stack' = stack in - eval env (env#builtin ".elem" [a; b] (cstack, stack', glob, loc, i, o)) prg' - - | LD x -> eval env (cstack, (match x with - | Value.Global x -> glob x - | Value.Local i -> loc.locals.(i) - | Value.Arg i -> loc.args.(i) - | Value.Access i -> loc.closure.(i)) :: stack, glob, loc, i, o) prg' - - | LDA x -> eval env (cstack, (Value.Var x) :: stack, glob, loc, i, o) prg' - - | ST x -> let z::stack' = stack in - eval env (cstack, z::stack', update glob loc z x, loc, i, o) prg' - - | STI -> let z::(Value.Var r)::stack' = stack in - eval env (cstack, z::stack', update glob loc z r, loc, i, o) prg' - - | STA -> let z::j::stack' = stack in - (match j with - | Value.Var r -> eval env (cstack, z::stack', update glob loc z r, loc, i, o) prg' - | Value.Int _ -> - let x :: stack' = stack' in - Value.update_elem x (Value.to_int j) z; - eval env (cstack, z::stack', glob, loc, i, o) prg' - ) - - | SLABEL _ | LABEL _ | FLABEL _ -> eval env conf prg' - - | JMP l -> eval env conf (env#labeled l) - | CJMP (c, l) -> let x::stack' = stack in - eval env (cstack, stack', glob, loc, i, o) (if (c = "z" && Value.to_int x = 0) || (c = "nz" && Value.to_int x <> 0) then env#labeled l else prg') - - | CLOSURE (name, dgs) -> let closure = - Array.of_list @@ - List.map ( - function - | Value.Arg i -> loc.args.(i) - | Value.Local i -> loc.locals.(i) - | Value.Access i -> loc.closure.(i) - | _ -> invalid_arg "wrong value in CLOSURE") - dgs - in - eval env (cstack, (Value.Closure ([], name, closure)) :: stack, glob, loc, i, o) prg' - - | CALL (f, n, _) -> let args, stack' = split n stack in - if env#is_label f - then eval env ((prg', loc)::cstack, stack', glob, {args = Array.of_list (List.rev args); locals = [||]; closure = [||]}, i, o) (env#labeled f) - else eval env (env#builtin f args ((cstack, stack', glob, loc, i, o) : config)) prg' - - | CALLC (n, _) -> let vs, stack' = split (n+1) stack in - let f::args = List.rev vs in - (match f with - | Value.Builtin f -> - eval env (env#builtin f (List.rev args) ((cstack, stack', glob, loc, i, o) : config)) prg' - | Value.Closure (_, f, closure) -> - eval env ((prg', loc)::cstack, stack', glob, {args = Array.of_list args; locals = [||]; closure = closure}, i, o) (env#labeled f) - | _ -> invalid_arg "not a closure (or a builtin) in CALL: %s\n" @@ show(value) f - ) - - | BEGIN (_, _, locals, _, _, _) -> eval env (cstack, stack, glob, {loc with locals = Array.init locals (fun _ -> Value.Empty)}, i, o) prg' - - | END -> (match cstack with - | (prg', loc')::cstack' -> eval env (cstack', (*Value.Empty ::*) stack, glob, loc', i, o) prg' - | [] -> conf - ) - - | RET -> (match cstack with - | (prg', loc')::cstack' -> eval env (cstack', stack, glob, loc', i, o) prg' - | [] -> conf - ) - - | DROP -> eval env (cstack, List.tl stack, glob, loc, i, o) prg' - | DUP -> eval env (cstack, List.hd stack :: stack, glob, loc, i, o) prg' - | SWAP -> let x::y::stack' = stack in - eval env (cstack, y::x::stack', glob, loc, i, o) prg' - | TAG (t, n) -> let x::stack' = stack in - eval env (cstack, (Value.of_int @@ match x with Value.Sexp (t', a) when t' = t && Array.length a = n -> 1 | _ -> 0) :: stack', glob, loc, i, o) prg' - | ARRAY n -> let x::stack' = stack in - eval env (cstack, (Value.of_int @@ match x with Value.Array a when Array.length a = n -> 1 | _ -> 0) :: stack', glob, loc, i, o) prg' - | PATT StrCmp -> let x::y::stack' = stack in - eval env (cstack, (Value.of_int @@ match x, y with (Value.String xs, Value.String ys) when xs = ys -> 1 | _ -> 0) :: stack', glob, loc, i, o) prg' - | PATT Array -> let x::stack' = stack in - eval env (cstack, (Value.of_int @@ match x with Value.Array _ -> 1 | _ -> 0) :: stack', glob, loc, i, o) prg' - | PATT String -> let x::stack' = stack in - eval env (cstack, (Value.of_int @@ match x with Value.String _ -> 1 | _ -> 0) :: stack', glob, loc, i, o) prg' - | PATT Sexp -> let x::stack' = stack in - eval env (cstack, (Value.of_int @@ match x with Value.Sexp _ -> 1 | _ -> 0) :: stack', glob, loc, i, o) prg' - | PATT Boxed -> let x::stack' = stack in - eval env (cstack, (Value.of_int @@ match x with Value.Int _ -> 0 | _ -> 1) :: stack', glob, loc, i, o) prg' - | PATT UnBoxed -> let x::stack' = stack in - eval env (cstack, (Value.of_int @@ match x with Value.Int _ -> 1 | _ -> 0) :: stack', glob, loc, i, o) prg' - | PATT Closure -> let x::stack' = stack in - eval env (cstack, (Value.of_int @@ match x with Value.Closure _ -> 1 | _ -> 0) :: stack', glob, loc, i, o) prg' - | FAIL (l, _) -> let x::_ = stack in - raise (Failure (Printf.sprintf "matching value %s failure at %s" (show(value) x) (show(Loc.t) l))) - ) + match insn with + | IMPORT _ | PUBLIC _ | EXTERN _ | LINE _ -> eval env conf prg' + | BINOP "==" -> + let (y :: x :: stack') = stack in + let z = + match (x, y) with + | Value.Int _, Value.Int _ -> + Value.of_int + @@ Expr.to_func "==" (Value.to_int x) (Value.to_int y) + | Value.Int _, _ | _, Value.Int _ -> Value.of_int 0 + | _ -> + failwith "unexpected operands in comparison: %s vs. %s\n" + (show Value.t + (fun _ -> "") + (fun _ -> "") + x) + (show Value.t + (fun _ -> "") + (fun _ -> "") + y) + in + eval env (cstack, z :: stack', glob, loc, i, o) prg' + | BINOP op -> + let (y :: x :: stack') = stack in + eval env + ( cstack, + (Value.of_int @@ Expr.to_func op (Value.to_int x) (Value.to_int y)) + :: stack', + glob, + loc, + i, + o ) + prg' + | CONST n -> + eval env (cstack, Value.of_int n :: stack, glob, loc, i, o) prg' + | STRING s -> + eval env + ( cstack, + (Value.of_string @@ Bytes.of_string s) :: stack, + glob, + loc, + i, + o ) + prg' + | SEXP (s, n) -> + let vs, stack' = split n stack in + eval env + (cstack, (Value.sexp s @@ List.rev vs) :: stack', glob, loc, i, o) + prg' + | ELEM -> + let (a :: b :: stack') = stack in + eval env + (env#builtin ".elem" [ a; b ] (cstack, stack', glob, loc, i, o)) + prg' + | LD x -> + eval env + ( cstack, + (match x with + | Value.Global x -> glob x + | Value.Local i -> loc.locals.(i) + | Value.Arg i -> loc.args.(i) + | Value.Access i -> loc.closure.(i)) + :: stack, + glob, + loc, + i, + o ) + prg' + | LDA x -> eval env (cstack, Value.Var x :: stack, glob, loc, i, o) prg' + | ST x -> + let (z :: stack') = stack in + eval env (cstack, z :: stack', update glob loc z x, loc, i, o) prg' + | STI -> + let (z :: Value.Var r :: stack') = stack in + eval env (cstack, z :: stack', update glob loc z r, loc, i, o) prg' + | STA -> ( + let (z :: j :: stack') = stack in + match j with + | Value.Var r -> + eval env + (cstack, z :: stack', update glob loc z r, loc, i, o) + prg' + | Value.Int _ -> + let (x :: stack') = stack' in + Value.update_elem x (Value.to_int j) z; + eval env (cstack, z :: stack', glob, loc, i, o) prg') + | SLABEL _ | LABEL _ | FLABEL _ -> eval env conf prg' + | JMP l -> eval env conf (env#labeled l) + | CJMP (c, l) -> + let (x :: stack') = stack in + eval env + (cstack, stack', glob, loc, i, o) + (if + (c = "z" && Value.to_int x = 0) || (c = "nz" && Value.to_int x <> 0) + then env#labeled l + else prg') + | CLOSURE (name, dgs) -> + let closure = + Array.of_list + @@ List.map + (function + | Value.Arg i -> loc.args.(i) + | Value.Local i -> loc.locals.(i) + | Value.Access i -> loc.closure.(i) + | _ -> invalid_arg "wrong value in CLOSURE") + dgs + in + eval env + (cstack, Value.Closure ([], name, closure) :: stack, glob, loc, i, o) + prg' + | CALL (f, n, _) -> + let args, stack' = split n stack in + if env#is_label f then + eval env + ( (prg', loc) :: cstack, + stack', + glob, + { + args = Array.of_list (List.rev args); + locals = [||]; + closure = [||]; + }, + i, + o ) + (env#labeled f) + else + eval env + (env#builtin f args ((cstack, stack', glob, loc, i, o) : config)) + prg' + | CALLC (n, _) -> ( + let vs, stack' = split (n + 1) stack in + let (f :: args) = List.rev vs in + match f with + | Value.Builtin f -> + eval env + (env#builtin f (List.rev args) + ((cstack, stack', glob, loc, i, o) : config)) + prg' + | Value.Closure (_, f, closure) -> + eval env + ( (prg', loc) :: cstack, + stack', + glob, + { args = Array.of_list args; locals = [||]; closure }, + i, + o ) + (env#labeled f) + | _ -> + invalid_arg "not a closure (or a builtin) in CALL: %s\n" + @@ show value f) + | BEGIN (_, _, locals, _, _, _) -> + eval env + ( cstack, + stack, + glob, + { loc with locals = Array.init locals (fun _ -> Value.Empty) }, + i, + o ) + prg' + | END -> ( + match cstack with + | (prg', loc') :: cstack' -> + eval env + (cstack', (*Value.Empty ::*) stack, glob, loc', i, o) + prg' + | [] -> conf) + | RET -> ( + match cstack with + | (prg', loc') :: cstack' -> + eval env (cstack', stack, glob, loc', i, o) prg' + | [] -> conf) + | DROP -> eval env (cstack, List.tl stack, glob, loc, i, o) prg' + | DUP -> eval env (cstack, List.hd stack :: stack, glob, loc, i, o) prg' + | SWAP -> + let (x :: y :: stack') = stack in + eval env (cstack, y :: x :: stack', glob, loc, i, o) prg' + | TAG (t, n) -> + let (x :: stack') = stack in + eval env + ( cstack, + (Value.of_int + @@ + match x with + | Value.Sexp (t', a) when t' = t && Array.length a = n -> 1 + | _ -> 0) + :: stack', + glob, + loc, + i, + o ) + prg' + | ARRAY n -> + let (x :: stack') = stack in + eval env + ( cstack, + (Value.of_int + @@ + match x with Value.Array a when Array.length a = n -> 1 | _ -> 0) + :: stack', + glob, + loc, + i, + o ) + prg' + | PATT StrCmp -> + let (x :: y :: stack') = stack in + eval env + ( cstack, + (Value.of_int + @@ + match (x, y) with + | Value.String xs, Value.String ys when xs = ys -> 1 + | _ -> 0) + :: stack', + glob, + loc, + i, + o ) + prg' + | PATT Array -> + let (x :: stack') = stack in + eval env + ( cstack, + (Value.of_int @@ match x with Value.Array _ -> 1 | _ -> 0) + :: stack', + glob, + loc, + i, + o ) + prg' + | PATT String -> + let (x :: stack') = stack in + eval env + ( cstack, + (Value.of_int @@ match x with Value.String _ -> 1 | _ -> 0) + :: stack', + glob, + loc, + i, + o ) + prg' + | PATT Sexp -> + let (x :: stack') = stack in + eval env + ( cstack, + (Value.of_int @@ match x with Value.Sexp _ -> 1 | _ -> 0) + :: stack', + glob, + loc, + i, + o ) + prg' + | PATT Boxed -> + let (x :: stack') = stack in + eval env + ( cstack, + (Value.of_int @@ match x with Value.Int _ -> 0 | _ -> 1) + :: stack', + glob, + loc, + i, + o ) + prg' + | PATT UnBoxed -> + let (x :: stack') = stack in + eval env + ( cstack, + (Value.of_int @@ match x with Value.Int _ -> 1 | _ -> 0) + :: stack', + glob, + loc, + i, + o ) + prg' + | PATT Closure -> + let (x :: stack') = stack in + eval env + ( cstack, + (Value.of_int @@ match x with Value.Closure _ -> 1 | _ -> 0) + :: stack', + glob, + loc, + i, + o ) + prg' + | FAIL (l, _) -> + let (x :: _) = stack in + raise + (Failure + (Printf.sprintf "matching value %s failure at %s" (show value x) + (show Loc.t l)))) (* Top-level evaluation @@ -421,37 +740,60 @@ let rec eval env (((cstack, stack, glob, loc, i, o) as conf) : config) = functio Takes a program, an input stream, and returns an output stream this program calculates *) -module M = Map.Make (String) +module M = Map.Make (String) class indexer prg = let rec make_env m = function - | [] -> m - | (LABEL l) :: tl - | (FLABEL l) :: tl -> make_env (M.add l tl m) tl - | _ :: tl -> make_env m tl - in + | [] -> m + | LABEL l :: tl | FLABEL l :: tl -> make_env (M.add l tl m) tl + | _ :: tl -> make_env m tl + in let m = make_env M.empty prg in object method is_label l = M.mem l m method labeled l = M.find l m end - -let run p i = + +let run p i = let module M = Map.Make (String) in let glob = State.undefined in - let (_, _, _, _, i, o) = + let _, _, _, _, _, o = eval - object + (object inherit indexer p - method builtin f args ((cstack, stack, glob, loc, i, o) as conf : config) = - let f = match f.[0] with 'L' -> String.sub f 1 (String.length f - 1) | _ -> f in - let (st, i, o, r) = Language.Builtin.eval (State.I, i, o, []) (List.map Obj.magic @@ List.rev args) f in - (cstack, (match r with [r] -> (Obj.magic r)::stack | _ -> Value.Empty :: stack), glob, loc, i, o) - end - ([], [], (List.fold_left (fun s (name, value) -> State.bind name value s) glob (Builtin.bindings ())), {locals=[||]; args=[||]; closure=[||]}, i, []) + + method builtin f args ((cstack, stack, glob, loc, i, o) : config) = + let f = + match f.[0] with + | 'L' -> String.sub f 1 (String.length f - 1) + | _ -> f + in + let _, i, o, r = + Language.Builtin.eval (State.I, i, o, []) + (List.map Obj.magic @@ List.rev args) + f + in + ( cstack, + (match r with + | [ r ] -> Obj.magic r :: stack + | _ -> Value.Empty :: stack), + glob, + loc, + i, + o ) + [@@ocaml.warning "-8"] + end) + ( [], + [], + List.fold_left + (fun s (name, value) -> State.bind name value s) + glob (Builtin.bindings ()), + { locals = [||]; args = [||]; closure = [||] }, + i, + [] ) p in - o + o (* Stack machine compiler @@ -459,685 +801,869 @@ let run p i = Takes a program in the source language and returns an equivalent program for the stack machine -*) -let label s = "L" ^ s +*) +let label s = "L" ^ s let scope_label i s = label s ^ "_" ^ string_of_int i let check_name_and_add names name mut = - if List.exists (fun (n, _) -> n = name) names - then report_error ~loc:(Loc.get name) (Printf.sprintf "name \"%s\" is already defined in the scope" (Subst.subst name)) + if List.exists (fun (n, _) -> n = name) names then + report_error ~loc:(Loc.get name) + (Printf.sprintf "name \"%s\" is already defined in the scope" + (Subst.subst name)) else (name, mut) :: names -;; - -@type funscope = { - st : Value.designation State.t; - arg_index : int; - local_index : int; - acc_index : int; - nlocals : int; - closure : Value.designation list; - scopes : scope list; -} with show - -@type fundef = { - name : string; - args : string list; - body : Expr.t; - scope : funscope; -} with show - -@type context = -| Top of fundef list -| Item of fundef * fundef list * context -with show - -let init_scope st = { - st = st; - arg_index = 0; - acc_index = 0; + +type funscope = { + st : Value.designation State.t; + arg_index : int; + local_index : int; + acc_index : int; + nlocals : int; + closure : Value.designation list; + scopes : scope list; +} +[@@deriving gt ~options:{ show }] + +type fundef = { + name : string; + args : string list; + body : Expr.t; + scope : funscope; +} +[@@deriving gt ~options:{ show }] + +type context = Top of fundef list | Item of fundef * fundef list * context +[@@deriving gt ~options:{ show }] + +let init_scope st = + { + st; + arg_index = 0; + acc_index = 0; local_index = 0; - nlocals = 0; - closure = []; - scopes = []; + nlocals = 0; + closure = []; + scopes = []; } - -let to_fundef name args body st = { - name = name; - args = args; - body = body; - scope = init_scope st; -} +let to_fundef name args body st = { name; args; body; scope = init_scope st } let from_fundef fd = (fd.name, fd.args, fd.body, fd.scope.st) - + let open_scope c fd = match c with - | Top _ -> Item (fd, [], c) + | Top _ -> Item (fd, [], c) | Item (p, fds, up) -> - Item (fd, [], Item ({p with scope = fd.scope}, fds, up)) - -let close_scope (Item (f, [], c)) = c - + Item (fd, [], Item ({ p with scope = fd.scope }, fds, up)) + +let[@ocaml.warning "-8"] close_scope (Item (_, [], c)) = c + let add_fun c fd = match c with - | Top fds -> Top (fd :: fds) + | Top fds -> Top (fd :: fds) | Item (parent, fds, up) -> Item (parent, fd :: fds, up) - -let rec pick = function -| Item (parent, fd :: fds, up) -> - Item (parent, fds, up), Some fd -| Top (fd :: fds) -> - Top fds, Some fd -| c -> c, None + +let[@ocaml.warning "-39"] rec pick = function + | Item (parent, fd :: fds, up) -> (Item (parent, fds, up), Some fd) + | Top (fd :: fds) -> (Top fds, Some fd) + | c -> (c, None) let top = function Item (p, _, _) -> Some p | _ -> None -let rec propagate_acc (Item (p, fds, up) as item) name = +let[@ocaml.warning "-8"] rec propagate_acc (Item (p, fds, up) as item) name = match State.eval p.scope.st name with | Value.Access n when n = ~-1 -> - let index = p.scope.acc_index in - let up', loc = propagate_acc up name in - Item ({p with - scope = {p.scope with - st = State.update name (Value.Access index) p.scope.st; - acc_index = p.scope.acc_index + 1; - closure = loc :: p.scope.closure - }}, fds, up'), Value.Access index - | other -> item, other - -module FC = Map.Make (struct type t = string * string let compare = Pervasives.compare end) + let index = p.scope.acc_index in + let up', loc = propagate_acc up name in + ( Item + ( { + p with + scope = + { + p.scope with + st = State.update name (Value.Access index) p.scope.st; + acc_index = p.scope.acc_index + 1; + closure = loc :: p.scope.closure; + }; + }, + fds, + up' ), + Value.Access index ) + | other -> (item, other) + +module FC = Map.Make (struct + type t = string * string + + let compare = Stdlib.compare +end) class funinfo = -object (self : 'self) - val funtree = (Pervasives.ref M.empty : string M.t ref) - val closures = (Pervasives.ref M.empty : Value.designation list M.t ref) - val functx = (Pervasives.ref FC.empty : Value.designation list FC.t ref) - - method show_funinfo = - Printf.sprintf "funtree: %s\nclosures: %s\ncontexts: %s\n" - (show(list) (fun (x, y) -> x ^ ": " ^ y) @@ M.bindings !funtree) - (show(list) (fun (x, y) -> x ^ ": " ^ show(list) (show(Value.designation)) y) @@ M.bindings !closures) - (show(list) (fun ((x, y), v) -> "(" ^ x ^ ", " ^ y ^ ")" ^ show(list) (show(Value.designation)) v) @@ FC.bindings !functx) - - method lookup_closure p = FC.find p !functx - - method register_call f c = functx := FC.add (f, c) [] !functx; self - - method register_fun f p = funtree := M.add f p !funtree; self - - method register_closure f c = closures := M.add f c !closures; self - - method private get_parent f = M.find f !funtree - - method get_closure f = M.find f !closures - - method private propagate_for_call (f, c) = - try - let fp = self#get_parent f in - let rec find_path current = - if fp = current - then [] - else find_path (self#get_parent current) @ [current] - in - let path = find_path c in - let changed = Pervasives.ref false in - let rec propagate_downwards current_closure = function - | [] -> current_closure - | f :: tl -> - let fclosure = self#get_closure f in - let delta = Pervasives.ref fclosure in - let index = Pervasives.ref (List.length fclosure) in - let added = Pervasives.ref false in - let add_to_closure loc = - added := true; - delta := !delta @ [loc]; - let loc' = Value.Access !index in - incr index; - loc' - in - let next_closure = - List.map - (fun loc -> - let rec find_index i = function - | [] -> raise Not_found - | loc' :: tl -> - if loc' = loc - then Value.Access i - else find_index (i+1) tl + object (self : 'self) + val funtree : string M.t ref = Stdlib.ref M.empty + val closures : Value.designation list M.t ref = Stdlib.ref M.empty + val functx : Value.designation list FC.t ref = Stdlib.ref FC.empty + + method show_funinfo = + Printf.sprintf "funtree: %s\nclosures: %s\ncontexts: %s\n" + (show list (fun (x, y) -> x ^ ": " ^ y) @@ M.bindings !funtree) + (show list (fun (x, y) -> + x ^ ": " ^ show list (show Value.designation) y) + @@ M.bindings !closures) + (show list (fun ((x, y), v) -> + "(" ^ x ^ ", " ^ y ^ ")" ^ show list (show Value.designation) v) + @@ FC.bindings !functx) + + method lookup_closure p = FC.find p !functx + + method register_call f c = + functx := FC.add (f, c) [] !functx; + self + + method register_fun f p = + funtree := M.add f p !funtree; + self + + method register_closure f c = + closures := M.add f c !closures; + self + + method private get_parent f = M.find f !funtree + method get_closure f = M.find f !closures + + method private propagate_for_call (f, c) = + try + let fp = self#get_parent f in + let rec find_path current = + if fp = current then [] + else find_path (self#get_parent current) @ [ current ] + in + let path = find_path c in + let changed = Stdlib.ref false in + let rec propagate_downwards current_closure = function + | [] -> current_closure + | f :: tl -> + let fclosure = self#get_closure f in + let delta = Stdlib.ref fclosure in + let index = Stdlib.ref (List.length fclosure) in + let added = Stdlib.ref false in + let add_to_closure loc = + added := true; + delta := !delta @ [ loc ]; + let loc' = Value.Access !index in + incr index; + loc' in - try find_index 0 fclosure with Not_found -> add_to_closure loc - ) - current_closure - in - if !added then ( - changed := true; - closures := M.add f !delta !closures - ); - propagate_downwards next_closure tl - in - let closure = propagate_downwards (self#get_closure f) path in - functx := FC.add (f, c) closure !functx; - !changed - with Not_found -> false - - method propagate_closures = - while List.fold_left (fun flag (call, _) -> flag || self#propagate_for_call call) false @@ FC.bindings !functx - do () done; - self - -end - + let next_closure = + List.map + (fun loc -> + let rec find_index i = function + | [] -> raise Not_found + | loc' :: tl -> + if loc' = loc then Value.Access i + else find_index (i + 1) tl + in + try find_index 0 fclosure + with Not_found -> add_to_closure loc) + current_closure + in + if !added then ( + changed := true; + closures := M.add f !delta !closures); + propagate_downwards next_closure tl + in + let closure = propagate_downwards (self#get_closure f) path in + functx := FC.add (f, c) closure !functx; + !changed + with Not_found -> false + + method propagate_closures = + while + List.fold_left + (fun flag (call, _) -> flag || self#propagate_for_call call) + false + @@ FC.bindings !functx + do + () + done; + self + end + class env cmd imports = -object (self : 'self) - val label_index = 0 - val scope_index = 0 - val lam_index = 0 - val scope = init_scope State.I - val fundefs = Top [] - val decls = [] - val funinfo = new funinfo - val line = None - val end_label = "" - - method show_funinfo = funinfo#show_funinfo - - method get_closure p = try funinfo#lookup_closure p with Not_found -> [] - - method get_fun_closure f = funinfo#get_closure f - - method propagate_closures = {< funinfo = funinfo#propagate_closures >} - - method register_call f = {< funinfo = funinfo#register_call f self#current_function >} - - method register_fun f = {< funinfo = funinfo#register_fun f self#current_function >} - - method register_closure f = {< funinfo = funinfo#register_closure f self#closure >} - - method current_function = - match fundefs with Top _ -> "main" | Item (fd, _, _) -> fd.name - - method private import_imports = - let paths = cmd#get_include_paths in - let env = List.fold_left - (fun env import -> - let _, intfs = Interface.find import paths in - List.fold_left - (fun env -> function - | `Variable name -> env#add_name name `Extern Mut - | `Fun name -> env#add_fun_name name `Extern - | _ -> env - ) - env - intfs - ) - self - imports - in - env - - method global_scope = scope_index = 0 - - method get_label = (label @@ string_of_int label_index), {< label_index = label_index + 1 >} - method get_end_label = - let lab = label @@ string_of_int label_index in - lab, {< end_label = lab; label_index = label_index + 1 >} - - method end_label = end_label - - method nargs = scope.arg_index - method nlocals = scope.nlocals - - method get_decls = - let opt_label = function true -> label | _ -> fun x -> "global_" ^ x in - List.flatten @@ - List.map - (function - | (name, `Extern, f) -> [EXTERN (opt_label f name)] - | (name, `Public, f) -> [PUBLIC (opt_label f name)] - | (name, `PublicExtern, f) -> [PUBLIC (opt_label f name); EXTERN (opt_label f name)] - | _ -> invalid_arg "must not happen" - ) @@ - List.filter (function (_, `Local, _) -> false | _ -> true) decls - - method push_scope (blab : string) (elab : string) = - (*Printf.printf "push: Scope local index = %d\n" scope.local_index;*) - match scope.st with - | State.I -> - {< - scope_index = scope_index + 1; - scope = { - scope with - st = State.G ([], State.undefined) - } - >} # import_imports - - | _ -> - {< scope_index = scope_index + 1; - scope = { - scope with - st = State.L ([], State.undefined, scope.st); - scopes = {blab = blab; elab = elab; names = []; subs = []} :: scope.scopes - } - >} - - method pop_scope = - match scope.st with - | State.I -> {< scope = {scope with st = State.I} >} - | State.G _ -> {< scope = {scope with st = State.I} >} - | State.L (xs, _, x) -> - {< - scope = { - scope with - st = x; - local_index = ((*Printf.printf "pop: Scope local index = %d\n" (scope.local_index - List.length xs);*) scope.local_index - List.length (List.filter (fun (_, x) -> x <> FVal) xs) (*xs*)); - scopes = match scope.scopes with - [_] -> scope.scopes - | hs :: ps :: tl -> {ps with subs = hs :: ps.subs} :: tl - } - >} - - method open_fun_scope blab elab (name, args, body, st') = - {< - fundefs = open_scope fundefs { - name = name; - args = args; - body = body; - scope = {scope with st = st'}; - }; - scope = init_scope ( - let rec readdress_to_closure = function - | State.L (xs, st, tl) -> - State.L (xs, (fun name -> match st name with Value.Fun _ as x -> x | _ -> Value.Access (~-1)), readdress_to_closure tl) - | st -> st - in - readdress_to_closure st' - ); - >} # push_scope blab elab - - method close_fun_scope = - (*Printf.printf "Scopes: %s\n" @@ show(GT.list) show_scope scope.scopes;*) - let scopes = scope.scopes in - let fundefs' = close_scope fundefs in - match top fundefs' with - | Some fd -> {< fundefs = fundefs'; scope = fd.scope >} # pop_scope, scopes - | None -> {< fundefs = fundefs' >} # pop_scope, scopes - - method add_arg (name : string) = {< - scope = { - scope with - st = (match scope.st with - | State.I | State.G _ -> - invalid_arg "wrong scope in add_arg" - | State.L (names, s, p) -> - State.L (check_name_and_add names name Mut, State.bind name (Value.Arg scope.arg_index) s, p) - ); - arg_index = scope.arg_index + 1 - } - >} - - method check_scope m name = - match m with - | `Local -> () - | _ -> - report_error (Printf.sprintf "external/public definitions (\"%s\") not allowed in local scopes" (Subst.subst name)) - - method add_name (name : string) (m : [`Local | `Extern | `Public | `PublicExtern]) (mut : Language.k) = {< - decls = (name, m, false) :: decls; - scope = { - scope with - st = (match scope.st with - | State.I -> - invalid_arg "uninitialized scope" - | State.G (names, s) -> - State.G ((match m with `Extern | `PublicExtern -> names | _ -> check_name_and_add names name mut), State.bind name (Value.Global name) s) - | State.L (names, s, p) -> - self#check_scope m name; - State.L (check_name_and_add names name mut, State.bind name (Value.Local ((*Printf.printf "Var: %s -> %d\n" name scope.local_index;*) scope.local_index)) s, p) (* !! *) - ); - local_index = (match scope.st with State.L _ -> scope.local_index + 1 | _ -> scope.local_index); - nlocals = (match scope.st with State.L _ -> max (scope.local_index + 1) scope.nlocals | _ -> scope.nlocals); - scopes = match scope.scopes with - ts :: tl -> {ts with names = (name, scope.local_index) :: ts.names} :: tl - | _ -> scope.scopes - } - >} - - method fun_internal_name (name : string) = - (match scope.st with State.G _ -> label | _ -> scope_label scope_index) name - - method add_fun_name (name : string) (m : [`Local | `Extern | `Public | `PublicExtern]) = - let name' = self#fun_internal_name name in - let st' = + object (self : 'self) + val label_index = 0 + val scope_index = 0 + val lam_index = 0 + val scope = init_scope State.I + val fundefs = Top [] + val decls = [] + val funinfo = new funinfo + val line = None + val end_label = "" + method show_funinfo = funinfo#show_funinfo + method get_closure p = try funinfo#lookup_closure p with Not_found -> [] + method get_fun_closure f = funinfo#get_closure f + method propagate_closures = {} + + method register_call f = + {} + + method register_fun f = + {} + + method register_closure f = + {} + + method current_function = + match fundefs with Top _ -> "main" | Item (fd, _, _) -> fd.name + + method private import_imports = + let paths = cmd#get_include_paths in + let env = + List.fold_left + (fun env import -> + let _, intfs = Interface.find import paths in + List.fold_left + (fun env -> function + | `Variable name -> env#add_name name `Extern Mut + | `Fun name -> env#add_fun_name name `Extern + | _ -> env) + env intfs) + self imports + in + env + + method global_scope = scope_index = 0 + + method get_label = + (label @@ string_of_int label_index, {}) + + method get_end_label = + let lab = label @@ string_of_int label_index in + (lab, {}) + + method end_label = end_label + method nargs = scope.arg_index + method nlocals = scope.nlocals + + method get_decls = + let opt_label = function true -> label | _ -> fun x -> "global_" ^ x in + List.flatten + @@ List.map (function + | name, `Extern, f -> [ EXTERN (opt_label f name) ] + | name, `Public, f -> [ PUBLIC (opt_label f name) ] + | name, `PublicExtern, f -> + [ PUBLIC (opt_label f name); EXTERN (opt_label f name) ] + | _ -> invalid_arg "must not happen") + @@ List.filter (function _, `Local, _ -> false | _ -> true) decls + + method push_scope (blab : string) (elab : string) = + (*Printf.printf "push: Scope local index = %d\n" scope.local_index;*) match scope.st with | State.I -> - invalid_arg "uninitialized scope" - | State.G (names, s) -> - State.G ((match m with `Extern | `PublicExtern -> names | _ -> check_name_and_add names name FVal), State.bind name (Value.Fun name') s) - | State.L (names, s, p) -> - self#check_scope m name; - State.L (check_name_and_add names name FVal, State.bind name (Value.Fun name') s, p) - in - {< - decls = (name, m, true) :: decls; - scope = {scope with st = st'} - >} - - method add_lambda (args : string list) (body : Expr.t) = - let name' = self#fun_internal_name (Printf.sprintf "lambda_%d" lam_index) in - {< fundefs = add_fun fundefs (to_fundef name' args body scope.st); lam_index = lam_index + 1 >} # register_fun name', name' - - method add_fun (name : string) (args : string list) (m : [`Local | `Extern | `Public | `PublicExtern]) (body : Expr.t) = - let name' = self#fun_internal_name name in - match m with - | `Extern -> self - | _ -> - {< - fundefs = add_fun fundefs (to_fundef name' args body scope.st) - >} # register_fun name' - - method lookup name = - match State.eval scope.st name with - | Value.Access n when n = ~-1 -> - let index = scope.acc_index in - let fundefs', loc = propagate_acc fundefs name in - {< - fundefs = fundefs'; - scope = { - scope with - st = State.update name (Value.Access index) scope.st; - acc_index = scope.acc_index + 1; - closure = loc :: scope.closure - } - >}, Value.Access index - | other -> self, other - - method next_definition = - match pick fundefs with - | fds, None -> None - | fds, Some fd -> Some ({< fundefs = fds >}, from_fundef fd) - - method closure = List.rev scope.closure - - method gen_line name = - match Loc.get name with - | None -> self, [] - | Some (l, _) -> - match line with - | None -> {< line = Some l >}, [LINE l] - | Some l' when l' <> l -> {< line = Some l >}, [LINE l] - | _ -> self, [] -end - -let compile cmd ((imports, infixes), p) = + {} + #import_imports + | _ -> + {} + + method pop_scope = + match scope.st with + | State.I -> {} + | State.G _ -> {} + | State.L (xs, _, x) -> + { x <> FVal) xs) + (*xs*); + scopes = + (match scope.scopes with + | [ _ ] -> scope.scopes + | hs :: ps :: tl -> + { ps with subs = hs :: ps.subs } :: tl + | _ -> + failwith + (Printf.sprintf "Unexpected pattern: %s: %d" + __FILE__ __LINE__)); + }>} + + method open_fun_scope blab elab (name, args, body, st') = + { + State.L + ( xs, + (fun name -> + match st name with + | Value.Fun _ as x -> x + | _ -> Value.Access ~-1), + readdress_to_closure tl ) + | st -> st + in + readdress_to_closure st')>} + #push_scope + blab elab + + method close_fun_scope = + (*Printf.printf "Scopes: %s\n" @@ show(GT.list) show_scope scope.scopes;*) + let scopes = scope.scopes in + let fundefs' = close_scope fundefs in + match top fundefs' with + | Some fd -> ({}#pop_scope, scopes) + | None -> ({}#pop_scope, scopes) + + method add_arg (name : string) = + { + invalid_arg "wrong scope in add_arg" + | State.L (names, s, p) -> + State.L + ( check_name_and_add names name Mut, + State.bind name (Value.Arg scope.arg_index) s, + p )); + arg_index = scope.arg_index + 1; + }>} + + method check_scope m name = + match m with + | `Local -> () + | _ -> + report_error + (Printf.sprintf + "external/public definitions (\"%s\") not allowed in local \ + scopes" + (Subst.subst name)) + + method add_name (name : string) + (m : [ `Local | `Extern | `Public | `PublicExtern ]) (mut : Language.k) + = + { invalid_arg "uninitialized scope" + | State.G (names, s) -> + State.G + ( (match m with + | `Extern | `PublicExtern -> names + | _ -> check_name_and_add names name mut), + State.bind name (Value.Global name) s ) + | State.L (names, s, p) -> + self#check_scope m name; + State.L + ( check_name_and_add names name mut, + State.bind name + (Value.Local + (*Printf.printf "Var: %s -> %d\n" name scope.local_index;*) + scope.local_index) + s, + p ) + (* !! *)); + local_index = + (match scope.st with + | State.L _ -> scope.local_index + 1 + | _ -> scope.local_index); + nlocals = + (match scope.st with + | State.L _ -> max (scope.local_index + 1) scope.nlocals + | _ -> scope.nlocals); + scopes = + (match scope.scopes with + | ts :: tl -> + { + ts with + names = (name, scope.local_index) :: ts.names; + } + :: tl + | _ -> scope.scopes); + }>} + + method fun_internal_name (name : string) = + (match scope.st with State.G _ -> label | _ -> scope_label scope_index) + name + + method add_fun_name (name : string) + (m : [ `Local | `Extern | `Public | `PublicExtern ]) = + let name' = self#fun_internal_name name in + let st' = + match scope.st with + | State.I -> invalid_arg "uninitialized scope" + | State.G (names, s) -> + State.G + ( (match m with + | `Extern | `PublicExtern -> names + | _ -> check_name_and_add names name FVal), + State.bind name (Value.Fun name') s ) + | State.L (names, s, p) -> + self#check_scope m name; + State.L + ( check_name_and_add names name FVal, + State.bind name (Value.Fun name') s, + p ) + in + {} + + method add_lambda (args : string list) (body : Expr.t) = + let name' = + self#fun_internal_name (Printf.sprintf "lambda_%d" lam_index) + in + ( {} + #register_fun name', + name' ) + + method add_fun (name : string) (args : string list) + (m : [ `Local | `Extern | `Public | `PublicExtern ]) (body : Expr.t) = + let name' = self#fun_internal_name name in + match m with + | `Extern -> self + | _ -> + {} + #register_fun name' + + method lookup name = + match State.eval scope.st name with + | Value.Access n when n = ~-1 -> + let index = scope.acc_index in + let fundefs', loc = propagate_acc fundefs name in + ( {}, + Value.Access index ) + | other -> (self, other) + + method next_definition = + match pick fundefs with + | _, None -> None + | fds, Some fd -> Some ({}, from_fundef fd) + + method closure = List.rev scope.closure + + method gen_line name = + match Loc.get name with + | None -> (self, []) + | Some (l, _) -> ( + match line with + | None -> ({}, [ LINE l ]) + | Some l' when l' <> l -> ({}, [ LINE l ]) + | _ -> (self, [])) + end [@@ocaml.warning "-15"] + +let compile cmd ((imports, _), p) = let rec pattern env lfalse = function - | Pattern.Wildcard -> env, false, [DROP] - | Pattern.Named (_, p) -> pattern env lfalse p - | Pattern.Const c -> env, true, [CONST c; BINOP "=="; CJMP ("z", lfalse)] - | Pattern.String s -> env, true, [STRING s; PATT StrCmp; CJMP ("z", lfalse)] - | Pattern.ArrayTag -> env, true, [PATT Array; CJMP ("z", lfalse)] - | Pattern.StringTag -> env, true, [PATT String; CJMP ("z", lfalse)] - | Pattern.SexpTag -> env, true, [PATT Sexp; CJMP ("z", lfalse)] - | Pattern.UnBoxed -> env, true, [PATT UnBoxed; CJMP ("z", lfalse)] - | Pattern.Boxed -> env, true, [PATT Boxed; CJMP ("z", lfalse)] - | Pattern.ClosureTag -> env, true, [PATT Closure; CJMP ("z", lfalse)] - | Pattern.Array ps -> - let lhead, env = env#get_label in - let ldrop, env = env#get_label in - let tag = [DUP; ARRAY (List.length ps); CJMP ("nz", lhead); LABEL ldrop; DROP; JMP lfalse; LABEL lhead] in - let code, env = pattern_list lhead ldrop env ps in - env, true, tag @ code @ [DROP] - | Pattern.Sexp (t, ps) -> - let lhead, env = env#get_label in - let ldrop, env = env#get_label in - let tag = [DUP; TAG (t, List.length ps); CJMP ("nz", lhead); LABEL ldrop; DROP; JMP lfalse; LABEL lhead] in - let code, env = pattern_list lhead ldrop env ps in - env, true, tag @ code @ [DROP] - and pattern_list lhead ldrop env ps = + | Pattern.Wildcard -> (env, false, [ DROP ]) + | Pattern.Named (_, p) -> pattern env lfalse p + | Pattern.Const c -> (env, true, [ CONST c; BINOP "=="; CJMP ("z", lfalse) ]) + | Pattern.String s -> + (env, true, [ STRING s; PATT StrCmp; CJMP ("z", lfalse) ]) + | Pattern.ArrayTag -> (env, true, [ PATT Array; CJMP ("z", lfalse) ]) + | Pattern.StringTag -> (env, true, [ PATT String; CJMP ("z", lfalse) ]) + | Pattern.SexpTag -> (env, true, [ PATT Sexp; CJMP ("z", lfalse) ]) + | Pattern.UnBoxed -> (env, true, [ PATT UnBoxed; CJMP ("z", lfalse) ]) + | Pattern.Boxed -> (env, true, [ PATT Boxed; CJMP ("z", lfalse) ]) + | Pattern.ClosureTag -> (env, true, [ PATT Closure; CJMP ("z", lfalse) ]) + | Pattern.Array ps -> + let lhead, env = env#get_label in + let ldrop, env = env#get_label in + let tag = + [ + DUP; + ARRAY (List.length ps); + CJMP ("nz", lhead); + LABEL ldrop; + DROP; + JMP lfalse; + LABEL lhead; + ] + in + let code, env = pattern_list lhead ldrop env ps in + (env, true, tag @ code @ [ DROP ]) + | Pattern.Sexp (t, ps) -> + let lhead, env = env#get_label in + let ldrop, env = env#get_label in + let tag = + [ + DUP; + TAG (t, List.length ps); + CJMP ("nz", lhead); + LABEL ldrop; + DROP; + JMP lfalse; + LABEL lhead; + ] + in + let code, env = pattern_list lhead ldrop env ps in + (env, true, tag @ code @ [ DROP ]) + and pattern_list _ ldrop env ps = let _, env, code = List.fold_left (fun (i, env, code) p -> - let env, _, pcode = pattern env ldrop p in - i+1, env, ([DUP; CONST i; ELEM (*CALL (".elem", 2, false)*) ] @ pcode) :: code - ) - (0, env, []) - ps + let env, _, pcode = pattern env ldrop p in + ( i + 1, + env, + ([ DUP; CONST i; ELEM (*CALL (".elem", 2, false)*) ] @ pcode) + :: code )) + (0, env, []) ps in - List.flatten (List.rev code), env + (List.flatten (List.rev code), env) and bindings env p = let bindings = - transform(Pattern.t) + transform Pattern.t (fun fself -> - object inherit [int list, _, (string * int list) list] @Pattern.t - method c_Wildcard path _ = [] - method c_Named path _ s p = [s, path] @ fself path p - method c_Sexp path _ x ps = List.concat @@ List.mapi (fun i p -> fself (path @ [i]) p) ps - method c_UnBoxed _ _ = [] - method c_StringTag _ _ = [] - method c_String _ _ _ = [] - method c_SexpTag _ _ = [] - method c_Const _ _ _ = [] - method c_Boxed _ _ = [] - method c_ArrayTag _ _ = [] - method c_ClosureTag _ _ = [] - method c_Array path _ ps = List.concat @@ List.mapi (fun i p -> fself (path @ [i]) p) ps - end) - [] - p + object + inherit [int list, _, (string * int list) list] Pattern.t_t + method c_Wildcard _ _ = [] + method c_Named path _ s p = [ (s, path) ] @ fself path p + + method c_Sexp path _ _ ps = + List.concat @@ List.mapi (fun i p -> fself (path @ [ i ]) p) ps + + method c_UnBoxed _ _ = [] + method c_StringTag _ _ = [] + method c_String _ _ _ = [] + method c_SexpTag _ _ = [] + method c_Const _ _ _ = [] + method c_Boxed _ _ = [] + method c_ArrayTag _ _ = [] + method c_ClosureTag _ _ = [] + + method c_Array path _ ps = + List.concat @@ List.mapi (fun i p -> fself (path @ [ i ]) p) ps + end) + [] p in let env, code = List.fold_left (fun (env, acc) (name, path) -> (*Printf.printf "Bindings..\n";*) - let env = env#add_name name `Local Mut in - let env, dsg = env#lookup name in + let env = env#add_name name `Local Mut in + let env, dsg = env#lookup name in (*Printf.printf "End Bindings..\n";*) - env, - ([DUP] @ - List.concat (List.map (fun i -> [CONST i; ELEM (* CALL (".elem", 2, false)*)]) path) @ - [ST dsg; DROP]) :: acc - ) - (env, []) - (List.rev bindings) - in - env, (List.flatten code) @ [DROP] - and add_code (env, flag, s) l f s' = env, f, s @ (if flag then [LABEL l] else []) @ s' + ( env, + ([ DUP ] + @ List.concat + (List.map + (fun i -> [ CONST i; ELEM (* CALL (".elem", 2, false)*) ]) + path) + @ [ ST dsg; DROP ]) + :: acc )) + (env, []) (List.rev bindings) + in + (env, List.flatten code @ [ DROP ]) + and add_code (env, flag, s) l f s' = + (env, f, s @ (if flag then [ LABEL l ] else []) @ s') and compile_list tail l env = function - | [] -> env, false, [] - | [e] -> compile_expr tail l env e - | e::es -> - let les, env = env#get_label in - let env, flag1, s1 = compile_expr false les env e in - let env, flag2, s2 = compile_list tail l env es in - add_code (env, flag1, s1) les flag2 s2 - and compile_expr tail l env = function - | Expr.Lambda (args, b) -> - let env, lines = List.fold_left (fun (env, acc) name -> let env, ln = env#gen_line name in env, acc @ ln) (env, []) args in - let env, name = env#add_lambda args b in - env#register_call name, false, lines @ [PROTO (name, env#current_function)] - - | Expr.Scope (ds, e) -> - let blab, env = env#get_label in - let elab, env = env#get_label in - let env = env#push_scope blab elab in - let env, e, funs = - List.fold_left - (fun (env, e, funs) -> - function - | name, (m, `Fun (args, b)) -> env#add_fun_name name m, e, (name, args, m, b) :: funs - | name, (m, `Variable None) -> env#add_name name m Mut, e, funs - | name, (m, `Variable (Some v)) -> env#add_name name m Mut, Expr.Seq (Expr.Ignore (Expr.Assign (Expr.Ref name, v)), e), funs - ) - (env, e, []) - (List.rev ds) - in - let env = List.fold_left (fun env (name, args, m, b) -> env#add_fun name args m b) env funs in - let env, flag, code = compile_expr tail l env e in - env#pop_scope, flag, [SLABEL blab] @ code @ [SLABEL elab] - - | Expr.Unit -> env, false, [CONST 0] - - | Expr.Ignore s -> let ls, env = env#get_label in - add_code (compile_expr tail ls env s) ls false [DROP] - - | Expr.ElemRef (x, i) -> compile_list tail l env [x; i] - | Expr.Var x -> let env, line = env#gen_line x in - let env, acc = env#lookup x in - (*Printf.printf "Looking up %s -> %s\n" x (show(Value.designation) acc);*) - (match acc with Value.Fun name -> env#register_call name, false, line @ [PROTO (name, env#current_function)] | _ -> env, false, line @ [LD acc]) - | Expr.Ref x -> let env, line = env#gen_line x in - let env, acc = env#lookup x in env, false, line @ [LDA acc] - | Expr.Const n -> env, false, [CONST n] - | Expr.String s -> env, false, [STRING s] - | Expr.Binop (op, x, y) -> let lop, env = env#get_label in - add_code (compile_list false lop env [x; y]) lop false [BINOP op] - - | Expr.Call (f, args) -> let lcall, env = env#get_label in - (match f with - | Expr.Var name -> - let env, line = env#gen_line name in - let env, acc = env#lookup name in - (match acc with - | Value.Fun name -> - let env = env#register_call name in - let env, f, code = add_code (compile_list false lcall env args) lcall false [PCALLC (List.length args, tail)] in - env, f, line @ (PPROTO (name, env#current_function) :: code) - | _ -> - add_code (compile_list false lcall env (f :: args)) lcall false [CALLC (List.length args, tail)] - ) - - | _ -> add_code (compile_list false lcall env (f :: args)) lcall false [CALLC (List.length args, tail)] - ) - - | Expr.Array xs -> let lar, env = env#get_label in - add_code (compile_list false lar env xs) lar false [CALL (".array", List.length xs, tail)] - - | Expr.Sexp (t, xs) -> let lsexp, env = env#get_label in - add_code (compile_list false lsexp env xs) lsexp false [SEXP (t, List.length xs)] - - | Expr.Elem (a, i) -> let lelem, env = env#get_label in - add_code (compile_list false lelem env [a; i]) lelem false [ELEM (* CALL (".elem", 2, tail) *)] - - | Expr.Assign (Expr.Ref x, e) -> let lassn, env = env#get_label in - let env , line = env#gen_line x in - let env , acc = env#lookup x in - add_code (compile_expr false lassn env e) lassn false (line @ [ST acc]) - - | Expr.Assign (x, e) -> let lassn, env = env#get_label in - add_code (compile_list false lassn env [x; e]) lassn false [match x with Expr.Ref _ -> STI | _ -> STA] (*Expr.ElemRef _ -> STA | _ -> STI]*) - - | Expr.Skip -> env, false, [] - - | Expr.Seq (s1, s2) -> compile_list tail l env [s1; s2] - - | Expr.If (c, s1, s2) -> let le, env = env#get_label in - let l2, env = env#get_label in - let env, fe , se = compile_expr false le env c in - let env, flag1, s1 = compile_expr tail l env s1 in - let env, flag2, s2 = compile_expr tail l env s2 in - env, true, se @ (if fe then [LABEL le] else []) @ [CJMP ("z", l2)] @ s1 @ (if flag1 then [] else [JMP l]) @ [LABEL l2] @ s2 @ (if flag2 then [] else [JMP l]) - - | Expr.While (c, s) -> let lexp, env = env#get_label in - let loop, env = env#get_label in - let cond, env = env#get_label in - let env, fe, se = compile_expr false lexp env c in - let env, _ , s = compile_expr false cond env s in - env, false, [JMP cond; FLABEL loop] @ s @ [LABEL cond] @ se @ (if fe then [LABEL lexp] else []) @ [CJMP ("nz", loop)] - - | Expr.DoWhile (s, c) -> let lexp , env = env#get_label in - let loop , env = env#get_label in - let check, env = env#get_label in - let env, fe , se = compile_expr false lexp env c in - let env, flag, body = compile_expr false check env s in - env, false, [LABEL loop] @ body @ (if flag then [LABEL check] else []) @ se @ (if fe then [LABEL lexp] else []) @ [CJMP ("nz", loop)] - - | Expr.Leave -> env, false, [] - - | Expr.Case (e, brs, loc, atr) -> - let n = List.length brs - 1 in - let lfail, env = env#get_label in - let lexp , env = env#get_label in - let env , fe , se = compile_expr false lexp env e in - let env , _, _, code, fail = - List.fold_left - (fun ((env, lab, i, code, continue) as acc) (p, s) -> - if continue - then - let (lfalse, env), jmp = - if i = n - then (lfail, env), [] - else env#get_label, [JMP l] - in - let env, lfalse', pcode = pattern env lfalse p in - let blab, env = env#get_label in - let elab, env = env#get_label in - let env = env#push_scope blab elab in - let env, bindcode = bindings env p in - let env, l' , scode = compile_expr tail l env s in - let env = env#pop_scope in - (env, Some lfalse, i+1, ((match lab with None -> [SLABEL blab] | Some l -> [SLABEL blab; LABEL l; DUP]) @ pcode @ bindcode @ scode @ jmp @ [SLABEL elab]) :: code, lfalse') - else acc - ) - (env, None, 0, [], true) brs - in - env, true, se @ (if fe then [LABEL lexp] else []) @ [DUP] @ (List.flatten @@ List.rev code) @ [JMP l] @ if fail then [LABEL lfail; FAIL (loc, atr != Expr.Void); JMP l] else [] + | [] -> (env, false, []) + | [ e ] -> compile_expr tail l env e + | e :: es -> + let les, env = env#get_label in + let env, flag1, s1 = compile_expr false les env e in + let env, flag2, s2 = compile_list tail l env es in + add_code (env, flag1, s1) les flag2 s2 + and[@ocaml.warning "-8"] compile_expr tail l env = function + | Expr.Lambda (args, b) -> + let env, lines = + List.fold_left + (fun (env, acc) name -> + let env, ln = env#gen_line name in + (env, acc @ ln)) + (env, []) args + in + let env, name = env#add_lambda args b in + ( env#register_call name, + false, + lines @ [ PROTO (name, env#current_function) ] ) + | Expr.Scope (ds, e) -> + let blab, env = env#get_label in + let elab, env = env#get_label in + let env = env#push_scope blab elab in + let env, e, funs = + List.fold_left + (fun (env, e, funs) -> function + | name, (m, `Fun (args, b)) -> + (env#add_fun_name name m, e, (name, args, m, b) :: funs) + | name, (m, `Variable None) -> (env#add_name name m Mut, e, funs) + | name, (m, `Variable (Some v)) -> + ( env#add_name name m Mut, + Expr.Seq (Expr.Ignore (Expr.Assign (Expr.Ref name, v)), e), + funs )) + (env, e, []) (List.rev ds) + in + let env = + List.fold_left + (fun env (name, args, m, b) -> env#add_fun name args m b) + env funs + in + let env, flag, code = compile_expr tail l env e in + (env#pop_scope, flag, [ SLABEL blab ] @ code @ [ SLABEL elab ]) + | Expr.Unit -> (env, false, [ CONST 0 ]) + | Expr.Ignore s -> + let ls, env = env#get_label in + add_code (compile_expr tail ls env s) ls false [ DROP ] + | Expr.ElemRef (x, i) -> compile_list tail l env [ x; i ] + | Expr.Var x -> ( + let env, line = env#gen_line x in + let env, acc = env#lookup x in + (*Printf.printf "Looking up %s -> %s\n" x (show(Value.designation) acc);*) + match acc with + | Value.Fun name -> + ( env#register_call name, + false, + line @ [ PROTO (name, env#current_function) ] ) + | _ -> (env, false, line @ [ LD acc ])) + | Expr.Ref x -> + let env, line = env#gen_line x in + let env, acc = env#lookup x in + (env, false, line @ [ LDA acc ]) + | Expr.Const n -> (env, false, [ CONST n ]) + | Expr.String s -> (env, false, [ STRING s ]) + | Expr.Binop (op, x, y) -> + let lop, env = env#get_label in + add_code (compile_list false lop env [ x; y ]) lop false [ BINOP op ] + | Expr.Call (f, args) -> ( + let lcall, env = env#get_label in + match f with + | Expr.Var name -> ( + let env, line = env#gen_line name in + let env, acc = env#lookup name in + match acc with + | Value.Fun name -> + let env = env#register_call name in + let env, f, code = + add_code + (compile_list false lcall env args) + lcall false + [ PCALLC (List.length args, tail) ] + in + (env, f, line @ (PPROTO (name, env#current_function) :: code)) + | _ -> + add_code + (compile_list false lcall env (f :: args)) + lcall false + [ CALLC (List.length args, tail) ]) + | _ -> + add_code + (compile_list false lcall env (f :: args)) + lcall false + [ CALLC (List.length args, tail) ]) + | Expr.Array xs -> + let lar, env = env#get_label in + add_code + (compile_list false lar env xs) + lar false + [ CALL (".array", List.length xs, tail) ] + | Expr.Sexp (t, xs) -> + let lsexp, env = env#get_label in + add_code + (compile_list false lsexp env xs) + lsexp false + [ SEXP (t, List.length xs) ] + | Expr.Elem (a, i) -> + let lelem, env = env#get_label in + add_code + (compile_list false lelem env [ a; i ]) + lelem false + [ ELEM (* CALL (".elem", 2, tail) *) ] + | Expr.Assign (Expr.Ref x, e) -> + let lassn, env = env#get_label in + let env, line = env#gen_line x in + let env, acc = env#lookup x in + add_code (compile_expr false lassn env e) lassn false (line @ [ ST acc ]) + | Expr.Assign (x, e) -> + let lassn, env = env#get_label in + add_code + (compile_list false lassn env [ x; e ]) + lassn false + [ (match x with Expr.Ref _ -> STI | _ -> STA) ] + (*Expr.ElemRef _ -> STA | _ -> STI]*) + | Expr.Skip -> (env, false, []) + | Expr.Seq (s1, s2) -> compile_list tail l env [ s1; s2 ] + | Expr.If (c, s1, s2) -> + let le, env = env#get_label in + let l2, env = env#get_label in + let env, fe, se = compile_expr false le env c in + let env, flag1, s1 = compile_expr tail l env s1 in + let env, flag2, s2 = compile_expr tail l env s2 in + ( env, + true, + se + @ (if fe then [ LABEL le ] else []) + @ [ CJMP ("z", l2) ] + @ s1 + @ (if flag1 then [] else [ JMP l ]) + @ [ LABEL l2 ] @ s2 + @ if flag2 then [] else [ JMP l ] ) + | Expr.While (c, s) -> + let lexp, env = env#get_label in + let loop, env = env#get_label in + let cond, env = env#get_label in + let env, fe, se = compile_expr false lexp env c in + let env, _, s = compile_expr false cond env s in + ( env, + false, + [ JMP cond; FLABEL loop ] @ s @ [ LABEL cond ] @ se + @ (if fe then [ LABEL lexp ] else []) + @ [ CJMP ("nz", loop) ] ) + | Expr.DoWhile (s, c) -> + let lexp, env = env#get_label in + let loop, env = env#get_label in + let check, env = env#get_label in + let env, fe, se = compile_expr false lexp env c in + let env, flag, body = compile_expr false check env s in + ( env, + false, + [ LABEL loop ] @ body + @ (if flag then [ LABEL check ] else []) + @ se + @ (if fe then [ LABEL lexp ] else []) + @ [ CJMP ("nz", loop) ] ) + | Expr.Leave -> (env, false, []) + | Expr.Case (e, brs, loc, atr) -> + let n = List.length brs - 1 in + let lfail, env = env#get_label in + let lexp, env = env#get_label in + let env, fe, se = compile_expr false lexp env e in + let env, _, _, code, fail = + List.fold_left + (fun ((env, lab, i, code, continue) as acc) (p, s) -> + if continue then + let (lfalse, env), jmp = + if i = n then ((lfail, env), []) + else (env#get_label, [ JMP l ]) + in + let env, lfalse', pcode = pattern env lfalse p in + let blab, env = env#get_label in + let elab, env = env#get_label in + let env = env#push_scope blab elab in + let env, bindcode = bindings env p in + let env, _, scode = compile_expr tail l env s in + let env = env#pop_scope in + ( env, + Some lfalse, + i + 1, + ((match lab with + | None -> [ SLABEL blab ] + | Some l -> [ SLABEL blab; LABEL l; DUP ]) + @ pcode @ bindcode @ scode @ jmp @ [ SLABEL elab ]) + :: code, + lfalse' ) + else acc) + (env, None, 0, [], true) brs + in + ( env, + true, + se + @ (if fe then [ LABEL lexp ] else []) + @ [ DUP ] + @ (List.flatten @@ List.rev code) + @ [ JMP l ] + @ + if fail then [ LABEL lfail; FAIL (loc, atr != Expr.Void); JMP l ] + else [] ) in - let rec compile_fundef env ((name, args, stmt, st) as fd) = + let rec compile_fundef env ((name, args, stmt, _) as fd) = (* Printf.eprintf "Compile fundef: %s, state=%s\n" name (show(State.t) (show(Value.designation)) st); *) (* Printf.eprintf "st (inner) = %s\n" (try show(Value.designation) @@ State.eval st "inner" with _ -> " not found"); *) - let blab, env = env#get_label in - let elab, env = env#get_label in - let env = env#open_fun_scope blab elab fd in + let blab, env = env#get_label in + let elab, env = env#get_label in + let env = env#open_fun_scope blab elab fd in (*Printf.eprintf "Lookup: %s\n%!" (try show(Value.designation) @@ snd (env#lookup "inner") with _ -> "no inner..."); *) - let env = List.fold_left (fun env arg -> env#add_arg arg) env args in - let lend, env = env#get_end_label in - let env, flag, code = compile_expr true lend env stmt in - let env, funcode = compile_fundefs [] env in + let env = List.fold_left (fun env arg -> env#add_arg arg) env args in + let lend, env = env#get_end_label in + let env, _, code = compile_expr true lend env stmt in + let env, funcode = compile_fundefs [] env in (*Printf.eprintf "Function: %s, closure: %s\n%!" name (show(list) (show(Value.designation)) env#closure);*) let env = env#register_closure name in - let nargs, nlocals, closure = env#nargs, env#nlocals, env#closure in + let nargs, nlocals, closure = (env#nargs, env#nlocals, env#closure) in let env, scopes = env#close_fun_scope in let code = - ([LABEL name; BEGIN (name, nargs, nlocals, closure, args, scopes); SLABEL blab] @ - code @ - [LABEL lend; SLABEL elab; END]) :: funcode + ([ + LABEL name; + BEGIN (name, nargs, nlocals, closure, args, scopes); + SLABEL blab; + ] + @ code + @ [ LABEL lend; SLABEL elab; END ]) + :: funcode in - env, code + (env, code) and compile_fundefs acc env = match env#next_definition with - | None -> env, acc + | None -> (env, acc) | Some (env, def) -> - let env, code = compile_fundef env def in - compile_fundefs (acc @ code) env + let env, code = compile_fundef env def in + compile_fundefs (acc @ code) env in let fix_closures env prg = let rec inner state = function - | [] -> [] - | BEGIN (f, na, l, c, a, s) :: tl -> BEGIN (f, na, l, (try env#get_fun_closure f with Not_found -> c), a, s) :: inner state tl - | PROTO (f, c) :: tl -> CLOSURE (f, env#get_closure (f, c)) :: inner state tl - | PPROTO (f, c) :: tl -> - (match env#get_closure (f, c) with - | [] -> inner (Some f :: state) tl - | closure -> CLOSURE (f, closure) :: inner (None :: state) tl - ) - | PCALLC (n, tail) :: tl -> - (match state with - | None :: state' -> CALLC (n, tail) :: inner state' tl - | Some f :: state' -> CALL (f, n, tail) :: inner state' tl - ) - | insn :: tl -> insn :: inner state tl + | [] -> [] + | BEGIN (f, na, l, c, a, s) :: tl -> + BEGIN + (f, na, l, (try env#get_fun_closure f with Not_found -> c), a, s) + :: inner state tl + | PROTO (f, c) :: tl -> + CLOSURE (f, env#get_closure (f, c)) :: inner state tl + | PPROTO (f, c) :: tl -> ( + match env#get_closure (f, c) with + | [] -> inner (Some f :: state) tl + | closure -> CLOSURE (f, closure) :: inner (None :: state) tl) + | PCALLC (n, tail) :: tl -> ( + match state with + | None :: state' -> CALLC (n, tail) :: inner state' tl + | Some f :: state' -> CALL (f, n, tail) :: inner state' tl + | _ -> + failwith + (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ __LINE__)) + | insn :: tl -> insn :: inner state tl in inner [] prg in - let env = new env cmd imports in - let lend, env = env#get_label in + let env = new env cmd imports in + let lend, env = env#get_label in let env, flag, code = compile_expr false lend env p in - let code = if flag then code @ [LABEL lend] else code in - let topname = cmd#topname in - let env, prg = compile_fundefs [[LABEL topname; BEGIN (topname, (if topname = "main" then 2 else 0), env#nlocals, [], [], [])] @ code @ [END]] env in - let prg = (List.map (fun i -> IMPORT i) imports) @ [PUBLIC topname] @ env#get_decls @ List.flatten prg in + let code = if flag then code @ [ LABEL lend ] else code in + let topname = cmd#topname in + let env, prg = + compile_fundefs + [ + [ + LABEL topname; + BEGIN + ( topname, + (if topname = "main" then 2 else 0), + env#nlocals, + [], + [], + [] ); + ] + @ code @ [ END ]; + ] + env + in + let prg = + List.map (fun i -> IMPORT i) imports + @ [ PUBLIC topname ] @ env#get_decls @ List.flatten prg + in (*Printf.eprintf "Before propagating closures:\n"; - Printf.eprintf "%s\n%!" env#show_funinfo; - *) + Printf.eprintf "%s\n%!" env#show_funinfo; + *) let env = env#propagate_closures in (* Printf.eprintf "After propagating closures:\n"; diff --git a/src/X86.ml b/src/X86.ml index 2b5418cd0..b8c989bb1 100644 --- a/src/X86.ml +++ b/src/X86.ml @@ -1,29 +1,28 @@ open GT open Language -open SM - + (* X86 codegeneration interface *) (* The registers: *) -let regs = [|"%ebx"; "%ecx"; "%esi"; "%edi"; "%eax"; "%edx"; "%ebp"; "%esp"|] +let regs = [| "%ebx"; "%ecx"; "%esi"; "%edi"; "%eax"; "%edx"; "%ebp"; "%esp" |] (* We can not freely operate with all register; only 3 by now *) let num_of_regs = Array.length regs - 5 (* We need to know the word size to calculate offsets correctly *) -let word_size = 4;; +let word_size = 4 (* We need to distinguish the following operand types: *) -@type opnd = -| R of int (* hard register *) -| S of int (* a position on the hardware stack *) -| C (* a saved closure *) -| M of string (* a named memory location *) -| L of int (* an immediate operand *) -| I of int * opnd (* an indirect operand with offset *) -with show +type opnd = + | R of int (* hard register *) + | S of int (* a position on the hardware stack *) + | C (* a saved closure *) + | M of string (* a named memory location *) + | L of int (* an immediate operand *) + | I of int * opnd (* an indirect operand with offset *) +[@@deriving gt ~options:{ show }] -let show_opnd = show(opnd) +let show_opnd = show opnd (* For convenience we define the following synonyms for the registers: *) let ebx = R 0 @@ -37,80 +36,98 @@ let esp = R 7 (* Now x86 instruction (we do not need all of them): *) type instr = -(* copies a value from the first to the second operand *) | Mov of opnd * opnd -(* loads an address of the first operand into the second *) | Lea of opnd * opnd -(* makes a binary operation; note, the first operand *) | Binop of string * opnd * opnd -(* designates x86 operator, not the source language one *) -(* x86 integer division, see instruction set reference *) | IDiv of opnd -(* see instruction set reference *) | Cltd -(* sets a value from flags; the first operand is the *) | Set of string * string -(* suffix, which determines the value being set, the *) -(* the second --- (sub)register name *) -(* pushes the operand on the hardware stack *) | Push of opnd -(* pops from the hardware stack to the operand *) | Pop of opnd -(* call a function by a name *) | Call of string -(* call a function by indirect address *) | CallI of opnd -(* returns from a function *) | Ret -(* a label in the code *) | Label of string -(* a conditional jump *) | CJmp of string * string -(* a non-conditional jump *) | Jmp of string -(* directive *) | Meta of string - -(* arithmetic correction: decrement *) | Dec of opnd -(* arithmetic correction: or 0x0001 *) | Or1 of opnd -(* arithmetic correction: shl 1 *) | Sal1 of opnd -(* arithmetic correction: shr 1 *) | Sar1 of opnd - | Repmovsl + (* copies a value from the first to the second operand *) + | Mov of opnd * opnd + (* loads an address of the first operand into the second *) + | Lea of opnd * opnd + (* makes a binary operation; note, the first operand *) + | Binop of string * opnd * opnd + (* designates x86 operator, not the source language one *) + (* x86 integer division, see instruction set reference *) + | IDiv of opnd + (* see instruction set reference *) + | Cltd + (* sets a value from flags; the first operand is the *) + | Set of string * string + (* suffix, which determines the value being set, the *) + (* the second --- (sub)register name *) + (* pushes the operand on the hardware stack *) + | Push of opnd + (* pops from the hardware stack to the operand *) + | Pop of opnd + (* call a function by a name *) + | Call of string + (* call a function by indirect address *) + | CallI of opnd + (* returns from a function *) + | Ret + (* a label in the code *) + | Label of string + (* a conditional jump *) + | CJmp of string * string + (* a non-conditional jump *) + | Jmp of string + (* directive *) + | Meta of string + (* arithmetic correction: decrement *) + | Dec of opnd + (* arithmetic correction: or 0x0001 *) + | Or1 of opnd + (* arithmetic correction: shl 1 *) + | Sal1 of opnd + (* arithmetic correction: shr 1 *) + | Sar1 of opnd + | Repmovsl + (* Instruction printer *) let stack_offset i = - if i >= 0 - then (i+1) * word_size - else 8 + (-i-1) * word_size - + if i >= 0 then (i + 1) * word_size else 8 + ((-i - 1) * word_size) + let show instr = let rec opnd = function - | R i -> regs.(i) - | C -> "4(%ebp)" - | S i -> if i >= 0 - then Printf.sprintf "-%d(%%ebp)" (stack_offset i) - else Printf.sprintf "%d(%%ebp)" (stack_offset i) - | M x -> x - | L i -> Printf.sprintf "$%d" i - | I (0, x) -> Printf.sprintf "(%s)" (opnd x) - | I (n, x) -> Printf.sprintf "%d(%s)" n (opnd x) + | R i -> regs.(i) + | C -> "4(%ebp)" + | S i -> + if i >= 0 then Printf.sprintf "-%d(%%ebp)" (stack_offset i) + else Printf.sprintf "%d(%%ebp)" (stack_offset i) + | M x -> x + | L i -> Printf.sprintf "$%d" i + | I (0, x) -> Printf.sprintf "(%s)" (opnd x) + | I (n, x) -> Printf.sprintf "%d(%s)" n (opnd x) in let binop = function - | "+" -> "addl" - | "-" -> "subl" - | "*" -> "imull" - | "&&" -> "andl" - | "!!" -> "orl" - | "^" -> "xorl" - | "cmp" -> "cmpl" - | "test" -> "test" - | _ -> failwith "unknown binary operator" + | "+" -> "addl" + | "-" -> "subl" + | "*" -> "imull" + | "&&" -> "andl" + | "!!" -> "orl" + | "^" -> "xorl" + | "cmp" -> "cmpl" + | "test" -> "test" + | _ -> failwith "unknown binary operator" in match instr with - | Cltd -> "\tcltd" - | Set (suf, s) -> Printf.sprintf "\tset%s\t%s" suf s - | IDiv s1 -> Printf.sprintf "\tidivl\t%s" (opnd s1) - | Binop (op, s1, s2) -> Printf.sprintf "\t%s\t%s,\t%s" (binop op) (opnd s1) (opnd s2) - | Mov (s1, s2) -> Printf.sprintf "\tmovl\t%s,\t%s" (opnd s1) (opnd s2) - | Lea (x, y) -> Printf.sprintf "\tleal\t%s,\t%s" (opnd x) (opnd y) - | Push s -> Printf.sprintf "\tpushl\t%s" (opnd s) - | Pop s -> Printf.sprintf "\tpopl\t%s" (opnd s) - | Ret -> "\tret" - | Call p -> Printf.sprintf "\tcall\t%s" p - | CallI o -> Printf.sprintf "\tcall\t*(%s)" (opnd o) - | Label l -> Printf.sprintf "%s:\n" l - | Jmp l -> Printf.sprintf "\tjmp\t%s" l - | CJmp (s , l) -> Printf.sprintf "\tj%s\t%s" s l - | Meta s -> Printf.sprintf "%s\n" s - | Dec s -> Printf.sprintf "\tdecl\t%s" (opnd s) - | Or1 s -> Printf.sprintf "\torl\t$0x0001,\t%s" (opnd s) - | Sal1 s -> Printf.sprintf "\tsall\t%s" (opnd s) - | Sar1 s -> Printf.sprintf "\tsarl\t%s" (opnd s) - | Repmovsl -> Printf.sprintf "\trep movsl\t" + | Cltd -> "\tcltd" + | Set (suf, s) -> Printf.sprintf "\tset%s\t%s" suf s + | IDiv s1 -> Printf.sprintf "\tidivl\t%s" (opnd s1) + | Binop (op, s1, s2) -> + Printf.sprintf "\t%s\t%s,\t%s" (binop op) (opnd s1) (opnd s2) + | Mov (s1, s2) -> Printf.sprintf "\tmovl\t%s,\t%s" (opnd s1) (opnd s2) + | Lea (x, y) -> Printf.sprintf "\tleal\t%s,\t%s" (opnd x) (opnd y) + | Push s -> Printf.sprintf "\tpushl\t%s" (opnd s) + | Pop s -> Printf.sprintf "\tpopl\t%s" (opnd s) + | Ret -> "\tret" + | Call p -> Printf.sprintf "\tcall\t%s" p + | CallI o -> Printf.sprintf "\tcall\t*(%s)" (opnd o) + | Label l -> Printf.sprintf "%s:\n" l + | Jmp l -> Printf.sprintf "\tjmp\t%s" l + | CJmp (s, l) -> Printf.sprintf "\tj%s\t%s" s l + | Meta s -> Printf.sprintf "%s\n" s + | Dec s -> Printf.sprintf "\tdecl\t%s" (opnd s) + | Or1 s -> Printf.sprintf "\torl\t$0x0001,\t%s" (opnd s) + | Sal1 s -> Printf.sprintf "\tsall\t%s" (opnd s) + | Sar1 s -> Printf.sprintf "\tsarl\t%s" (opnd s) + | Repmovsl -> Printf.sprintf "\trep movsl\t" (* Opening stack machine to use instructions without fully qualified names *) open SM @@ -126,435 +143,541 @@ let compile cmd env imports code = (* SM.print_prg code; *) flush stdout; let suffix = function - | "<" -> "l" - | "<=" -> "le" - | "==" -> "e" - | "!=" -> "ne" - | ">=" -> "ge" - | ">" -> "g" - | _ -> failwith "unknown operator" + | "<" -> "l" + | "<=" -> "le" + | "==" -> "e" + | "!=" -> "ne" + | ">=" -> "ge" + | ">" -> "g" + | _ -> failwith "unknown operator" in - let box n = (n lsl 1) lor 1 in + let box n = (n lsl 1) lor 1 in let rec compile' env scode = let on_stack = function S _ -> true | _ -> false in - let mov x s = if on_stack x && on_stack s then [Mov (x, eax); Mov (eax, s)] else [Mov (x, s)] in + let mov x s = + if on_stack x && on_stack s then [ Mov (x, eax); Mov (eax, s) ] + else [ Mov (x, s) ] + in let callc env n tail = - let tail = tail && env#nargs = n in - if tail - then ( + let tail = tail && env#nargs = n in + if tail then let rec push_args env acc = function - | 0 -> env, acc - | n -> let x, env = env#pop in - if x = env#loc (Value.Arg (n-1)) - then push_args env acc (n-1) - else push_args env ((mov x (env#loc (Value.Arg (n-1)))) @ acc) (n-1) + | 0 -> (env, acc) + | n -> + let x, env = env#pop in + if x = env#loc (Value.Arg (n - 1)) then push_args env acc (n - 1) + else + push_args env (mov x (env#loc (Value.Arg (n - 1))) @ acc) (n - 1) in - let env , pushs = push_args env [] n in - let closure, env = env#pop in - let y , env = env#allocate in - env, pushs @ [Mov (closure, edx); - Mov (I(0, edx), eax); - Mov (ebp, esp); - Pop (ebp)] @ - (if env#has_closure then [Pop ebx] else []) @ - [Jmp "*%eax"] (* UGLY!!! *) - ) - else ( + let env, pushs = push_args env [] n in + let closure, env = env#pop in + let _, env = env#allocate in + ( env, + pushs + @ [ + Mov (closure, edx); Mov (I (0, edx), eax); Mov (ebp, esp); Pop ebp; + ] + @ (if env#has_closure then [ Pop ebx ] else []) + @ [ Jmp "*%eax" ] ) (* UGLY!!! *) + else let pushr, popr = - List.split @@ List.map (fun r -> (Push r, Pop r)) (env#live_registers n) + List.split + @@ List.map (fun r -> (Push r, Pop r)) (env#live_registers n) in - let pushr, popr = env#save_closure @ pushr, env#rest_closure @ popr in + let pushr, popr = (env#save_closure @ pushr, env#rest_closure @ popr) in let env, code = let rec push_args env acc = function - | 0 -> env, acc - | n -> let x, env = env#pop in - push_args env ((Push x)::acc) (n-1) + | 0 -> (env, acc) + | n -> + let x, env = env#pop in + push_args env (Push x :: acc) (n - 1) in - let env, pushs = push_args env [] n in - let pushs = List.rev pushs in - let closure, env = env#pop in + let env, pushs = push_args env [] n in + let pushs = List.rev pushs in + let closure, env = env#pop in let call_closure = - if on_stack closure - then [Mov (closure, edx); Mov (edx, eax); CallI eax] - else [Mov (closure, edx); CallI closure] + if on_stack closure then + [ Mov (closure, edx); Mov (edx, eax); CallI eax ] + else [ Mov (closure, edx); CallI closure ] in - env, pushr @ pushs @ call_closure @ [Binop ("+", L (word_size * List.length pushs), esp)] @ (List.rev popr) + ( env, + pushr @ pushs @ call_closure + @ [ Binop ("+", L (word_size * List.length pushs), esp) ] + @ List.rev popr ) in - let y, env = env#allocate in env, code @ [Mov (eax, y)] - ) + let y, env = env#allocate in + (env, code @ [ Mov (eax, y) ]) in let call env f n tail = - let tail = tail && env#nargs = n && f.[0] <> '.' in + let tail = tail && env#nargs = n && f.[0] <> '.' in let f = - match f.[0] with '.' -> "B" ^ String.sub f 1 (String.length f - 1) | _ -> f + match f.[0] with + | '.' -> "B" ^ String.sub f 1 (String.length f - 1) + | _ -> f in - if tail - then ( + if tail then let rec push_args env acc = function - | 0 -> env, acc - | n -> let x, env = env#pop in - if x = env#loc (Value.Arg (n-1)) - then push_args env acc (n-1) - else push_args env ((mov x (env#loc (Value.Arg (n-1)))) @ acc) (n-1) + | 0 -> (env, acc) + | n -> + let x, env = env#pop in + if x = env#loc (Value.Arg (n - 1)) then push_args env acc (n - 1) + else + push_args env (mov x (env#loc (Value.Arg (n - 1))) @ acc) (n - 1) in let env, pushs = push_args env [] n in - let y, env = env#allocate in - env, pushs @ [Mov (ebp, esp); Pop (ebp)] @ (if env#has_closure then [Pop ebx] else []) @ [Jmp f] - ) - else ( + let _, env = env#allocate in + ( env, + pushs + @ [ Mov (ebp, esp); Pop ebp ] + @ (if env#has_closure then [ Pop ebx ] else []) + @ [ Jmp f ] ) + else let pushr, popr = - List.split @@ List.map (fun r -> (Push r, Pop r)) (env#live_registers n) - in - let pushr, popr = env#save_closure @ pushr, env#rest_closure @ popr in + List.split + @@ List.map (fun r -> (Push r, Pop r)) (env#live_registers n) + in + let pushr, popr = (env#save_closure @ pushr, env#rest_closure @ popr) in let env, code = let rec push_args env acc = function - | 0 -> env, acc - | n -> let x, env = env#pop in - push_args env ((Push x)::acc) (n-1) + | 0 -> (env, acc) + | n -> + let x, env = env#pop in + push_args env (Push x :: acc) (n - 1) in let env, pushs = push_args env [] n in - let pushs = + let pushs = match f with - | "Barray" -> List.rev @@ (Push (L (box n))) :: pushs - | "Bsexp" -> List.rev @@ (Push (L (box n))) :: pushs - | "Bsta" -> pushs - | _ -> List.rev pushs + | "Barray" -> List.rev @@ (Push (L (box n)) :: pushs) + | "Bsexp" -> List.rev @@ (Push (L (box n)) :: pushs) + | "Bsta" -> pushs + | _ -> List.rev pushs in - env, pushr @ pushs @ [Call f; Binop ("+", L (word_size * List.length pushs), esp)] @ (List.rev popr) + ( env, + pushr @ pushs + @ [ Call f; Binop ("+", L (word_size * List.length pushs), esp) ] + @ List.rev popr ) in - let y, env = env#allocate in env, code @ [Mov (eax, y)] - ) + let y, env = env#allocate in + (env, code @ [ Mov (eax, y) ]) in match scode with - | [] -> env, [] + | [] -> (env, []) | instr :: scode' -> let stack = "" (* env#show_stack*) in (* Printf.printf "insn=%s, stack=%s\n%!" (GT.show(insn) instr) (env#show_stack); *) let env', code' = - if env#is_barrier - then match instr with - | LABEL s -> if env#has_stack s then (env#drop_barrier)#retrieve_stack s, [Label s] else env#drop_stack, [] - | FLABEL s -> env#drop_barrier, [Label s] - | SLABEL s -> env, [Label s] - | _ -> env, [] + if env#is_barrier then + match instr with + | LABEL s -> + if env#has_stack s then + (env#drop_barrier#retrieve_stack s, [ Label s ]) + else (env#drop_stack, []) + | FLABEL s -> (env#drop_barrier, [ Label s ]) + | SLABEL s -> (env, [ Label s ]) + | _ -> (env, []) else - match instr with - | PUBLIC name -> env#register_public name, [] - | EXTERN name -> env#register_extern name, [] - | IMPORT name -> env, [] - - | CLOSURE (name, closure) -> - let pushr, popr = - List.split @@ List.map (fun r -> (Push r, Pop r)) (env#live_registers 0) - in - let closure_len = List.length closure in - let push_closure = - List.map (fun d -> Push (env#loc d)) @@ List.rev closure - in - let s, env = env#allocate in - (env, - pushr @ - push_closure @ - [Push (M ("$" ^ name)); - Push (L (box closure_len)); - Call "Bclosure"; - Binop ("+", L (word_size * (closure_len + 2)), esp); - Mov (eax, s)] @ - List.rev popr @ env#reload_closure) - - | CONST n -> - let s, env' = env#allocate in - (env', [Mov (L (box n), s)]) - - | STRING s -> - let s, env = env#string s in - let l, env = env#allocate in - let env, call = call env ".string" 1 false in - (env, Mov (M ("$" ^ s), l) :: call) - - | LDA x -> - let s, env' = (env #variable x)#allocate in - let s', env''= env'#allocate in - env'', - [Lea (env'#loc x, eax); Mov (eax, s); Mov (eax, s')] - - | LD x -> - let s, env' = (env#variable x)#allocate in - env', - (match s with - | S _ | M _ -> [Mov (env'#loc x, eax); Mov (eax, s)] - | _ -> [Mov (env'#loc x, s)] - ) - - | ST x -> - let env' = env#variable x in - let s = env'#peek in - env', - (match s with - | S _ | M _ -> [Mov (s, eax); Mov (eax, env'#loc x)] - | _ -> [Mov (s, env'#loc x)] - ) - - | STA -> - call env ".sta" 3 false - - | STI -> - let v, x, env' = env#pop2 in - env'#push x, - (match x with - | S _ | M _ -> [Mov (v, edx); Mov (x, eax); Mov (edx, I (0, eax)); Mov (edx, x)] @ env#reload_closure - | _ -> [Mov (v, eax); Mov (eax, I (0, x)); Mov (eax, x)] - ) - - | BINOP op -> - let x, y, env' = env#pop2 in - env'#push y, - (match op with - | "/" -> - [Mov (y, eax); - Sar1 eax; - Cltd; - (* x := x >> 1 ?? *) - Sar1 x; (*!!!*) - IDiv x; - Sal1 eax; - Or1 eax; - Mov (eax, y) - ] - | "%" -> - [Mov (y, eax); - Sar1 eax; - Cltd; - (* x := x >> 1 ?? *) - Sar1 x; (*!!!*) - IDiv x; - Sal1 edx; - Or1 edx; - Mov (edx, y) - ] @ env#reload_closure - | "<" | "<=" | "==" | "!=" | ">=" | ">" -> - (match x with - | M _ | S _ -> - [Binop ("^", eax, eax); - Mov (x, edx); - Binop ("cmp", edx, y); - Set (suffix op, "%al"); - Sal1 eax; - Or1 eax; - Mov (eax, y) - ] @ env#reload_closure + match instr with + | PUBLIC name -> (env#register_public name, []) + | EXTERN name -> (env#register_extern name, []) + | IMPORT _ -> (env, []) + | CLOSURE (name, closure) -> + let pushr, popr = + List.split + @@ List.map (fun r -> (Push r, Pop r)) (env#live_registers 0) + in + let closure_len = List.length closure in + let push_closure = + List.map (fun d -> Push (env#loc d)) @@ List.rev closure + in + let s, env = env#allocate in + ( env, + pushr @ push_closure + @ [ + Push (M ("$" ^ name)); + Push (L (box closure_len)); + Call "Bclosure"; + Binop ("+", L (word_size * (closure_len + 2)), esp); + Mov (eax, s); + ] + @ List.rev popr @ env#reload_closure ) + | CONST n -> + let s, env' = env#allocate in + (env', [ Mov (L (box n), s) ]) + | STRING s -> + let s, env = env#string s in + let l, env = env#allocate in + let env, call = call env ".string" 1 false in + (env, Mov (M ("$" ^ s), l) :: call) + | LDA x -> + let s, env' = (env#variable x)#allocate in + let s', env'' = env'#allocate in + (env'', [ Lea (env'#loc x, eax); Mov (eax, s); Mov (eax, s') ]) + | LD x -> ( + let s, env' = (env#variable x)#allocate in + ( env', + match s with + | S _ | M _ -> [ Mov (env'#loc x, eax); Mov (eax, s) ] + | _ -> [ Mov (env'#loc x, s) ] )) + | ST x -> ( + let env' = env#variable x in + let s = env'#peek in + ( env', + match s with + | S _ | M _ -> [ Mov (s, eax); Mov (eax, env'#loc x) ] + | _ -> [ Mov (s, env'#loc x) ] )) + | STA -> call env ".sta" 3 false + | STI -> ( + let v, x, env' = env#pop2 in + ( env'#push x, + match x with + | S _ | M _ -> + [ + Mov (v, edx); + Mov (x, eax); + Mov (edx, I (0, eax)); + Mov (edx, x); + ] + @ env#reload_closure + | _ -> [ Mov (v, eax); Mov (eax, I (0, x)); Mov (eax, x) ] )) + | BINOP op -> ( + let x, y, env' = env#pop2 in + ( env'#push y, + (* (match op with + |"<" | "<=" | "==" | "!=" | ">=" | ">" -> + [Push (eax); + Push (edx); + Mov (y, eax); + Binop("&&", L(1), eax); + Mov (x, edx); + Binop("&&", L(1), edx); + Binop("cmp", eax, edx); + CJmp ("nz", "_ERROR2"); + Pop (edx); + Pop (eax)] + (* | "+" | "-" | "*" | "/" -> *) + | _ -> + [Mov (y, eax); + Binop("&&", L(1), eax); + Binop("cmp", L(0), eax); + CJmp ("z", "_ERROR"); + Mov (x, eax); + Binop("&&", L(1), eax); + Binop("cmp", L(0), eax); + CJmp ("z", "_ERROR")] + | _ -> []) @ *) + match op with + | "/" -> + [ + Mov (y, eax); + Sar1 eax; + Cltd; + (* x := x >> 1 ?? *) + Sar1 x; + (*!!!*) + IDiv x; + Sal1 eax; + Or1 eax; + Mov (eax, y); + ] + | "%" -> + [ + Mov (y, eax); + Sar1 eax; + Cltd; + (* x := x >> 1 ?? *) + Sar1 x; + (*!!!*) + IDiv x; + Sal1 edx; + Or1 edx; + Mov (edx, y); + ] + @ env#reload_closure + | "<" | "<=" | "==" | "!=" | ">=" | ">" -> ( + match x with + | M _ | S _ -> + [ + Binop ("^", eax, eax); + Mov (x, edx); + Binop ("cmp", edx, y); + Set (suffix op, "%al"); + Sal1 eax; + Or1 eax; + Mov (eax, y); + ] + @ env#reload_closure + | _ -> + [ + Binop ("^", eax, eax); + Binop ("cmp", x, y); + Set (suffix op, "%al"); + Sal1 eax; + Or1 eax; + Mov (eax, y); + ]) + | "*" -> + if on_stack y then + [ + Dec y; + Mov (x, eax); + Sar1 eax; + Binop (op, y, eax); + Or1 eax; + Mov (eax, y); + ] + else + [ + Dec y; + Mov (x, eax); + Sar1 eax; + Binop (op, eax, y); + Or1 y; + ] + | "&&" -> + [ + Dec x; + (*!!!*) + Mov (x, eax); + Binop (op, x, eax); + Mov (L 0, eax); + Set ("ne", "%al"); + Dec y; + (*!!!*) + Mov (y, edx); + Binop (op, y, edx); + Mov (L 0, edx); + Set ("ne", "%dl"); + Binop (op, edx, eax); + Set ("ne", "%al"); + Sal1 eax; + Or1 eax; + Mov (eax, y); + ] + @ env#reload_closure + | "!!" -> + [ + Mov (y, eax); + Sar1 eax; + Sar1 x; + (*!!!*) + Binop (op, x, eax); + Mov (L 0, eax); + Set ("ne", "%al"); + Sal1 eax; + Or1 eax; + Mov (eax, y); + ] + | "+" -> + if on_stack x && on_stack y then + [ Mov (x, eax); Dec eax; Binop ("+", eax, y) ] + else [ Binop (op, x, y); Dec y ] + | "-" -> + if on_stack x && on_stack y then + [ Mov (x, eax); Binop (op, eax, y); Or1 y ] + else [ Binop (op, x, y); Or1 y ] | _ -> - [Binop ("^" , eax, eax); - Binop ("cmp", x, y); - Set (suffix op, "%al"); - Sal1 eax; - Or1 eax; - Mov (eax, y) + failwith + (Printf.sprintf "Unexpected pattern: %s: %d" __FILE__ + __LINE__) )) + | LABEL s | FLABEL s | SLABEL s -> (env, [ Label s ]) + | JMP l -> ((env#set_stack l)#set_barrier, [ Jmp l ]) + | CJMP (s, l) -> + let x, env = env#pop in + ( env#set_stack l, + [ Sar1 x; (*!!!*) Binop ("cmp", L 0, x); CJmp (s, l) ] ) + | BEGIN (f, nargs, nlocals, closure, args, scopes) -> + let rec stabs_scope scope = + let names = + List.map + (fun (name, index) -> + Meta + (Printf.sprintf "\t.stabs \"%s:1\",128,0,0,-%d" name + (stack_offset index))) + scope.names + in + names + @ (if names = [] then [] + else + [ + Meta + (Printf.sprintf "\t.stabn 192,0,0,%s-%s" scope.blab f); + ]) + @ (List.flatten @@ List.map stabs_scope scope.subs) + @ + if names = [] then [] + else + [ + Meta + (Printf.sprintf "\t.stabn 224,0,0,%s-%s" scope.elab f); + ] + in + let name = + if f.[0] = 'L' then String.sub f 1 (String.length f - 1) + else f + in + env#assert_empty_stack; + let has_closure = closure <> [] in + let env = env#enter f nargs nlocals has_closure in + ( env, + [ Meta (Printf.sprintf "\t.type %s, @function" name) ] + @ (if f = "main" then [] + else + [ + Meta + (Printf.sprintf "\t.stabs \"%s:F1\",36,0,0,%s" name f); + ] + @ List.mapi + (fun i a -> + Meta + (Printf.sprintf "\t.stabs \"%s:p1\",160,0,0,%d" a + ((i * 4) + 8))) + args + @ List.flatten + @@ List.map stabs_scope scopes) + @ [ Meta "\t.cfi_startproc" ] + @ (if has_closure then [ Push edx ] else []) + @ (if f = cmd#topname then + [ + Mov (M "_init", eax); + Binop ("test", eax, eax); + CJmp ("z", "_continue"); + Ret; + Label "_ERROR"; + Call "Lbinoperror"; + Ret; + Label "_ERROR2"; + Call "Lbinoperror2"; + Ret; + Label "_continue"; + Mov (L 1, M "_init"); ] - ) - | "*" -> - if on_stack y - then [Dec y; Mov (x, eax); Sar1 eax; Binop (op, y, eax); Or1 eax; Mov (eax, y)] - else [Dec y; Mov (x, eax); Sar1 eax; Binop (op, eax, y); Or1 y] - | "&&" -> - [Dec x; (*!!!*) - Mov (x, eax); - Binop (op, x, eax); - Mov (L 0, eax); - Set ("ne", "%al"); - - Dec y; (*!!!*) - Mov (y, edx); - Binop (op, y, edx); - Mov (L 0, edx); - Set ("ne", "%dl"); - - Binop (op, edx, eax); - Set ("ne", "%al"); - Sal1 eax; - Or1 eax; - Mov (eax, y) - ] @ env#reload_closure - | "!!" -> - [Mov (y, eax); - Sar1 eax; - Sar1 x; (*!!!*) - Binop (op, x, eax); - Mov (L 0, eax); - Set ("ne", "%al"); - Sal1 eax; - Or1 eax; - Mov (eax, y) - ] - | "+" -> - if on_stack x && on_stack y - then [Mov (x, eax); Dec eax; Binop ("+", eax, y)] - else [Binop (op, x, y); Dec y] - | "-" -> - if on_stack x && on_stack y - then [Mov (x, eax); Binop (op, eax, y); Or1 y] - else [Binop (op, x, y); Or1 y] - ) - - | LABEL s - | FLABEL s - | SLABEL s -> env, [Label s] - - | JMP l -> (env#set_stack l)#set_barrier, [Jmp l] - - | CJMP (s, l) -> - let x, env = env#pop in - env#set_stack l, [Sar1 x; (*!!!*) Binop ("cmp", L 0, x); CJmp (s, l)] - - | BEGIN (f, nargs, nlocals, closure, args, scopes) -> - let rec stabs_scope scope = - let names = - List.map - (fun (name, index) -> - Meta (Printf.sprintf "\t.stabs \"%s:1\",128,0,0,-%d" name (stack_offset index)) - ) - scope.names - in - names @ - (if names = [] then [] else [Meta (Printf.sprintf "\t.stabn 192,0,0,%s-%s" scope.blab f)]) @ - (List.flatten @@ List.map stabs_scope scope.subs) @ - (if names = [] then [] else [Meta (Printf.sprintf "\t.stabn 224,0,0,%s-%s" scope.elab f)]) - in - let name = - if f.[0] = 'L' then String.sub f 1 (String.length f - 1) else f - in - env#assert_empty_stack; - let has_closure = closure <> [] in - let env = env#enter f nargs nlocals has_closure in - env, [Meta (Printf.sprintf "\t.type %s, @function" name)] @ - (if f = "main" - then [] - else - [Meta (Printf.sprintf "\t.stabs \"%s:F1\",36,0,0,%s" name f)] @ - (List.mapi (fun i a -> Meta (Printf.sprintf "\t.stabs \"%s:p1\",160,0,0,%d" a ((i*4) + 8))) args) @ - (List.flatten @@ List.map stabs_scope scopes) - ) + else []) + @ [ + Push ebp; + Meta + ("\t.cfi_def_cfa_offset\t" + ^ if has_closure then "12" else "8"); + Meta + ("\t.cfi_offset 5, -" + ^ if has_closure then "12" else "8"); + Mov (esp, ebp); + Meta "\t.cfi_def_cfa_register\t5"; + Binop ("-", M ("$" ^ env#lsize), esp); + Mov (esp, edi); + Mov (M "$filler", esi); + Mov (M ("$" ^ env#allocated_size), ecx); + Repmovsl; + ] + @ (if f = "main" then + [ + Call "__gc_init"; + Push (I (12, ebp)); + Push (I (8, ebp)); + Call "set_args"; + Binop ("+", L 8, esp); + ] + else []) @ - [Meta "\t.cfi_startproc"] @ - (if has_closure then [Push edx] else []) @ - (if f = cmd#topname - then - [Mov (M "_init", eax); - Binop ("test", eax, eax); - CJmp ("z", "_continue"); + if f = cmd#topname then + List.map + (fun i -> Call ("init" ^ i)) + (List.filter (fun i -> i <> "Std") imports) + else [] ) + | END -> + let x, env = env#pop in + env#assert_empty_stack; + let name = env#fname in + ( env#leave, + [ + Mov (x, eax); + (*!!*) + Label env#epilogue; + Mov (ebp, esp); + Pop ebp; + ] + @ env#rest_closure + @ (if name = "main" then [ Binop ("^", eax, eax) ] else []) + @ [ + Meta "\t.cfi_restore\t5"; + Meta "\t.cfi_def_cfa\t4, 4"; Ret; - Label "_continue"; - Mov (L 1, M "_init"); - ] - else [] - ) @ - [Push ebp; - Meta ("\t.cfi_def_cfa_offset\t" ^ if has_closure then "12" else "8"); - Meta ("\t.cfi_offset 5, -" ^ if has_closure then "12" else "8"); - Mov (esp, ebp); - Meta "\t.cfi_def_cfa_register\t5"; - Binop ("-", M ("$" ^ env#lsize), esp); - Mov (esp, edi); - Mov (M "$filler", esi); - Mov (M ("$" ^ (env#allocated_size)), ecx); - Repmovsl - ] @ - (if f = "main" - then [Call "__gc_init"; Push (I (12, ebp)); Push (I (8, ebp)); Call "set_args"; Binop ("+", L 8, esp)] - else [] - ) @ - (if f = cmd#topname - then List.map (fun i -> Call ("init" ^ i)) (List.filter (fun i -> i <> "Std") imports) - else [] - ) - - | END -> - let x, env = env#pop in - env#assert_empty_stack; - let name = env#fname in - env#leave, [ - Mov (x, eax); (*!!*) - Label env#epilogue; - Mov (ebp, esp); - Pop ebp; - ] @ - env#rest_closure @ - (if name = "main" then [Binop ("^", eax, eax)] else []) @ - [Meta "\t.cfi_restore\t5"; - Meta "\t.cfi_def_cfa\t4, 4"; - Ret; - Meta "\t.cfi_endproc"; - Meta (Printf.sprintf "\t.set\t%s,\t%d" env#lsize (env#allocated * word_size)); - Meta (Printf.sprintf "\t.set\t%s,\t%d" env#allocated_size env#allocated); - Meta (Printf.sprintf "\t.size %s, .-%s" name name); - ] - - | RET -> - let x = env#peek in - env, [Mov (x, eax); Jmp env#epilogue] - - | ELEM -> call env ".elem" 2 false - - | CALL (f, n, tail) -> call env f n tail - - | CALLC (n, tail) -> callc env n tail - - | SEXP (t, n) -> - let s, env = env#allocate in - let env, code = call env ".sexp" (n+1) false in - env, [Mov (L (box (env#hash t)), s)] @ code - - | DROP -> - snd env#pop, [] - - | DUP -> - let x = env#peek in - let s, env = env#allocate in - env, mov x s - - | SWAP -> - let x, y = env#peek2 in - env, [Push x; Push y; Pop x; Pop y] - - | TAG (t, n) -> - let s1, env = env#allocate in - let s2, env = env#allocate in - let env, code = call env ".tag" 3 false in - env, [Mov (L (box (env#hash t)), s1); Mov (L (box n), s2)] @ code - - | ARRAY n -> - let s, env = env#allocate in - let env, code = call env ".array_patt" 2 false in - env, [Mov (L (box n), s)] @ code - - | PATT StrCmp -> call env ".string_patt" 2 false - - | PATT patt -> - call env - (match patt with - | Boxed -> ".boxed_patt" - | UnBoxed -> ".unboxed_patt" - | Array -> ".array_tag_patt" - | String -> ".string_tag_patt" - | Sexp -> ".sexp_tag_patt" - | Closure -> ".closure_tag_patt" - ) 1 false - | LINE (line) -> - env#gen_line line - - | FAIL ((line, col), value) -> - let v, env = if value then env#peek, env else env#pop in - let s, env = env#string cmd#get_infile in - env, [Push (L (box col)); Push (L (box line)); Push (M ("$" ^ s)); Push v; Call "Bmatch_failure"; Binop ("+", L (4 * word_size), esp)] - - | i -> - invalid_arg (Printf.sprintf "invalid SM insn: %s\n" (GT.show(insn) i)) + Meta "\t.cfi_endproc"; + Meta + (Printf.sprintf "\t.set\t%s,\t%d" env#lsize + (env#allocated * word_size)); + Meta + (Printf.sprintf "\t.set\t%s,\t%d" env#allocated_size + env#allocated); + Meta (Printf.sprintf "\t.size %s, .-%s" name name); + ] ) + | RET -> + let x = env#peek in + (env, [ Mov (x, eax); Jmp env#epilogue ]) + | ELEM -> call env ".elem" 2 false + | CALL (f, n, tail) -> call env f n tail + | CALLC (n, tail) -> callc env n tail + | SEXP (t, n) -> + let s, env = env#allocate in + let env, code = call env ".sexp" (n + 1) false in + (env, [ Mov (L (box (env#hash t)), s) ] @ code) + | DROP -> (snd env#pop, []) + | DUP -> + let x = env#peek in + let s, env = env#allocate in + (env, mov x s) + | SWAP -> + let x, y = env#peek2 in + (env, [ Push x; Push y; Pop x; Pop y ]) + | TAG (t, n) -> + let s1, env = env#allocate in + let s2, env = env#allocate in + let env, code = call env ".tag" 3 false in + ( env, + [ Mov (L (box (env#hash t)), s1); Mov (L (box n), s2) ] @ code + ) + | ARRAY n -> + let s, env = env#allocate in + let env, code = call env ".array_patt" 2 false in + (env, [ Mov (L (box n), s) ] @ code) + | PATT StrCmp -> call env ".string_patt" 2 false + | PATT patt -> + call env + (match patt with + | Boxed -> ".boxed_patt" + | UnBoxed -> ".unboxed_patt" + | Array -> ".array_tag_patt" + | String -> ".string_tag_patt" + | Sexp -> ".sexp_tag_patt" + | Closure -> ".closure_tag_patt" + | StrCmp -> + failwith + (Printf.sprintf "Unexpected pattern: StrCmp %s: %d" + __FILE__ __LINE__)) + 1 false + | LINE line -> env#gen_line line + | FAIL ((line, col), value) -> + let v, env = if value then (env#peek, env) else env#pop in + let s, env = env#string cmd#get_infile in + ( env, + [ + Push (L (box col)); + Push (L (box line)); + Push (M ("$" ^ s)); + Push v; + Call "Bmatch_failure"; + Binop ("+", L (4 * word_size), esp); + ] ) + | i -> + invalid_arg + (Printf.sprintf "invalid SM insn: %s\n" (GT.show insn i)) in let env'', code'' = compile' env' scode' in - env'', [Meta (Printf.sprintf "# %s / % s" (GT.show(SM.insn) instr) stack)] @ code' @ code'' + ( env'', + [ Meta (Printf.sprintf "# %s / %s" (GT.show SM.insn instr) stack) ] + @ code' @ code'' ) in compile' env code - + (* A set of strings *) module S = Set.Make (String) @@ -563,57 +686,50 @@ module M = Map.Make (String) (* Environment implementation *) class env prg = - let chars = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'" in - let make_assoc l i = List.combine l (List.init (List.length l) (fun x -> x + i)) in - let rec assoc x = function [] -> raise Not_found | l :: ls -> try List.assoc x l with Not_found -> assoc x ls in + let chars = + "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'" + in + (* let make_assoc l i = + List.combine l (List.init (List.length l) (fun x -> x + i)) + in *) + (* let rec assoc x = function + | [] -> raise Not_found + | l :: ls -> ( try List.assoc x l with Not_found -> assoc x ls) + in *) object (self) inherit SM.indexer prg - val globals = S.empty (* a set of global variables *) - val stringm = M.empty (* a string map *) - val scount = 0 (* string count *) - val stack_slots = 0 (* maximal number of stack positions *) - - val static_size = 0 (* static data size *) - val stack = [] (* symbolic stack *) - val nargs = 0 (* number of function arguments *) - val locals = [] (* function local variables *) - val fname = "" (* function name *) - val stackmap = M.empty (* labels to stack map *) - val barrier = false (* barrier condition *) + val globals = S.empty (* a set of global variables *) + val stringm = M.empty (* a string map *) + val scount = 0 (* string count *) + val stack_slots = 0 (* maximal number of stack positions *) + val static_size = 0 (* static data size *) + val stack = [] (* symbolic stack *) + val nargs = 0 (* number of function arguments *) + val locals = [] (* function local variables *) + val fname = "" (* function name *) + val stackmap = M.empty (* labels to stack map *) + val barrier = false (* barrier condition *) val max_locals_size = 0 - val has_closure = false - val publics = S.empty - val externs = S.empty - val nlabels = 0 - val first_line = true - + val has_closure = false + val publics = S.empty + val externs = S.empty + val nlabels = 0 + val first_line = true method publics = S.elements publics - - method register_public name = {< publics = S.add name publics >} - method register_extern name = {< externs = S.add name externs >} - + method register_public name = {} + method register_extern name = {} method max_locals_size = max_locals_size - method has_closure = has_closure - - method save_closure = - if has_closure then [Push edx] else [] - - method rest_closure = - if has_closure then [Pop edx] else [] - - method reload_closure = - if has_closure then [Mov (C (*S 0*), edx)] else [] - + method save_closure = if has_closure then [ Push edx ] else [] + method rest_closure = if has_closure then [ Pop edx ] else [] + method reload_closure = if has_closure then [ Mov (C (*S 0*), edx) ] else [] method fname = fname - + method leave = - if stack_slots > max_locals_size - then {< max_locals_size = stack_slots >} + if stack_slots > max_locals_size then {} else self - method show_stack = - GT.show(list) (GT.show(opnd)) stack + method show_stack = GT.show list (GT.show opnd) stack method print_locals = Printf.printf "LOCALS: size = %d\n" static_size; @@ -621,8 +737,8 @@ class env prg = (fun l -> Printf.printf "("; List.iter (fun (a, i) -> Printf.printf "%s=%d " a i) l; - Printf.printf ")\n" - ) locals; + Printf.printf ")\n") + locals; Printf.printf "END LOCALS\n" (* Assert empty stack *) @@ -632,107 +748,113 @@ class env prg = method is_barrier = barrier (* set barrier *) - method set_barrier = {< barrier = true >} + method set_barrier = {} (* drop barrier *) - method drop_barrier = {< barrier = false >} + method drop_barrier = {} (* drop stack *) - method drop_stack = {< stack = [] >} + method drop_stack = {} (* associates a stack to a label *) - method set_stack l = (*Printf.printf "Setting stack for %s\n" l;*) - {< stackmap = M.add l stack stackmap >} + method set_stack l = + (*Printf.printf "Setting stack for %s\n" l;*) + {} (* retrieves a stack for a label *) - method retrieve_stack l = (*Printf.printf "Retrieving stack for %s\n" l;*) - try {< stack = M.find l stackmap >} with Not_found -> self + method retrieve_stack l = + (*Printf.printf "Retrieving stack for %s\n" l;*) + try {} with Not_found -> self (* checks if there is a stack for a label *) - method has_stack l = (*Printf.printf "Retrieving stack for %s\n" l;*) + method has_stack l = + (*Printf.printf "Retrieving stack for %s\n" l;*) M.mem l stackmap (* gets a name for a global variable *) method loc x = match x with | Value.Global name -> M ("global_" ^ name) - | Value.Fun name -> M ("$" ^ name) - | Value.Local i -> S i - | Value.Arg i -> S (- (i + if has_closure then 2 else 1)) - | Value.Access i -> I (word_size * (i+1), edx) - + | Value.Fun name -> M ("$" ^ name) + | Value.Local i -> S i + | Value.Arg i -> S (-(i + if has_closure then 2 else 1)) + | Value.Access i -> I (word_size * (i + 1), edx) + (* allocates a fresh position on a symbolic stack *) method allocate = let x, n = - let rec allocate' = function - | [] -> ebx , 0 - | (S n)::_ -> S (n+1) , n+2 - | (R n)::_ when n < num_of_regs -> R (n+1) , stack_slots - | _ -> S static_size, static_size+1 + let allocate' = function + | [] -> (ebx, 0) + | S n :: _ -> (S (n + 1), n + 2) + | R n :: _ when n < num_of_regs -> (R (n + 1), stack_slots) + | _ -> (S static_size, static_size + 1) in allocate' stack in - x, {< stack_slots = max n stack_slots; stack = x::stack >} + (x, {}) (* pushes an operand to the symbolic stack *) - method push y = {< stack = y::stack >} + method push y = {} (* pops one operand from the symbolic stack *) - method pop = let x::stack' = stack in x, {< stack = stack' >} + method pop = + let[@ocaml.warning "-8"] (x :: stack') = stack in + (x, {}) (* pops two operands from the symbolic stack *) - method pop2 = let x::y::stack' = stack in x, y, {< stack = stack' >} + method pop2 = + let[@ocaml.warning "-8"] (x :: y :: stack') = stack in + (x, y, {}) (* peeks the top of the stack (the stack does not change) *) method peek = List.hd stack (* peeks two topmost values from the stack (the stack itself does not change) *) - method peek2 = let x::y::_ = stack in x, y + method peek2 = + let[@ocaml.warning "-8"] (x :: y :: _) = stack in + (x, y) (* tag hash: gets a hash for a string tag *) method hash tag = - let h = Pervasives.ref 0 in + let h = Stdlib.ref 0 in for i = 0 to min (String.length tag - 1) 4 do - h := (!h lsl 6) lor (String.index chars tag.[i]) + h := (!h lsl 6) lor String.index chars tag.[i] done; !h (* registers a variable in the environment *) method variable x = match x with - | Value.Global name -> {< globals = S.add ("global_" ^ name) globals >} - | _ -> self + | Value.Global name -> {} + | _ -> self (* registers a string constant *) method string x = let escape x = - let n = String.length x in - let buf = Buffer.create (n*2) in + let n = String.length x in + let buf = Buffer.create (n * 2) in let rec iterate i = - if i < n - then ( + if i < n then ( (match x.[i] with - | '"' -> Buffer.add_string buf "\\\"" + | '"' -> Buffer.add_string buf "\\\"" | '\n' -> Buffer.add_string buf "\n" | '\t' -> Buffer.add_string buf "\t" - | c -> Buffer.add_char buf c - ); - iterate (i+1) - ) + | c -> Buffer.add_char buf c); + iterate (i + 1)) in iterate 0; Buffer.contents buf in let x = escape x in - try M.find x stringm, self + try (M.find x stringm, self) with Not_found -> let y = Printf.sprintf "string_%d" scount in let m = M.add x y stringm in - y, {< scount = scount + 1; stringm = m>} + (y, {}) (* gets number of arguments in the current function *) method nargs = nargs - + (* gets all global variables *) method globals = S.elements (S.diff globals externs) @@ -741,108 +863,146 @@ class env prg = (* gets a number of stack positions allocated *) method allocated = stack_slots - method allocated_size = Printf.sprintf "LS%s_SIZE" fname - + (* enters a function *) method enter f nargs nlocals has_closure = - {< nargs = nargs; static_size = nlocals; stack_slots = nlocals; stack = []; fname = f; has_closure = has_closure; first_line = true >} + {} (* returns a label for the epilogue *) method epilogue = Printf.sprintf "L%s_epilogue" fname (* returns a name for local size meta-symbol *) method lsize = Printf.sprintf "L%s_SIZE" fname - + (* returns a list of live registers *) method live_registers depth = let rec inner d acc = function - | [] -> acc - | (R _ as r)::tl -> inner (d+1) (if d >= depth then (r::acc) else acc) tl - | _::tl -> inner (d+1) acc tl + | [] -> acc + | (R _ as r) :: tl -> + inner (d + 1) (if d >= depth then r :: acc else acc) tl + | _ :: tl -> inner (d + 1) acc tl in inner 0 [] stack (* generate a line number information for current function *) method gen_line line = let lab = Printf.sprintf ".L%d" nlabels in - {< nlabels = nlabels + 1; first_line = false >}, - if fname = "main" - then - [Meta (Printf.sprintf "\t.stabn 68,0,%d,%s" line lab); Label lab] - else - (if first_line then [Meta (Printf.sprintf "\t.stabn 68,0,%d,0" line)] else []) @ - [Meta (Printf.sprintf "\t.stabn 68,0,%d,%s-%s" line lab fname); Label lab] - + ( {}, + if fname = "main" then + [ Meta (Printf.sprintf "\t.stabn 68,0,%d,%s" line lab); Label lab ] + else + (if first_line then + [ Meta (Printf.sprintf "\t.stabn 68,0,%d,0" line) ] + else []) + @ [ + Meta (Printf.sprintf "\t.stabn 68,0,%d,%s-%s" line lab fname); + Label lab; + ] ) end (* Generates an assembler text for a program: first compiles the program into the stack code, then generates x86 assember code, then prints the assembler file *) let genasm cmd prog = - let sm = SM.compile cmd prog in + let sm = SM.compile cmd prog in let env, code = compile cmd (new env sm) (fst (fst prog)) sm in let globals = List.map (fun s -> Meta (Printf.sprintf "\t.globl\t%s" s)) env#publics in - let data = [Meta "\t.data"] @ - (List.map (fun (s, v) -> Meta (Printf.sprintf "%s:\t.string\t\"%s\"" v s)) env#strings) @ - [Meta "_init:\t.int 0"; - Meta "\t.section custom_data,\"aw\",@progbits"; - Meta (Printf.sprintf "filler:\t.fill\t%d, 4, 1" env#max_locals_size)] @ - (List.concat @@ - List.map - (fun s -> [Meta (Printf.sprintf "\t.stabs \"%s:S1\",40,0,0,%s" (String.sub s (String.length "global_") (String.length s - String.length "global_")) s); - Meta (Printf.sprintf "%s:\t.int\t1" s)]) - env#globals - ) + let data = + [ Meta "\t.data" ] + @ List.map + (fun (s, v) -> Meta (Printf.sprintf "%s:\t.string\t\"%s\"" v s)) + env#strings + @ [ + Meta "_init:\t.int 0"; + Meta "\t.section custom_data,\"aw\",@progbits"; + Meta (Printf.sprintf "filler:\t.fill\t%d, 4, 1" env#max_locals_size); + ] + @ List.concat + @@ List.map + (fun s -> + [ + Meta + (Printf.sprintf "\t.stabs \"%s:S1\",40,0,0,%s" + (String.sub s (String.length "global_") + (String.length s - String.length "global_")) + s); + Meta (Printf.sprintf "%s:\t.int\t1" s); + ]) + env#globals in let asm = Buffer.create 1024 in List.iter (fun i -> Buffer.add_string asm (Printf.sprintf "%s\n" @@ show i)) - ([Meta (Printf.sprintf "\t.file \"%s\"" cmd#get_absolute_infile); - Meta (Printf.sprintf "\t.stabs \"%s\",100,0,0,.Ltext" cmd#get_absolute_infile)] @ - globals @ - data @ - [Meta "\t.text"; Label ".Ltext"; Meta "\t.stabs \"data:t1=r1;0;4294967295;\",128,0,0,0"] @ - code); + ([ + Meta (Printf.sprintf "\t.file \"%s\"" cmd#get_absolute_infile); + Meta + (Printf.sprintf "\t.stabs \"%s\",100,0,0,.Ltext" + cmd#get_absolute_infile); + ] + @ globals @ data + @ [ + Meta "\t.text"; + Label ".Ltext"; + Meta "\t.stabs \"data:t1=r1;0;4294967295;\",128,0,0,0"; + ] + @ code); Buffer.contents asm let get_std_path () = - match Sys.getenv_opt "LAMA" with - | Some s -> s - | None -> Stdpath.path - + match Sys.getenv_opt "LAMA" with Some s -> s | None -> Stdpath.path + (* Builds a program: generates the assembler file and compiles it with the gcc toolchain *) let build cmd prog = let find_objects imports paths = let module S = Set.Make (String) in let rec iterate acc s = function - | [] -> acc - | import::imports -> - if S.mem import s - then iterate acc s imports - else - let path, intfs = Interface.find import paths in - iterate - ((Filename.concat path (import ^ ".o")) :: acc) - (S.add import s) - ((List.map (function `Import name -> name | _ -> invalid_arg "must not happen") @@ - List.filter (function `Import _ -> true | _ -> false) intfs) @ - imports) + | [] -> acc + | import :: imports -> + if S.mem import s then iterate acc s imports + else + let path, intfs = Interface.find import paths in + iterate + (Filename.concat path (import ^ ".o") :: acc) + (S.add import s) + ((List.map (function + | `Import name -> name + | _ -> invalid_arg "must not happen") + @@ List.filter (function `Import _ -> true | _ -> false) intfs) + @ imports) in iterate [] (S.add "Std" S.empty) imports in cmd#dump_file "s" (genasm cmd prog); cmd#dump_file "i" (Interface.gen prog); - let inc = get_std_path () in + let inc = get_std_path () in + let compiler = "gcc" in + let flags = "-no-pie -m32" in match cmd#get_mode with | `Default -> - let objs = find_objects (fst @@ fst prog) cmd#get_include_paths in - let buf = Buffer.create 255 in - List.iter (fun o -> Buffer.add_string buf o; Buffer.add_string buf " ") objs; - let gcc_cmdline = Printf.sprintf "gcc %s -m32 %s %s.s %s %s/runtime.a" cmd#get_debug cmd#get_output_option cmd#basename (Buffer.contents buf) inc in - Sys.command gcc_cmdline + let objs = find_objects (fst @@ fst prog) cmd#get_include_paths in + let buf = Buffer.create 255 in + List.iter + (fun o -> + Buffer.add_string buf o; + Buffer.add_string buf " ") + objs; + let gcc_cmdline = + Printf.sprintf "%s %s %s %s %s.s %s %s/runtime.a" compiler flags + cmd#get_debug cmd#get_output_option cmd#basename (Buffer.contents buf) + inc + in + Sys.command gcc_cmdline | `Compile -> - Sys.command (Printf.sprintf "gcc %s -m32 -c %s.s" cmd#get_debug cmd#basename) + Sys.command + (Printf.sprintf "%s %s %s -c %s.s" compiler flags cmd#get_debug + cmd#basename) | _ -> invalid_arg "must not happen" diff --git a/src/dune b/src/dune new file mode 100644 index 000000000..47421f4df --- /dev/null +++ b/src/dune @@ -0,0 +1,111 @@ +(env + (dev + (flags + (:standard -warn-error -3-7-8-13-15-20-26-27-32-33-39)))) + +(rule + (targets version.ml) + (action + (progn + (with-stdout-to + version2.ml + (progn + (run echo let version = "\"") + (run echo "Version 1.2: ") + (run git rev-parse --abbrev-ref HEAD) + (run echo , " ") + (run git rev-parse --short HEAD) + (run echo , " ") + (pipe-stdout + (run git rev-parse --verify HEAD) + (run git show --no-patch --no-notes --pretty='%cd')) + (run echo "\""))) + (with-stdout-to + version.ml + (pipe-stdout + (run cat version2.ml) + (run tr -d '\n')))))) + +(rule + (targets stdpath.ml) + (action + (progn + (with-stdout-to + stdpath2.ml + (progn + (run echo let path = "\"") + (run opam var share) + (run echo /Lama) + (run echo "\""))) + (with-stdout-to + stdpath.ml + (pipe-stdout + (run cat stdpath2.ml) + (run tr -d '\n')))))) + +(library + (name liba) + (modules Language Pprinter stdpath version X86 SM) + (libraries GT ostap) + (flags + (:standard + -rectypes + ;-dsource + )) + ; (ocamlopt_flags + ; (:standard -dsource)) + (wrapped false) + (preprocess + (per_module + ((pps GT.ppx_all) + SM + X86) + ((action + (run %{project_root}/src/pp5+gt+plugins+ostap+dump.byte %{input-file})) + Language + Pprinter + stdpath + version))) + (preprocessor_deps + (file %{project_root}/src/pp5+gt+plugins+ostap+dump.byte) + ;(file %{project_root}/src/pp5+gt+plugins+ostap+dump.exe) + ) + ;(inline_tests) + ) + +(executable + (name Driver) + (flags + (:standard + -rectypes + ;-dsource + )) + (modules Driver) + (libraries liba unix)) + +; (rule +; (targets pp5+gt+plugins+ostap+dump.exe) +; (deps +; (package GT)) +; (action +; (run +; mkcamlp5.opt +; -package +; camlp5,camlp5.pa_o,camlp5.pr_dump,camlp5.extend,camlp5.quotations,ostap.syntax,GT.syntax.all,GT.syntax +; -o +; %{targets}))) + +(rule + (targets pp5+gt+plugins+ostap+dump.byte) + (deps + (package GT)) + (action + (run + mkcamlp5 + -package + camlp5,camlp5.pa_o,camlp5.pr_o,ostap.syntax,GT.syntax.all,GT.syntax + -o + %{targets}))) + +(cram + (deps ./Driver.exe)) diff --git a/src/stdpath.ml b/src/stdpath.ml deleted file mode 100644 index e94b5487d..000000000 --- a/src/stdpath.ml +++ /dev/null @@ -1 +0,0 @@ -let path = "/home/db/.opam/4.14.0+flambda/share/Lama" diff --git a/src/version.ml b/src/version.ml deleted file mode 100644 index ecd4cf514..000000000 --- a/src/version.ml +++ /dev/null @@ -1 +0,0 @@ -let version = "Version 1.10, 1bafe839d, Sun Mar 12 05:30:58 2023 +0300" diff --git a/stdlib/regression/Makefile b/stdlib/regression/Makefile index a0607bd17..c6748f129 100644 --- a/stdlib/regression/Makefile +++ b/stdlib/regression/Makefile @@ -1,4 +1,4 @@ -TESTS=$(sort $(basename $(wildcard test*.lama))) +TESTS=$(sort $(filter-out test30, $(basename $(wildcard test*.lama)))) LAMAC=../../src/lamac @@ -7,8 +7,8 @@ LAMAC=../../src/lamac check: $(TESTS) $(TESTS): %: %.lama - @echo $@ - LAMA=../../runtime $(LAMAC) -I .. -ds -dp $< && ./$@ > $@.log && diff $@.log orig/$@.log + @echo "stdlib/regression/$@" + @LAMA=../../runtime $(LAMAC) -I .. -ds -dp $< && ./$@ > $@.log && diff $@.log orig/$@.log clean: $(RM) test*.log *.s *~ $(TESTS) *.i diff --git a/stdlib/regression/orig/test03.log b/stdlib/regression/orig/test03.log index e47f66d9b..27e4c6341 100644 --- a/stdlib/regression/orig/test03.log +++ b/stdlib/regression/orig/test03.log @@ -14,5 +14,5 @@ 1 0 0 -31 +1 -1