-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpaper.tex
1656 lines (1452 loc) · 63.8 KB
/
paper.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass{acm_proc_article-sp}
\usepackage[utf8]{inputenc}
\usepackage{caption,subcaption}
\captionsetup{labelfont=bf}
\usepackage[]{microtype}
% for proper backquote
\usepackage{upquote}
% URLs
\usepackage{url}
\usepackage[hidelinks]{hyperref}
% color
\usepackage{color}
\usepackage{xcolor}
% listings
\usepackage{listings}
\lstset{
numbers=left,
numberstyle=\tiny,
basicstyle=\ttfamily\small,
belowcaptionskip=0em,
aboveskip=1.5em,
belowskip=0em,
mathescape=true,
captionpos=b,
rulecolor=\color{black},
prebreak = \raisebox{0ex}[0ex][0ex]{\ensuremath{\hookleftarrow}},
frame=b,
xleftmargin=2em,
framexleftmargin=2em,
xrightmargin=0pt,
resetmargins=true,
framerule=1pt
}
%% Side-by-side listings
\usepackage{subfig}
\usepackage{caption}
\captionsetup{format=hang}
\usepackage{subcaption}
\usepackage{floatrow}
\makeatletter
\newbox\sf@box
\newenvironment{sublisting}[2][]{%
\def\sf@one{#1}%
\def\sf@two{#2}%
\setbox\sf@box\hbox
\bgroup
}{%
\egroup
\ifx\@empty\sf@two\@empty\relax
\def\sf@two{\@empty}
\fi
\ifx\@empty\sf@one\@empty\relax
\subfloat[\sf@two]{\box\sf@box}%
\else
\subfloat[\sf@one][\sf@two]{\box\sf@box}%
\fi
}
\makeatother
\newfloat{mylisting}{htbp}{lom}
\restylefloat*{mylisting}
\floatstyle{plain}
\floatname{mylisting}{Listing}
\captionsetup[mylisting]{position=bottom}
\floatsetup[mylisting]{}
\newsubfloat[position=bottom]{mylisting}
\captionsetup[subfloat]{lomdepth=2}
% use listing counter
\makeatletter
\AtBeginDocument{\let\c@mylisting\c@lstlisting}
\makeatother
%% Quotation with signature
\usepackage{quoting}
\quotingsetup{vskip=0pt}
\def\signed #1{{\leavevmode\unskip\nobreak\hfil\penalty50\hskip2em
\hbox{}\nobreak\hfil(#1)%
\parfillskip=0pt \finalhyphendemerits=0 \endgraf}}
\newsavebox\mybox
\newenvironment{aquote}[1]
{\savebox\mybox{#1}\begin{quoting}}
{\signed{\usebox\mybox}\end{quoting}}
%% tikz
\usepackage{tikz}
\usetikzlibrary{positioning,shapes,arrows,calc}
\newcommand*{\h}{\hspace{5pt}}% for indentation
\newcommand*{\hh}{\h\h}% double indentation
\tikzstyle{object}=[rectangle, rectangle split, draw=black,
text=black, minimum width=2cm,
rectangle split part align={center,left,left},
rectangle split part fill={gray!20,white,white}]
\tikzstyle{every text node part}=[align=center]
\tikzstyle{every node}=[font=\small\sffamily]
\tikzstyle{line} = [draw, thick, -latex']
\newcommand{\renvoi}[3][pos=0.5]{
\path (#2) -- (#3)coordinate[#1](mm);
\draw[line] (#2) -| (mm) |- (#3);
}
%%%% Document
\begin{document}
\title{Ralph - A Dylan dialect that compiles to JavaScript}
\numberofauthors{1}
\author{
\alignauthor
Bastian Mueller\\
IT University of Copenhagen, Denmark\\
\email{\href{mailto:[email protected]}{[email protected]}}
}
\maketitle
\begin{abstract}
We present the object-centered dynamic functional programming language
Ralph, which is inspired by Lisp. Ralph supports an extended subset of
Dylan's features (the intermediate Dylan standard with a prefix
syntax) and compiles to JavaScript. The Ralph compiler produces more
efficient JavaScript code than similar compilers translating Lisp-like
languages due to some trade-offs and utilization of an intermediate
representation based on the Administrative normal form. We discuss the
Ralph language, its mechanics, implementation of its features in
JavaScript, and its multi-pass compilation approach.
\end{abstract}
\section{Introduction}
Ralph\footnote{Available at \url{https://github.com/turbolent/ralph}}
is a programming language that targets JavaScript, more specifically
its subset ECMAScript \cite{ecma262}. Ralph is a Lisp dialect mainly
inspired by Dylan \cite{shalit1996dylan} as specified in the first
edition of its manual \cite{shalit1992dylan} (from here on referred to
as ``early specification''). Dylan is based on Scheme, Common Lisp,
and Smalltalk. Like Scheme and unlike Common Lisp, Dylan has a single
namespace for functions and variables. The object system is derived
from the Common Lisp Object System (CLOS), featuring multiple
inheritance and generic functions performing multiple dispatch.
Dylan was invented by Apple and its original code-name was ``Ralph''.
The same name was chosen for the new language to imply the effort of a
partial reimplementation. Unlike the final and standardized version of
Dylan which features an ALGOL-like syntax, this early version still
used a syntax with prefix notation based on s-expressions
\cite{mccarthy1960}.
Ralph provides many of Dylan's features, special forms, macros, and
functions of the standard library. However, it has a simplified
abstraction for collections, condition system and numerical tower. It
provides a module system, procedural hygienic macros and easy
interoperability with JavaScript. The class-based object system
currently supports single inheritance and single dispatch.
Today it is possible to write rich applications for both desktop and
mobile devices using Web technologies. Modern browsers offer a
number of APIs for this purpose, but JavaScript is the only programming
language able to use them. Besides the usage on the client-side,
JavaScript has also become popular for server-side applications,
through environments such as Node.js. JavaScript execution
environments and their underlying engines have evolved from simple
interpreters to sophisticated virtual machines involving Just-In-Time
emission of native code.
JavaScript is a functional programming language influenced by Scheme
and Self: it supports first-class functions, closures, lexical
scoping, higher-order programming, dynamic typing and prototype based
object-oriented programming.
Because of these features, an increasing number of compilers are
targeting JavaScript, both for existing languages, new languages or
low-level byte-code. For example, the Google Web Toolkit allows
writing applications in Java, Parenscript is a compiler for a subset
of Common Lisp, and ClojureScript is a compiler for a subset of
Clojure. CoffeeScript provides an alternative syntax for JavaScript.
Emscripten is a compiler translating LLVM bytecode to JavaScript.
Ralph was designed for use in application development. It is dynamic
to allow rapid prototyping and development, and has a focus on
expressiveness and efficiency. Much like Dylan, it tries
to bring the advanced features of Lisp to non-Lisp programmers, having
a good trade-off between sophisticated features and
performance. Performance and efficiency are especially crucial on
mobile devices, which have only limited processing capabilities and
reducing power consumption is important. The compiler of Ralph generates
safe and efficient JavaScript close to hand-written code, due to
trade-offs, utilization of an adequate intermediate representation and
high-level optimizations.
We motivate the problem set by presenting the issues associated with
using JavaScript in Section~\ref{sec:problems}, explaining why it is
desirable to have a higher-level language targeting JavaScript.
Section~\ref{sec:related} is looking into related projects and
compilation approaches. Afterwards we describe our solutions for
several problems in Section~\ref{sec:solution}. We present Ralph's
limitations in Section~\ref{sec:limitations} and describe future work
and conclude in Section~\ref{sec:future}.
\section{Problems with JavaScript}\label{sec:problems}
JavaScript has several problematic features which programmers
constantly have to avoid \cite{crockford2008}. A lexical environment is
only created for functions, \texttt{with} statements and
\texttt{catch} clauses of \texttt{try} statements. Block scope or an
equivalent of a let-expression are not available. Another common
problem is hoisting. Variable and function declarations (but not their
initializers) are moved to the top of their enclosing scope and
default to \texttt{undefined}. An example is shown in
Listing~\ref{lst:hoisting}.
\begin{mylisting}[h]
\caption{Hoisting of declarations}
\label{lst:hoisting}
\vspace{-2em}
\begin{sublisting}{source}
\begin{minipage}[b]{.4\textwidth}
\begin{lstlisting}
function foo() {
bar();
var x = 1;
}
\end{lstlisting}
\end{minipage}
\end{sublisting}
\hfill
\begin{sublisting}{hoisted}
\begin{minipage}[b]{.50\textwidth}
\begin{lstlisting}
var foo;
foo = function () {
var x;
bar();
x = 1;
}
\end{lstlisting}
\end{minipage}
\end{sublisting}
\end{mylisting}
\vspace{-0.5em}
The problematic consequences of these features is illustrated in
the following two programs. In the example shown in
Listing~\ref{lst:hoisting-bug1}, it could be assumed the
conditional in \texttt{bar} fails, due to the initialization of
\texttt{foo} to 1. However, because of hoisting, the conditional
succeeds.
\begin{lstlisting}[caption=Bug caused by hoisting,label=lst:hoisting-bug1]
var foo = 1;
function bar() {
if (!foo) {
var foo = 10;
}
return foo;
}
bar(); // $\Rightarrow$ 10
\end{lstlisting}
In the second example shown in Listing~\ref{lst:hoisting-bug2}, it
could be assumed the assignment in line 3 would affect the global
variable, but the function declaration is hoisted and a local variable
\texttt{a} is introduced inside function \texttt{b}, even though the
declaration of function \texttt{a} is actually never reached.
\begin{lstlisting}[caption=Hoisting with unreachable code,label=lst:hoisting-bug2]
var a = 1;
function b() {
a = 10;
return;
function a() {}
}
b();
a // $\Rightarrow$ 1
\end{lstlisting}
\texttt{For} loops are not creating new bindings in each iteration.
In combination with closures this leads to another common problem that
is exemplified in Listing~\ref{lst:for-loop}.
\begin{lstlisting}[caption=For loop in connection with closure,label=lst:for-loop]
var fns = [];
for (var i = 0; i < 5; i++) {
fns.push(function () {
return i;
});
}
fns[3](); // $\Rightarrow$ 5
\end{lstlisting}
The initialization part of the \texttt{for} loop is hoisted and in the
step part, the variable is incremented. The closure added to the array
is containing a reference to the variable, not the value that was
bound when the closure was created.
Another common problem is JavaScript's uncommon semantics of truth and
comparison using the equality operator \texttt{==}, which involves
several coercion rules if its operands are of different types. Some
examples demonstrating the behavior are shown in
Listing~\ref{lst:js-equality}.
\begin{lstlisting}[
caption=Examples of equality and truth behavior,
label=lst:js-equality]
'' == '0' // $\Rightarrow$ false
'' == 0 // $\Rightarrow$ true
'0' == 0 // $\Rightarrow$ true
false == '0' // $\Rightarrow$ true
' \t\n ' == 0 // $\Rightarrow$ true
' \n ' ? true : false // $\Rightarrow$ true
\end{lstlisting}
The language is also missing a mechanism to organize code and create
reusable software components that safely interact with each other. For
example, Common Lisp provides packages for this purpose. Dylan
provides modules and libraries. In JavaScript, all scripts loaded into
an environment are evaluated in the global scope, thus leading to
possible interference between programs using identical names.
Even though JavaScript has problems and writing programs
directly is error-prone, using JavaScript as a compilation target is
viable.
\section{Related Work}\label{sec:related}
Besides Ralph, more compilers target JavaScript, both for full or
substantial subsets of existing languages and new
languages\footnote{For a comprehensive list, see
\url{http://altjs.org/}}. We describe a Lisp dialect, thus this
section will survey some other Lisp dialects targeting
JavaScript\footnote{A feature matrix of more Lisp-like languages
compiling to JavaScript is available at
\url{http://ceaude.twoticketsplease.de/js-lisps.html}}.
In general, compilers follow either one of two approaches.
The first and most common is compiling the high-level source language
to high-level JavaScript. For example, Parenscript, ClojureScript
and scheme2js are in this category. Another approach is using an
existing compiler for the source language and compiling the emitted
low-level language to JavaScript. For instance, Gambit-JS is a
compiler for Scheme relying on the Gambit-C compiler for this purpose.
Parenscript is a compiler for an extended subset of Common Lisp and is
available as a Common Lisp library. Its goals are efficiency and good
interoperability with JavaScript. Generated code is readable and
has no run-time dependencies, but the standard library is small.
Programs written in Parenscript can also be used as Common Lisp
without modification. Even though some optimizations are applied, it
is producing code naively in a direct-style manner.
ClojureScript is a compiler for a subset of Clojure and is written in
Clojure itself. It only employs few optimizations and also produces
JavaScript naively. It uses the run-time of the Google Web Toolkit,
and the compiler annotates the generated code with type hints in
comments. The resulting code is further compiled by the Google Closure
Compiler. It performs a JavaScript-to-JavaScript translation,
optimizing and minimizing the code as much as possible. For this
purpose it is using the provided type hints and certain code
conventions. However, it does not always generate efficient code,
since higher-level information about the intentions of the programmer
are lost during the translation. Another disadvantage is the large
run-time dependency. Both Parenscript and ClojureScript allow the
usage of macros and the latter is even hygienic, but they need to be
written in the host language.
Other implementations in this category have similar characteristics
and may be written in JavaScript instead. As ClojureScript and
Parenscript, they commonly provide easy means of interoperating with
native JavaScript code, by using built-in types and using the native
calling convention. They commonly also generate readable code
comparable to hand-written one, making it easier to debug. However,
the majority of them is only performing few optimizations and naively
generating JavaScript, resulting in modest performance. One particular
problem is the generation of too many unnecessary closures, which has
a large performance impact. To implement block-level scope, these
compilers produce an anonymous function wrapper that is immediately
invoked, as shown in Listing~\ref{lst:closures}.
\begin{mylisting}[h]
\caption{Generation of closures for nested bindings}
\label{lst:closures}
\vspace{-2em}
\begin{sublisting}{Parenscript}
\begin{minipage}[b]{.46\textwidth}
\begin{lstlisting}[escapechar=!]
(setf x
(let ((x 1))
x))
!{\color{black!40}\rule[0.2em]{\linewidth}{0.5pt}}!
x = (function () {
var x = 1;
return x;
})();
\end{lstlisting}
\end{minipage}
\end{sublisting}
\hfill
\begin{sublisting}{ClojureScript}
\begin{minipage}[b]{.46\textwidth}
\begin{lstlisting}[escapechar=!]
(let [x (let [x 1]
x)]
x)
!{\color{black!40}\rule[0.2em]{\linewidth}{0.5pt}}!
var x__2 =
(function () {
var x__1 = 1;
return x__1;
})();
\end{lstlisting}
\end{minipage}
\end{sublisting}
\end{mylisting}
\vspace{-0.5em}
These closures are also generated for forms in expressions for which
JavaScript statements need to be generated. An example for
ClojureScript is shown in Listing~\ref{lst:try/catch}. A binding for
the result of an exception handling form is introduced. As the
compiler needs to emit a JavaScript \texttt{try}/\texttt{catch}
statement, it needs to be wrapped in an immediately invoked anonymous
function to be used as an expression.
\begin{lstlisting}[escapechar=!,
caption=Generation of closures for forms
resulting in JavaScript statements,
label=lst:try/catch]
(let [x (try $\ldots$
(catch $\ldots$))]
$\ldots$
!{\color{black!40}\rule[0.2em]{\linewidth}{0.5pt}}!
var x__1 = (function () {
try { $\ldots$ }
catch ($\ldots$) { $\ldots$ }
})();
$\ldots$
\end{lstlisting}
A more sophisticated compiler for Scheme is scheme2js. It supports
many features of R$^5$RS \cite{R5RS}, but has a focus on efficiency
rather than conformance. It performs tail-call optimization by
translation to iterative constructs and using trampolines. It provides
a non-hygienic macro system and supports first-class continuations
(\texttt{call/cc}), implemented through exceptions. The special
Replay-C \cite{loitsch2009scm2js} algorithm was developed for this
optimization. Using exceptions in generated code has performance
implications though, which are discussed in
Section~\ref{sec:limitations}. Not relying on built-in types also
results in an overhead when interoperating with native JavaScript
code.
An example for the second approach, compiling low-level code to
JavaScript, is Gambit-JS \cite{thivierge2012}. First, Gambit-C is used
to compile Scheme code conforming to R$^5$RS to a final control flow
graph (CFG), called the Gambit Virtual Machine. This intermediate
representation (IR) is normally further compiled to C. Gambit-JS is
emulating the low-level semantics of the instructions by implementing
a custom stack in JavaScript. This approach has many advantages.
Continuations are serializable and can be migrated between
environments, proper tail-calls are implemented without resorting to
trampolines, and generated code is faster than that of scheme2js. Major
disadvantages are its complexity and difficulty of implementation, as
well as more difficult interoperability.
\section{Solution}\label{sec:solution}
In this Section we describe the implementation details of Ralph and
the design considerations for Ralph's implementation, taking into
account the previous sections. We focus on JavaScript interoperability,
and describe modules, the object system, macros and scoping. Finally
we present the separate passes of the compiler and implemented
optimizations.
\subsection{Interoperability}
For a language compiling to JavaScript interoperability is crucial,
because execution environments such as the browser provide a number
of APIs. Additionally third-party libraries can be called easily.
In Ralph we rely on JavaScript's native data structures and calling
convention for performance reasons. Neither automatic coercion, which
imposes a performance penalty, nor error-prone manual coercion is
needed when performing calls between Ralph and JavaScript.
The root of the class hierarchy \texttt{<object>} is represented by
the JavaScript type \texttt{Object}. It provides a mechanism to
associate strings (``properties'') with values of any type. For
symbols, a class with the properties \texttt{name} and \texttt{module}
is used.
Arrays are the fundamental data structure to represent collections in
JavaScript. They dynamically grow when adding or removing elements and
are efficiently implemented in the execution environments.
Ralph's lists are represented by JavaScript arrays, instead of
implementing a custom \texttt{Pair} type with \texttt{head} and
\texttt{tail} properties, which would result in higher memory
consumption and worse performance.
Numbers and boolean values are directly represented by their
JavaScript counterparts. There is no explicit distinction between
integer and floating point numbers, in contrast to Dylan's numerical
tower.
Strings are represented by its respective JavaScript type as
well, for the same reasons of speed and interoperability. The drawback
is immutability, compared to having mutable strings as standardized in
Dylan.
\subsection{Module system}
Dylan allows structuring code into reusable software component by
means of modules and libraries. A library consists of one or more
modules that contain the actual definitions. Each module acts as an
independent namespace for identifiers. In the early specification,
no module system was specified. We chose to adopt modules to group
definitions. Each file is a module with its own namespace for
identifiers. Definitions can be imported from other modules and
exported using the \texttt{define-module} form. An example is shown in
Listing~\ref{lst:define-module}.
\begin{lstlisting}[
caption=Definition of the reader module,
label=lst:define-module]
(define-module ralph/reader
import: (ralph/stream
(ralph/format
only: (format-to-string))
(ralph/regexp
rename: (match match-regexp)))
export: (read))
\end{lstlisting}
\subsection{Object System}
\subsubsection{Classes}
In comparison to Dylan, which provides a multiple inheritance
class-based object system, Ralph currently provides single
inheritance. The reason for this design decision is performance.
We previously implemented multiple inheritance without using
the prototypial features of the object system provided by
JavaScript. This turned out to be impractical in real-world
applications due to poor performance. In the future we want to extend
Ralph with multiple inheritance.
To understand how Ralph's object system is implemented, first the
underlying prototype-based system is explained briefly. In JavaScript,
every object might have a \lstinline{__proto__} property referencing
another object, called the prototype. When a property is accessed on
an object which does not contain the property, and the object has a
prototype, this is used instead for the lookup. For example,
\lstinline|({__proto__: {x: 1}}).x| performs the lookup of property
\texttt{x} on an object with prototype \lstinline|{x: 1}|.
\begin{center}
\begin{tikzpicture}[node distance=1.5cm]
\node (object1)
[object, rectangle split parts=2]
{\nodepart{text}\textbf{Object}
\nodepart{second}\_\_proto\_\_};
\node (object2)
[object, rectangle split parts=2, right=of object1]
{\nodepart{text}\textbf{Object}
\nodepart{second}x = 1};
\renvoi{object1.second east}{object2.text west};
\end{tikzpicture}
\end{center}
Functions are first-class objects and constructors are implemented as
functions. Using the \texttt{new} operator, a fresh object is
instantiated. The execution environment allocates a new object and
sets its prototype to the constructor's \texttt{prototype} property,
which is initially an empty object. If a property lookup on the
instance fails, the value of the constructor's \texttt{prototype}
property will be used instead. When a constructor is called
using \texttt{new}, the implicit variable \texttt{this} refers to the
instance is bound inside the body. This is also the case when a
function is called as a method, i.e., using property lookup in form of
\texttt{object.function()}, rather than
\texttt{function(object)}. This mechanism is illustrated in
Listing~\ref{lst:js-example}.
\begin{lstlisting}[label=lst:js-example,caption=Using prototypes in JavaScript]
function Person(name) {
this.name = name;
}
Person.prototype.hello = function () {
return "Hello, " + this.name + "!";
}
var john = new Person("John");
// john.__proto__ = Person.prototype
john.hello() // $\Rightarrow$ "Hello, John!"
\end{lstlisting}
This already resembles typical class-based object orientation when
assuming the constructor as a ``class''. Single inheritance can be
implemented using \\
\lstinline{class.prototype.__proto__ = superclass.prototype}
In Ralph, the macro \texttt{define-class} produces a constructor.
Additionally an object containing all properties (cf. slots in Dylan
and CLOS) of the class is generated. Finally, a call to the
runtime function \texttt{\%make-class} is generated and these values
and the superclass are passed as arguments. This is demonstrated in
Listing~\ref{lst:define-class}.
\begin{lstlisting}[label=lst:define-class,
caption=Code produced by macro \texttt{define-class},
escapechar=!
]
(define-class <person> (<object>)
name)
!{\color{black!40}\rule[0.2em]{\linewidth}{0.5pt}}!
(define <person>
(%make-class <object>
(%object "name" #f)
(%method <person> ())))
\end{lstlisting}
At runtime, \texttt{\%make-class} will setup the inheritance
relationship by setting the prototype property as shown above, and
keeping a reference to the superclass. Also, the prototype of the
object containing all properties is set to that of the superclass, so
that looking up properties of the class will also return inherited
ones, most specific first. The definitions are shown in
Listing~\ref{lst:make-class/inherit}.
\begin{lstlisting}[label=lst:make-class/inherit,
caption=Definitions of \texttt{\%make-class} and \texttt{\%inherit}
]
(define-function %inherit (class superclass)
(set! (get class "%superclass")
superclass)
(set! (get class "prototype" "__proto__")
(get superclass "prototype"))
(set! (get class "%properties" "__proto__")
(get superclass "%properties"))
class)
(define-function %make-class
(superclass properties constructor)
(set! (get constructor "%properties")
properties)
(when superclass
(%inherit constructor superclass))
constructor)
\end{lstlisting}
Similar to Dylan, the method \texttt{make} will allocate an instance of
the passed class, and \texttt{initialize} will perform the actual
initialization of all properties. If no values are passed to
\texttt{make}, the default forms, stored as functions, are used
instead. Listing~\ref{lst:make/initialize} contains the definitions of
both functions.
\begin{lstlisting}[label=lst:make/initialize,
caption=Definitions of \texttt{make} and \texttt{initialize}
]
(define-function make (type #rest arguments)
(bind ((object (%native-call "new" type)))
(apply initialize object arguments)
object))
(define-function %initialize-property
(object properties arguments property)
(unless (has? (get <object> "prototype")
property)
(bind ((value
(or (get arguments property)
(bind ((value
(get properties
property)))
(when value
(value))))))
(set! (get object property) value))))
(define-method initialize
((object <object>) #rest rest)
(bind ((arguments (as-object rest)))
(if-bind (properties
(get (type object)
"%properties"))
(do (curry %initialize-property
object properties
arguments)
(object-properties
properties inherited?: #t))))
object)
\end{lstlisting}
Recently, means of defining properties were added to JavaScript,
including default values and access restrictions, using
\sloppy\mbox{\texttt{Object.defineProperty}}. The approach described
above was chosen, as this feature is not yet available in all
execution environments and its performance is poor compared to normal
functions which set and get properties.
\subsubsection{Functions}
There are two types of functions: methods and generic functions. A
generic function can contain one or more methods. Both generic
functions and methods consist of a typed argument list and
code. Similar to CLOS, functions are first-class citizens, and not
defined inside of classes. When a method is called, its body is
directly executed. When a generic function is called, the first argument
is compared to the typed argument lists of all defined methods of the
generic function and the most specific one is invoked. This is the
means of object-oriented (single) dispatch.
Dylan performs multiple dispatch, i.e., all arguments and all elements
of the argument lists are considered when determining the applicable
method. Ralph is currently limited to single dispatch, i.e., only the
first argument and first type of each argument list is taken into
consideration.
Similar to the implementations of the class hierarchy, this design
choice is due to the performance problems associated with naively
adopting this concept, i.e., adding methods to a generic function,
which performs multiple dispatch on each call. Not using the prototype
system has drastic performance implications, as usage like shown in
Listing~\ref{lst:js-example} is heavily optimized by execution
environments, e.g., by replacing call-sites with direct jumps.
The runtime of Ralph uses prototypes. The macro \texttt{define-method}
produces a call to the runtime function \texttt{\%make-method}, which
receives the method to be defined, along the name and type for which
it is specialized. At runtime, the function \texttt{\%make-method}
adds the method to the \texttt{prototype} property of the type. The
method is thus actually added to the class internally. If no generic
function exists yet, it is implicitly created. When the method is
called, the implicitly generated generic function performs the actual
dispatch: it looks up the method via its name in the object and
invokes it if it exists. This part is usually inlined by the execution
environment. Both definitions are shown in
Listing~\ref{lst:make-method} and Listing~\ref{lst:make-generic}.
\begin{lstlisting}[label=lst:make-method,
caption=Definition of \texttt{\%make-method}
]
(define-function %make-method
(name function setter? type existing)
;; definition
(set! (get function "%name") name)
(set! (get function "%setter?") setter?)
(set! (get function "%type") type)
(set! (get type "prototype" name) function)
;; implicit definition of generic function?
(if (and existing
(get existing "%generic?"))
existing
(%make-generic name))))
\end{lstlisting}
\begin{lstlisting}[label=lst:make-generic,
caption=Definition of \texttt{\%make-generic}
]
(define-function %make-dispatcher (name)
(method (object)
(bind ((function
(get (%native
"(" object "== null ?"
" false : " object ")")
name)))
(%infix "&&"
function
((get function "apply")
object %all-arguments)))))
(define-function %make-generic (name)
(bind ((dispatcher (%make-dispatcher name)))
(set! (get dispatcher "%generic?") #t)
(set! (get dispatcher "%name") name)
dispatcher))
\end{lstlisting}
Inside the body of a method in Ralph, the implicit variable
\texttt{next-method} is a reference to the next most specific method.
It is a symbol macro (described in the next section) compiling to a
call to the runtime function \texttt{\%next-method}, passing the
current method. The implementation of \texttt{\%next-method} is shown
in Listing~\ref{lst:next-method}.
\begin{lstlisting}[
caption=Determining the next most specific method,
label=lst:next-method
]
(define-function %next-method (function)
(bind ((name (get function "%name"))
(proto (get function "%type"
"prototype"
"__proto__")))
(get proto name)))
\end{lstlisting}
\pagebreak
An exemplary program demonstrating the use of classes and methods in
Ralph is shown in Listing~\ref{lst:classes-methods}.
Figure~\ref{fig:person-student} shows the internal object structure
after both classes are defined.
\begin{lstlisting}[
caption=Using classes and methods in Ralph,
label=lst:classes-methods
]
(define-class <person> (<object>)
(name ""))
(define-method hello ((person <person>))
(format-to-string "Hello, %s!"
(get person "name")))
(define-class <student> (<person>) id)
(define-method hello ((student <student>))
(concatenate (next-method student)
(format-to-string
" Student #%d!"
(get student "id"))))
(bind ((john (make <student> name: "John" id: 23)))
(hello john))
;; $\Rightarrow$ "Hello, John! Student #23"
\end{lstlisting}
\vspace{2em}
\begin{figure}[H]
\centering
\caption{Structure after definition of classes <person> and
<student>}
\label{fig:person-student}
\begin{tikzpicture}[node distance=1cm]
\node (person)
[object, rectangle split parts=3]
{\nodepart{text}\textbf{Function} <person>
\nodepart{second}prototype
\nodepart{third}\%properties};
\node (personProto)
[object, rectangle split parts=2, right=of person, text width=3.5cm]
{\nodepart{text}\textbf{Object}
\nodepart{second}hello =
\lstinline[breaklines]|function () { return "Hello, " + this.name + "!" }|};
\node (personProps)
[object, rectangle split parts=2, right=of person,
below=0.5cm of personProto, text width=3.5cm]
{\nodepart{text}\textbf{Object}
\nodepart{second}name = \lstinline|function () { return "" }|};
\node (student)
[object, rectangle split parts=4, below=2.5cm of person]
{\nodepart{text}\textbf{Function} <student>
\nodepart{second}prototype
\nodepart{third}\%properties
\nodepart{fourth}\%superclass};
\node (studentProto)
[object, rectangle split parts=3, right=of student, text width=3.5cm]
{\nodepart{text}\textbf{Object}
\nodepart{second}hello = \lstinline|function () { $\ldots$ }|
\nodepart{third}\_\_proto\_\_};
\node (studentProps)
[object, rectangle split parts=3, right=of student,
below=0.5cm of studentProto, text width=3.5cm]
{\nodepart{text}\textbf{Object}
\nodepart{second}id = false
\nodepart{third}\_\_proto\_\_};
\renvoi{person.third east}{personProps.text west}
\renvoi{person.second east}{personProto.text west}
\renvoi{student.third east}{studentProps.text west}
\renvoi{student.second east}{studentProto.text west}
\renvoi[left=1em]{student.fourth west}{person.text west}
\renvoi[right=1em]{studentProto.third east}
{personProto.text east}
\renvoi[right=2em]{studentProps.third east}
{personProps.text east}
\end{tikzpicture}
\end{figure}
\vspace{1em}
\subsection{Macro system}
\subsubsection{Definition and usage of Macros}
%% motivation/introduction
Macros were not part of the early specification of Dylan and the
standardized Dylan only features a macro-system based on
pattern-matching of code fragments and rewrite rule-based
transformation. We decided to implement a more powerful system,
allowing macro transformers to perform general computations.
\pagebreak
Similar to Dylan, Ralph performs hygienic expansion. We chose to adopt
the macro system of Clojure, which is a hygienic variant of the Common
Lisp macro system based on quasi-quotation. Definition and execution
of macro transformers, as well as approach to hygiene, substantially
differs from Dylan.
%% definition
Macro transformers can be defined using the \texttt{define-macro}
form. They are implemented as normal functions, but also registered as
macros when evaluated. Listing~\ref{lst:inc} contains a macro
definition of the increment operation \texttt{inc!}.
\begin{lstlisting}[caption=Macro definition of \texttt{inc!},label=lst:inc]
(define-macro inc! (object value)
`(set! ,object
(+ ,object ,(or value 1))))
\end{lstlisting}
In addition to normal quoting through \texttt{quote} or its reader
macro (the ``quote'' character \lstinline{'}), a form can be
syntax-quoted (using ``backquote'' \lstinline{`}). The expression that
follows is quoted, and in case it is a list, every subexpression is
quoted recursively as well. Quotation can be prevented using unquote
(the ``comma'' \lstinline{,} character) and spliced into the outer
expression using unquote-splicing (the ``comma'' and ``at'' characters
combined \lstinline{,@}). The behavior is similar to that of Common
Lisp's equivalents.
The major difference is syntax-quote, as in addition to quoting, it
also ``resolves'' symbols in the current module. If the symbol is
locally bound, either through a global definition or a local binding,
it is qualified with the current module. If it is not bound and
imported from another module, it is qualified with this
module. Otherwise the current module will be used for
qualification. For all other forms, such as numbers, strings,
keywords, etc., syntax-quoting has the same effect as normal quoting,
i.e., \lstinline{`x} is equivalent to \lstinline{'x}.
\begin{aquote}{Rich Hickey, creator of Clojure}
``It is unique, and it is not renaming. I call the process resolving,
and the basic purpose is hygiene - macros yield qualified symbols
reflecting their meanings at macro definition time.''
\end{aquote}
Listing~\ref{lst:syntax-quote} shows an example demonstrating this
behavior and also how unqualified symbols can be introduced by
unquoting a symbol. Through the use of syntax-quote, macros are
therefore producing qualified symbols.
\begin{lstlisting}[
label=lst:syntax-quote,
caption={Example of using syntax-quote in module \texttt{bar},
importing \texttt{x} and \texttt{y} from \texttt{foo}}
]
(bind ((x 23))
`(y x ,x ,'x))
;; $\Rightarrow$ (foo::y bar::x 23 x)
\end{lstlisting}
The second difference of this approach to hygiene is in the expansion
pass. Both \texttt{define} and \texttt{bind} prevent the binding of
qualified symbols. In connection with macros producing qualified
symbols by default, this ultimately prevents unintentional capturing
of variables and qualified symbols cannot be bound to other values.
Still, hygiene can be intentionally broken by introducing an
unqualified symbol as shown before. Listing~\ref{lst:expansion}
illustrates this behavior.
\begin{mylisting}[h]
\caption{Behavior of macroexpansion}
\label{lst:expansion}
\vspace{-2em}
\begin{sublisting}{Macro definition producing an error at expansion
time, caused by binding qualified symbol \texttt{x}}
\begin{minipage}[b]{.7\textwidth}
\begin{lstlisting}
(define-macro foo ()
`(bind ((x $\ldots$))
$\ldots$))
\end{lstlisting}
\end{minipage}
\end{sublisting}
\hfill
\begin{sublisting}{Intentional breaking of hygiene by binding an unqualified symbol}
\begin{minipage}[b]{.7\textwidth}
\begin{lstlisting}
(define-macro foo ()
`(bind ((,'x $\ldots$))