-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathwisp.py
executable file
·405 lines (355 loc) · 15.9 KB
/
wisp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
#!/usr/bin/env python3
# wisp.py --- Whitespace-to-Lisp preprocessor.
# Copyright (C) 2013 Arne Babenhauserheide <[email protected]>
# Author: Arne Babenhauserheide <[email protected]>
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
"""whitespace to lisp converter.
Essentially it just adds brackets for indentation to allow writing
lisp with indentation senstitive syntax.
Currently it is written in Python, because I like Python as language,
but crave the power of lisp.
"""
def replaceinwisp(code, string, replacement):
"""Replace the given string with the replacement, but only in
indentation sensitive parts of the code.
Essentially replace everywhere except in brackets or strings.
:param code: Arbitrary wisp code to process.
:param string: A string to replace.
:param replacement: The replacement string.
:return: (code, count): The new code and a count of replacements.
"""
count = 0
instring = False
incomment = False
inbrackets = 0
strlen = len(string)
for n in range(len(code) - strlen):
i = code[n]
# comments start with a ; - but only in regular wisp code or in brackets.
if not incomment and not instring and i == ";" and not code[n-2:n] == "#\\":
incomment = not incomment
# a linebreak ends the comment
if incomment:
if i == "\n":
incomment = not incomment
# all processing stops in comments
continue
# FIXME: This still breaks at "\\"
if i == '"' and (not code[n-1:n] == "\\" or (code[n-2:n] == "\\\\" and not code[n-3:n] == "\\\\\\")):
instring = not instring
# all processing stops in strings
if instring:
continue
if i == "(" and not code[n-2:n] == "#\\":
inbrackets += 1
elif i == ")" and not code[n-2:n] == "#\\":
inbrackets -= 1
# all processing stops in brackets
if inbrackets:
continue
# here we do the actual replacing
if code[n:n+strlen] == string:
count += 1
code = code[:n] + replacement + code[n+strlen:]
return code, count
class UndefinedIndentationLevel(IndentationError):
"""Unindent does not match any outer indentation level."""
class Line:
def __init__(self, line):
"""Parse one line in which linebreaks within strings and
brackets already got replaced by a temporary placeholder."""
# Visible indentation: If the line starts with any number of
# _, followed by a space, treat those _ as spaces.
if line.startswith("_"):
for i,letter in enumerate(line):
if letter != "_":
# rewind the index to the last underscore
i -= 1
break
# increment the index to the first
# non-underscore. Required to treat end of string and end
# of underscores the same
i += 1
# here line[i-1] is _. Check if line[i+1] is a space or if
# the line ends after the last underscore
if line[i:i+1] == " " or not line[i:]:
line = (i)*" " + line[i:]
# \_ escapes the underscore at the beginning of a line, so you
# can use identifiers which only consist of underscores.
elif line.startswith("\_"):
line = "_" + line[2:]
#: prefix to go around the outer bracket: '(, ,( or `(
self.prefix = ""
# check if this is a continuation of the parent line
self.continues = line.lstrip().startswith(". ")
if self.continues:
self.content = line.lstrip()[2:].lstrip()
else:
self.content = line.lstrip()
# check if the line is prefixed with any combination of ' ` and ,
if not self.continues:
while (self.content.startswith("' ") or
self.content.startswith(", ") or
self.content.startswith("` ") or
self.content.startswith("#, ") or # scheme macros
self.content.startswith("#` ") or
self.content.startswith("#' ") or
self.content.startswith("#,@, ")):
self.prefix += self.content.split(" ")[0]
self.content = self.content[2:]
# care for lines starting with ": " (a colon followed by a space and more chars)
self.indent = len(line) - len(line.lstrip())
if self.content.startswith(": ") and self.content[2:].lstrip():
# just add a space in front of the " : ". Then it will be
# captured by as inline : later. With this, the following are almost equal:
# ": a b" and
# ":
# a b"
# The only difference between both is that ": a b" cannot
# have siblings in subsequent lines: The function call
# ends on this line.
self.content = " " + self.content
if self.content.strip() == ":" or self.content.strip() == "":
self.content = ""
# split a possible comment
self.comment = ""
instring = False
for n, i in enumerate(self.content):
if i == '"' and not self.content[n-1:n] == "\\":
instring = not instring
if not instring and i == ";" and not self.content[n-2:n] == "#\\":
self.comment = self.content[n+1:]
self.content = self.content[:n]
break
# treat inline " : " as opening a bracket which gets closed at
# the end of the line if the : is at the end of the line, add
# () to avoid being dependent on whitespace at the end of the
# line.
bracketstoclose = 0
instring = False
inbrackets = 0
# go backwards through the content to be able to leave out the
# space after a colon without breaking later colons.
for n, i in reversed(list(enumerate(self.content))):
if i == '"' and not self.content[n-1:n] == "\\":
instring = not instring
if not instring and i == ")" and not self.content[n-2:n] == "#\\":
inbrackets += 1
elif not instring and i == "(" and not self.content[n-2:n] == "#\\":
inbrackets -= 1
if (not instring and
not inbrackets and
i == ":" and # optimization to be able to avoid string
# slicing when there can be no hit.
n # avoid content[-1:2] (which is an unnecessary
# slicing, since it is always ""
):
if self.content[n-1:n+2] == " : " or self.content[n-1:] == " :":
bracketstoclose += 1
# treat ' : as '(
if self.content[n-3:n+1] == " ' :":
self.content = self.content[:n-2] + "'(" + self.content[n+2:]
else:
# we have to keep the space after the colon (" : "
# → " ( "), otherwise we cannot use two
# consecutive colons (" : : ") which would be surprising.
self.content = self.content[:n] + "(" + self.content[n+2:]
# after the full line processing, replace " \\: " "\n\\: " and
# " \\:\n" (inside line, start of a line, end of a line) by "
# : ", "\n: " and " :\n" respectively to allow escaping : as
# expression.
self.content, count = replaceinwisp(self.content, " \\: ", " : ")
if self.content.startswith("\\: "):
self.content = ": " + self.content[3:]
elif self.content.endswith(" \\:"):
self.content = self.content[:-3] + " :"
elif self.content == "\\:": # empty function or variable call
self.content = ":"
# add closing brackets
self.content += ")" * bracketstoclose
#: Is the line effectively empty?
self.empty = False
onlycomment = (line.split(";")[1:] and # there is content after the comment sign
not line.split(";")[0].count('"') % 2 and # but the first comment sign is not in a string
not line.split(";")[0].strip()) # there is no content before the comment sign
if line.strip() == "" or onlycomment:
self.empty = True
def nostringbreaks(code):
"""remove linebreaks inside strings (will be readded at the end)"""
instring = False
nostringbreaks = []
for n, char in enumerate(code):
if char == '"' and not code[n-1:n] == "\\":
instring = not instring
if instring and char == "\n":
nostringbreaks.append("\\LINEBREAK")
else:
nostringbreaks.append(char)
return "".join(nostringbreaks)
def nobracketbreaks(code):
"""remove linebreaks inside brackets (will be readded at the end)."""
instring = False
incomment = False
inbracket = 0
nostringbreaks = []
for n, char in enumerate(code):
# comments start with a ; - but only in regular wisp code or in brackets.
if not incomment and not instring and char == ";" and not code[n-2:n] == "#\\":
incomment = not incomment
# a linebreak ends the comment
if incomment:
if char == "\n":
incomment = not incomment
# all processing stops in comments
nostringbreaks.append(char)
continue
if char == '"' and not code[n-1:n] == "\\":
instring = not instring
if char == '(' and not instring and not code[n-2:n] == "#\\":
inbracket += 1
elif char == ')' and not instring and not code[n-2:n] == "#\\":
inbracket -= 1
if inbracket and char == "\n":
nostringbreaks.append("\\LINEBREAK")
else:
nostringbreaks.append(char)
return "".join(nostringbreaks)
def processlines(lines, prev, codestartindex, levels, lisplines, emptylines):
"""Process all lines after the first."""
# process further lines: adjust the content of the current line, but only append
for n, line in enumerate(lines[codestartindex+1:]):
n += codestartindex + 2
# ignore empty lines and comment-only lines
if line.empty:
# simply keep empty lines and ignore their indentation
# readd a possible comment
if line.comment:
line.content += ";" + line.comment
# keep the line, do not track it in any way
emptylines.append(line.indent * " " + line.content)
continue
# care for leading brackets
# continuing lines do not get a leading bracket.
if not line.continues:
line.content = line.prefix + "(" + line.content
# care for closing brackets
# rising indent: sibling function or variable
if line.indent > prev.indent:
levels.append(line.indent)
lisplines.append(prev.indent * " " + prev.content)
# same indent: neighbour function of variable: close the previour lines bracket
if line.indent == prev.indent:
if not prev.continues:
lisplines.append(prev.indent * " " + prev.content + ")")
else:
lisplines.append(prev.indent * " " + prev.content)
# lower indent: parent funtion or variable. Find the number of brackets to close
if prev.indent > line.indent:
bracketstoclose = len([level for level in levels if level >= line.indent])
if not line.indent in levels[-bracketstoclose:]:
raise UndefinedIndentationLevel("Unindent of line " + str(n) + " does not match any outer indentation level.\n" + line.indent*" " + "|\n" + line.indent*" " + "v\n" + line.indent*" " + line.content)
levels = levels[:-bracketstoclose + 1]
if prev.continues:
bracketstoclose -= 1
lisplines.append(prev.indent * " " + prev.content + ")" * bracketstoclose)
# add a possible comment
if prev.comment:
lisplines[-1] += ";" + prev.comment
prev = line
lisplines.extend(emptylines)
emptylines = []
# postprocessing the loop.
if prev and prev.continues:
levels.pop()
if prev:
lisplines.append(prev.indent * " " + prev.content + ")" * (len(levels)))
lisplines.extend(emptylines)
return prev, lisplines, emptylines, levels
def wisp2lisp(code):
"""Turn wisp code to lisp code."""
# TODO: extract the shebang before preprocessing the code.
# if the code is empty, just return an empty string
if not code:
return code
# first get rid of linebreaks in strings
code = nostringbreaks(code)
# and of linebreaks inside brackets
code = nobracketbreaks(code)
# now read the indentation
lines = []
for line in code.splitlines():
lines.append(Line(line))
# finally emit matching lisp code
# write into the lisp lines with a delay of 1 line
lisplines = []
# effectively empty lines to be appended
emptylines = []
levels = [0]
prev = lines[0]
#: The index of the first code line
codestartindex = 0
# process the first lines in the file.
# Shebang lines must be used verbatim
if not prev.indent and prev.content.startswith("#!"):
codestartindex += 1
if prev.comment:
prev.content += ";" + prev.comment
lisplines.append(prev.content)
if codestartindex < len(lines):
prev = lines[codestartindex]
else:
prev = None
# initial comment lines need special treatment to avoid starting
# them with () (implementation detail)
while prev and prev.empty:
codestartindex += 1
if prev.comment:
prev.content += ";" + prev.comment
lisplines.append(prev.indent * " " + prev.content)
if codestartindex < len(lines):
prev = lines[codestartindex]
else:
prev = None
if prev and not prev.continues:
prev.content = prev.prefix + "(" + prev.content
# run the linereader loop. This does the main work - aside from
# the preprocessing in the Line class.
if prev:
prev, lisplines, emptylines, levels = processlines(lines, prev, codestartindex,
levels, lisplines, emptylines)
# postprocessing the resulting lisplines: the loop is not perfect…
# get rid of brackets around empty lines
for n,i in enumerate(lisplines):
if i.lstrip() == "()":
lisplines[n] = ""
return "\n".join(lisplines).replace("\\LINEBREAK", "\n")
if __name__ == "__main__":
import sys
import optparse
parser = optparse.OptionParser("[-o outfile] [file | -]")
parser.add_option("-o", "--output", default="")
opts, args = parser.parse_args()
if args:
sourcefile = args[0]
else:
sourcefile = "example.w"
# accept stdin as input
if sourcefile == "-":
wisp = sys.stdin.read()
else:
with open(sourcefile) as f:
wisp = f.read()
if opts.output:
with open(opts.output, "w") as f:
f.write(wisp2lisp(wisp) + "\n")
else:
print(wisp2lisp(wisp))