-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path25_stringformatting.py
260 lines (210 loc) · 6.9 KB
/
25_stringformatting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
# %% [markdown]
# # String formatting
# https://docs.python.org/3/library/stdtypes.html#old-string-formatting
"""
types of strings : r, u, b, f
why raw in regex
Old style formatting
format keyword
f strings
behind the scene
template strings
"""
# %%
"""u : unicode string for backward compatibility
r : raw string : ignore escape characters
b : byte string
f : formatted strings"""
# %% [markdown]
# ## raw string in regex
# %%
import re
print("unicode\t", u"unicode\t", "unicode\t" == u"unicode\t")
print("unicode\t", r"unicode\t", "unicode\t" == r"unicode\t")
print("C\\Path", r"C\Path", "C\\Path" == r"C\Path")
# b byte
print("\xcf", b"\xcf")
print('I am a string'.encode('ASCII'))
print(b'I am a string'.decode('ASCII'))
# any combination of rf, br is allowed... f is for format, we'll see later
# so you can escape the quote but you still prints the backlash
print(r'need escape \'quote\'')
# why raw string in regex ?
# Regular expressions use the backslash character ('\') to indicate special forms or to allow special characters to be used without invoking their special meaning. This collides with Python’s usage of the same character for the same purpose in string literals..."
# %%
test_str = "I love\dpizza"
# python and regex parser both use \ to give meaning to expression
# more research is needed : does regex parser work first ?
pattern0 = r"(\dpizza)"
a0 = re.search(pattern0, test_str)
print(a0)
pattern1 = "(\\dpizza)"
a1 = re.search(pattern1, test_str)
print(a1)
pattern2 = r"(\\dpizza)"
a2 = re.search(pattern2, test_str)
print(a2)
pattern3 = "(\\\\dpizza)"
a3 = re.search(pattern3, test_str)
print(a3)
# %% [markdown]
# ## Old style formatting
# %%
# I can use a single value or a tuple
general = "General Kenobi"
print("Hello there ! %s" % general)
tuple_vader = ("Anakin",)
"You were my brother %s !" % tuple_vader
# %%
# Multiple values : need to use tuple
verb = "underestimate"
reason = "power"
"You %s my %s !" % (verb, reason)
# %%
# or a dict (no need to put them in order)
"Let the %(verb)s flow into %(pronoun)s !" % {
"pronoun": "you", "verb": "anger"}
# %%
# %% [markdown]
print("delimiter % / O mapping key (example) / O conversion flag / O minimum field width / O precision / O length modifier / Conversion type")
# %%
# examples
numberpos = 45.34
numberneg = -45.34
numberint = 450
print("My number is %f" % numberpos)
print("My number is %f" % numberneg)
# %%
print("My number is % f" % numberpos)
print("My number is % f" % numberneg)
# %%
print("My number is %+f" % numberpos)
print("My number is %+f" % numberneg)
# %%
print("My number is %08f" % numberpos)
print("My number is %08f" % numberneg)
print("My number is %05d" % numberint)
# %%
print("My number is %06.1f" % numberpos)
print("My number is %04.3f" % numberneg)
# %%
print("My number is %.0f" % numberint)
print("My number is %#.0f" % numberint)
# %%
# length modifier h, l, L
wide_string = "Wide String"
# From what i understand, they can be present but it is not necessary for python so they are ignored.
print("%ls" % wide_string)
print("%Ls" % wide_string)
print("%hs" % wide_string)
# %%
# small exercise
nb = 42.5789
# print this number with only two digits after . and with a + sign
print("%f" % nb)
# %%
# all the conversion flags
nbint = 42
print("%i" % nbint)
print("%d" % nbint) # u is the same as d but is obsolete
print("%o" % nbint) # octal value
print("%#o" % nbint) # 42 = 8*5 +2
print("%x" % nbint) # hexadecimal
print("%X" % nbint) # hexadecimal upper
print("%#x" % nbint) # hexadecimal
print("%#X" % nbint) # hexadecimal upper
# %%
nbfloat = 420543643136456431464.335
print("%e" % nbfloat) # hexadecimal
print("%E" % nbfloat) # hexadecimal upper
print("%#.0e" % 0) # alternate form : contains a . even if no digit after
print("%.0E" % 0) # hexadecimal upper
# %%
# g is a mix of f and e (G is a mix of F and E) ! uses e if below e-4 or above e4
print("%g" % nbfloat)
print("%g" % nbint)
# %%
# c is single character : i dont really know
print("%c" % "5")
# %%
class Car():
def __init__(self):
pass
def __str__(self):
return "str car"
def __repr__(self):
return "repr car"
print("%r" % Car()) # uses repr
print("%s" % Car()) # uses str
print("%a" % "é") # converts to ascii
# %% [markdown]
# # New style : https://docs.python.org/3/library/stdtypes.html#str.format
# instead of % use format
# no more tuple or dict
print("Hello {}, General {}".format("There", "Kenobi"))
# %%
print("Hello {location}, General {name}".format(
name="Kenobi", location="There"))
# %%
# all the flags work the same way but you have to add : before
nbnew = 45.95712
print("{:f}".format(nbnew))
print("{:+.2f}".format(nbnew))
print("{nb:+.2f}".format(nb=nbnew))
# Good news : everything we did before works the same way and you won't have to try to find % on your keyboard.
# %% [markdown]
# # Let's go further : literal string interpolation (python 3.6+)
location = "there"
print("Hello {location}")
# %%
print(f"Hello {location}")
# %%
nblit = -85.2
print(f"Number {nblit:05.0f}")
# no need to format anymore !
# plus you can combine f and r !
verb = "like"
print(fr"The linter \does not {verb:s} it though ")
# %%
# you can even do operations in f strings
a = 1
b = 2
print(f"{a}+{b}={a+b}")
# %%
# behind the scene
import dis
def greet(location, name):
return f"Hello {location} ! General {name}"
dis.dis(greet)
"""
https://stackoverflow.com/questions/12673074/how-should-i-understand-the-output-of-dis-dis
Number on the fartleft is the line number in the source code where the execution starts
The numbers in the column on the left are the offset of the instruction within the bytecode
the numbers on the right are the opargs : further investigation needed
"""
#%%
# almost the same as (real implementation is faster because uses BUILD_STRING)
def greet2(location, name):
return ("Hello " + location + "! General " + name)
dis.dis(greet2)
# %%
# So pretty cool but can cause security issue. Never use f strings to process user inputs or even format!
SECRET_ENV_VARIABLE = "my secret"
class BackendOperation():
def __init__(self):
pass
backend_operation = BackendOperation()
user_input = '{backend.__init__.__globals__[SECRET_ENV_VARIABLE]}'
# if for a reason or another we do this formatting (i did not manage to find a better example...)
print(user_input.format(backend=backend_operation))
# %%
# Ok we get the point, in certain circumstances, using format with user input can make the user access variables. Which means we need a special way to process user input.
from string import Template
t = Template("Hello, $location!")
print(t.substitute(location="there"))
#%%
user_input2 = '${backend.__init__.__globals__[SECRET_ENV_VARIABLE]}'
Template(user_input2).substitute(backend=backend_operation)
# %%
# Conclusion : Use string litterals ! It makes code really easy to read.
# But use template strings if you deal with user inputs. You can not do all the cool stuff with them like changing the format (you have to do it manually) but it is safer