diff --git a/.gitignore b/.gitignore index 52043b7..a8ed9c7 100644 --- a/.gitignore +++ b/.gitignore @@ -75,4 +75,4 @@ docs/_build/ # cibuildtool wheelhouse/ -ke/tests/test_*.actual \ No newline at end of file +ke/tests/test_*.actual diff --git a/README.md b/README.md index 38fa899..67a7bc3 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Now 100% less painful to migrate! (You heard that right: migration is not painfu # Try it - Try Kleenexp [online](https://kleenexp.herokuapp.com/alice/). -- Install the Kleenexp extension in [Visual Studio Code](https://marketplace.visualstudio.com/items?itemName=sonoflilit.kleenexp) (or [github.dev](https://github.dev/SonOfLilit/kleenexp/)) as a drop-in replacement for Search/Replace. (Worth it if just so you can keep "regex search" always enabled, *without* needing to backslash-escape all your `.` and `()`.) +- Install the Kleenexp extension in [Visual Studio Code](https://marketplace.visualstudio.com/items?itemName=sonoflilit.kleenexp) (or [github.dev](https://github.dev/SonOfLilit/kleenexp/)) as a drop-in replacement for Search/Replace. (Worth it if just so you can keep "regex search" always enabled, _without_ needing to backslash-escape all your `.` and `()`.) # Installation and usage @@ -74,27 +74,37 @@ Be sure to read the tutorial below! # A Taste of the Syntax Kleenexp: + ``` Hello. My name is Inigo Montoya. You killed my Father. Prepare to die. ``` + Regex: + ``` Hello\. My name is Inigo Montoya\. You killed my Father\. Prepare to die\. ``` Kleenexp: + ``` [1-3 'What is your ' ['name' | 'quest' | 'favourite colour'] '?' [0-1 #space]] ``` + Regex: + ``` (?:What is your (?:name|quest|favourite colour)\?)\s?){1,3} ``` + Kleenexp: + ``` Hello. My name is [capture:name #tmp ' ' #tmp #tmp=[#uppercase [1+ #lowercase]]]. You killed my ['Father' | 'Mother' | 'Hamster']. Prepare to die. ``` + Regex: + ``` Hello\. My name is (?[A-Z][a-z]+ [A-Z][a-z]+)\. You killed my (?:Father|Mother|Hamster)\. Prepare to die\.` ``` @@ -150,6 +160,7 @@ However, with apologies to the late Dr. Kleene, "Kleene expressions" is pronounc # Real World Examples Removing parenthesis: + ```python import ke @@ -171,6 +182,7 @@ def remove_parentheses(line): return re.sub(r'\([^)]*\)', '', line) assert remove_parentheses('a(b)c(d)e') == 'ace' ``` + For replacement with `sub()`, the syntax for the replacement is the same as for regexes. ```python @@ -178,7 +190,7 @@ import ke assert ke.sub("[[capture '.' [6 #digit] ] [0+ #digit] ]", r"\1", "3.14159265359") == "3.141592" assert ke.sub("Hi [capture:name 1+ #letter]!", r"\g \g!", "Hi Bobby!") == "Bobby Bobby!" ``` - + Another example, rewriting paths in Django: ```python @@ -196,7 +208,7 @@ urlpatterns = [ ] ``` -The original with regex is taken from Django documentation and looks like this: +The original with regex is taken from Django documentation and looks like this: ``` from django.urls import path, re_path @@ -217,7 +229,8 @@ urlpatterns = [ This is still in Beta, we'd love to get your feedback on the syntax. -# Syntax +# Syntax + Anything outside of brackets is a literal: ``` @@ -347,14 +360,13 @@ Some macros you can use: | #backspace | | `/[\b]/` | | | #formfeed | | `/\f/` | | -\* Definitions `/wrapped in slashes/` are in old regex syntax. This is used when the macro isn't simply a short way to express something you could express otherwise in Kleenexp.) +\* Definitions `/wrapped in slashes/` are in old regex syntax. This is used when the macro isn't simply a short way to express something you could express otherwise in Kleenexp.) For example, `"[not ['a' | 'b']]"` compiles to `/[^ab]/` `"[#digit | [#a..f]]"` compiles to `/[0-9a-f]/` - Coming soon: @@ -388,7 +400,7 @@ Ease of migration trumps any other design consideration. Without a clear, painle - Should be easy to teach - Should be quick to type (e.g. "between 3 and 5 times" is not a very good syntax) - Should minimize comic book cursing like `^[^#]\*$` -- Should make it easy to write literals (for example `/Yo, dawg!/` matches "Yo, dawg!" and no other string) +- Should make it easy to write literals (for example `/Yo, dawg!/` matches "Yo, dawg!" and no other string) - Should only have 1-2 "special characters" that make an expression be more than a simple literal - Should not rely on characters that need to be escaped in many use cases, e.g. `"` and `\` in most languages' string literals, `` ` `` or `$` in bash (`'` is OK because every language that allows `'` strings also allows `"` strings. Except for SQL. Sorry, SQL.) - Different things should look different: Beware of Lisp-like parenthesis-forests. @@ -398,7 +410,7 @@ Ease of migration trumps any other design consideration. Without a clear, painle # Grammar See [Grammar](/grammar.md). - + # Contributing PRs welcome. If it's a major change, maybe open a "feature suggestion" issue first suggesting the feature, get a blessing, and agree on a design. diff --git a/grammar.md b/grammar.md index 9ea543d..ecd9dae 100644 --- a/grammar.md +++ b/grammar.md @@ -27,4 +27,4 @@ whitespace = ~r'[ \t\r\n]+' # '=' and ':' have syntactic meaning token = ~r'[A-Za-z0-9!$%&()*+,./;<>?@\\^_`{}~-]+' range_endpoint = ~r'[A-Za-z0-9]' -``` \ No newline at end of file +``` diff --git a/ke/asm.py b/ke/asm.py index c44c50c..ece338b 100644 --- a/ke/asm.py +++ b/ke/asm.py @@ -157,7 +157,7 @@ def invert(self): class NumberRange(namedtuple("NumberRange", ["start", "end"]), Asm): def to_regex(self, flavor, wrap=False): regex = numrange.number_range_to_regex(self.start, self.end) - return self.maybe_wrap('|' in regex or wrap, regex) + return self.maybe_wrap("|" in regex or wrap, regex) START_LINE = Boundary(r"^", None) diff --git a/ke/compiler.py b/ke/compiler.py index e5e4aef..ddae709 100644 --- a/ke/compiler.py +++ b/ke/compiler.py @@ -219,18 +219,23 @@ def compile_macro(macro, macros): def compile_multi_range(range, macros): - start,end = int(range.start), int(range.end) + start, end = int(range.start), int(range.end) if start > end: raise CompileError( "MultiRange start not before range end: '%s' > '%s'" % (start, end) ) if start < 0 and end < 0: - start,end = abs(start), abs(end) + start, end = abs(start), abs(end) return asm.Concat([asm.Literal("-"), asm.NumberRange(end, start)]) if start < 0: - return asm.Either([compile_multi_range(MultiRange(start, -1), macros), compile_multi_range(MultiRange(0, end), macros)]) + return asm.Either( + [ + compile_multi_range(MultiRange(start, -1), macros), + compile_multi_range(MultiRange(0, end), macros), + ] + ) return asm.NumberRange(start, end) diff --git a/ke/numrange.py b/ke/numrange.py index 5fdf903..277e987 100644 --- a/ke/numrange.py +++ b/ke/numrange.py @@ -199,12 +199,10 @@ def number_range_to_regex(a, b): if max_a == b: return single_range_to_regex(a, b) return "|".join( - [ - single_range_to_regex(a, max_a), - number_range_to_regex(max_a + 1, b), - ] + [single_range_to_regex(a, max_a), number_range_to_regex(max_a + 1, b)] ) + assert number_range_to_regex(3, 3) == "3" assert number_range_to_regex(3, 4) == "[3-4]" assert number_range_to_regex(0, 9) == r"\d" diff --git a/ke/parser.py b/ke/parser.py index 05d8326..8dc2d93 100644 --- a/ke/parser.py +++ b/ke/parser.py @@ -148,7 +148,7 @@ def visit_multi_range_macro(self, multi_range_macro, data): if len(start.text) == 1 and len(end.text) == 1: return Range(start.text, end.text) return MultiRange(start.text, end.text) - + def visit_def(self, _literal, data): (macro, _eq, braces) = data return Def(macro.name, braces) diff --git a/ke/tests/test_api.py b/ke/tests/test_api.py index fb9cc64..2278b5a 100644 --- a/ke/tests/test_api.py +++ b/ke/tests/test_api.py @@ -169,25 +169,25 @@ def test_finditer(): def test_sub(): assert ( - ke.sub( - "Hi [capture 1+ #letter], what's up?", - r"\1! \1!", - "Hi Bobby, what's up? Hi Martin, what's up?", - ) - == "Bobby! Bobby! Martin! Martin!" + ke.sub( + "Hi [capture 1+ #letter], what's up?", + r"\1! \1!", + "Hi Bobby, what's up? Hi Martin, what's up?", + ) + == "Bobby! Bobby! Martin! Martin!" ) assert ( - ke.sub( - "Hi [capture:name 1+ #letter], what's up?", - r"\g! \g!", - "Hi Bobby, what's up? Hi Martin, what's up?", - ) - == "Bobby! Bobby! Martin! Martin!" + ke.sub( + "Hi [capture:name 1+ #letter], what's up?", + r"\g! \g!", + "Hi Bobby, what's up? Hi Martin, what's up?", + ) + == "Bobby! Bobby! Martin! Martin!" ) assert ke.sub("[1+ #d]", "###", "123-45-6789", count=2) == "###-###-6789" assert ( - ke.sub("[c 1+ #d]", lambda m: m.group(1)[::-1], "123-45-6789") == "321-54-9876" + ke.sub("[c 1+ #d]", lambda m: m.group(1)[::-1], "123-45-6789") == "321-54-9876" ) with pytest.raises(IndexError): ke.sub("[1+ #d]", lambda m: m.group(1)[::-1], "123-45-6789") @@ -507,17 +507,27 @@ def test_multi_range(): def test_ip(): assert ke.match("[#start_line][3 #0..255 '.'][#0..255][#end_line]", "127.0.0.1") assert ke.match("[#start_line][3 #0..255 '.'][#0..255][#end_line]", "0.0.0.0") - assert ke.match("[#start_line][3 #0..255 '.'][#0..255][#end_line]", "255.255.255.255") + assert ke.match( + "[#start_line][3 #0..255 '.'][#0..255][#end_line]", "255.255.255.255" + ) assert ke.match("[#start_line][3 #0..99 '.'][#0..199][#end_line]", "99.89.99.199") assert ke.match("[#start_line][3 #0..9 '.'][#0..1][#end_line]", "0.5.9.1") - assert not ke.match("[#start_line][3 #0..99 '.'][#0..199][#end_line]", "99.99.99.299") + assert not ke.match( + "[#start_line][3 #0..99 '.'][#0..199][#end_line]", "99.99.99.299" + ) assert not ke.match("[#start_line][3 #0..255 '.'][#0..255][#end_line]", "256.0.0.1") - assert not ke.match("[#start_line][3 #0..255 '.'][#0..255][#end_line]", "256.256.257.260") - assert not ke.match("[#start_line][3 #0..255 '.'][#0..255][#end_line]", "2555.2555.2555.1") + assert not ke.match( + "[#start_line][3 #0..255 '.'][#0..255][#end_line]", "256.256.257.260" + ) + assert not ke.match( + "[#start_line][3 #0..255 '.'][#0..255][#end_line]", "2555.2555.2555.1" + ) - assert (ke.re("[#start_line][3 #0..255 '.'][#0..255][#end_line]") == - r"^(?:(?:\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.){3}(?:\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])$") + assert ( + ke.re("[#start_line][3 #0..255 '.'][#0..255][#end_line]") + == r"^(?:(?:\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])\.){3}(?:\d|[1-9]\d|1\d\d|2[0-4]\d|25[0-5])$" + ) def test_escapes(): @@ -535,8 +545,8 @@ def test_escapes(): def test_define_macros(): expected = "Yo dawg, I heard you like Yo dawg, I heard you like this, so I put some of this in your regex so you can recurse while you recurse, so I put some Yo dawg, I heard you like this, so I put some of this in your regex so you can recurse while you recurse in your Yo dawg, I heard you like this, so I put some of this in your regex so you can recurse while you recurse so you can recurse while you recurse" assert ( - ke.re( - """[#recursive_dawg][ + ke.re( + """[#recursive_dawg][ #yo=["Yo dawg, I heard you like "] #so_i_put=[", so I put some "] #in_your=[" in your "] @@ -545,8 +555,8 @@ def test_define_macros(): #dawg=[#yo "this" #so_i_put "of this" #in_your "regex" #so_you_can "recurse" #while_you "recurse"] #recursive_dawg=[#yo #dawg #so_i_put #dawg #in_your #dawg #so_you_can "recurse" #while_you "recurse"] ]""" - ) - == expected + ) + == expected ) with pytest.raises(re.error): ke.re("[#m=['hi' #m]") @@ -584,8 +594,8 @@ def test_newlines(): ) assert ( - ke.re("[#start_string][#newline][#end_string]") - == r"\A(?:[\n\r\u2028\u2029]|\r\n)\Z" + ke.re("[#start_string][#newline][#end_string]") + == r"\A(?:[\n\r\u2028\u2029]|\r\n)\Z" ) assert_pattern( ke.compile("[#start_string][#newline][#end_string]"),