From 7dd9a1ab32c062e3784c793d6fc39d3279be750e Mon Sep 17 00:00:00 2001
From: Alex Younger <thealexyounger@proton.me>
Date: Mon, 16 Dec 2024 20:44:51 -0600
Subject: [PATCH] feat: improve parsing performance from o(n^2) to o(n)

The function process_tag was previously concatenating strings
inside of a loop. Each + operation creates a new string,
resulting in repeated copying of already accumulated data. By
replacing this with an array we append to, and then joining this
array at the end, we can take this from an exponential function
to a linear function.
---
 .gitignore              |  1 +
 markdownify/__init__.py | 26 +++++++++++++++++++-------
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/.gitignore b/.gitignore
index 18d16bf..7eb029d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,4 @@
 build/
 .vscode/settings.json
 .tox/
+.python-version
\ No newline at end of file
diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 3272ce5..5bd6f84 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -126,7 +126,7 @@ def convert_soup(self, soup):
         return self.process_tag(soup, convert_as_inline=False, children_only=True)
 
     def process_tag(self, node, convert_as_inline, children_only=False):
-        text = ''
+        text_parts = []
 
         # markdown headings or cells can't include
         # block elements (elements w/newlines)
@@ -160,22 +160,34 @@ def process_tag(self, node, convert_as_inline, children_only=False):
             if isinstance(el, Comment) or isinstance(el, Doctype):
                 continue
             elif isinstance(el, NavigableString):
-                text += self.process_text(el)
+                text_parts.append(self.process_text(el))
             else:
-                text_strip = text.rstrip('\n')
-                newlines_left = len(text) - len(text_strip)
+                # Handle the case when text_parts is not empty
+                if text_parts:
+                    text_strip = text_parts[-1].rstrip('\n')
+                    newlines_left = len(text_parts[-1]) - len(text_strip)
+                else:
+                    text_strip = ''
+                    newlines_left = 0
+
                 next_text = self.process_tag(el, convert_children_as_inline)
                 next_text_strip = next_text.lstrip('\n')
                 newlines_right = len(next_text) - len(next_text_strip)
                 newlines = '\n' * max(newlines_left, newlines_right)
-                text = text_strip + newlines + next_text_strip
+                # Modify how we add the new text
+                if text_parts:
+                    text_parts[-1] = text_strip
+                text_parts.append(newlines + next_text_strip)
 
         if not children_only:
             convert_fn = getattr(self, 'convert_%s' % node.name, None)
             if convert_fn and self.should_convert_tag(node.name):
-                text = convert_fn(node, text, convert_as_inline)
+                # Join the text parts before passing to convert_fn
+                text_parts_str = ''.join(text_parts)
+                text_parts = [convert_fn(node, text_parts_str, convert_as_inline)]
 
-        return text
+        # Return the joined text parts
+        return ''.join(text_parts)
 
     def process_text(self, el):
         text = six.text_type(el) or ''