Merge pull request #11 from JarrettR/refactor

Refactor
JarrettR · Dec 18, 2021 · 664857b · 664857b
2 parents 0556635 + eb9c4b8
commit 664857b
Show file tree

Hide file tree

Showing 36 changed files with 3,888 additions and 2,197 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,8 @@ __pycache__/
 # C extensions
 *.so
 
+*.svg
+
 # Distribution / packaging
 .Python
 build/
@@ -109,6 +111,7 @@ venv/
 ENV/
 env.bak/
 venv.bak/
+*.code-workspace
 
 # Spyder project settings
 .spyderproject

diff --git a/__init__.py b/__init__.py
@@ -1,4 +1,4 @@
 from .stretch_plugin_action import StretchPluginAction # Note the relative import!
 
 StretchPluginAction('to_svg').register() # Instantiate and register to Pcbnew
-StretchPluginAction('to_pcb').register() # Instantiate and register to Pcbnew
+StretchPluginAction('to_pcb').register() # Instantiate and register to Pcbnew
diff --git a/bs/__init__.py b/bs/__init__.py
@@ -15,13 +15,14 @@
 """
 
 __author__ = "Leonard Richardson ([email protected])"
-__version__ = "4.9.0"
+__version__ = "4.9.3"
 __copyright__ = "Copyright (c) 2004-2020 Leonard Richardson"
 # Use of this source code is governed by the MIT license.
 __license__ = "MIT"
 
 __all__ = ['BeautifulSoup']
 
+from collections import Counter
 import os
 import re
 import sys
@@ -39,15 +40,32 @@
     NavigableString,
     PageElement,
     ProcessingInstruction,
+    PYTHON_SPECIFIC_ENCODINGS,
     ResultSet,
+    Script,
+    Stylesheet,
     SoupStrainer,
     Tag,
+    TemplateString,
     )
 
 # The very first thing we do is give a useful error if someone is
 # running this code under Python 3 without converting it.
 'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'<>'You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
 
+# Define some custom warnings.
+class GuessedAtParserWarning(UserWarning):
+    """The warning issued when BeautifulSoup has to guess what parser to
+    use -- probably because no parser was specified in the constructor.
+    """
+
+class MarkupResemblesLocatorWarning(UserWarning):
+    """The warning issued when BeautifulSoup is given 'markup' that
+    actually looks like a resource locator -- a URL or a path to a file
+    on disk.
+    """
+
+
 class BeautifulSoup(Tag):
     """A data structure representing a parsed HTML or XML document.
 
@@ -93,7 +111,7 @@ class BeautifulSoup(Tag):
     ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
 
     NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
-
+    
     def __init__(self, markup="", features=None, builder=None,
                  parse_only=None, from_encoding=None, exclude_encodings=None,
                  element_classes=None, **kwargs):
@@ -235,7 +253,9 @@ def deprecated_argument(old_name, new_name):
             if not original_builder and not (
                     original_features == builder.NAME or
                     original_features in builder.ALTERNATE_NAMES
-            ):
+            ) and markup:
+                # The user did not tell us which TreeBuilder to use,
+                # and we had to guess. Issue a warning.
                 if builder.is_xml:
                     markup_type = "XML"
                 else:
@@ -269,7 +289,10 @@ def deprecated_argument(old_name, new_name):
                         parser=builder.NAME,
                         markup_type=markup_type
                     )
-                    warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % values, stacklevel=2)
+                    warnings.warn(
+                        self.NO_PARSER_SPECIFIED_WARNING % values,
+                        GuessedAtParserWarning, stacklevel=2
+                    )
         else:
             if kwargs:
                 warnings.warn("Keyword arguments to the BeautifulSoup constructor will be ignored. These would normally be passed into the TreeBuilder constructor, but a TreeBuilder instance was passed in as `builder`.")
@@ -309,7 +332,8 @@ def deprecated_argument(old_name, new_name):
                 warnings.warn(
                     '"%s" looks like a filename, not markup. You should'
                     ' probably open this file and pass the filehandle into'
-                    ' Beautiful Soup.' % self._decode_markup(markup)
+                    ' Beautiful Soup.' % self._decode_markup(markup),
+                    MarkupResemblesLocatorWarning
                 )
             self._check_markup_is_url(markup)
 
@@ -396,7 +420,8 @@ def _check_markup_is_url(cls, markup):
                     ' requests to get the document behind the URL, and feed'
                     ' that document to Beautiful Soup.' % cls._decode_markup(
                         markup
-                    )
+                    ),
+                    MarkupResemblesLocatorWarning
                 )
 
     def _feed(self):
@@ -422,13 +447,28 @@ def reset(self):
         self.current_data = []
         self.currentTag = None
         self.tagStack = []
+        self.open_tag_counter = Counter()
         self.preserve_whitespace_tag_stack = []
         self.string_container_stack = []
         self.pushTag(self)
 
     def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
                 sourceline=None, sourcepos=None, **kwattrs):
-        """Create a new Tag associated with this BeautifulSoup object."""
+        """Create a new Tag associated with this BeautifulSoup object.
+
+        :param name: The name of the new Tag.
+        :param namespace: The URI of the new Tag's XML namespace, if any.
+        :param prefix: The prefix for the new Tag's XML namespace, if any.
+        :param attrs: A dictionary of this Tag's attribute values; can
+            be used instead of `kwattrs` for attributes like 'class'
+            that are reserved words in Python.
+        :param sourceline: The line number where this tag was
+            (purportedly) found in its source document.
+        :param sourcepos: The character position within `sourceline` where this
+            tag was (purportedly) found.
+        :param kwattrs: Keyword arguments for the new Tag's attribute values.
+
+        """
         kwattrs.update(attrs)
         return self.element_classes.get(Tag, Tag)(
             None, self.builder, name, namespace, nsprefix, kwattrs,
@@ -458,13 +498,13 @@ def new_string(self, s, subclass=None):
         container = self.string_container(subclass)
         return container(s)
 
-    def insert_before(self, successor):
+    def insert_before(self, *args):
         """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement
         it because there is nothing before or after it in the parse tree.
         """
         raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
 
-    def insert_after(self, successor):
+    def insert_after(self, *args):
         """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement
         it because there is nothing before or after it in the parse tree.
         """
@@ -473,22 +513,26 @@ def insert_after(self, successor):
     def popTag(self):
         """Internal method called by _popToTag when a tag is closed."""
         tag = self.tagStack.pop()
+        if tag.name in self.open_tag_counter:
+            self.open_tag_counter[tag.name] -= 1
         if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
             self.preserve_whitespace_tag_stack.pop()
         if self.string_container_stack and tag == self.string_container_stack[-1]:
             self.string_container_stack.pop()
-        #print "Pop", tag.name
+        #print("Pop", tag.name)
         if self.tagStack:
             self.currentTag = self.tagStack[-1]
         return self.currentTag
 
     def pushTag(self, tag):
         """Internal method called by handle_starttag when a tag is opened."""
-        #print "Push", tag.name
+        #print("Push", tag.name)
         if self.currentTag is not None:
             self.currentTag.contents.append(tag)
         self.tagStack.append(tag)
         self.currentTag = self.tagStack[-1]
+        if tag.name != self.ROOT_TAG_NAME:
+            self.open_tag_counter[tag.name] += 1
         if tag.name in self.builder.preserve_whitespace_tags:
             self.preserve_whitespace_tag_stack.append(tag)
         if tag.name in self.builder.string_containers:
@@ -599,15 +643,19 @@ def _linkage_fixer(self, el):
 
     def _popToTag(self, name, nsprefix=None, inclusivePop=True):
         """Pops the tag stack up to and including the most recent
-        instance of the given tag. 
+        instance of the given tag.
+
+        If there are no open tags with the given name, nothing will be
+        popped.
 
         :param name: Pop up to the most recent tag with this name.
         :param nsprefix: The namespace prefix that goes with `name`.
         :param inclusivePop: It this is false, pops the tag stack up
           to but *not* including the most recent instqance of the
           given tag.
+
         """
-        #print "Popping to %s" % name
+        #print("Popping to %s" % name)
         if name == self.ROOT_TAG_NAME:
             # The BeautifulSoup object itself can never be popped.
             return
@@ -616,6 +664,8 @@ def _popToTag(self, name, nsprefix=None, inclusivePop=True):
 
         stack_size = len(self.tagStack)
         for i in range(stack_size - 1, 0, -1):
+            if not self.open_tag_counter.get(name):
+                break
             t = self.tagStack[i]
             if (name == t.name and nsprefix == t.prefix):
                 if inclusivePop:
@@ -642,7 +692,7 @@ def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline=None,
         in the document. For instance, if this was a self-closing tag,
         don't call handle_endtag.
         """
-        # print "Start tag %s: %s" % (name, attrs)
+        # print("Start tag %s: %s" % (name, attrs))
         self.endData()
 
         if (self.parse_only and len(self.tagStack) <= 1
@@ -669,14 +719,14 @@ def handle_endtag(self, name, nsprefix=None):
         :param name: Name of the tag.
         :param nsprefix: Namespace prefix for the tag.
         """
-        #print "End tag: " + name
+        #print("End tag: " + name)
         self.endData()
         self._popToTag(name, nsprefix)
 
     def handle_data(self, data):
         """Called by the tree builder when a chunk of textual data is encountered."""
         self.current_data.append(data)
-
+       
     def decode(self, pretty_print=False,
                eventual_encoding=DEFAULT_OUTPUT_ENCODING,
                formatter="minimal"):
@@ -691,6 +741,11 @@ def decode(self, pretty_print=False,
         if self.is_xml:
             # Print the XML declaration
             encoding_part = ''
+            if eventual_encoding in PYTHON_SPECIFIC_ENCODINGS:
+                # This is a special Python encoding; it can't actually
+                # go into an XML document because it means nothing
+                # outside of Python.
+                eventual_encoding = None
             if eventual_encoding != None:
                 encoding_part = ' encoding="%s"' % eventual_encoding
             prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
@@ -733,4 +788,4 @@ class FeatureNotFound(ValueError):
 if __name__ == '__main__':
     import sys
     soup = BeautifulSoup(sys.stdin)
-    print soup.prettify()
+    print(soup.prettify())
diff --git a/bs/builder/__init__.py b/bs/builder/__init__.py
@@ -334,11 +334,11 @@ def close(self):
 
     def startElement(self, name, attrs):
         attrs = dict((key[1], value) for key, value in list(attrs.items()))
-        #print "Start %s, %r" % (name, attrs)
+        #print("Start %s, %r" % (name, attrs))
         self.soup.handle_starttag(name, attrs)
 
     def endElement(self, name):
-        #print "End %s" % name
+        #print("End %s" % name)
         self.soup.handle_endtag(name)
 
     def startElementNS(self, nsTuple, nodeName, attrs):
@@ -476,8 +476,7 @@ def set_up_substitutions(self, tag):
 
 def register_treebuilders_from(module):
     """Copy TreeBuilders from the given module into this module."""
-    # I'm fairly sure this is not the best way to do this.
-    this_module = sys.modules['bs4.builder']
+    this_module = sys.modules[__name__]
     for name in module.__all__:
         obj = getattr(module, name)
 

diff --git a/bs/builder/_html5lib.py b/bs/builder/_html5lib.py
@@ -375,9 +375,9 @@ def removeChild(self, node):
 
     def reparentChildren(self, new_parent):
         """Move all of this tag's children into another tag."""
-        # print "MOVE", self.element.contents
-        # print "FROM", self.element
-        # print "TO", new_parent.element
+        # print("MOVE", self.element.contents)
+        # print("FROM", self.element)
+        # print("TO", new_parent.element)
 
         element = self.element
         new_parent_element = new_parent.element
@@ -435,9 +435,9 @@ def reparentChildren(self, new_parent):
         element.contents = []
         element.next_element = final_next_element
 
-        # print "DONE WITH MOVE"
-        # print "FROM", self.element
-        # print "TO", new_parent_element
+        # print("DONE WITH MOVE")
+        # print("FROM", self.element)
+        # print("TO", new_parent_element)
 
     def cloneNode(self):
         tag = self.soup.new_tag(self.element.name, self.namespace)