diff -urN bs4_4.4_official/AUTHORS.txt sigil_bs4/AUTHORS.txt
--- bs4_4.4_official/AUTHORS.txt	1969-12-31 19:00:00.000000000 -0500
+++ sigil_bs4/AUTHORS.txt	2015-10-06 00:16:22.000000000 -0400
@@ -0,0 +1,43 @@
+Behold, mortal, the origins of Beautiful Soup...
+================================================
+
+Leonard Richardson is the primary programmer.
+
+Aaron DeVore is awesome.
+
+Mark Pilgrim provided the encoding detection code that forms the base
+of UnicodeDammit.
+
+Thomas Kluyver and Ezio Melotti finished the work of getting Beautiful
+Soup 4 working under Python 3.
+
+Simon Willison wrote soupselect, which was used to make Beautiful Soup
+support CSS selectors.
+
+Sam Ruby helped with a lot of edge cases.
+
+Jonathan Ellis was awarded the prestigous Beau Potage D'Or for his
+work in solving the nestable tags conundrum.
+
+An incomplete list of people have contributed patches to Beautiful
+Soup:
+
+ Istvan Albert, Andrew Lin, Anthony Baxter, Andrew Boyko, Tony Chang,
+ Zephyr Fang, Fuzzy, Roman Gaufman, Yoni Gilad, Richie Hindle, Peteris
+ Krumins, Kent Johnson, Ben Last, Robert Leftwich, Staffan Malmgren,
+ Ksenia Marasanova, JP Moins, Adam Monsen, John Nagle, "Jon", Ed
+ Oskiewicz, Greg Phillips, Giles Radford, Arthur Rudolph, Marko
+ Samastur, Jouni Seppänen, Alexander Schmolck, Andy Theyers, Glyn
+ Webster, Paul Wright, Danny Yoo
+
+An incomplete list of people who made suggestions or found bugs or
+found ways to break Beautiful Soup:
+
+ Hanno Böck, Matteo Bertini, Chris Curvey, Simon Cusack, Bruce Eckel,
+ Matt Ernst, Michael Foord, Tom Harris, Bill de hOra, Donald Howes,
+ Matt Patterson, Scott Roberts, Steve Strassmann, Mike Williams,
+ warchild at redho dot com, Sami Kuisma, Carlos Rocha, Bob Hutchison,
+ Joren Mc, Michal Migurski, John Kleven, Tim Heaney, Tripp Lilley, Ed
+ Summers, Dennis Sutch, Chris Smith, Aaron Sweep^W Swartz, Stuart
+ Turner, Greg Edwards, Kevin J Kalupson, Nikos Kouremenos, Artur de
+ Sousa Rocha, Yichun Wei, Per Vognsen
diff -urN bs4_4.4_official/COPYING.txt sigil_bs4/COPYING.txt
--- bs4_4.4_official/COPYING.txt	1969-12-31 19:00:00.000000000 -0500
+++ sigil_bs4/COPYING.txt	2015-10-06 00:16:22.000000000 -0400
@@ -0,0 +1,26 @@
+Beautiful Soup is made available under the MIT license:
+
+ Copyright (c) 2004-2012 Leonard Richardson
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE, DAMMIT.
+
+Beautiful Soup incorporates code from the html5lib library, which is
+also made available under the MIT license.
diff -urN bs4_4.4_official/__init__.py sigil_bs4/__init__.py
--- bs4_4.4_official/__init__.py	2015-09-05 19:23:38.000000000 -0400
+++ sigil_bs4/__init__.py	2015-12-06 10:47:08.000000000 -0500
@@ -16,6 +16,21 @@
 http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 """
 
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+import sys
+PY3 = sys.version_info[0] == 3
+if PY3:
+    text_type = str
+    binary_type = bytes
+    basestring = str
+    unicode = str
+else:
+    range = xrange
+    text_type = unicode
+    binary_type = str
+
+
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
 __version__ = "4.4.0"
 __copyright__ = "Copyright (c) 2004-2015 Leonard Richardson"
@@ -27,6 +42,9 @@
 import re
 import warnings
 
+def _remove_xml_header(data):
+    return re.sub(r'<\s*\?xml\s*[^\?>]*\?*>\s*','',data, flags=re.I)
+
 from .builder import builder_registry, ParserRejectedMarkup
 from .dammit import UnicodeDammit
 from .element import (
@@ -144,7 +162,7 @@
 
         if builder is None:
             original_features = features
-            if isinstance(features, str):
+            if isinstance(features, basestring):
                 features = [features]
             if features is None or len(features) == 0:
                 features = self.DEFAULT_BUILDER_FEATURES
@@ -178,7 +196,7 @@
             # involving passing non-markup to Beautiful Soup.
             # Beautiful Soup will still parse the input as markup,
             # just in case that's what the user really wants.
-            if (isinstance(markup, str)
+            if (isinstance(markup, unicode)
                 and not os.path.supports_unicode_filenames):
                 possible_filename = markup.encode("utf8")
             else:
@@ -192,16 +210,16 @@
                 # system. Just let it go.
                 pass
             if is_file:
-                if isinstance(markup, str):
+                if isinstance(markup, unicode):
                     markup = markup.encode("utf8")
                 warnings.warn(
                     '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
             if markup[:5] == "http:" or markup[:6] == "https:":
                 # TODO: This is ugly but I couldn't get it to work in
                 # Python 3 otherwise.
-                if ((isinstance(markup, bytes) and not b' ' in markup)
-                    or (isinstance(markup, str) and not ' ' in markup)):
-                    if isinstance(markup, str):
+                if ((isinstance(markup, binary_type) and not b' ' in markup)
+                    or (isinstance(markup, unicode) and not ' ' in markup)):
+                    if isinstance(markup, unicode):
                         markup = markup.encode("utf8")
                     warnings.warn(
                         '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
@@ -420,7 +438,7 @@
 
     def decode(self, pretty_print=False,
                eventual_encoding=DEFAULT_OUTPUT_ENCODING,
-               formatter="minimal"):
+               formatter="minimal", indent_chars=" "):
         """Returns a string or Unicode representation of this document.
         To get Unicode, pass None for encoding."""
 
@@ -437,7 +455,55 @@
         else:
             indent_level = 0
         return prefix + super(BeautifulSoup, self).decode(
-            indent_level, eventual_encoding, formatter)
+            indent_level, eventual_encoding, formatter, indent_chars)
+
+    def decodexml(self, indent_level=0, eventual_encoding=DEFAULT_OUTPUT_ENCODING,
+               formatter="minimal", indent_chars=" "):
+        """Returns a string or Unicode representation of this document.
+        as pretty printed xml"""
+
+        # generate a correct xml header declaration
+        encoding_part = ''
+        if eventual_encoding != None:
+            encoding_part = ' encoding="%s"' % eventual_encoding
+        prefix = '<?xml version="1.0"%s ?>\n' % encoding_part
+        # remove any existing xml header pi since its encoding may now be incorrect
+        # before adding in new xml header pi with the proper specified encoding
+        newsource = super(BeautifulSoup, self).decodexml(
+            indent_level, eventual_encoding, formatter, indent_chars)
+        if newsource.startswith('<?xml '):
+            newsource = _remove_xml_header(newsource)
+        return prefix + newsource
+
+    def serialize_xhtml(self, eventual_encoding=DEFAULT_OUTPUT_ENCODING, formatter="minimal"):
+        # generate a correct xml header declaration
+        encoding_part = ''
+        if eventual_encoding != None:
+            encoding_part = ' encoding="%s"' % eventual_encoding
+        prefix = '<?xml version="1.0"%s ?>\n' % encoding_part
+        newsource = super(BeautifulSoup, self).serialize_xhtml(eventual_encoding, formatter)
+        # remove any existing xml header declaration since its encoding may now be incorrect
+        # before adding in new xml header declaration with the proper specified encoding
+        if newsource.startswith('<?xml '):
+            newsource = _remove_xml_header(newsource)
+        newsource = prefix + newsource
+        return newsource.rstrip()
+
+    def prettyprint_xhtml(self, indent_level=0, eventual_encoding=DEFAULT_OUTPUT_ENCODING,
+                          formatter="minimal", indent_chars=" "):
+        # generate a correct xml header declaration
+        encoding_part = ''
+        if eventual_encoding != None:
+            encoding_part = ' encoding="%s"' % eventual_encoding
+        prefix = '<?xml version="1.0"%s ?>\n' % encoding_part
+        newsource = super(BeautifulSoup, self).prettyprint_xhtml(indent_level, eventual_encoding, formatter, indent_chars)
+        # remove any existing xml header pi since its encoding may now be incorrect
+        # before adding in new xml header pi with the proper specified encoding
+        if newsource.startswith('<?xml '):
+            newsource = _remove_xml_header(newsource)
+        newsource = prefix + newsource
+        return newsource.rstrip()
+
 
 # Alias to make it easier to type import: 'from bs4 import _soup'
 _s = BeautifulSoup
diff -urN bs4_4.4_official/builder/__init__.py sigil_bs4/builder/__init__.py
--- bs4_4.4_official/builder/__init__.py	2015-09-05 19:23:39.000000000 -0400
+++ sigil_bs4/builder/__init__.py	2015-10-06 00:16:22.000000000 -0400
@@ -1,7 +1,21 @@
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+import sys
+PY3 = sys.version_info[0] == 3
+if PY3:
+    text_type = str
+    binary_type = bytes
+    unicode = str
+    basestring = str
+else:
+    range = xrange
+    text_type = unicode
+    binary_type = str
+    chr = unichr
+
 from collections import defaultdict
 import itertools
-import sys
-from bs4.element import (
+from sigil_bs4.element import (
     CharsetMetaAttributeValue,
     ContentMetaAttributeValue,
     whitespace_re
@@ -162,7 +176,7 @@
                     # value is a whitespace-separated list of
                     # values. Split it into a list.
                     value = attrs[attr]
-                    if isinstance(value, str):
+                    if isinstance(value, basestring):
                         values = whitespace_re.split(value)
                     else:
                         # html5lib sometimes calls setAttributes twice
@@ -291,7 +305,7 @@
 def register_treebuilders_from(module):
     """Copy TreeBuilders from the given module into this module."""
     # I'm fairly sure this is not the best way to do this.
-    this_module = sys.modules['bs4.builder']
+    this_module = sys.modules['sigil_bs4.builder']
     for name in module.__all__:
         obj = getattr(module, name)
 
diff -urN bs4_4.4_official/builder/_html5lib.py sigil_bs4/builder/_html5lib.py
--- bs4_4.4_official/builder/_html5lib.py	2015-09-05 19:23:39.000000000 -0400
+++ sigil_bs4/builder/_html5lib.py	2015-10-06 00:16:22.000000000 -0400
@@ -1,22 +1,37 @@
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+import sys
+PY3 = sys.version_info[0] == 3
+if PY3:
+    text_type = str
+    binary_type = bytes
+    unicode = str
+    basestring = str
+else:
+    range = xrange
+    text_type = unicode
+    binary_type = str
+    chr = unichr
+
 __all__ = [
     'HTML5TreeBuilder',
     ]
 
 from pdb import set_trace
 import warnings
-from bs4.builder import (
+from sigil_bs4.builder import (
     PERMISSIVE,
     HTML,
     HTML_5,
     HTMLTreeBuilder,
     )
-from bs4.element import (
+from sigil_bs4.element import (
     NamespacedAttribute,
     whitespace_re,
 )
 import html5lib
 from html5lib.constants import namespaces
-from bs4.element import (
+from sigil_bs4.element import (
     Comment,
     Doctype,
     NavigableString,
@@ -50,7 +65,7 @@
         doc = parser.parse(markup, encoding=self.user_specified_encoding)
 
         # Set the character encoding detected by the tokenizer.
-        if isinstance(markup, str):
+        if isinstance(markup, unicode):
             # We need to special-case this because html5lib sets
             # charEncoding to UTF-8 if it gets Unicode input.
             doc.original_encoding = None
@@ -143,7 +158,7 @@
 
     def appendChild(self, node):
         string_child = child = None
-        if isinstance(node, str):
+        if isinstance(node, basestring):
             # Some other piece of code decided to pass in a string
             # instead of creating a TextElement object to contain the
             # string.
@@ -158,7 +173,7 @@
         else:
             child = node.element
 
-        if not isinstance(child, str) and child.parent is not None:
+        if not isinstance(child, basestring) and child.parent is not None:
             node.element.extract()
 
         if (string_child and self.element.contents
@@ -171,7 +186,7 @@
             old_element.replace_with(new_element)
             self.soup._most_recent_element = new_element
         else:
-            if isinstance(node, str):
+            if isinstance(node, basestring):
                 # Create a brand new NavigableString from this string.
                 child = self.soup.new_string(node)
 
diff -urN bs4_4.4_official/builder/_htmlparser.py sigil_bs4/builder/_htmlparser.py
--- bs4_4.4_official/builder/_htmlparser.py	2015-09-05 19:23:39.000000000 -0400
+++ sigil_bs4/builder/_htmlparser.py	2015-10-06 00:16:22.000000000 -0400
@@ -1,20 +1,43 @@
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+import sys
+PY3 = sys.version_info[0] == 3
+if PY3:
+    text_type = str
+    binary_type = bytes
+    unicode = str
+    basestring = str
+else:
+    range = xrange
+    text_type = unicode
+    binary_type = str
+    chr = unichr
+
 """Use the HTMLParser library to parse HTML files that aren't too bad."""
 
 __all__ = [
     'HTMLParserTreeBuilder',
     ]
 
-from html.parser import HTMLParser
+if PY3:
+    from html.parser import HTMLParser
+    try:
+        from html.parser import HTMLParseError
+    except ImportError as e:
+        # HTMLParseError is removed in Python 3.5. Since it can never be
+        # thrown in 3.5, we can just define our own class as a placeholder.
+        class HTMLParseError(Exception):
+            pass
+else:
+    from HTMLParser import HTMLParser
+    try:
+        from HTMLParser import HTMLParseError
+    except ImportError as e:
+        # HTMLParseError is removed in Python 3.5. Since it can never be
+        # thrown in 3.5, we can just define our own class as a placeholder.
+        class HTMLParseError(Exception):
+            pass
 
-try:
-    from html.parser import HTMLParseError
-except ImportError as e:
-    # HTMLParseError is removed in Python 3.5. Since it can never be
-    # thrown in 3.5, we can just define our own class as a placeholder.
-    class HTMLParseError(Exception):
-        pass
-
-import sys
 import warnings
 
 # Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
@@ -30,16 +53,16 @@
 CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
 
 
-from bs4.element import (
+from sigil_bs4.element import (
     CData,
     Comment,
     Declaration,
     Doctype,
     ProcessingInstruction,
     )
-from bs4.dammit import EntitySubstitution, UnicodeDammit
+from sigil_bs4.dammit import EntitySubstitution, UnicodeDammit
 
-from bs4.builder import (
+from sigil_bs4.builder import (
     HTML,
     HTMLTreeBuilder,
     STRICT,
@@ -145,7 +168,7 @@
         declared within markup, whether any characters had to be
         replaced with REPLACEMENT CHARACTER).
         """
-        if isinstance(markup, str):
+        if isinstance(markup, unicode):
             yield (markup, None, None, False)
             return
 
diff -urN bs4_4.4_official/builder/_lxml.py sigil_bs4/builder/_lxml.py
--- bs4_4.4_official/builder/_lxml.py	2015-09-05 19:23:39.000000000 -0400
+++ sigil_bs4/builder/_lxml.py	2015-10-06 00:16:22.000000000 -0400
@@ -1,19 +1,36 @@
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+import sys
+PY3 = sys.version_info[0] == 3
+if PY3:
+    text_type = str
+    binary_type = bytes
+    unicode = str
+else:
+    range = xrange
+    text_type = unicode
+    binary_type = str
+
 __all__ = [
     'LXMLTreeBuilderForXML',
     'LXMLTreeBuilder',
     ]
 
 from io import BytesIO
-from io import StringIO
+if PY3:
+    from io import StringIO
+else:
+    from StringIO import StringIO
+
 import collections
 from lxml import etree
-from bs4.element import (
+from sigil_bs4.element import (
     Comment,
     Doctype,
     NamespacedAttribute,
     ProcessingInstruction,
 )
-from bs4.builder import (
+from sigil_bs4.builder import (
     FAST,
     HTML,
     HTMLTreeBuilder,
@@ -21,7 +38,7 @@
     ParserRejectedMarkup,
     TreeBuilder,
     XML)
-from bs4.dammit import EncodingDetector
+from sigil_bs4.dammit import EncodingDetector
 
 LXML = 'lxml'
 
@@ -87,12 +104,12 @@
 
         Each 4-tuple represents a strategy for parsing the document.
         """
-        if isinstance(markup, str):
+        if isinstance(markup, unicode):
             # We were given Unicode. Maybe lxml can parse Unicode on
             # this system?
             yield markup, None, document_declared_encoding, False
 
-        if isinstance(markup, str):
+        if isinstance(markup, unicode):
             # No, apparently not. Convert the Unicode to UTF-8 and
             # tell lxml to parse it as UTF-8.
             yield (markup.encode("utf8"), "utf8",
@@ -112,7 +129,7 @@
     def feed(self, markup):
         if isinstance(markup, bytes):
             markup = BytesIO(markup)
-        elif isinstance(markup, str):
+        elif isinstance(markup, unicode):
             markup = StringIO(markup)
 
         # Call feed() at least once, even if the markup is empty,
@@ -137,24 +154,29 @@
         # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
         attrs = dict(attrs)
         nsprefix = None
+
+        # Fix bug in bs4 _lxml.py that ignores attributes that specify namespaces on this tag
+
         # Invert each namespace map as it comes in.
-        if len(self.nsmaps) > 1:
-            # There are no new namespaces for this tag, but
-            # non-default namespaces are in play, so we need a
-            # separate tag stack to know when they end.
-            self.nsmaps.append(None)
-        elif len(nsmap) > 0:
+        if len(nsmap) > 0:
             # A new namespace mapping has come into play.
             inverted_nsmap = dict((value, key) for key, value in list(nsmap.items()))
             self.nsmaps.append(inverted_nsmap)
+        
             # Also treat the namespace mapping as a set of attributes on the
-            # tag, so we can recreate it later.
+            # tag, so we can properly recreate it later.
             attrs = attrs.copy()
             for prefix, namespace in list(nsmap.items()):
                 attribute = NamespacedAttribute(
                     "xmlns", prefix, "http://www.w3.org/2000/xmlns/")
                 attrs[attribute] = namespace
 
+        elif len(self.nsmaps) > 1:
+            # There are no new namespaces for this tag, but
+            # non-default namespaces are in play, so we need a
+            # separate tag stack to know when they end.
+            self.nsmaps.append(None)
+
         # Namespaces are in play. Find any attributes that came in
         # from lxml with namespaces attached to their names, and
         # turn then into NamespacedAttribute objects.
@@ -164,16 +186,16 @@
             if namespace is None:
                 new_attrs[attr] = value
             else:
-                nsprefix = self._prefix_for_namespace(namespace)
+                nsprefix = self._prefix_for_attr_namespace(namespace)
                 attr = NamespacedAttribute(nsprefix, attr, namespace)
                 new_attrs[attr] = value
         attrs = new_attrs
 
         namespace, name = self._getNsTag(name)
-        nsprefix = self._prefix_for_namespace(namespace)
+        nsprefix = self._prefix_for_tag_namespace(namespace)
         self.soup.handle_starttag(name, namespace, nsprefix, attrs)
 
-    def _prefix_for_namespace(self, namespace):
+    def _prefix_for_attr_namespace(self, namespace):
         """Find the currently active prefix for the given namespace."""
         if namespace is None:
             return None
@@ -182,16 +204,28 @@
                 return inverted_nsmap[namespace]
         return None
 
+    # To keep the tag prefixes as clean/simple as possible if there is 
+    # more than one possible prefix allowed and it includes None use it instead
+    # This happens when a namespace prefix is added for an attribute that duplicates
+    # an earlier namespace meant for tags that had set that  namespace prefix to None
+    def _prefix_for_tag_namespace(self, namespace):
+        """Find the currently active prefix for the given namespace for a tag."""
+        if namespace is None:
+            return None
+        prefixes = []
+        for inverted_nsmap in self.nsmaps:
+            if inverted_nsmap is not None and namespace in inverted_nsmap:
+                prefixes.append(inverted_nsmap[namespace])
+        if len(prefixes) == 0 or  None in prefixes:
+            return None
+        # ow return the last (most recent) viable prefix
+        return prefixes[-1]
+
     def end(self, name):
         self.soup.endData()
         completed_tag = self.soup.tagStack[-1]
         namespace, name = self._getNsTag(name)
-        nsprefix = None
-        if namespace is not None:
-            for inverted_nsmap in reversed(self.nsmaps):
-                if inverted_nsmap is not None and namespace in inverted_nsmap:
-                    nsprefix = inverted_nsmap[namespace]
-                    break
+        nsprefix = self._prefix_for_tag_namespace(namespace)
         self.soup.handle_endtag(name, nsprefix)
         if len(self.nsmaps) > 1:
             # This tag, or one of its parents, introduced a namespace
diff -urN bs4_4.4_official/dammit.py sigil_bs4/dammit.py
--- bs4_4.4_official/dammit.py	2015-09-05 19:23:38.000000000 -0400
+++ sigil_bs4/dammit.py	2015-12-06 10:47:08.000000000 -0500
@@ -7,9 +7,27 @@
 XML or HTML to reflect a new encoding; that's the tree builder's job.
 """
 
+from __future__ import unicode_literals, print_function
+
+import sys
+PY3 = sys.version_info[0] == 3
+if PY3:
+    text_type = str
+    binary_type = bytes
+    unicode = str
+    basestring = str
+else:
+    range = xrange
+    text_type = unicode
+    binary_type = str
+    chr = unichr
+
 from pdb import set_trace
 import codecs
-from html.entities import codepoint2name
+if PY3:
+    from html.entities import codepoint2name
+else:
+    from htmlentitydefs import codepoint2name
 import re
 import logging
 import string
@@ -77,13 +95,16 @@
         "&": "amp",
         "<": "lt",
         ">": "gt",
+        "\u00a0" : "#160",
         }
 
-    BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
+    BARE_AMPERSAND_OR_BRACKET = re.compile(r"([<>\u00a0]|"
                                            "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
                                            ")")
 
-    AMPERSAND_OR_BRACKET = re.compile("([<>&])")
+    IS_ENTITY = re.compile("(&#\d+;|&#x[0-9a-fA-F]+;|&\w+;)")
+
+    AMPERSAND_OR_BRACKET = re.compile(r"([<>&\u00a0])")
 
     @classmethod
     def _substitute_html_entity(cls, matchobj):
@@ -116,22 +137,33 @@
 
           Welcome to "Bob's Bar" -> "Welcome to &quot;Bob's bar&quot;
         """
+        """
+          Robustness fix for bs4
+
+          But many other downstream processors of both html and xml 
+          really don't deal well with single quotes instead of the more
+          standard double-quotes.  So simply replace them with their xml 
+          entity regardless
+        """
+
         quote_with = '"'
         if '"' in value:
-            if "'" in value:
-                # The string contains both single and double
-                # quotes.  Turn the double quotes into
-                # entities. We quote the double quotes rather than
-                # the single quotes because the entity name is
-                # "&quot;" whether this is HTML or XML.  If we
-                # quoted the single quotes, we'd have to decide
-                # between &apos; and &squot;.
-                replace_with = "&quot;"
-                value = value.replace('"', replace_with)
-            else:
-                # There are double quotes but no single quotes.
-                # We can use single quotes to quote the attribute.
-                quote_with = "'"
+            # if "'" in value:
+            #     # The string contains both single and double
+            #     # quotes.  Turn the double quotes into
+            #     # entities. We quote the double quotes rather than
+            #     # the single quotes because the entity name is
+            #     # "&quot;" whether this is HTML or XML.  If we
+            #     # quoted the single quotes, we'd have to decide
+            #     # between &apos; and &squot;.
+            #     replace_with = "&quot;"
+            #     value = value.replace('"', replace_with)
+            # else:
+            #     # There are double quotes but no single quotes.
+            #     # We can use single quotes to quote the attribute.
+            #     quote_with = "'"
+            replace_with = "&quot;"
+            value = value.replace('"', replace_with)
         return quote_with + value + quote_with
 
     @classmethod
@@ -189,8 +221,16 @@
         character with "&eacute;" will make it more readable to some
         people.
         """
-        return cls.CHARACTER_TO_HTML_ENTITY_RE.sub(
-            cls._substitute_html_entity, s)
+        # ignore already existing entities
+        pieces = cls.IS_ENTITY.split(s)
+        for i in range(0,len(pieces),2):
+            piece = pieces[i]
+            pieces[i] = cls.CHARACTER_TO_HTML_ENTITY_RE.sub(cls._substitute_html_entity, piece)
+        return "".join(pieces)
+
+        # return cls.CHARACTER_TO_HTML_ENTITY_RE.sub(
+        #     cls._substitute_html_entity, s)
+
 
 
 class EncodingDetector:
@@ -272,7 +312,7 @@
     def strip_byte_order_mark(cls, data):
         """If a byte-order mark is present, strip it and return the encoding it implies."""
         encoding = None
-        if isinstance(data, str):
+        if isinstance(data, text_type):
             # Unicode data cannot have a byte-order mark.
             return data, encoding
         if (len(data) >= 4) and (data[:2] == b'\xfe\xff') \
@@ -350,16 +390,15 @@
             markup, override_encodings, is_html, exclude_encodings)
 
         # Short-circuit if the data is in Unicode to begin with.
-        if isinstance(markup, str) or markup == '':
+        if isinstance(markup, text_type) or markup == b'':
             self.markup = markup
-            self.unicode_markup = str(markup)
+            self.unicode_markup = unicode(markup)
             self.original_encoding = None
             return
 
         # The encoding detector may have stripped a byte-order mark.
         # Use the stripped markup from this point on.
         self.markup = self.detector.markup
-
         u = None
         for encoding in self.detector.encodings:
             markup = self.detector.markup
@@ -420,22 +459,22 @@
             markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
 
         try:
-            #print "Trying to convert document to %s (errors=%s)" % (
-            #    proposed, errors)
+            # print("Trying to convert document to %s (errors=%s)" % (proposed, errors))
             u = self._to_unicode(markup, proposed, errors)
             self.markup = u
             self.original_encoding = proposed
         except Exception as e:
-            #print "That didn't work!"
-            #print e
+            # print("That didn't work!")
+            # print(e)
             return None
-        #print "Correct encoding: %s" % proposed
+        # print("Correct encoding: %s" % proposed)
         return self.markup
 
     def _to_unicode(self, data, encoding, errors="strict"):
         '''Given a string and its encoding, decodes the string into Unicode.
         %encoding is a string recognized by encodings.aliases'''
-        return str(data, encoding, errors)
+        return unicode(data, encoding, errors)
+            
 
     @property
     def declared_html_encoding(self):
diff -urN bs4_4.4_official/diagnose.py sigil_bs4/diagnose.py
--- bs4_4.4_official/diagnose.py	2015-09-05 19:23:38.000000000 -0400
+++ sigil_bs4/diagnose.py	2015-12-06 10:47:08.000000000 -0500
@@ -1,10 +1,28 @@
 """Diagnostic functions, mainly for use when doing tech support."""
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+import sys
+PY3 = sys.version_info[0] == 3
+if PY3:
+    text_type = str
+    binary_type = bytes
+else:
+    range = xrange
+    text_type = unicode
+    binary_type = str
+
 import cProfile
-from io import StringIO
-from html.parser import HTMLParser
-import bs4
-from bs4 import BeautifulSoup, __version__
-from bs4.builder import builder_registry
+
+if PY3:
+    from io import StringIO
+    from html.parser import HTMLParser
+else:
+    from StringIO import StringIO
+    from HTMLParser import HTMLParser
+
+import sigil_bs4
+from sigil_bs4 import BeautifulSoup, __version__
+from sigil_bs4.builder import builder_registry
 
 import os
 import pstats
diff -urN bs4_4.4_official/element.py sigil_bs4/element.py
--- bs4_4.4_official/element.py	2015-09-05 19:23:39.000000000 -0400
+++ sigil_bs4/element.py	2015-12-06 10:47:08.000000000 -0500
@@ -1,15 +1,58 @@
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+import sys
+PY3 = sys.version_info[0] == 3
+if PY3:
+    text_type = str
+    binary_type = bytes
+    unicode = str
+    basestring = str
+else:
+    range = xrange
+    text_type = unicode
+    binary_type = str
+    chr = unichr
+
 from pdb import set_trace
 import collections
 import re
-import sys
 import warnings
-from bs4.dammit import EntitySubstitution
+from sigil_bs4.dammit import EntitySubstitution
 
 DEFAULT_OUTPUT_ENCODING = "utf-8"
 PY3K = (sys.version_info[0] > 2)
 
 whitespace_re = re.compile("\s+")
 
+
+NON_BREAKING_INLINE_TAGS = ("a","abbr","acronym","b","bdo","big","br",
+    "button","cite","code","del","dfn","em","font","i","image","img",
+    "input","ins","kbd","label","map","nobr","object","q","s","samp",
+    "select","small","span","strike","strong","sub","sup","textarea",
+    "tt","u","var","wbr","mbp:nu")
+
+PRESERVE_WHITESPACE_TAGS = ("pre","textarea","script","style")
+
+VOID_TAGS = ("area","base","basefont","bgsound","br","col","command",
+    "embed","event-source","frame","hr","image","img","input","keygen",
+    "link","menuitem","meta","param","source","spacer","track","wbr",
+    "mbp:pagebreak")
+
+NO_ENTITY_SUB_TAGS = ("script", "style")
+
+SPECIAL_HANDLING_TAGS = ("html", "body")
+
+STRUCTURAL_TAGS = ("article","aside","blockquote","body","canvas",
+    "colgroup","div","dl","figure","footer","head","header","hr","html",
+    "ol","section","table","tbody","tfoot","thead","td","th","tr","ul")
+
+OTHER_TEXTHOLDING_TAGS = ("address","caption","dd","div","dt","h1","h2",
+    "h3","h4","h5","h6","legend","li","option","p","td","th","title")
+
+EBOOK_XML_PARENT_TAGS = ("package","metadata","manifest","spine","guide","ncx",
+                         "head","doctitle","docauthor","navmap", "navpoint",
+                          "navlabel", "pagelist", "pagetarget") 
+
 def _alias(attr):
     """Alias one attribute name to another for backward compatibility"""
     @property
@@ -22,22 +65,23 @@
     return alias
 
 
-class NamespacedAttribute(str):
+class NamespacedAttribute(unicode):
 
     def __new__(cls, prefix, name, namespace=None):
         if name is None:
-            obj = str.__new__(cls, prefix)
+            obj = unicode.__new__(cls, prefix)
+
         elif prefix is None:
             # Not really namespaced.
-            obj = str.__new__(cls, name)
+            obj = unicode.__new__(cls, name)
         else:
-            obj = str.__new__(cls, prefix + ":" + name)
+            obj = unicode.__new__(cls, prefix + ":" + name)
         obj.prefix = prefix
         obj.name = name
         obj.namespace = namespace
         return obj
 
-class AttributeValueWithCharsetSubstitution(str):
+class AttributeValueWithCharsetSubstitution(unicode):
     """A stand-in object for a character encoding specified in HTML."""
 
 class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
@@ -48,7 +92,7 @@
     """
 
     def __new__(cls, original_value):
-        obj = str.__new__(cls, original_value)
+        obj = unicode.__new__(cls, original_value)
         obj.original_value = original_value
         return obj
 
@@ -71,9 +115,9 @@
         match = cls.CHARSET_RE.search(original_value)
         if match is None:
             # No substitution necessary.
-            return str.__new__(str, original_value)
+            return unicode.__new__(str, original_value)
 
-        obj = str.__new__(cls, original_value)
+        obj = unicode.__new__(cls, original_value)
         obj.original_value = original_value
         return obj
 
@@ -115,7 +159,7 @@
     @classmethod
     def substitute_xml(cls, ns):
         return cls._substitute_if_appropriate(
-            ns, EntitySubstitution.substitute_xml)
+            ns, EntitySubstitution.substitute_xml_containing_entities)
 
 class PageElement(object):
     """Contains the navigational information for some part of the page
@@ -296,7 +340,7 @@
     def insert(self, position, new_child):
         if new_child is self:
             raise ValueError("Cannot insert a tag into itself.")
-        if (isinstance(new_child, str)
+        if (isinstance(new_child, basestring)
             and not isinstance(new_child, NavigableString)):
             new_child = NavigableString(new_child)
 
@@ -517,7 +561,7 @@
                 result = (element for element in generator
                           if isinstance(element, Tag))
                 return ResultSet(strainer, result)
-            elif isinstance(name, str):
+            elif isinstance(name, basestring):
                 # Optimization to find all tags with a given name.
                 result = (element for element in generator
                           if isinstance(element, Tag)
@@ -668,7 +712,7 @@
         return self.parents
 
 
-class NavigableString(str, PageElement):
+class NavigableString(unicode, PageElement):
 
     PREFIX = ''
     SUFFIX = ''
@@ -681,10 +725,10 @@
         passed in to the superclass's __new__ or the superclass won't know
         how to handle non-ASCII characters.
         """
-        if isinstance(value, str):
-            u = str.__new__(cls, value)
+        if isinstance(value, unicode):
+            u = unicode.__new__(cls, value)
         else:
-            u = str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+            u = unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
         u.setup()
         return u
 
@@ -695,7 +739,7 @@
         return type(self)(self)
 
     def __getnewargs__(self):
-        return (str(self),)
+        return (unicode(self),)
 
     def __getattr__(self, attr):
         """text.string gives you text. This is for backwards
@@ -762,7 +806,7 @@
         if pub_id is not None:
             value += ' PUBLIC "%s"' % pub_id
             if system_id is not None:
-                value += ' "%s"' % system_id
+                value += '\n "%s"' % system_id
         elif system_id is not None:
             value += ' SYSTEM "%s"' % system_id
 
@@ -846,6 +890,12 @@
     isSelfClosing = is_empty_element  # BS3
 
     @property
+    def is_non_breaking_inline_tag(self):
+        # used only for pretty printing of html to prevent returns after tags
+        # from introducing spaces where none are desired
+        return self.name in NON_BREAKING_INLINE_TAGS and not self._is_xml
+
+    @property
     def string(self):
         """Convenience property to get the single string within this tag.
 
@@ -968,6 +1018,10 @@
         "A tag is non-None even if it has no contents."
         return True
 
+    def __nonzero__(self):
+        "A tag is non-None even if it has no contents."
+        return True
+
     def __setitem__(self, key, value):
         """Setting tag[key] sets the value of the 'key' attribute for the
         tag."""
@@ -1045,22 +1099,23 @@
 
     def encode(self, encoding=DEFAULT_OUTPUT_ENCODING,
                indent_level=None, formatter="minimal",
-               errors="xmlcharrefreplace"):
+               errors="xmlcharrefreplace", indent_chars=" "):
         # Turn the data structure into Unicode, then encode the
         # Unicode.
-        u = self.decode(indent_level, encoding, formatter)
+        u = self.decode(indent_level, encoding, formatter, indent_chars)
         return u.encode(encoding, errors)
 
     def _should_pretty_print(self, indent_level):
         """Should this tag be pretty-printed?"""
         return (
             indent_level is not None and
-            (self.name not in HTMLAwareEntitySubstitution.preformatted_tags
+            ((self.name not in HTMLAwareEntitySubstitution.preformatted_tags 
+              and self.name not in NON_BREAKING_INLINE_TAGS)
              or self._is_xml))
 
     def decode(self, indent_level=None,
                eventual_encoding=DEFAULT_OUTPUT_ENCODING,
-               formatter="minimal"):
+               formatter="minimal", indent_chars=" "):
         """Returns a Unicode representation of this tag and its contents.
 
         :param eventual_encoding: The tag is destined to be
@@ -1084,8 +1139,8 @@
                 else:
                     if isinstance(val, list) or isinstance(val, tuple):
                         val = ' '.join(val)
-                    elif not isinstance(val, str):
-                        val = str(val)
+                    elif not isinstance(val, basestring):
+                        val = unicode(val)
                     elif (
                         isinstance(val, AttributeValueWithCharsetSubstitution)
                         and eventual_encoding is not None):
@@ -1093,7 +1148,7 @@
 
                     text = self.format_string(val, formatter)
                     decoded = (
-                        str(key) + '='
+                        unicode(key) + '='
                         + EntitySubstitution.quoted_attribute_value(text))
                 attrs.append(decoded)
         close = ''
@@ -1112,14 +1167,14 @@
         space = ''
         indent_space = ''
         if indent_level is not None:
-            indent_space = (' ' * (indent_level - 1))
+            indent_space = (indent_chars * (indent_level - 1))
         if pretty_print:
             space = indent_space
             indent_contents = indent_level + 1
         else:
             indent_contents = None
         contents = self.decode_contents(
-            indent_contents, eventual_encoding, formatter)
+            indent_contents, eventual_encoding, formatter, indent_chars)
 
         if self.hidden:
             # This is the 'document root' object.
@@ -1151,15 +1206,15 @@
             s = ''.join(s)
         return s
 
-    def prettify(self, encoding=None, formatter="minimal"):
+    def prettify(self, encoding=None, formatter="minimal", indent_chars=" "):
         if encoding is None:
-            return self.decode(True, formatter=formatter)
+            return self.decode(True, formatter=formatter, indent_chars=indent_chars)
         else:
-            return self.encode(encoding, True, formatter=formatter)
+            return self.encode(encoding, True, formatter=formatter, indent_chars=indent_chars)
 
     def decode_contents(self, indent_level=None,
                        eventual_encoding=DEFAULT_OUTPUT_ENCODING,
-                       formatter="minimal"):
+                       formatter="minimal", indent_chars=" "):
         """Renders the contents of this tag as a Unicode string.
 
         :param indent_level: Each line of the rendering will be
@@ -1187,21 +1242,385 @@
             if isinstance(c, NavigableString):
                 text = c.output_ready(formatter)
             elif isinstance(c, Tag):
-                s.append(c.decode(indent_level, eventual_encoding,
-                                  formatter))
+                s.append(c.decode(indent_level, eventual_encoding, formatter, indent_chars))
             if text and indent_level and not self.name == 'pre':
                 text = text.strip()
             if text:
                 if pretty_print and not self.name == 'pre':
-                    s.append(" " * (indent_level - 1))
+                    s.append(indent_chars * (indent_level - 1))
                 s.append(text)
                 if pretty_print and not self.name == 'pre':
                     s.append("\n")
         return ''.join(s)
 
+    def decodexml(self, indent_level=0, eventual_encoding=DEFAULT_OUTPUT_ENCODING, 
+               formatter="minimal", indent_chars=" "):
+
+        # First off, turn a string formatter into a function. This
+        # will stop the lookup from happening over and over again.
+        if not isinstance(formatter, collections.Callable):
+            formatter = self._formatter_for_name(formatter)
+
+        is_xmlparent = self.name.lower() in EBOOK_XML_PARENT_TAGS
+        attrs = []
+        if self.attrs:
+            for key, val in sorted(self.attrs.items()):
+                if val is None:
+                    decoded = key
+                else:
+                    if isinstance(val, list) or isinstance(val, tuple):
+                        val = ' '.join(val)
+                    elif not isinstance(val, str):
+                        val = str(val)
+                    elif (
+                        isinstance(val, AttributeValueWithCharsetSubstitution)
+                        and eventual_encoding is not None):
+                        val = val.encode(eventual_encoding)
+
+                    text = self.format_string(val, formatter)
+                    decoded = (
+                        str(key) + '='
+                        + EntitySubstitution.quoted_attribute_value(text))
+                attrs.append(decoded)
+
+        prefix = ''
+        if self.prefix:
+            prefix = self.prefix + ":"
+
+        # for pure xml, a self closing tag with only whitespace 
+        # "contents" should be treated as empty
+        if self.can_be_empty_element:
+            tagcontents = self.string
+            if tagcontents is not None and len(tagcontents.strip()) == 0:
+                self.contents = []
+        
+        close = ''
+        closeTag = ''
+        if self.is_empty_element:
+            close = '/'
+        else:
+            closeTag = '</%s%s>' % (prefix, self.name)
+
+        indent_space = (indent_chars * (indent_level - 1))
+        indent_contents = indent_level
+        if is_xmlparent or self.hidden:
+            indent_contents = indent_level + 1
+
+        contents = self.decodexml_contents(indent_contents, eventual_encoding, formatter, indent_chars)
+        if self.hidden:
+            # This is the 'document root' object.
+            s = contents
+        else:
+            s = []
+            attribute_string = ''
+            if attrs:
+                attribute_string = ' ' + ' '.join(attrs)
+            s.append(indent_space)
+            s.append('<%s%s%s%s>' % (prefix, self.name, attribute_string, close))
+            if is_xmlparent:
+                s.append("\n")
+            s.append(contents)
+            if contents and contents[-1] != "\n" and is_xmlparent or self.is_empty_element:
+                s.append("\n")
+            if closeTag and is_xmlparent:
+                s.append(indent_space)
+            s.append(closeTag)
+            if closeTag and self.next_sibling:
+                s.append("\n")
+            s = ''.join(s)
+        return s
+
+    def decodexml_contents(self, indent_level=0, eventual_encoding=DEFAULT_OUTPUT_ENCODING, 
+                        formatter="minimal", indent_chars=" "):
+        """Renders the contents of this tag as a Unicode string.
+        """
+        # First off, turn a string formatter into a function. This
+        # will stop the lookup from happening over and over again.
+        if not isinstance(formatter, collections.Callable):
+            formatter = self._formatter_for_name(formatter)
+
+        is_xmlparent = self.name.lower() in EBOOK_XML_PARENT_TAGS
+        s = []
+        for c in self:
+            text = None
+            if isinstance(c, NavigableString):
+                text = c.output_ready(formatter)
+            elif isinstance(c, Tag):
+                val = c.decodexml(indent_level, eventual_encoding, formatter, indent_chars)
+                s.append(val)
+            if text:
+                text = text.strip()
+            if text:
+                if is_xmlparent and len(s) == 0:
+                    s.append(indent_chars * (indent_level - 1))
+                s.append(text)
+        return ''.join(s)
+
+    def serialize_xhtml(self, eventual_encoding=DEFAULT_OUTPUT_ENCODING, formatter="minimal"):
+        # First off, turn a string formatter into a function. This
+        # will stop the lookup from happening over and over again.
+        if not isinstance(formatter, collections.Callable):
+            formatter = self._formatter_for_name(formatter)
+
+        prefix = ''
+        close = ''
+        closeTag = ''
+        attrs = []
+        if self.attrs:
+            for key, val in sorted(self.attrs.items()):
+                if val is None:
+                    ntext = key
+                else:
+                    if isinstance(val, list) or isinstance(val, tuple):
+                        val = ' '.join(val)
+                    elif not isinstance(val, str):
+                        val = str(val)
+                    elif (isinstance(val, AttributeValueWithCharsetSubstitution) and 
+                          eventual_encoding is not None):
+                        val = val.encode(eventual_encoding)
+                    text = self.format_string(val, formatter)
+                    ntext = (str(key) + '=' + EntitySubstitution.quoted_attribute_value(text))
+                attrs.append(ntext)
+
+        if self.prefix:
+            prefix = self.prefix + ":"
+
+        if self.is_empty_element:
+            close = '/'
+        else:
+            closeTag = '</%s%s>' % (prefix, self.name)
+
+        contents = self.serialize_xhtml_contents(eventual_encoding, formatter)
+
+        # strip extraneous whitespace before the primary closing tag
+        if self.name in SPECIAL_HANDLING_TAGS:
+            contents = contents.strip()
+            contents += "\n"
+
+        if self.hidden:
+            # This is the 'document root' object.
+            s = contents
+        else:
+            s = []
+            attribute_string = ''
+            if attrs:
+                attribute_string = ' ' + ' '.join(attrs)
+            s.append('<%s%s%s%s>' % (prefix, self.name, attribute_string, close))
+            if self.name in SPECIAL_HANDLING_TAGS:
+                s.append("\n")
+            s.append(contents)
+            s.append(closeTag)
+            if self.name in SPECIAL_HANDLING_TAGS:
+                s.append("\n")
+            s = ''.join(s)
+        return s
+
+    def serialize_xhtml_contents(self, eventual_encoding=DEFAULT_OUTPUT_ENCODING, formatter="minimal"):
+
+        # First off, turn a string formatter into a function. This
+        # will stop the lookup from happening over and over again.
+        if not isinstance(formatter, collections.Callable):
+            formatter = self._formatter_for_name(formatter)
+
+        s = []
+        for c in self:
+            text = None
+            if isinstance(c, Comment):
+                text = Comment(c).output_ready(formatter)
+                s.append(text)
+            elif isinstance(c, CData):
+                text = CData(c).output_ready(formatter)
+                s.append(text)
+            elif isinstance(c, NavigableString):
+                text = c.output_ready(formatter)
+                s.append(text)
+            elif isinstance(c, Tag):
+                s.append(c.serialize_xhtml(eventual_encoding, formatter))
+        return ''.join(s)
+
+    def prettyprint_xhtml(self, indent_level=0, eventual_encoding=DEFAULT_OUTPUT_ENCODING, 
+               formatter="minimal", indent_chars=" "):
+
+        # First off, turn a string formatter into a function. This
+        # will stop the lookup from happening over and over again.
+        if not isinstance(formatter, collections.Callable):
+            formatter = self._formatter_for_name(formatter)
+
+        is_structural = self.name in STRUCTURAL_TAGS
+        is_inline = self.name in NON_BREAKING_INLINE_TAGS
+
+        # build attribute string
+        attribs = []
+        atts = ""
+        if self.attrs:
+            for key, val in sorted(self.attrs.items()):
+                if val is None:
+                    decoded = key
+                else:
+                    if isinstance(val, list) or isinstance(val, tuple):
+                        val = ' '.join(val)
+                    elif not isinstance(val, str):
+                        val = str(val)
+                    elif (
+                        isinstance(val, AttributeValueWithCharsetSubstitution)
+                        and eventual_encoding is not None):
+                        val = val.encode(eventual_encoding)
+
+                    text = self.format_string(val, formatter)
+                    decoded = (
+                        str(key) + '='
+                        + EntitySubstitution.quoted_attribute_value(text))
+                attribs.append(decoded)
+            atts = " " + " ".join(attribs)
+
+        prefix = ''
+        if self.prefix:
+            prefix = self.prefix + ":"
+
+        is_void_tag = self.name in VOID_TAGS
+
+        # get tag content
+        contents=""
+        if not is_void_tag:
+            if is_structural:
+                contents = self.prettyprint_xhtml_contents(indent_level+1, eventual_encoding, formatter, indent_chars)
+            else:
+                contents = self.prettyprint_xhtml_contents(indent_level, eventual_encoding, formatter, indent_chars)
+
+        if self.hidden:
+            # This is the 'document root' object.
+            return contents
+
+        is_keepwhitespace = self.name in PRESERVE_WHITESPACE_TAGS
+        if not is_keepwhitespace and not is_inline:
+            contents = contents.rstrip()
+
+        single = is_void_tag
+        # for xhtml serialization with self-closing non-void tags
+        # uncomment the following line
+        # single = single or (contents == "")
+
+        indent_space = (indent_chars * (indent_level - 1))
+
+        # handle self-closed tags with no content first
+        if single:
+            selfclosetag = '<%s%s%s/>' % (prefix, self.name, atts)
+            if is_inline:
+                # always add newline after br tags when they are children of structural tags
+                if (self.name == "br") and self.parent.name in STRUCTURAL_TAGS:
+                    selfclosetag += "\n"
+                return selfclosetag
+            return indent_space + selfclosetag + "\n"
+
+        # handle the general case
+        starttag = '<%s%s%s>' % (prefix, self.name, atts)
+        closetag = '</%s%s>' % (prefix, self.name)
+        results = ""
+        if is_structural:
+            results = indent_space + starttag
+            if contents != "":
+                results += "\n" + contents + "\n" + indent_space
+            results += closetag + "\n"
+        elif is_inline:
+            results = starttag
+            results += contents
+            results += closetag
+        else:
+            results = indent_space + starttag
+            if not is_keepwhitespace:
+                contents = contents.lstrip()
+            results += contents
+            results += closetag + "\n"
+        return results
+
+    def prettyprint_xhtml_contents(self, indent_level=0, eventual_encoding=DEFAULT_OUTPUT_ENCODING, 
+                        formatter="minimal", indent_chars=" "):
+        """Renders the contents of this tag as a Unicode string.
+        """
+        # First off, turn a string formatter into a function. This
+        # will stop the lookup from happening over and over again.
+        if not isinstance(formatter, collections.Callable):
+            formatter = self._formatter_for_name(formatter)
+
+        is_structural = self.name in STRUCTURAL_TAGS
+        is_inline = self.name in NON_BREAKING_INLINE_TAGS
+        is_keepwhitespace = self.name in PRESERVE_WHITESPACE_TAGS
+        indent_space = (indent_chars * (indent_level - 1))
+        last_char = "x"
+        contains_block_tags = False
+
+        if is_structural or self.hidden:
+            last_char = "\n"
+
+        s = []
+
+        for c in self:
+            text = None
+            if isinstance(c, Comment):
+                text = Comment(c).output_ready(formatter)
+                s.append(text)
+            elif isinstance(c, CData):
+                text = CData(c).output_ready(formatter)
+                s.append(text)
+            elif isinstance(c, NavigableString):
+                text = c.output_ready(formatter)
+                tval = text
+                is_whitespace = (tval.strip() == "")
+
+                # handle pure whitespace differently
+                if is_whitespace:
+                    if is_keepwhitespace:
+                        s.append(text)
+                    elif is_inline or self.name in OTHER_TEXTHOLDING_TAGS:
+                        if last_char not in " \t\v\f\r\n":
+                            s.append(" ")
+                        else:
+                            s.append("")
+                    else:
+                        # ignore this whitespace
+                        s.append("")
+
+                # handle all other text
+                else:
+                    if is_structural and last_char == "\n":
+                        s.append(indent_space)
+                        text = text.lstrip()
+                    s.append(text)
+
+            # handle tags
+            elif isinstance(c, Tag):
+                val = c.prettyprint_xhtml(indent_level, eventual_encoding, formatter, indent_chars)
+                # track if contains block tags and append newline and prepend newline if needed
+                if not c.name in NON_BREAKING_INLINE_TAGS:
+                    contains_block_tags = True
+                    if last_char != "\n":
+                        s.append("\n")
+                        last_char = "\n"
+                # if child of a structual tag is inline and follows a newline, indent it properly
+                if is_structural and c.name in NON_BREAKING_INLINE_TAGS and last_char == '\n':
+                    s.append(indent_space)
+                    val = val.lstrip()
+                s.append(val)
+
+            else:
+                s.append("")
+
+            # update last_char
+            last_element = s[-1]
+            if last_element != "":
+                last_char = last_element[-1:]
+
+        # after processing all children, handle inline tags that contain block level tags
+        if is_inline and contains_block_tags:
+            if last_char != "\n":
+                s.append("\n")
+            s.append(indent_space)
+
+        return ''.join(s)
+
     def encode_contents(
         self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
-        formatter="minimal"):
+        formatter="minimal", indent_chars=" "):
         """Renders the contents of this tag as a bytestring.
 
         :param indent_level: Each line of the rendering will be
@@ -1213,7 +1632,7 @@
            entities to Unicode characters.
         """
 
-        contents = self.decode_contents(indent_level, encoding, formatter)
+        contents = self.decode_contents(indent_level, encoding, formatter, indent_chars)
         return contents.encode(encoding)
 
     # Old method for BS3 compatibility
@@ -1550,7 +1969,7 @@
     def _normalize_search_value(self, value):
         # Leave it alone if it's a Unicode string, a callable, a
         # regular expression, a boolean, or None.
-        if (isinstance(value, str) or isinstance(value, collections.Callable) or hasattr(value, 'match')
+        if (isinstance(value, text_type) or isinstance(value, collections.Callable) or hasattr(value, 'match')
             or isinstance(value, bool) or value is None):
             return value
 
@@ -1563,7 +1982,7 @@
             new_value = []
             for v in value:
                 if (hasattr(v, '__iter__') and not isinstance(v, bytes)
-                    and not isinstance(v, str)):
+                    and not isinstance(v, text_type)):
                     # This is almost certainly the user's mistake. In the
                     # interests of avoiding infinite loops, we'll let
                     # it through as-is rather than doing a recursive call.
@@ -1575,7 +1994,7 @@
         # Otherwise, convert it into a Unicode string.
         # The unicode(str()) thing is so this will do the same thing on Python 2
         # and Python 3.
-        return str(str(value))
+        return unicode(unicode(value))
 
     def __str__(self):
         if self.text:
@@ -1629,7 +2048,7 @@
         found = None
         # If given a list of items, scan it for a text element that
         # matches.
-        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, str)):
+        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)):
             for element in markup:
                 if isinstance(element, NavigableString) \
                        and self.search(element):
@@ -1642,7 +2061,7 @@
                 found = self.search_tag(markup)
         # If it's text, make sure the text matches.
         elif isinstance(markup, NavigableString) or \
-                 isinstance(markup, str):
+                 isinstance(markup, basestring):
             if not self.name and not self.attrs and self._matches(markup, self.text):
                 found = markup
         else:
@@ -1656,7 +2075,7 @@
         if isinstance(markup, list) or isinstance(markup, tuple):
             # This should only happen when searching a multi-valued attribute
             # like 'class'.
-            if (isinstance(match_against, str)
+            if (isinstance(match_against, unicode)
                 and ' ' in match_against):
                 # A bit of a special case. If they try to match "foo
                 # bar" on a multivalue attribute's value, only accept
@@ -1691,7 +2110,7 @@
             # None matches None, False, an empty string, an empty list, and so on.
             return not match_against
 
-        if isinstance(match_against, str):
+        if isinstance(match_against, unicode):
             # Exact string match
             return markup == match_against
 
