11 年之前 · 6ca6c36f82
--- a/django/utils/html.py
+++ b/django/utils/html.py
@@ -118,7 +118,10 @@ linebreaks = allow_lazy(linebreaks, six.text_type)
 
				 
			
 
				 class MLStripper(HTMLParser):
			
 
				     def __init__(self):
			
 
				-        HTMLParser.__init__(self)
			
 
				+        if six.PY2:
			
 
				+            HTMLParser.__init__(self)
			
 
				+        else:
			
 
				+            HTMLParser.__init__(self, strict=False)
			
 
				         self.reset()
			
 
				         self.fed = []
			
 
				 
			
@@ -135,16 +138,36 @@ class MLStripper(HTMLParser):
 
				         return ''.join(self.fed)
			
 
				 
			
 
				 
			
 
				-def strip_tags(value):
			
 
				-    """Returns the given HTML with all tags stripped."""
			
 
				+def _strip_once(value):
			
 
				+    """
			
 
				+    Internal tag stripping utility used by strip_tags.
			
 
				+    """
			
 
				     s = MLStripper()
			
 
				     try:
			
 
				         s.feed(value)
			
 
				-        s.close()
			
 
				     except HTMLParseError:
			
 
				         return value
			
 
				+    try:
			
 
				+        s.close()
			
 
				+    except (HTMLParseError, UnboundLocalError) as err:
			
 
				+        # UnboundLocalError because of http://bugs.python.org/issue17802
			
 
				+        # on Python 3.2, triggered by strict=False mode of HTMLParser
			
 
				+        return s.get_data() + s.rawdata
			
 
				     else:
			
 
				         return s.get_data()
			
 
				+
			
 
				+
			
 
				+def strip_tags(value):
			
 
				+    """Returns the given HTML with all tags stripped."""
			
 
				+    while True:
			
 
				+        if not ('<' in value or '>' in value):
			
 
				+            return value
			
 
				+        new_value = _strip_once(value)
			
 
				+        if new_value == value:
			
 
				+            # _strip_once was not able to detect more tags
			
 
				+            return value
			
 
				+        else:
			
 
				+            value = new_value
			
 
				 strip_tags = allow_lazy(strip_tags)
			
 
				 
			
 
				 
			
--- a/docs/ref/templates/builtins.txt
+++ b/docs/ref/templates/builtins.txt
@@ -1985,7 +1985,7 @@ If ``value`` is ``10``, the output will be ``1.000000E+01``.
 
				 striptags
			
 
				 ^^^^^^^^^
			
 
				 
			
 
				-Strips all [X]HTML tags.
			
 
				+Makes all possible efforts to strip all [X]HTML tags.
			
 
				 
			
 
				 For example::
			
 
				 
			
@@ -1994,6 +1994,16 @@ For example::
 
				 If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"``, the
			
 
				 output will be ``"Joel is a slug"``.
			
 
				 
			
 
				+.. admonition:: No safety guarantee
			
 
				+
			
 
				+    Note that ``striptags`` doesn't give any guarantee about its output being
			
 
				+    entirely HTML safe, particularly with non valid HTML input. So **NEVER**
			
 
				+    apply the ``safe`` filter to a ``striptags`` output.
			
 
				+    If you are looking for something more robust, you can use the ``bleach``
			
 
				+    Python library, notably its `clean`_ method.
			
 
				+
			
 
				+.. _clean: http://bleach.readthedocs.org/en/latest/clean.html
			
 
				+
			
 
				 .. templatefilter:: time
			
 
				 
			
 
				 time
			
--- a/docs/ref/utils.txt
+++ b/docs/ref/utils.txt
@@ -595,17 +595,23 @@ escaping HTML.
 
				 
			
 
				 .. function:: strip_tags(value)
			
 
				 
			
 
				-    Removes anything that looks like an html tag from the string, that is
			
 
				-    anything contained within ``<>``.
			
 
				+    Tries to remove anything that looks like an HTML tag from the string, that
			
 
				+    is anything contained within ``<>``.
			
 
				+    Absolutely NO guaranty is provided about the resulting string being entirely
			
 
				+    HTML safe. So NEVER mark safe the result of a ``strip_tag`` call without
			
 
				+    escaping it first, for example with :func:`~django.utils.html.escape`.
			
 
				 
			
 
				     For example::
			
 
				 
			
 
				         strip_tags(value)
			
 
				 
			
 
				     If ``value`` is ``"<b>Joel</b> <button>is</button> a <span>slug</span>"``
			
 
				-    the return value will be ``"Joel is a slug"``. Note that ``strip_tags``
			
 
				-    result may still contain unsafe HTML content, so you might use
			
 
				-    :func:`~django.utils.html.escape` to make it a safe string.
			
 
				+    the return value will be ``"Joel is a slug"``.
			
 
				+
			
 
				+    If you are looking for a more robust solution, take a look at the `bleach`_
			
 
				+    Python library.
			
 
				+
			
 
				+    .. _bleach: https://pypi.python.org/pypi/bleach
			
 
				 
			
 
				     .. versionchanged:: 1.6
			
 
				 
			
--- a/tests/utils_tests/test_html.py
+++ b/tests/utils_tests/test_html.py
@@ -80,6 +80,8 @@ class TestUtilsHtml(TestCase):
 
				             ('a<p a >b</p>c', 'abc'),
			
 
				             ('d<a:b c:d>e</p>f', 'def'),
			
 
				             ('<strong>foo</strong><a href="http://example.com">bar</a>', 'foobar'),
			
 
				+            ('<sc<!-- -->ript>test<<!-- -->/script>', 'test'),
			
 
				+            ('<script>alert()</script>&h', 'alert()&h'),
			
 
				         )
			
 
				         for value, output in items:
			
 
				             self.check_output(f, value, output)