|
@@ -259,23 +259,14 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
|
|
return x
|
|
|
return '%s…' % x[:max(0, limit - 1)]
|
|
|
|
|
|
- def unescape(text, trail):
|
|
|
+ def unescape(text):
|
|
|
"""
|
|
|
If input URL is HTML-escaped, unescape it so that it can be safely fed
|
|
|
to smart_urlquote. For example:
|
|
|
http://example.com?x=1&y=<2> => http://example.com?x=1&y=<2>
|
|
|
"""
|
|
|
- unescaped = (text + trail).replace(
|
|
|
- '&', '&').replace('<', '<').replace(
|
|
|
+ return text.replace('&', '&').replace('<', '<').replace(
|
|
|
'>', '>').replace('"', '"').replace(''', "'")
|
|
|
- if trail and unescaped.endswith(trail):
|
|
|
- # Remove trail for unescaped if it was not consumed by unescape
|
|
|
- unescaped = unescaped[:-len(trail)]
|
|
|
- elif trail == ';':
|
|
|
- # Trail was consumed by unescape (as end-of-entity marker), move it to text
|
|
|
- text += trail
|
|
|
- trail = ''
|
|
|
- return text, unescaped, trail
|
|
|
|
|
|
def trim_punctuation(lead, middle, trail):
|
|
|
"""
|
|
@@ -286,14 +277,6 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
|
|
trimmed_something = True
|
|
|
while trimmed_something:
|
|
|
trimmed_something = False
|
|
|
-
|
|
|
- # Trim trailing punctuation.
|
|
|
- stripped = middle.rstrip(TRAILING_PUNCTUATION_CHARS)
|
|
|
- if middle != stripped:
|
|
|
- trail = middle[len(stripped):] + trail
|
|
|
- middle = stripped
|
|
|
- trimmed_something = True
|
|
|
-
|
|
|
# Trim wrapping punctuation.
|
|
|
for opening, closing in WRAPPING_PUNCTUATION:
|
|
|
if middle.startswith(opening):
|
|
@@ -306,6 +289,15 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
|
|
middle = middle[:-len(closing)]
|
|
|
trail = closing + trail
|
|
|
trimmed_something = True
|
|
|
+ # Trim trailing punctuation (after trimming wrapping punctuation,
|
|
|
+ # as encoded entities contain ';'). Unescape entites to avoid
|
|
|
+ # breaking them by removing ';'.
|
|
|
+ middle_unescaped = unescape(middle)
|
|
|
+ stripped = middle_unescaped.rstrip(TRAILING_PUNCTUATION_CHARS)
|
|
|
+ if middle_unescaped != stripped:
|
|
|
+ trail = middle[len(stripped):] + trail
|
|
|
+ middle = middle[:len(stripped) - len(middle_unescaped)]
|
|
|
+ trimmed_something = True
|
|
|
return lead, middle, trail
|
|
|
|
|
|
def is_email_simple(value):
|
|
@@ -337,11 +329,9 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
|
|
url = None
|
|
|
nofollow_attr = ' rel="nofollow"' if nofollow else ''
|
|
|
if simple_url_re.match(middle):
|
|
|
- middle, middle_unescaped, trail = unescape(middle, trail)
|
|
|
- url = smart_urlquote(middle_unescaped)
|
|
|
+ url = smart_urlquote(unescape(middle))
|
|
|
elif simple_url_2_re.match(middle):
|
|
|
- middle, middle_unescaped, trail = unescape(middle, trail)
|
|
|
- url = smart_urlquote('http://%s' % middle_unescaped)
|
|
|
+ url = smart_urlquote('http://%s' % unescape(middle))
|
|
|
elif ':' not in middle and is_email_simple(middle):
|
|
|
local, domain = middle.rsplit('@', 1)
|
|
|
try:
|