|
@@ -15,17 +15,6 @@ from django.utils.regex_helper import _lazy_re_compile
|
|
|
from django.utils.safestring import SafeData, SafeString, mark_safe
|
|
|
from django.utils.text import normalize_newlines
|
|
|
|
|
|
-# Configuration for urlize() function.
|
|
|
-TRAILING_PUNCTUATION_CHARS = '.,:;!'
|
|
|
-WRAPPING_PUNCTUATION = [('(', ')'), ('[', ']')]
|
|
|
-
|
|
|
-word_split_re = _lazy_re_compile(r'''([\s<>"']+)''')
|
|
|
-simple_url_re = _lazy_re_compile(r'^https?://\[?\w', re.IGNORECASE)
|
|
|
-simple_url_2_re = _lazy_re_compile(
|
|
|
- r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$',
|
|
|
- re.IGNORECASE
|
|
|
-)
|
|
|
-
|
|
|
|
|
|
@keep_lazy(str, SafeString)
|
|
|
def escape(text):
|
|
@@ -229,48 +218,118 @@ def smart_urlquote(url):
|
|
|
return urlunsplit((scheme, netloc, path, query, fragment))
|
|
|
|
|
|
|
|
|
-@keep_lazy_text
|
|
|
-def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
|
|
+class Urlizer:
|
|
|
"""
|
|
|
Convert any URLs in text into clickable links.
|
|
|
|
|
|
- Works on http://, https://, www. links, and also on links ending in one of
|
|
|
+ Work on http://, https://, www. links, and also on links ending in one of
|
|
|
the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org).
|
|
|
Links can have trailing punctuation (periods, commas, close-parens) and
|
|
|
leading punctuation (opening parens) and it'll still do the right thing.
|
|
|
+ """
|
|
|
+ trailing_punctuation_chars = '.,:;!'
|
|
|
+ wrapping_punctuation = [('(', ')'), ('[', ']')]
|
|
|
+
|
|
|
+ simple_url_re = _lazy_re_compile(r'^https?://\[?\w', re.IGNORECASE)
|
|
|
+ simple_url_2_re = _lazy_re_compile(
|
|
|
+ r'^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$',
|
|
|
+ re.IGNORECASE
|
|
|
+ )
|
|
|
+ word_split_re = _lazy_re_compile(r'''([\s<>"']+)''')
|
|
|
|
|
|
- If trim_url_limit is not None, truncate the URLs in the link text longer
|
|
|
- than this limit to trim_url_limit - 1 characters and append an ellipsis.
|
|
|
+ mailto_template = 'mailto:{local}@{domain}'
|
|
|
+ url_template = '<a href="{href}"{attrs}>{url}</a>'
|
|
|
|
|
|
- If nofollow is True, give the links a rel="nofollow" attribute.
|
|
|
+ def __call__(self, text, trim_url_limit=None, nofollow=False, autoescape=False):
|
|
|
+ """
|
|
|
+ If trim_url_limit is not None, truncate the URLs in the link text
|
|
|
+ longer than this limit to trim_url_limit - 1 characters and append an
|
|
|
+ ellipsis.
|
|
|
|
|
|
- If autoescape is True, autoescape the link text and URLs.
|
|
|
- """
|
|
|
- safe_input = isinstance(text, SafeData)
|
|
|
+ If nofollow is True, give the links a rel="nofollow" attribute.
|
|
|
+
|
|
|
+ If autoescape is True, autoescape the link text and URLs.
|
|
|
+ """
|
|
|
+ self.trim_url_limit = trim_url_limit
|
|
|
+ self.nofollow = nofollow
|
|
|
+ self.autoescape = autoescape
|
|
|
+ self.safe_input = isinstance(text, SafeData)
|
|
|
+
|
|
|
+ words = self.word_split_re.split(str(text))
|
|
|
+ return ''.join([
|
|
|
+ self.handle_word(word) for word in words
|
|
|
+ ])
|
|
|
|
|
|
- def trim_url(x, limit=trim_url_limit):
|
|
|
- if limit is None or len(x) <= limit:
|
|
|
+ def handle_word(self, word):
|
|
|
+ if '.' in word or '@' in word or ':' in word:
|
|
|
+ # lead: Punctuation trimmed from the beginning of the word.
|
|
|
+ # middle: State of the word.
|
|
|
+ # trail: Punctuation trimmed from the end of the word.
|
|
|
+ lead, middle, trail = self.trim_punctuation(word)
|
|
|
+ # Make URL we want to point to.
|
|
|
+ url = None
|
|
|
+ nofollow_attr = ' rel="nofollow"' if self.nofollow else ''
|
|
|
+ if self.simple_url_re.match(middle):
|
|
|
+ url = smart_urlquote(html.unescape(middle))
|
|
|
+ elif self.simple_url_2_re.match(middle):
|
|
|
+ url = smart_urlquote('http://%s' % html.unescape(middle))
|
|
|
+ elif ':' not in middle and self.is_email_simple(middle):
|
|
|
+ local, domain = middle.rsplit('@', 1)
|
|
|
+ try:
|
|
|
+ domain = punycode(domain)
|
|
|
+ except UnicodeError:
|
|
|
+ return word
|
|
|
+ url = self.mailto_template.format(local=local, domain=domain)
|
|
|
+ nofollow_attr = ''
|
|
|
+ # Make link.
|
|
|
+ if url:
|
|
|
+ trimmed = self.trim_url(middle)
|
|
|
+ if self.autoescape and not self.safe_input:
|
|
|
+ lead, trail = escape(lead), escape(trail)
|
|
|
+ trimmed = escape(trimmed)
|
|
|
+ middle = self.url_template.format(
|
|
|
+ href=escape(url),
|
|
|
+ attrs=nofollow_attr,
|
|
|
+ url=trimmed,
|
|
|
+ )
|
|
|
+ return mark_safe(f'{lead}{middle}{trail}')
|
|
|
+ else:
|
|
|
+ if self.safe_input:
|
|
|
+ return mark_safe(word)
|
|
|
+ elif self.autoescape:
|
|
|
+ return escape(word)
|
|
|
+ elif self.safe_input:
|
|
|
+ return mark_safe(word)
|
|
|
+ elif self.autoescape:
|
|
|
+ return escape(word)
|
|
|
+ return word
|
|
|
+
|
|
|
+ def trim_url(self, x):
|
|
|
+ if self.trim_url_limit is None or len(x) <= self.trim_url_limit:
|
|
|
return x
|
|
|
- return '%s…' % x[:max(0, limit - 1)]
|
|
|
+ return '%s…' % x[:max(0, self.trim_url_limit - 1)]
|
|
|
|
|
|
- def trim_punctuation(lead, middle, trail):
|
|
|
+ def trim_punctuation(self, word):
|
|
|
"""
|
|
|
- Trim trailing and wrapping punctuation from `middle`. Return the items
|
|
|
- of the new state.
|
|
|
+ Trim trailing and wrapping punctuation from `word`. Return the items of
|
|
|
+ the new state.
|
|
|
"""
|
|
|
+ lead, middle, trail = '', word, ''
|
|
|
# Continue trimming until middle remains unchanged.
|
|
|
trimmed_something = True
|
|
|
while trimmed_something:
|
|
|
trimmed_something = False
|
|
|
# Trim wrapping punctuation.
|
|
|
- for opening, closing in WRAPPING_PUNCTUATION:
|
|
|
+ for opening, closing in self.wrapping_punctuation:
|
|
|
if middle.startswith(opening):
|
|
|
middle = middle[len(opening):]
|
|
|
lead += opening
|
|
|
trimmed_something = True
|
|
|
# Keep parentheses at the end only if they're balanced.
|
|
|
- if (middle.endswith(closing) and
|
|
|
- middle.count(closing) == middle.count(opening) + 1):
|
|
|
+ if (
|
|
|
+ middle.endswith(closing) and
|
|
|
+ middle.count(closing) == middle.count(opening) + 1
|
|
|
+ ):
|
|
|
middle = middle[:-len(closing)]
|
|
|
trail = closing + trail
|
|
|
trimmed_something = True
|
|
@@ -278,7 +337,7 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
|
|
# as encoded entities contain ';'). Unescape entities to avoid
|
|
|
# breaking them by removing ';'.
|
|
|
middle_unescaped = html.unescape(middle)
|
|
|
- stripped = middle_unescaped.rstrip(TRAILING_PUNCTUATION_CHARS)
|
|
|
+ stripped = middle_unescaped.rstrip(self.trailing_punctuation_chars)
|
|
|
if middle_unescaped != stripped:
|
|
|
punctuation_count = len(middle_unescaped) - len(stripped)
|
|
|
trail = middle[-punctuation_count:] + trail
|
|
@@ -286,6 +345,7 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
|
|
trimmed_something = True
|
|
|
return lead, middle, trail
|
|
|
|
|
|
+ @staticmethod
|
|
|
def is_email_simple(value):
|
|
|
"""Return True if value looks like an email address."""
|
|
|
# An @ must be in the middle of the value.
|
|
@@ -301,50 +361,13 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
|
|
return False
|
|
|
return True
|
|
|
|
|
|
- words = word_split_re.split(str(text))
|
|
|
- for i, word in enumerate(words):
|
|
|
- if '.' in word or '@' in word or ':' in word:
|
|
|
- # lead: Current punctuation trimmed from the beginning of the word.
|
|
|
- # middle: Current state of the word.
|
|
|
- # trail: Current punctuation trimmed from the end of the word.
|
|
|
- lead, middle, trail = '', word, ''
|
|
|
- # Deal with punctuation.
|
|
|
- lead, middle, trail = trim_punctuation(lead, middle, trail)
|
|
|
|
|
|
- # Make URL we want to point to.
|
|
|
- url = None
|
|
|
- nofollow_attr = ' rel="nofollow"' if nofollow else ''
|
|
|
- if simple_url_re.match(middle):
|
|
|
- url = smart_urlquote(html.unescape(middle))
|
|
|
- elif simple_url_2_re.match(middle):
|
|
|
- url = smart_urlquote('http://%s' % html.unescape(middle))
|
|
|
- elif ':' not in middle and is_email_simple(middle):
|
|
|
- local, domain = middle.rsplit('@', 1)
|
|
|
- try:
|
|
|
- domain = punycode(domain)
|
|
|
- except UnicodeError:
|
|
|
- continue
|
|
|
- url = 'mailto:%s@%s' % (local, domain)
|
|
|
- nofollow_attr = ''
|
|
|
+urlizer = Urlizer()
|
|
|
|
|
|
- # Make link.
|
|
|
- if url:
|
|
|
- trimmed = trim_url(middle)
|
|
|
- if autoescape and not safe_input:
|
|
|
- lead, trail = escape(lead), escape(trail)
|
|
|
- trimmed = escape(trimmed)
|
|
|
- middle = '<a href="%s"%s>%s</a>' % (escape(url), nofollow_attr, trimmed)
|
|
|
- words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
|
|
|
- else:
|
|
|
- if safe_input:
|
|
|
- words[i] = mark_safe(word)
|
|
|
- elif autoescape:
|
|
|
- words[i] = escape(word)
|
|
|
- elif safe_input:
|
|
|
- words[i] = mark_safe(word)
|
|
|
- elif autoescape:
|
|
|
- words[i] = escape(word)
|
|
|
- return ''.join(words)
|
|
|
+
|
|
|
+@keep_lazy_text
|
|
|
+def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
|
|
|
+ return urlizer(text, trim_url_limit=trim_url_limit, nofollow=nofollow, autoescape=autoescape)
|
|
|
|
|
|
|
|
|
def avoid_wrapping(value):
|