|
@@ -24,6 +24,7 @@ capfirst = allow_lazy(capfirst, six.text_type)
|
|
|
|
|
|
# Set up regular expressions
|
|
|
re_words = re.compile(r'<.*?>|((?:\w[-\w]*|&.*?;)+)', re.U | re.S)
|
|
|
+re_chars = re.compile(r'<.*?>|(.)', re.U | re.S)
|
|
|
re_tag = re.compile(r'<(/)?([^ ]+?)(?:(\s*/)| .*?)?>', re.S)
|
|
|
re_newlines = re.compile(r'\r\n|\r') # Used in normalize_newlines
|
|
|
re_camel_case = re.compile(r'(((?<=[a-z])[A-Z])|([A-Z](?![A-Z]|$)))')
|
|
@@ -82,7 +83,7 @@ class Truncator(SimpleLazyObject):
|
|
|
return text
|
|
|
return '%s%s' % (text, truncate)
|
|
|
|
|
|
- def chars(self, num, truncate=None):
|
|
|
+ def chars(self, num, truncate=None, html=False):
|
|
|
"""
|
|
|
Returns the text truncated to be no longer than the specified number
|
|
|
of characters.
|
|
@@ -101,7 +102,15 @@ class Truncator(SimpleLazyObject):
|
|
|
truncate_len -= 1
|
|
|
if truncate_len == 0:
|
|
|
break
|
|
|
+ if html:
|
|
|
+ return self._truncate_html(length, truncate, text, truncate_len, False)
|
|
|
+ return self._text_chars(length, truncate, text, truncate_len)
|
|
|
+ chars = allow_lazy(chars)
|
|
|
|
|
|
+ def _text_chars(self, length, truncate, text, truncate_len):
|
|
|
+ """
|
|
|
+ Truncates a string after a certain number of chars.
|
|
|
+ """
|
|
|
s_len = 0
|
|
|
end_index = None
|
|
|
for i, char in enumerate(text):
|
|
@@ -119,7 +128,6 @@ class Truncator(SimpleLazyObject):
|
|
|
|
|
|
# Return the original string since no truncation was necessary
|
|
|
return text
|
|
|
- chars = allow_lazy(chars)
|
|
|
|
|
|
def words(self, num, truncate=None, html=False):
|
|
|
"""
|
|
@@ -129,7 +137,7 @@ class Truncator(SimpleLazyObject):
|
|
|
"""
|
|
|
length = int(num)
|
|
|
if html:
|
|
|
- return self._html_words(length, truncate)
|
|
|
+ return self._truncate_html(length, truncate, self._wrapped, length, True)
|
|
|
return self._text_words(length, truncate)
|
|
|
words = allow_lazy(words)
|
|
|
|
|
@@ -145,40 +153,45 @@ class Truncator(SimpleLazyObject):
|
|
|
return self.add_truncation_text(' '.join(words), truncate)
|
|
|
return ' '.join(words)
|
|
|
|
|
|
- def _html_words(self, length, truncate):
|
|
|
+ def _truncate_html(self, length, truncate, text, truncate_len, words):
|
|
|
"""
|
|
|
- Truncates HTML to a certain number of words (not counting tags and
|
|
|
- comments). Closes opened tags if they were correctly closed in the
|
|
|
- given HTML.
|
|
|
+ Truncates HTML to a certain number of chars (not counting tags and
|
|
|
+ comments), or, if words is True, then to a certain number of words.
|
|
|
+ Closes opened tags if they were correctly closed in the given HTML.
|
|
|
|
|
|
Newlines in the HTML are preserved.
|
|
|
"""
|
|
|
- if length <= 0:
|
|
|
+ if words and length <= 0:
|
|
|
return ''
|
|
|
+
|
|
|
html4_singlets = (
|
|
|
'br', 'col', 'link', 'base', 'img',
|
|
|
'param', 'area', 'hr', 'input'
|
|
|
)
|
|
|
- # Count non-HTML words and keep note of open tags
|
|
|
+
|
|
|
+ # Count non-HTML chars/words and keep note of open tags
|
|
|
pos = 0
|
|
|
end_text_pos = 0
|
|
|
- words = 0
|
|
|
+ current_len = 0
|
|
|
open_tags = []
|
|
|
- while words <= length:
|
|
|
- m = re_words.search(self._wrapped, pos)
|
|
|
+
|
|
|
+ regex = re_words if words else re_chars
|
|
|
+
|
|
|
+ while current_len <= length:
|
|
|
+ m = regex.search(text, pos)
|
|
|
if not m:
|
|
|
# Checked through whole string
|
|
|
break
|
|
|
pos = m.end(0)
|
|
|
if m.group(1):
|
|
|
- # It's an actual non-HTML word
|
|
|
- words += 1
|
|
|
- if words == length:
|
|
|
+ # It's an actual non-HTML word or char
|
|
|
+ current_len += 1
|
|
|
+ if current_len == truncate_len:
|
|
|
end_text_pos = pos
|
|
|
continue
|
|
|
# Check for tag
|
|
|
tag = re_tag.match(m.group(0))
|
|
|
- if not tag or end_text_pos:
|
|
|
+ if not tag or current_len >= truncate_len:
|
|
|
# Don't worry about non tags or tags after our truncate point
|
|
|
continue
|
|
|
closing_tag, tagname, self_closing = tag.groups()
|
|
@@ -199,10 +212,10 @@ class Truncator(SimpleLazyObject):
|
|
|
else:
|
|
|
# Add it to the start of the open tags list
|
|
|
open_tags.insert(0, tagname)
|
|
|
- if words <= length:
|
|
|
- # Don't try to close tags if we don't need to truncate
|
|
|
- return self._wrapped
|
|
|
- out = self._wrapped[:end_text_pos]
|
|
|
+
|
|
|
+ if current_len <= length:
|
|
|
+ return text
|
|
|
+ out = text[:end_text_pos]
|
|
|
truncate_text = self.add_truncation_text('', truncate)
|
|
|
if truncate_text:
|
|
|
out += truncate_text
|