2
0

test_text.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. # -*- coding: utf-8 -*-
  2. from __future__ import unicode_literals
  3. import json
  4. from django.test import SimpleTestCase
  5. from django.utils import six, text
  6. from django.utils.encoding import force_text
  7. from django.utils.functional import lazy
  8. from django.utils.translation import override
  9. lazystr = lazy(force_text, six.text_type)
  10. IS_WIDE_BUILD = (len('\U0001F4A9') == 1)
  11. class TestUtilsText(SimpleTestCase):
  12. def test_get_text_list(self):
  13. self.assertEqual(text.get_text_list(['a', 'b', 'c', 'd']), 'a, b, c or d')
  14. self.assertEqual(text.get_text_list(['a', 'b', 'c'], 'and'), 'a, b and c')
  15. self.assertEqual(text.get_text_list(['a', 'b'], 'and'), 'a and b')
  16. self.assertEqual(text.get_text_list(['a']), 'a')
  17. self.assertEqual(text.get_text_list([]), '')
  18. with override('ar'):
  19. self.assertEqual(text.get_text_list(['a', 'b', 'c']), "a، b أو c")
  20. def test_smart_split(self):
  21. testdata = [
  22. ('This is "a person" test.',
  23. ['This', 'is', '"a person"', 'test.']),
  24. ('This is "a person\'s" test.',
  25. ['This', 'is', '"a person\'s"', 'test.']),
  26. ('This is "a person\\"s" test.',
  27. ['This', 'is', '"a person\\"s"', 'test.']),
  28. ('"a \'one',
  29. ['"a', "'one"]),
  30. ('all friends\' tests',
  31. ['all', 'friends\'', 'tests']),
  32. ('url search_page words="something else"',
  33. ['url', 'search_page', 'words="something else"']),
  34. ("url search_page words='something else'",
  35. ['url', 'search_page', "words='something else'"]),
  36. ('url search_page words "something else"',
  37. ['url', 'search_page', 'words', '"something else"']),
  38. ('url search_page words-"something else"',
  39. ['url', 'search_page', 'words-"something else"']),
  40. ('url search_page words=hello',
  41. ['url', 'search_page', 'words=hello']),
  42. ('url search_page words="something else',
  43. ['url', 'search_page', 'words="something', 'else']),
  44. ("cut:','|cut:' '",
  45. ["cut:','|cut:' '"]),
  46. (lazystr("a b c d"), # Test for #20231
  47. ['a', 'b', 'c', 'd']),
  48. ]
  49. for test, expected in testdata:
  50. self.assertEqual(list(text.smart_split(test)), expected)
  51. def test_truncate_chars(self):
  52. truncator = text.Truncator(
  53. 'The quick brown fox jumped over the lazy dog.'
  54. )
  55. self.assertEqual('The quick brown fox jumped over the lazy dog.',
  56. truncator.chars(100)),
  57. self.assertEqual('The quick brown fox ...',
  58. truncator.chars(23)),
  59. self.assertEqual('The quick brown fo.....',
  60. truncator.chars(23, '.....')),
  61. # Ensure that we normalize our unicode data first
  62. nfc = text.Truncator('o\xfco\xfco\xfco\xfc')
  63. nfd = text.Truncator('ou\u0308ou\u0308ou\u0308ou\u0308')
  64. self.assertEqual('oüoüoüoü', nfc.chars(8))
  65. self.assertEqual('oüoüoüoü', nfd.chars(8))
  66. self.assertEqual('oü...', nfc.chars(5))
  67. self.assertEqual('oü...', nfd.chars(5))
  68. # Ensure the final length is calculated correctly when there are
  69. # combining characters with no precomposed form, and that combining
  70. # characters are not split up.
  71. truncator = text.Truncator('-B\u030AB\u030A----8')
  72. self.assertEqual('-B\u030A...', truncator.chars(5))
  73. self.assertEqual('-B\u030AB\u030A-...', truncator.chars(7))
  74. self.assertEqual('-B\u030AB\u030A----8', truncator.chars(8))
  75. # Ensure the length of the end text is correctly calculated when it
  76. # contains combining characters with no precomposed form.
  77. truncator = text.Truncator('-----')
  78. self.assertEqual('---B\u030A', truncator.chars(4, 'B\u030A'))
  79. self.assertEqual('-----', truncator.chars(5, 'B\u030A'))
  80. # Make a best effort to shorten to the desired length, but requesting
  81. # a length shorter than the ellipsis shouldn't break
  82. self.assertEqual('...', text.Truncator('asdf').chars(1))
  83. def test_truncate_words(self):
  84. truncator = text.Truncator('The quick brown fox jumped over the lazy '
  85. 'dog.')
  86. self.assertEqual('The quick brown fox jumped over the lazy dog.',
  87. truncator.words(10))
  88. self.assertEqual('The quick brown fox...', truncator.words(4))
  89. self.assertEqual('The quick brown fox[snip]',
  90. truncator.words(4, '[snip]'))
  91. def test_truncate_html_words(self):
  92. truncator = text.Truncator('<p id="par"><strong><em>The quick brown fox'
  93. ' jumped over the lazy dog.</em></strong></p>')
  94. self.assertEqual('<p id="par"><strong><em>The quick brown fox jumped over'
  95. ' the lazy dog.</em></strong></p>', truncator.words(10, html=True))
  96. self.assertEqual('<p id="par"><strong><em>The quick brown fox...</em>'
  97. '</strong></p>', truncator.words(4, html=True))
  98. self.assertEqual('<p id="par"><strong><em>The quick brown fox....</em>'
  99. '</strong></p>', truncator.words(4, '....', html=True))
  100. self.assertEqual('<p id="par"><strong><em>The quick brown fox</em>'
  101. '</strong></p>', truncator.words(4, '', html=True))
  102. # Test with new line inside tag
  103. truncator = text.Truncator('<p>The quick <a href="xyz.html"\n'
  104. 'id="mylink">brown fox</a> jumped over the lazy dog.</p>')
  105. self.assertEqual('<p>The quick <a href="xyz.html"\n'
  106. 'id="mylink">brown...</a></p>', truncator.words(3, '...', html=True))
  107. # Test self-closing tags
  108. truncator = text.Truncator('<br/>The <hr />quick brown fox jumped over'
  109. ' the lazy dog.')
  110. self.assertEqual('<br/>The <hr />quick brown...',
  111. truncator.words(3, '...', html=True))
  112. truncator = text.Truncator('<br>The <hr/>quick <em>brown fox</em> '
  113. 'jumped over the lazy dog.')
  114. self.assertEqual('<br>The <hr/>quick <em>brown...</em>',
  115. truncator.words(3, '...', html=True))
  116. # Test html entities
  117. truncator = text.Truncator('<i>Buenos d&iacute;as!'
  118. ' &#x00bf;C&oacute;mo est&aacute;?</i>')
  119. self.assertEqual('<i>Buenos d&iacute;as! &#x00bf;C&oacute;mo...</i>',
  120. truncator.words(3, '...', html=True))
  121. truncator = text.Truncator('<p>I &lt;3 python, what about you?</p>')
  122. self.assertEqual('<p>I &lt;3 python...</p>',
  123. truncator.words(3, '...', html=True))
  124. def test_wrap(self):
  125. digits = '1234 67 9'
  126. self.assertEqual(text.wrap(digits, 100), '1234 67 9')
  127. self.assertEqual(text.wrap(digits, 9), '1234 67 9')
  128. self.assertEqual(text.wrap(digits, 8), '1234 67\n9')
  129. self.assertEqual(text.wrap('short\na long line', 7),
  130. 'short\na long\nline')
  131. self.assertEqual(text.wrap('do-not-break-long-words please? ok', 8),
  132. 'do-not-break-long-words\nplease?\nok')
  133. long_word = 'l%sng' % ('o' * 20)
  134. self.assertEqual(text.wrap(long_word, 20), long_word)
  135. self.assertEqual(text.wrap('a %s word' % long_word, 10),
  136. 'a\n%s\nword' % long_word)
  137. def test_normalize_newlines(self):
  138. self.assertEqual(text.normalize_newlines("abc\ndef\rghi\r\n"),
  139. "abc\ndef\nghi\n")
  140. self.assertEqual(text.normalize_newlines("\n\r\r\n\r"), "\n\n\n\n")
  141. self.assertEqual(text.normalize_newlines("abcdefghi"), "abcdefghi")
  142. self.assertEqual(text.normalize_newlines(""), "")
  143. def test_normalize_newlines_bytes(self):
  144. """normalize_newlines should be able to handle bytes too"""
  145. normalized = text.normalize_newlines(b"abc\ndef\rghi\r\n")
  146. self.assertEqual(normalized, "abc\ndef\nghi\n")
  147. self.assertIsInstance(normalized, six.text_type)
  148. def test_slugify(self):
  149. items = (
  150. # given - expected - unicode?
  151. ('Hello, World!', 'hello-world', False),
  152. ('spam & eggs', 'spam-eggs', False),
  153. ('spam & ıçüş', 'spam-ıçüş', True),
  154. ('foo ıç bar', 'foo-ıç-bar', True),
  155. (' foo ıç bar', 'foo-ıç-bar', True),
  156. ('你好', '你好', True),
  157. )
  158. for value, output, is_unicode in items:
  159. self.assertEqual(text.slugify(value, allow_unicode=is_unicode), output)
  160. def test_unescape_entities(self):
  161. items = [
  162. ('', ''),
  163. ('foo', 'foo'),
  164. ('&amp;', '&'),
  165. ('&#x26;', '&'),
  166. ('&#38;', '&'),
  167. ('foo &amp; bar', 'foo & bar'),
  168. ('foo & bar', 'foo & bar'),
  169. ]
  170. for value, output in items:
  171. self.assertEqual(text.unescape_entities(value), output)
  172. def test_get_valid_filename(self):
  173. filename = "^&'@{}[],$=!-#()%+~_123.txt"
  174. self.assertEqual(text.get_valid_filename(filename), "-_123.txt")
  175. def test_compress_sequence(self):
  176. data = [{'key': i} for i in range(10)]
  177. seq = list(json.JSONEncoder().iterencode(data))
  178. seq = [s.encode('utf-8') for s in seq]
  179. actual_length = len(b''.join(seq))
  180. out = text.compress_sequence(seq)
  181. compressed_length = len(b''.join(out))
  182. self.assertTrue(compressed_length < actual_length)