test_encoding.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. import datetime
  2. import sys
  3. import unittest
  4. from pathlib import Path
  5. from unittest import mock
  6. from urllib.parse import quote_plus
  7. from django.test import SimpleTestCase
  8. from django.utils.encoding import (
  9. DjangoUnicodeDecodeError,
  10. escape_uri_path,
  11. filepath_to_uri,
  12. force_bytes,
  13. force_str,
  14. get_system_encoding,
  15. iri_to_uri,
  16. repercent_broken_unicode,
  17. smart_bytes,
  18. smart_str,
  19. uri_to_iri,
  20. )
  21. from django.utils.functional import SimpleLazyObject
  22. from django.utils.translation import gettext_lazy
  23. class TestEncodingUtils(SimpleTestCase):
  24. def test_force_str_exception(self):
  25. """
  26. Broken __str__ actually raises an error.
  27. """
  28. class MyString:
  29. def __str__(self):
  30. return b"\xc3\xb6\xc3\xa4\xc3\xbc"
  31. # str(s) raises a TypeError if the result is not a text type.
  32. with self.assertRaises(TypeError):
  33. force_str(MyString())
  34. def test_force_str_lazy(self):
  35. s = SimpleLazyObject(lambda: "x")
  36. self.assertIs(type(force_str(s)), str)
  37. def test_force_str_DjangoUnicodeDecodeError(self):
  38. msg = (
  39. "'utf-8' codec can't decode byte 0xff in position 0: invalid "
  40. "start byte. You passed in b'\\xff' (<class 'bytes'>)"
  41. )
  42. with self.assertRaisesMessage(DjangoUnicodeDecodeError, msg):
  43. force_str(b"\xff")
  44. def test_force_bytes_exception(self):
  45. """
  46. force_bytes knows how to convert to bytes an exception
  47. containing non-ASCII characters in its args.
  48. """
  49. error_msg = "This is an exception, voilà"
  50. exc = ValueError(error_msg)
  51. self.assertEqual(force_bytes(exc), error_msg.encode())
  52. self.assertEqual(
  53. force_bytes(exc, encoding="ascii", errors="ignore"),
  54. b"This is an exception, voil",
  55. )
  56. def test_force_bytes_strings_only(self):
  57. today = datetime.date.today()
  58. self.assertEqual(force_bytes(today, strings_only=True), today)
  59. def test_force_bytes_encoding(self):
  60. error_msg = "This is an exception, voilà".encode()
  61. result = force_bytes(error_msg, encoding="ascii", errors="ignore")
  62. self.assertEqual(result, b"This is an exception, voil")
  63. def test_force_bytes_memory_view(self):
  64. data = b"abc"
  65. result = force_bytes(memoryview(data))
  66. # Type check is needed because memoryview(bytes) == bytes.
  67. self.assertIs(type(result), bytes)
  68. self.assertEqual(result, data)
  69. def test_smart_bytes(self):
  70. class Test:
  71. def __str__(self):
  72. return "ŠĐĆŽćžšđ"
  73. lazy_func = gettext_lazy("x")
  74. self.assertIs(smart_bytes(lazy_func), lazy_func)
  75. self.assertEqual(
  76. smart_bytes(Test()),
  77. b"\xc5\xa0\xc4\x90\xc4\x86\xc5\xbd\xc4\x87\xc5\xbe\xc5\xa1\xc4\x91",
  78. )
  79. self.assertEqual(smart_bytes(1), b"1")
  80. self.assertEqual(smart_bytes("foo"), b"foo")
  81. def test_smart_str(self):
  82. class Test:
  83. def __str__(self):
  84. return "ŠĐĆŽćžšđ"
  85. lazy_func = gettext_lazy("x")
  86. self.assertIs(smart_str(lazy_func), lazy_func)
  87. self.assertEqual(
  88. smart_str(Test()), "\u0160\u0110\u0106\u017d\u0107\u017e\u0161\u0111"
  89. )
  90. self.assertEqual(smart_str(1), "1")
  91. self.assertEqual(smart_str("foo"), "foo")
  92. def test_get_default_encoding(self):
  93. with mock.patch("locale.getlocale", side_effect=Exception):
  94. self.assertEqual(get_system_encoding(), "ascii")
  95. def test_repercent_broken_unicode_recursion_error(self):
  96. # Prepare a string long enough to force a recursion error if the tested
  97. # function uses recursion.
  98. data = b"\xfc" * sys.getrecursionlimit()
  99. try:
  100. self.assertEqual(
  101. repercent_broken_unicode(data), b"%FC" * sys.getrecursionlimit()
  102. )
  103. except RecursionError:
  104. self.fail("Unexpected RecursionError raised.")
  105. class TestRFC3987IEncodingUtils(unittest.TestCase):
  106. def test_filepath_to_uri(self):
  107. self.assertIsNone(filepath_to_uri(None))
  108. self.assertEqual(
  109. filepath_to_uri("upload\\чубака.mp4"),
  110. "upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4",
  111. )
  112. self.assertEqual(filepath_to_uri(Path("upload/test.png")), "upload/test.png")
  113. self.assertEqual(filepath_to_uri(Path("upload\\test.png")), "upload/test.png")
  114. def test_iri_to_uri(self):
  115. cases = [
  116. # Valid UTF-8 sequences are encoded.
  117. ("red%09rosé#red", "red%09ros%C3%A9#red"),
  118. ("/blog/for/Jürgen Münster/", "/blog/for/J%C3%BCrgen%20M%C3%BCnster/"),
  119. (
  120. "locations/%s" % quote_plus("Paris & Orléans"),
  121. "locations/Paris+%26+Orl%C3%A9ans",
  122. ),
  123. # Reserved chars remain unescaped.
  124. ("%&", "%&"),
  125. ("red&♥ros%#red", "red&%E2%99%A5ros%#red"),
  126. (gettext_lazy("red&♥ros%#red"), "red&%E2%99%A5ros%#red"),
  127. ]
  128. for iri, uri in cases:
  129. with self.subTest(iri):
  130. self.assertEqual(iri_to_uri(iri), uri)
  131. # Test idempotency.
  132. self.assertEqual(iri_to_uri(iri_to_uri(iri)), uri)
  133. def test_uri_to_iri(self):
  134. cases = [
  135. (None, None),
  136. # Valid UTF-8 sequences are decoded.
  137. ("/%e2%89%Ab%E2%99%a5%E2%89%aB/", "/≫♥≫/"),
  138. ("/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93", "/♥♥/?utf8=✓"),
  139. ("/%41%5a%6B/", "/AZk/"),
  140. # Reserved and non-URL valid ASCII chars are not decoded.
  141. ("/%25%20%02%41%7b/", "/%25%20%02A%7b/"),
  142. # Broken UTF-8 sequences remain escaped.
  143. ("/%AAd%AAj%AAa%AAn%AAg%AAo%AA/", "/%AAd%AAj%AAa%AAn%AAg%AAo%AA/"),
  144. ("/%E2%99%A5%E2%E2%99%A5/", "/♥%E2♥/"),
  145. ("/%E2%99%A5%E2%99%E2%99%A5/", "/♥%E2%99♥/"),
  146. ("/%E2%E2%99%A5%E2%99%A5%99/", "/%E2♥♥%99/"),
  147. (
  148. "/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93",
  149. "/♥♥/?utf8=%9C%93✓%9C%93",
  150. ),
  151. ]
  152. for uri, iri in cases:
  153. with self.subTest(uri):
  154. self.assertEqual(uri_to_iri(uri), iri)
  155. # Test idempotency.
  156. self.assertEqual(uri_to_iri(uri_to_iri(uri)), iri)
  157. def test_complementarity(self):
  158. cases = [
  159. (
  160. "/blog/for/J%C3%BCrgen%20M%C3%BCnster/",
  161. "/blog/for/J\xfcrgen%20M\xfcnster/",
  162. ),
  163. ("%&", "%&"),
  164. ("red&%E2%99%A5ros%#red", "red&♥ros%#red"),
  165. ("/%E2%99%A5%E2%99%A5/", "/♥♥/"),
  166. ("/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93", "/♥♥/?utf8=✓"),
  167. ("/%25%20%02%7b/", "/%25%20%02%7b/"),
  168. ("/%AAd%AAj%AAa%AAn%AAg%AAo%AA/", "/%AAd%AAj%AAa%AAn%AAg%AAo%AA/"),
  169. ("/%E2%99%A5%E2%E2%99%A5/", "/♥%E2♥/"),
  170. ("/%E2%99%A5%E2%99%E2%99%A5/", "/♥%E2%99♥/"),
  171. ("/%E2%E2%99%A5%E2%99%A5%99/", "/%E2♥♥%99/"),
  172. (
  173. "/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93",
  174. "/♥♥/?utf8=%9C%93✓%9C%93",
  175. ),
  176. ]
  177. for uri, iri in cases:
  178. with self.subTest(uri):
  179. self.assertEqual(iri_to_uri(uri_to_iri(uri)), uri)
  180. self.assertEqual(uri_to_iri(iri_to_uri(iri)), iri)
  181. def test_escape_uri_path(self):
  182. cases = [
  183. (
  184. "/;some/=awful/?path/:with/@lots/&of/+awful/chars",
  185. "/%3Bsome/%3Dawful/%3Fpath/:with/@lots/&of/+awful/chars",
  186. ),
  187. ("/foo#bar", "/foo%23bar"),
  188. ("/foo?bar", "/foo%3Fbar"),
  189. ]
  190. for uri, expected in cases:
  191. with self.subTest(uri):
  192. self.assertEqual(escape_uri_path(uri), expected)