test_encoding.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. import datetime
  2. import unittest
  3. from unittest import mock
  4. from urllib.parse import quote_plus
  5. from django.test import SimpleTestCase
  6. from django.utils.encoding import (
  7. DjangoUnicodeDecodeError, escape_uri_path, filepath_to_uri, force_bytes,
  8. force_str, get_system_encoding, iri_to_uri, smart_bytes, smart_str,
  9. uri_to_iri,
  10. )
  11. from django.utils.functional import SimpleLazyObject
  12. from django.utils.translation import gettext_lazy
  13. class TestEncodingUtils(SimpleTestCase):
  14. def test_force_str_exception(self):
  15. """
  16. Broken __str__ actually raises an error.
  17. """
  18. class MyString:
  19. def __str__(self):
  20. return b'\xc3\xb6\xc3\xa4\xc3\xbc'
  21. # str(s) raises a TypeError if the result is not a text type.
  22. with self.assertRaises(TypeError):
  23. force_str(MyString())
  24. def test_force_str_lazy(self):
  25. s = SimpleLazyObject(lambda: 'x')
  26. self.assertIs(type(force_str(s)), str)
  27. def test_force_str_DjangoUnicodeDecodeError(self):
  28. msg = (
  29. "'utf-8' codec can't decode byte 0xff in position 0: invalid "
  30. "start byte. You passed in b'\\xff' (<class 'bytes'>)"
  31. )
  32. with self.assertRaisesMessage(DjangoUnicodeDecodeError, msg):
  33. force_str(b'\xff')
  34. def test_force_bytes_exception(self):
  35. """
  36. force_bytes knows how to convert to bytes an exception
  37. containing non-ASCII characters in its args.
  38. """
  39. error_msg = "This is an exception, voilà"
  40. exc = ValueError(error_msg)
  41. self.assertEqual(force_bytes(exc), error_msg.encode())
  42. self.assertEqual(force_bytes(exc, encoding='ascii', errors='ignore'), b'This is an exception, voil')
  43. def test_force_bytes_strings_only(self):
  44. today = datetime.date.today()
  45. self.assertEqual(force_bytes(today, strings_only=True), today)
  46. def test_force_bytes_encoding(self):
  47. error_msg = 'This is an exception, voilà'.encode()
  48. result = force_bytes(error_msg, encoding='ascii', errors='ignore')
  49. self.assertEqual(result, b'This is an exception, voil')
  50. def test_force_bytes_memory_view(self):
  51. data = b'abc'
  52. result = force_bytes(memoryview(data))
  53. # Type check is needed because memoryview(bytes) == bytes.
  54. self.assertIs(type(result), bytes)
  55. self.assertEqual(result, data)
  56. def test_smart_bytes(self):
  57. class Test:
  58. def __str__(self):
  59. return 'ŠĐĆŽćžšđ'
  60. lazy_func = gettext_lazy('x')
  61. self.assertIs(smart_bytes(lazy_func), lazy_func)
  62. self.assertEqual(smart_bytes(Test()), b'\xc5\xa0\xc4\x90\xc4\x86\xc5\xbd\xc4\x87\xc5\xbe\xc5\xa1\xc4\x91')
  63. self.assertEqual(smart_bytes(1), b'1')
  64. self.assertEqual(smart_bytes('foo'), b'foo')
  65. def test_smart_str(self):
  66. class Test:
  67. def __str__(self):
  68. return 'ŠĐĆŽćžšđ'
  69. lazy_func = gettext_lazy('x')
  70. self.assertIs(smart_str(lazy_func), lazy_func)
  71. self.assertEqual(smart_str(Test()), '\u0160\u0110\u0106\u017d\u0107\u017e\u0161\u0111')
  72. self.assertEqual(smart_str(1), '1')
  73. self.assertEqual(smart_str('foo'), 'foo')
  74. def test_get_default_encoding(self):
  75. with mock.patch('locale.getdefaultlocale', side_effect=Exception):
  76. self.assertEqual(get_system_encoding(), 'ascii')
  77. class TestRFC3987IEncodingUtils(unittest.TestCase):
  78. def test_filepath_to_uri(self):
  79. self.assertEqual(filepath_to_uri(None), None)
  80. self.assertEqual(filepath_to_uri('upload\\чубака.mp4'), 'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')
  81. def test_iri_to_uri(self):
  82. cases = [
  83. # Valid UTF-8 sequences are encoded.
  84. ('red%09rosé#red', 'red%09ros%C3%A9#red'),
  85. ('/blog/for/Jürgen Münster/', '/blog/for/J%C3%BCrgen%20M%C3%BCnster/'),
  86. ('locations/%s' % quote_plus('Paris & Orléans'), 'locations/Paris+%26+Orl%C3%A9ans'),
  87. # Reserved chars remain unescaped.
  88. ('%&', '%&'),
  89. ('red&♥ros%#red', 'red&%E2%99%A5ros%#red'),
  90. (gettext_lazy('red&♥ros%#red'), 'red&%E2%99%A5ros%#red'),
  91. ]
  92. for iri, uri in cases:
  93. self.assertEqual(iri_to_uri(iri), uri)
  94. # Test idempotency.
  95. self.assertEqual(iri_to_uri(iri_to_uri(iri)), uri)
  96. def test_uri_to_iri(self):
  97. cases = [
  98. (None, None),
  99. # Valid UTF-8 sequences are decoded.
  100. ('/%e2%89%Ab%E2%99%a5%E2%89%aB/', '/≫♥≫/'),
  101. ('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),
  102. ('/%41%5a%6B/', '/AZk/'),
  103. # Reserved and non-URL valid ASCII chars are not decoded.
  104. ('/%25%20%02%41%7b/', '/%25%20%02A%7b/'),
  105. # Broken UTF-8 sequences remain escaped.
  106. ('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),
  107. ('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),
  108. ('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),
  109. ('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),
  110. ('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'),
  111. ]
  112. for uri, iri in cases:
  113. self.assertEqual(uri_to_iri(uri), iri)
  114. # Test idempotency.
  115. self.assertEqual(uri_to_iri(uri_to_iri(uri)), iri)
  116. def test_complementarity(self):
  117. cases = [
  118. ('/blog/for/J%C3%BCrgen%20M%C3%BCnster/', '/blog/for/J\xfcrgen%20M\xfcnster/'),
  119. ('%&', '%&'),
  120. ('red&%E2%99%A5ros%#red', 'red&♥ros%#red'),
  121. ('/%E2%99%A5%E2%99%A5/', '/♥♥/'),
  122. ('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),
  123. ('/%25%20%02%7b/', '/%25%20%02%7b/'),
  124. ('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),
  125. ('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),
  126. ('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),
  127. ('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),
  128. ('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'),
  129. ]
  130. for uri, iri in cases:
  131. self.assertEqual(iri_to_uri(uri_to_iri(uri)), uri)
  132. self.assertEqual(uri_to_iri(iri_to_uri(iri)), iri)
  133. def test_escape_uri_path(self):
  134. self.assertEqual(
  135. escape_uri_path('/;some/=awful/?path/:with/@lots/&of/+awful/chars'),
  136. '/%3Bsome/%3Dawful/%3Fpath/:with/@lots/&of/+awful/chars'
  137. )
  138. self.assertEqual(escape_uri_path('/foo#bar'), '/foo%23bar')
  139. self.assertEqual(escape_uri_path('/foo?bar'), '/foo%3Fbar')