test_encoding.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. import datetime
  2. import unittest
  3. from urllib.parse import quote_plus
  4. from django.test import SimpleTestCase
  5. from django.utils.encoding import (
  6. DjangoUnicodeDecodeError, escape_uri_path, filepath_to_uri, force_bytes,
  7. force_text, iri_to_uri, smart_text, uri_to_iri,
  8. )
  9. from django.utils.functional import SimpleLazyObject
  10. class TestEncodingUtils(SimpleTestCase):
  11. def test_force_text_exception(self):
  12. """
  13. Broken __str__ actually raises an error.
  14. """
  15. class MyString:
  16. def __str__(self):
  17. return b'\xc3\xb6\xc3\xa4\xc3\xbc'
  18. # str(s) raises a TypeError if the result is not a text type.
  19. with self.assertRaises(TypeError):
  20. force_text(MyString())
  21. def test_force_text_lazy(self):
  22. s = SimpleLazyObject(lambda: 'x')
  23. self.assertTrue(type(force_text(s)), str)
  24. def test_force_text_DjangoUnicodeDecodeError(self):
  25. msg = (
  26. "'utf-8' codec can't decode byte 0xff in position 0: invalid "
  27. "start byte. You passed in b'\\xff' (<class 'bytes'>)"
  28. )
  29. with self.assertRaisesMessage(DjangoUnicodeDecodeError, msg):
  30. force_text(b'\xff')
  31. def test_force_bytes_exception(self):
  32. """
  33. force_bytes knows how to convert to bytes an exception
  34. containing non-ASCII characters in its args.
  35. """
  36. error_msg = "This is an exception, voilà"
  37. exc = ValueError(error_msg)
  38. self.assertEqual(force_bytes(exc), error_msg.encode())
  39. self.assertEqual(force_bytes(exc, encoding='ascii', errors='ignore'), b'This is an exception, voil')
  40. def test_force_bytes_strings_only(self):
  41. today = datetime.date.today()
  42. self.assertEqual(force_bytes(today, strings_only=True), today)
  43. def test_smart_text(self):
  44. class Test:
  45. def __str__(self):
  46. return 'ŠĐĆŽćžšđ'
  47. class TestU:
  48. def __str__(self):
  49. return 'ŠĐĆŽćžšđ'
  50. def __bytes__(self):
  51. return b'Foo'
  52. self.assertEqual(smart_text(Test()), '\u0160\u0110\u0106\u017d\u0107\u017e\u0161\u0111')
  53. self.assertEqual(smart_text(TestU()), '\u0160\u0110\u0106\u017d\u0107\u017e\u0161\u0111')
  54. self.assertEqual(smart_text(1), '1')
  55. self.assertEqual(smart_text('foo'), 'foo')
  56. class TestRFC3987IEncodingUtils(unittest.TestCase):
  57. def test_filepath_to_uri(self):
  58. self.assertEqual(filepath_to_uri('upload\\чубака.mp4'), 'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')
  59. def test_iri_to_uri(self):
  60. cases = [
  61. # Valid UTF-8 sequences are encoded.
  62. ('red%09rosé#red', 'red%09ros%C3%A9#red'),
  63. ('/blog/for/Jürgen Münster/', '/blog/for/J%C3%BCrgen%20M%C3%BCnster/'),
  64. ('locations/%s' % quote_plus('Paris & Orléans'), 'locations/Paris+%26+Orl%C3%A9ans'),
  65. # Reserved chars remain unescaped.
  66. ('%&', '%&'),
  67. ('red&♥ros%#red', 'red&%E2%99%A5ros%#red'),
  68. ]
  69. for iri, uri in cases:
  70. self.assertEqual(iri_to_uri(iri), uri)
  71. # Test idempotency.
  72. self.assertEqual(iri_to_uri(iri_to_uri(iri)), uri)
  73. def test_uri_to_iri(self):
  74. cases = [
  75. # Valid UTF-8 sequences are decoded.
  76. ('/%e2%89%Ab%E2%99%a5%E2%89%aB/', '/≫♥≫/'),
  77. ('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),
  78. ('/%41%5a%6B/', '/AZk/'),
  79. # Reserved and non-URL valid ASCII chars are not decoded.
  80. ('/%25%20%02%41%7b/', '/%25%20%02A%7b/'),
  81. # Broken UTF-8 sequences remain escaped.
  82. ('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),
  83. ('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),
  84. ('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),
  85. ('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),
  86. ('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'),
  87. ]
  88. for uri, iri in cases:
  89. self.assertEqual(uri_to_iri(uri), iri)
  90. # Test idempotency.
  91. self.assertEqual(uri_to_iri(uri_to_iri(uri)), iri)
  92. def test_complementarity(self):
  93. cases = [
  94. ('/blog/for/J%C3%BCrgen%20M%C3%BCnster/', '/blog/for/J\xfcrgen%20M\xfcnster/'),
  95. ('%&', '%&'),
  96. ('red&%E2%99%A5ros%#red', 'red&♥ros%#red'),
  97. ('/%E2%99%A5%E2%99%A5/', '/♥♥/'),
  98. ('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),
  99. ('/%25%20%02%7b/', '/%25%20%02%7b/'),
  100. ('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),
  101. ('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),
  102. ('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),
  103. ('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),
  104. ('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'),
  105. ]
  106. for uri, iri in cases:
  107. self.assertEqual(iri_to_uri(uri_to_iri(uri)), uri)
  108. self.assertEqual(uri_to_iri(iri_to_uri(iri)), iri)
  109. def test_escape_uri_path(self):
  110. self.assertEqual(
  111. escape_uri_path('/;some/=awful/?path/:with/@lots/&of/+awful/chars'),
  112. '/%3Bsome/%3Dawful/%3Fpath/:with/@lots/&of/+awful/chars'
  113. )
  114. self.assertEqual(escape_uri_path('/foo#bar'), '/foo%23bar')
  115. self.assertEqual(escape_uri_path('/foo?bar'), '/foo%3Fbar')