test_encoding.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. # -*- encoding: utf-8 -*-
  2. from __future__ import unicode_literals
  3. import unittest
  4. import datetime
  5. from django.utils import six
  6. from django.utils.encoding import (
  7. filepath_to_uri, force_bytes, force_text, escape_uri_path,
  8. iri_to_uri, uri_to_iri,
  9. )
  10. from django.utils.http import urlquote_plus
  11. class TestEncodingUtils(unittest.TestCase):
  12. def test_force_text_exception(self):
  13. """
  14. Check that broken __unicode__/__str__ actually raises an error.
  15. """
  16. class MyString(object):
  17. def __str__(self):
  18. return b'\xc3\xb6\xc3\xa4\xc3\xbc'
  19. __unicode__ = __str__
  20. # str(s) raises a TypeError on python 3 if the result is not a text type.
  21. # python 2 fails when it tries converting from str to unicode (via ASCII).
  22. exception = TypeError if six.PY3 else UnicodeError
  23. self.assertRaises(exception, force_text, MyString())
  24. def test_force_bytes_exception(self):
  25. """
  26. Test that force_bytes knows how to convert to bytes an exception
  27. containing non-ASCII characters in its args.
  28. """
  29. error_msg = "This is an exception, voilà"
  30. exc = ValueError(error_msg)
  31. result = force_bytes(exc)
  32. self.assertEqual(result, error_msg.encode('utf-8'))
  33. def test_force_bytes_strings_only(self):
  34. today = datetime.date.today()
  35. self.assertEqual(force_bytes(today, strings_only=True), today)
  36. def test_escape_uri_path(self):
  37. self.assertEqual(
  38. escape_uri_path('/;some/=awful/?path/:with/@lots/&of/+awful/chars'),
  39. '/%3Bsome/%3Dawful/%3Fpath/:with/@lots/&of/+awful/chars'
  40. )
  41. self.assertEqual(escape_uri_path('/foo#bar'), '/foo%23bar')
  42. self.assertEqual(escape_uri_path('/foo?bar'), '/foo%3Fbar')
  43. class TestRFC3987IEncodingUtils(unittest.TestCase):
  44. def test_filepath_to_uri(self):
  45. self.assertEqual(filepath_to_uri('upload\\чубака.mp4'),
  46. 'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')
  47. self.assertEqual(filepath_to_uri('upload\\чубака.mp4'.encode('utf-8')),
  48. 'upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4')
  49. def test_iri_to_uri(self):
  50. cases = [
  51. # Valid UTF-8 sequences are encoded.
  52. ('red%09rosé#red', 'red%09ros%C3%A9#red'),
  53. ('/blog/for/Jürgen Münster/', '/blog/for/J%C3%BCrgen%20M%C3%BCnster/'),
  54. ('locations/%s' % urlquote_plus('Paris & Orléans'), 'locations/Paris+%26+Orl%C3%A9ans'),
  55. # Reserved chars remain unescaped.
  56. ('%&', '%&'),
  57. ('red&♥ros%#red', 'red&%E2%99%A5ros%#red'),
  58. ]
  59. for iri, uri in cases:
  60. self.assertEqual(iri_to_uri(iri), uri)
  61. # Test idempotency.
  62. self.assertEqual(iri_to_uri(iri_to_uri(iri)), uri)
  63. def test_uri_to_iri(self):
  64. cases = [
  65. # Valid UTF-8 sequences are decoded.
  66. ('/%E2%99%A5%E2%99%A5/', '/♥♥/'),
  67. ('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),
  68. # Broken UTF-8 sequences remain escaped.
  69. ('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),
  70. ('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),
  71. ('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),
  72. ('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),
  73. ('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'),
  74. ]
  75. for uri, iri in cases:
  76. self.assertEqual(uri_to_iri(uri), iri)
  77. # Test idempotency.
  78. self.assertEqual(uri_to_iri(uri_to_iri(uri)), iri)
  79. def test_complementarity(self):
  80. cases = [
  81. ('/blog/for/J%C3%BCrgen%20M%C3%BCnster/', '/blog/for/J\xfcrgen M\xfcnster/'),
  82. ('%&', '%&'),
  83. ('red&%E2%99%A5ros%#red', 'red&♥ros%#red'),
  84. ('/%E2%99%A5%E2%99%A5/', '/♥♥/'),
  85. ('/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93', '/♥♥/?utf8=✓'),
  86. ('/%AAd%AAj%AAa%AAn%AAg%AAo%AA/', '/%AAd%AAj%AAa%AAn%AAg%AAo%AA/'),
  87. ('/%E2%99%A5%E2%E2%99%A5/', '/♥%E2♥/'),
  88. ('/%E2%99%A5%E2%99%E2%99%A5/', '/♥%E2%99♥/'),
  89. ('/%E2%E2%99%A5%E2%99%A5%99/', '/%E2♥♥%99/'),
  90. ('/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93', '/♥♥/?utf8=%9C%93✓%9C%93'),
  91. ]
  92. for uri, iri in cases:
  93. self.assertEqual(iri_to_uri(uri_to_iri(uri)), uri)
  94. self.assertEqual(uri_to_iri(iri_to_uri(iri)), iri)