test_jslex.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. """Tests for jslex."""
  2. # encoding: utf-8
  3. # originally from https://bitbucket.org/ned/jslex
  4. from django.test import TestCase
  5. from django.utils.jslex import JsLexer, prepare_js_for_gettext
  6. class JsTokensTest(TestCase):
  7. LEX_CASES = [
  8. # ids
  9. ("a ABC $ _ a123", ["id a", "id ABC", "id $", "id _", "id a123"]),
  10. (r"\u1234 abc\u0020 \u0065_\u0067", [r"id \u1234", r"id abc\u0020", r"id \u0065_\u0067"]),
  11. # numbers
  12. ("123 1.234 0.123e-3 0 1E+40 1e1 .123", ["dnum 123", "dnum 1.234", "dnum 0.123e-3", "dnum 0", "dnum 1E+40", "dnum 1e1", "dnum .123"]),
  13. ("0x1 0xabCD 0XABcd", ["hnum 0x1", "hnum 0xabCD", "hnum 0XABcd"]),
  14. ("010 0377 090", ["onum 010", "onum 0377", "dnum 0", "dnum 90"]),
  15. ("0xa123ghi", ["hnum 0xa123", "id ghi"]),
  16. # keywords
  17. ("function Function FUNCTION", ["keyword function", "id Function", "id FUNCTION"]),
  18. ("const constructor in inherits", ["keyword const", "id constructor", "keyword in", "id inherits"]),
  19. ("true true_enough", ["reserved true", "id true_enough"]),
  20. # strings
  21. (''' 'hello' "hello" ''', ["string 'hello'", 'string "hello"']),
  22. (r""" 'don\'t' "don\"t" '"' "'" '\'' "\"" """,
  23. [r"""string 'don\'t'""", r'''string "don\"t"''', r"""string '"'""", r'''string "'"''', r"""string '\''""", r'''string "\""''']),
  24. (r'"ƃuıxǝ⅂ ʇdıɹɔsɐʌɐſ\""', [r'string "ƃuıxǝ⅂ ʇdıɹɔsɐʌɐſ\""']),
  25. # comments
  26. ("a//b", ["id a", "linecomment //b"]),
  27. ("/****/a/=2//hello", ["comment /****/", "id a", "punct /=", "dnum 2", "linecomment //hello"]),
  28. ("/*\n * Header\n */\na=1;", ["comment /*\n * Header\n */", "id a", "punct =", "dnum 1", "punct ;"]),
  29. # punctuation
  30. ("a+++b", ["id a", "punct ++", "punct +", "id b"]),
  31. # regex
  32. (r"a=/a*/,1", ["id a", "punct =", "regex /a*/", "punct ,", "dnum 1"]),
  33. (r"a=/a*[^/]+/,1", ["id a", "punct =", "regex /a*[^/]+/", "punct ,", "dnum 1"]),
  34. (r"a=/a*\[^/,1", ["id a", "punct =", r"regex /a*\[^/", "punct ,", "dnum 1"]),
  35. (r"a=/\//,1", ["id a", "punct =", r"regex /\//", "punct ,", "dnum 1"]),
  36. # next two are from http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
  37. ("""for (var x = a in foo && "</x>" || mot ? z:/x:3;x<5;y</g/i) {xyz(x++);}""",
  38. ["keyword for", "punct (", "keyword var", "id x", "punct =", "id a", "keyword in",
  39. "id foo", "punct &&", 'string "</x>"', "punct ||", "id mot", "punct ?", "id z",
  40. "punct :", "regex /x:3;x<5;y</g", "punct /", "id i", "punct )", "punct {",
  41. "id xyz", "punct (", "id x", "punct ++", "punct )", "punct ;", "punct }"]),
  42. ("""for (var x = a in foo && "</x>" || mot ? z/x:3;x<5;y</g/i) {xyz(x++);}""",
  43. ["keyword for", "punct (", "keyword var", "id x", "punct =", "id a", "keyword in",
  44. "id foo", "punct &&", 'string "</x>"', "punct ||", "id mot", "punct ?", "id z",
  45. "punct /", "id x", "punct :", "dnum 3", "punct ;", "id x", "punct <", "dnum 5",
  46. "punct ;", "id y", "punct <", "regex /g/i", "punct )", "punct {",
  47. "id xyz", "punct (", "id x", "punct ++", "punct )", "punct ;", "punct }"]),
  48. # Various "illegal" regexes that are valid according to the std.
  49. (r"""/????/, /++++/, /[----]/ """, ["regex /????/", "punct ,", "regex /++++/", "punct ,", "regex /[----]/"]),
  50. # Stress cases from http://stackoverflow.com/questions/5533925/what-javascript-constructs-does-jslex-incorrectly-lex/5573409#5573409
  51. (r"""/\[/""", [r"""regex /\[/"""]),
  52. (r"""/[i]/""", [r"""regex /[i]/"""]),
  53. (r"""/[\]]/""", [r"""regex /[\]]/"""]),
  54. (r"""/a[\]]/""", [r"""regex /a[\]]/"""]),
  55. (r"""/a[\]]b/""", [r"""regex /a[\]]b/"""]),
  56. (r"""/[\]/]/gi""", [r"""regex /[\]/]/gi"""]),
  57. (r"""/\[[^\]]+\]/gi""", [r"""regex /\[[^\]]+\]/gi"""]),
  58. ("""
  59. rexl.re = {
  60. NAME: /^(?![0-9])(?:\w)+|^"(?:[^"]|"")+"/,
  61. UNQUOTED_LITERAL: /^@(?:(?![0-9])(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/,
  62. QUOTED_LITERAL: /^'(?:[^']|'')*'/,
  63. NUMERIC_LITERAL: /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/,
  64. SYMBOL: /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/
  65. };
  66. """,
  67. ["id rexl", "punct .", "id re", "punct =", "punct {",
  68. "id NAME", "punct :", r"""regex /^(?![0-9])(?:\w)+|^"(?:[^"]|"")+"/""", "punct ,",
  69. "id UNQUOTED_LITERAL", "punct :", r"""regex /^@(?:(?![0-9])(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/""", "punct ,",
  70. "id QUOTED_LITERAL", "punct :", r"""regex /^'(?:[^']|'')*'/""", "punct ,",
  71. "id NUMERIC_LITERAL", "punct :", r"""regex /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/""", "punct ,",
  72. "id SYMBOL", "punct :", r"""regex /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/""",
  73. "punct }", "punct ;"
  74. ]),
  75. ("""
  76. rexl.re = {
  77. NAME: /^(?![0-9])(?:\w)+|^"(?:[^"]|"")+"/,
  78. UNQUOTED_LITERAL: /^@(?:(?![0-9])(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/,
  79. QUOTED_LITERAL: /^'(?:[^']|'')*'/,
  80. NUMERIC_LITERAL: /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/,
  81. SYMBOL: /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/
  82. };
  83. str = '"';
  84. """,
  85. ["id rexl", "punct .", "id re", "punct =", "punct {",
  86. "id NAME", "punct :", r"""regex /^(?![0-9])(?:\w)+|^"(?:[^"]|"")+"/""", "punct ,",
  87. "id UNQUOTED_LITERAL", "punct :", r"""regex /^@(?:(?![0-9])(?:\w|\:)+|^"(?:[^"]|"")+")\[[^\]]+\]/""", "punct ,",
  88. "id QUOTED_LITERAL", "punct :", r"""regex /^'(?:[^']|'')*'/""", "punct ,",
  89. "id NUMERIC_LITERAL", "punct :", r"""regex /^[0-9]+(?:\.[0-9]*(?:[eE][-+][0-9]+)?)?/""", "punct ,",
  90. "id SYMBOL", "punct :", r"""regex /^(?:==|=|<>|<=|<|>=|>|!~~|!~|~~|~|!==|!=|!~=|!~|!|&|\||\.|\:|,|\(|\)|\[|\]|\{|\}|\?|\:|;|@|\^|\/\+|\/|\*|\+|-)/""",
  91. "punct }", "punct ;",
  92. "id str", "punct =", """string '"'""", "punct ;",
  93. ]),
  94. (r""" this._js = "e.str(\"" + this.value.replace(/\\/g, "\\\\").replace(/"/g, "\\\"") + "\")"; """,
  95. ["keyword this", "punct .", "id _js", "punct =", r'''string "e.str(\""''', "punct +", "keyword this", "punct .",
  96. "id value", "punct .", "id replace", "punct (", r"regex /\\/g", "punct ,", r'string "\\\\"', "punct )",
  97. "punct .", "id replace", "punct (", r'regex /"/g', "punct ,", r'string "\\\""', "punct )", "punct +",
  98. r'string "\")"', "punct ;"]),
  99. ]
  100. def make_function(input, toks):
  101. def test_func(self):
  102. lexer = JsLexer()
  103. result = ["%s %s" % (name, tok) for name, tok in lexer.lex(input) if name != 'ws']
  104. self.assertListEqual(result, toks)
  105. return test_func
  106. for i, (input, toks) in enumerate(JsTokensTest.LEX_CASES):
  107. setattr(JsTokensTest, "test_case_%d" % i, make_function(input, toks))
  108. GETTEXT_CASES = (
  109. (
  110. r"""
  111. a = 1; /* /[0-9]+/ */
  112. b = 0x2a0b / 1; // /[0-9]+/
  113. c = 3;
  114. """,
  115. r"""
  116. a = 1; /* /[0-9]+/ */
  117. b = 0x2a0b / 1; // /[0-9]+/
  118. c = 3;
  119. """
  120. ), (
  121. r"""
  122. a = 1.234e-5;
  123. /*
  124. * /[0-9+/
  125. */
  126. b = .0123;
  127. """,
  128. r"""
  129. a = 1.234e-5;
  130. /*
  131. * /[0-9+/
  132. */
  133. b = .0123;
  134. """
  135. ), (
  136. r"""
  137. x = y / z;
  138. alert(gettext("hello"));
  139. x /= 3;
  140. """,
  141. r"""
  142. x = y / z;
  143. alert(gettext("hello"));
  144. x /= 3;
  145. """
  146. ), (
  147. r"""
  148. s = "Hello \"th/foo/ere\"";
  149. s = 'He\x23llo \'th/foo/ere\'';
  150. s = 'slash quote \", just quote "';
  151. """,
  152. r"""
  153. s = "Hello \"th/foo/ere\"";
  154. s = "He\x23llo \'th/foo/ere\'";
  155. s = "slash quote \", just quote \"";
  156. """
  157. ), (
  158. r"""
  159. s = "Line continuation\
  160. continued /hello/ still the string";/hello/;
  161. """,
  162. r"""
  163. s = "Line continuation\
  164. continued /hello/ still the string";"REGEX";
  165. """
  166. ), (
  167. r"""
  168. var regex = /pattern/;
  169. var regex2 = /matter/gm;
  170. var regex3 = /[*/]+/gm.foo("hey");
  171. """,
  172. r"""
  173. var regex = "REGEX";
  174. var regex2 = "REGEX";
  175. var regex3 = "REGEX".foo("hey");
  176. """
  177. ), (
  178. r"""
  179. for (var x = a in foo && "</x>" || mot ? z:/x:3;x<5;y</g/i) {xyz(x++);}
  180. for (var x = a in foo && "</x>" || mot ? z/x:3;x<5;y</g/i) {xyz(x++);}
  181. """,
  182. r"""
  183. for (var x = a in foo && "</x>" || mot ? z:"REGEX"/i) {xyz(x++);}
  184. for (var x = a in foo && "</x>" || mot ? z/x:3;x<5;y<"REGEX") {xyz(x++);}
  185. """
  186. ), (
  187. r"""
  188. \u1234xyz = gettext('Hello there');
  189. """, r"""
  190. Uu1234xyz = gettext("Hello there");
  191. """
  192. )
  193. )
  194. class JsToCForGettextTest(TestCase):
  195. pass
  196. def make_function(js, c):
  197. def test_func(self):
  198. self.assertMultiLineEqual(prepare_js_for_gettext(js), c)
  199. return test_func
  200. for i, pair in enumerate(GETTEXT_CASES):
  201. setattr(JsToCForGettextTest, "test_case_%d" % i, make_function(*pair))