Ver Fonte

Upgraded included simplejson to 2.0.7.

Also changed importing logic to prefer a system-installed version of
simplejson (unless it's an earlier version that does not contian the C
speedups), then the json module from Python 2.6, then the version
shipped with Django.

Fixed #9266.

git-svn-id: http://code.djangoproject.com/svn/django/trunk@9707 bcc190cf-cafb-0310-a4f2-bffc1f526a37
Malcolm Tredinnick há 16 anos atrás
pai
commit
a9c2f033cd

+ 290 - 323
django/utils/simplejson/__init__.py

@@ -1,376 +1,343 @@
-r"""
-A simple, fast, extensible JSON encoder and decoder
-
-JSON (JavaScript Object Notation) <http://json.org> is a subset of
+r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of
 JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data
 interchange format.
 
-simplejson exposes an API familiar to uses of the standard library
-marshal and pickle modules.
+:mod:`simplejson` exposes an API familiar to users of the standard library
+:mod:`marshal` and :mod:`pickle` modules. It is the externally maintained
+version of the :mod:`json` library contained in Python 2.6, but maintains
+compatibility with Python 2.4 and Python 2.5 and (currently) has
+significant performance advantages, even without using the optional C
+extension for speedups.
 
 Encoding basic Python object hierarchies::
-    
-    >>> import simplejson
-    >>> simplejson.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
+
+    >>> import simplejson as json
+    >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])
     '["foo", {"bar": ["baz", null, 1.0, 2]}]'
-    >>> print simplejson.dumps("\"foo\bar")
+    >>> print json.dumps("\"foo\bar")
     "\"foo\bar"
-    >>> print simplejson.dumps(u'\u1234')
+    >>> print json.dumps(u'\u1234')
     "\u1234"
-    >>> print simplejson.dumps('\\')
+    >>> print json.dumps('\\')
     "\\"
-    >>> print simplejson.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
+    >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)
     {"a": 0, "b": 0, "c": 0}
     >>> from StringIO import StringIO
     >>> io = StringIO()
-    >>> simplejson.dump(['streaming API'], io)
+    >>> json.dump(['streaming API'], io)
     >>> io.getvalue()
     '["streaming API"]'
 
 Compact encoding::
 
-    >>> import simplejson
-    >>> simplejson.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
+    >>> import simplejson as json
+    >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))
     '[1,2,3,{"4":5,"6":7}]'
 
 Pretty printing::
 
-    >>> import simplejson
-    >>> print simplejson.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)
+    >>> import simplejson as json
+    >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4)
+    >>> print '\n'.join([l.rstrip() for l in  s.splitlines()])
     {
-        "4": 5, 
+        "4": 5,
         "6": 7
     }
 
 Decoding JSON::
-    
-    >>> import simplejson
-    >>> simplejson.loads('["foo", {"bar":["baz", null, 1.0, 2]}]')
-    [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
-    >>> simplejson.loads('"\\"foo\\bar"')
-    u'"foo\x08ar'
+
+    >>> import simplejson as json
+    >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}]
+    >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj
+    True
+    >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar'
+    True
     >>> from StringIO import StringIO
     >>> io = StringIO('["streaming API"]')
-    >>> simplejson.load(io)
-    [u'streaming API']
+    >>> json.load(io)[0] == 'streaming API'
+    True
 
 Specializing JSON object decoding::
 
-    >>> import simplejson
+    >>> import simplejson as json
     >>> def as_complex(dct):
     ...     if '__complex__' in dct:
     ...         return complex(dct['real'], dct['imag'])
     ...     return dct
-    ... 
-    >>> simplejson.loads('{"__complex__": true, "real": 1, "imag": 2}',
+    ...
+    >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}',
     ...     object_hook=as_complex)
     (1+2j)
     >>> import decimal
-    >>> simplejson.loads('1.1', parse_float=decimal.Decimal)
-    Decimal("1.1")
-
-Extending JSONEncoder::
-    
-    >>> import simplejson
-    >>> class ComplexEncoder(simplejson.JSONEncoder):
-    ...     def default(self, obj):
-    ...         if isinstance(obj, complex):
-    ...             return [obj.real, obj.imag]
-    ...         return simplejson.JSONEncoder.default(self, obj)
-    ... 
-    >>> dumps(2 + 1j, cls=ComplexEncoder)
+    >>> json.loads('1.1', parse_float=decimal.Decimal) == decimal.Decimal('1.1')
+    True
+
+Specializing JSON object encoding::
+
+    >>> import simplejson as json
+    >>> def encode_complex(obj):
+    ...     if isinstance(obj, complex):
+    ...         return [obj.real, obj.imag]
+    ...     raise TypeError("%r is not JSON serializable" % (o,))
+    ...
+    >>> json.dumps(2 + 1j, default=encode_complex)
     '[2.0, 1.0]'
-    >>> ComplexEncoder().encode(2 + 1j)
+    >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j)
     '[2.0, 1.0]'
-    >>> list(ComplexEncoder().iterencode(2 + 1j))
-    ['[', '2.0', ', ', '1.0', ']']
-    
+    >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j))
+    '[2.0, 1.0]'
+
+
+Using simplejson.tool from the shell to validate and pretty-print::
 
-Using simplejson from the shell to validate and
-pretty-print::
-    
     $ echo '{"json":"obj"}' | python -msimplejson.tool
     {
         "json": "obj"
     }
     $ echo '{ 1.2:3.4}' | python -msimplejson.tool
     Expecting property name: line 1 column 2 (char 2)
-
-Note that the JSON produced by this module's default settings
-is a subset of YAML, so it may be used as a serializer for that as well.
 """
-__version__ = '1.9.2'
-__all__ = [
-    'dump', 'dumps', 'load', 'loads',
-    'JSONDecoder', 'JSONEncoder',
-]
-
-if __name__ == '__main__':
-    import warnings
-    warnings.warn('python -msimplejson is deprecated, use python -msiplejson.tool', DeprecationWarning)
+
+# Django modification: try to use the system version first, providing it's
+# either of a later version of has the C speedups in place. Otherwise, fall
+# back to our local copy.
+
+__version__ = '2.0.7'
+
+use_system_version = False
+try:
+    # The system-installed version has priority providing it is either not an
+    # earlier version or it contains the C speedups.
+    import simplejson
+    if (simplejson.__version__.split('.') >= __version__.split('.') or
+            hasattr(simplejson, '_speedups')):
+        from simplejson import *
+        use_system_version = True
+except ImportError:
+    pass
+
+if not use_system_version:
+    try:
+        from json import *      # Python 2.6 preferred over local copy.
+        use_system_version = True
+    except ImportError:
+        pass
+
+# If all else fails, we have a bundled version that can be used.
+if not use_system_version:
+    __all__ = [
+        'dump', 'dumps', 'load', 'loads',
+        'JSONDecoder', 'JSONEncoder',
+    ]
+
     from django.utils.simplejson.decoder import JSONDecoder
     from django.utils.simplejson.encoder import JSONEncoder
-else:
-    from decoder import JSONDecoder
-    from encoder import JSONEncoder
-
-_default_encoder = JSONEncoder(
-    skipkeys=False,
-    ensure_ascii=True,
-    check_circular=True,
-    allow_nan=True,
-    indent=None,
-    separators=None,
-    encoding='utf-8',
-    default=None,
-)
-
-def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
-        allow_nan=True, cls=None, indent=None, separators=None,
-        encoding='utf-8', default=None, **kw):
-    """
-    Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
-    ``.write()``-supporting file-like object).
-
-    If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types
-    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) 
-    will be skipped instead of raising a ``TypeError``.
-
-    If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp``
-    may be ``unicode`` instances, subject to normal Python ``str`` to
-    ``unicode`` coercion rules. Unless ``fp.write()`` explicitly
-    understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
-    to cause an error.
-
-    If ``check_circular`` is ``False``, then the circular reference check
-    for container types will be skipped and a circular reference will
-    result in an ``OverflowError`` (or worse).
-
-    If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to
-    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
-    in strict compliance of the JSON specification, instead of using the
-    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
-
-    If ``indent`` is a non-negative integer, then JSON array elements and object
-    members will be pretty-printed with that indent level. An indent level
-    of 0 will only insert newlines. ``None`` is the most compact representation.
-
-    If ``separators`` is an ``(item_separator, dict_separator)`` tuple
-    then it will be used instead of the default ``(', ', ': ')`` separators.
-    ``(',', ':')`` is the most compact JSON representation.
-
-    ``encoding`` is the character encoding for str instances, default is UTF-8.
-
-    ``default(obj)`` is a function that should return a serializable version
-    of obj or raise TypeError. The default simply raises TypeError.
-
-    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
-    ``.default()`` method to serialize additional types), specify it with
-    the ``cls`` kwarg.
-    """
-    # cached encoder
-    if (skipkeys is False and ensure_ascii is True and
-        check_circular is True and allow_nan is True and
-        cls is None and indent is None and separators is None and
-        encoding == 'utf-8' and default is None and not kw):
-        iterable = _default_encoder.iterencode(obj)
-    else:
+
+    _default_encoder = JSONEncoder(
+        skipkeys=False,
+        ensure_ascii=True,
+        check_circular=True,
+        allow_nan=True,
+        indent=None,
+        separators=None,
+        encoding='utf-8',
+        default=None,
+    )
+
+    def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True,
+            allow_nan=True, cls=None, indent=None, separators=None,
+            encoding='utf-8', default=None, **kw):
+        """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a
+        ``.write()``-supporting file-like object).
+
+        If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types
+        (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
+        will be skipped instead of raising a ``TypeError``.
+
+        If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp``
+        may be ``unicode`` instances, subject to normal Python ``str`` to
+        ``unicode`` coercion rules. Unless ``fp.write()`` explicitly
+        understands ``unicode`` (as in ``codecs.getwriter()``) this is likely
+        to cause an error.
+
+        If ``check_circular`` is ``False``, then the circular reference check
+        for container types will be skipped and a circular reference will
+        result in an ``OverflowError`` (or worse).
+
+        If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to
+        serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``)
+        in strict compliance of the JSON specification, instead of using the
+        JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
+
+        If ``indent`` is a non-negative integer, then JSON array elements and object
+        members will be pretty-printed with that indent level. An indent level
+        of 0 will only insert newlines. ``None`` is the most compact representation.
+
+        If ``separators`` is an ``(item_separator, dict_separator)`` tuple
+        then it will be used instead of the default ``(', ', ': ')`` separators.
+        ``(',', ':')`` is the most compact JSON representation.
+
+        ``encoding`` is the character encoding for str instances, default is UTF-8.
+
+        ``default(obj)`` is a function that should return a serializable version
+        of obj or raise TypeError. The default simply raises TypeError.
+
+        To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
+        ``.default()`` method to serialize additional types), specify it with
+        the ``cls`` kwarg.
+
+        """
+        # cached encoder
+        if (skipkeys is False and ensure_ascii is True and
+            check_circular is True and allow_nan is True and
+            cls is None and indent is None and separators is None and
+            encoding == 'utf-8' and default is None and not kw):
+            iterable = _default_encoder.iterencode(obj)
+        else:
+            if cls is None:
+                cls = JSONEncoder
+            iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
+                check_circular=check_circular, allow_nan=allow_nan, indent=indent,
+                separators=separators, encoding=encoding,
+                default=default, **kw).iterencode(obj)
+        # could accelerate with writelines in some versions of Python, at
+        # a debuggability cost
+        for chunk in iterable:
+            fp.write(chunk)
+
+
+    def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
+            allow_nan=True, cls=None, indent=None, separators=None,
+            encoding='utf-8', default=None, **kw):
+        """Serialize ``obj`` to a JSON formatted ``str``.
+
+        If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types
+        (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``)
+        will be skipped instead of raising a ``TypeError``.
+
+        If ``ensure_ascii`` is ``False``, then the return value will be a
+        ``unicode`` instance subject to normal Python ``str`` to ``unicode``
+        coercion rules instead of being escaped to an ASCII ``str``.
+
+        If ``check_circular`` is ``False``, then the circular reference check
+        for container types will be skipped and a circular reference will
+        result in an ``OverflowError`` (or worse).
+
+        If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to
+        serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
+        strict compliance of the JSON specification, instead of using the
+        JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
+
+        If ``indent`` is a non-negative integer, then JSON array elements and
+        object members will be pretty-printed with that indent level. An indent
+        level of 0 will only insert newlines. ``None`` is the most compact
+        representation.
+
+        If ``separators`` is an ``(item_separator, dict_separator)`` tuple
+        then it will be used instead of the default ``(', ', ': ')`` separators.
+        ``(',', ':')`` is the most compact JSON representation.
+
+        ``encoding`` is the character encoding for str instances, default is UTF-8.
+
+        ``default(obj)`` is a function that should return a serializable version
+        of obj or raise TypeError. The default simply raises TypeError.
+
+        To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
+        ``.default()`` method to serialize additional types), specify it with
+        the ``cls`` kwarg.
+
+        """
+        # cached encoder
+        if (skipkeys is False and ensure_ascii is True and
+            check_circular is True and allow_nan is True and
+            cls is None and indent is None and separators is None and
+            encoding == 'utf-8' and default is None and not kw):
+            return _default_encoder.encode(obj)
         if cls is None:
             cls = JSONEncoder
-        iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii,
+        return cls(
+            skipkeys=skipkeys, ensure_ascii=ensure_ascii,
             check_circular=check_circular, allow_nan=allow_nan, indent=indent,
-            separators=separators, encoding=encoding,
-            default=default, **kw).iterencode(obj)
-    # could accelerate with writelines in some versions of Python, at
-    # a debuggability cost
-    for chunk in iterable:
-        fp.write(chunk)
-
-
-def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True,
-        allow_nan=True, cls=None, indent=None, separators=None,
-        encoding='utf-8', default=None, **kw):
-    """
-    Serialize ``obj`` to a JSON formatted ``str``.
-
-    If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types
-    (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) 
-    will be skipped instead of raising a ``TypeError``.
-
-    If ``ensure_ascii`` is ``False``, then the return value will be a
-    ``unicode`` instance subject to normal Python ``str`` to ``unicode``
-    coercion rules instead of being escaped to an ASCII ``str``.
-
-    If ``check_circular`` is ``False``, then the circular reference check
-    for container types will be skipped and a circular reference will
-    result in an ``OverflowError`` (or worse).
-
-    If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to
-    serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in
-    strict compliance of the JSON specification, instead of using the
-    JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``).
-
-    If ``indent`` is a non-negative integer, then JSON array elements and
-    object members will be pretty-printed with that indent level. An indent
-    level of 0 will only insert newlines. ``None`` is the most compact
-    representation.
-
-    If ``separators`` is an ``(item_separator, dict_separator)`` tuple
-    then it will be used instead of the default ``(', ', ': ')`` separators.
-    ``(',', ':')`` is the most compact JSON representation.
-
-    ``encoding`` is the character encoding for str instances, default is UTF-8.
-
-    ``default(obj)`` is a function that should return a serializable version
-    of obj or raise TypeError. The default simply raises TypeError.
-
-    To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the
-    ``.default()`` method to serialize additional types), specify it with
-    the ``cls`` kwarg.
-    """
-    # cached encoder
-    if (skipkeys is False and ensure_ascii is True and
-        check_circular is True and allow_nan is True and
-        cls is None and indent is None and separators is None and
-        encoding == 'utf-8' and default is None and not kw):
-        return _default_encoder.encode(obj)
-    if cls is None:
-        cls = JSONEncoder
-    return cls(
-        skipkeys=skipkeys, ensure_ascii=ensure_ascii,
-        check_circular=check_circular, allow_nan=allow_nan, indent=indent,
-        separators=separators, encoding=encoding, default=default,
-        **kw).encode(obj)
-
-
-_default_decoder = JSONDecoder(encoding=None, object_hook=None)
-
-
-def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
-        parse_int=None, parse_constant=None, **kw):
-    """
-    Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
-    a JSON document) to a Python object.
-
-    If the contents of ``fp`` is encoded with an ASCII based encoding other
-    than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
-    be specified. Encodings that are not ASCII based (such as UCS-2) are
-    not allowed, and should be wrapped with
-    ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode``
-    object and passed to ``loads()``
-
-    ``object_hook`` is an optional function that will be called with the
-    result of any object literal decode (a ``dict``). The return value of
-    ``object_hook`` will be used instead of the ``dict``. This feature
-    can be used to implement custom decoders (e.g. JSON-RPC class hinting).
-    
-    To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
-    kwarg.
-    """
-    return loads(fp.read(),
-        encoding=encoding, cls=cls, object_hook=object_hook,
-        parse_float=parse_float, parse_int=parse_int,
-        parse_constant=parse_constant, **kw)
-
-
-def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
-        parse_int=None, parse_constant=None, **kw):
-    """
-    Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
-    document) to a Python object.
-
-    If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding
-    other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name
-    must be specified. Encodings that are not ASCII based (such as UCS-2)
-    are not allowed and should be decoded to ``unicode`` first.
-
-    ``object_hook`` is an optional function that will be called with the
-    result of any object literal decode (a ``dict``). The return value of
-    ``object_hook`` will be used instead of the ``dict``. This feature
-    can be used to implement custom decoders (e.g. JSON-RPC class hinting).
-
-    ``parse_float``, if specified, will be called with the string
-    of every JSON float to be decoded. By default this is equivalent to
-    float(num_str). This can be used to use another datatype or parser
-    for JSON floats (e.g. decimal.Decimal).
-
-    ``parse_int``, if specified, will be called with the string
-    of every JSON int to be decoded. By default this is equivalent to
-    int(num_str). This can be used to use another datatype or parser
-    for JSON integers (e.g. float).
-
-    ``parse_constant``, if specified, will be called with one of the
-    following strings: -Infinity, Infinity, NaN, null, true, false.
-    This can be used to raise an exception if invalid JSON numbers
-    are encountered.
-
-    To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
-    kwarg.
-    """
-    if (cls is None and encoding is None and object_hook is None and
-            parse_int is None and parse_float is None and
-            parse_constant is None and not kw):
-        return _default_decoder.decode(s)
-    if cls is None:
-        cls = JSONDecoder
-    if object_hook is not None:
-        kw['object_hook'] = object_hook
-    if parse_float is not None:
-        kw['parse_float'] = parse_float
-    if parse_int is not None:
-        kw['parse_int'] = parse_int
-    if parse_constant is not None:
-        kw['parse_constant'] = parse_constant
-    return cls(encoding=encoding, **kw).decode(s)
-
-
-#
-# Compatibility cruft from other libraries
-#
-
-
-def decode(s):
-    """
-    demjson, python-cjson API compatibility hook. Use loads(s) instead.
-    """
-    import warnings
-    warnings.warn("simplejson.loads(s) should be used instead of decode(s)",
-        DeprecationWarning)
-    return loads(s)
-
-
-def encode(obj):
-    """
-    demjson, python-cjson compatibility hook. Use dumps(s) instead.
-    """
-    import warnings
-    warnings.warn("simplejson.dumps(s) should be used instead of encode(s)",
-        DeprecationWarning)
-    return dumps(obj)
-
-
-def read(s):
-    """
-    jsonlib, JsonUtils, python-json, json-py API compatibility hook.
-    Use loads(s) instead.
-    """
-    import warnings
-    warnings.warn("simplejson.loads(s) should be used instead of read(s)",
-        DeprecationWarning)
-    return loads(s)
-
-
-def write(obj):
-    """
-    jsonlib, JsonUtils, python-json, json-py API compatibility hook.
-    Use dumps(s) instead.
-    """
-    import warnings
-    warnings.warn("simplejson.dumps(s) should be used instead of write(s)",
-        DeprecationWarning)
-    return dumps(obj)
-
-
-if __name__ == '__main__':
-    import simplejson.tool
-    simplejson.tool.main()
+            separators=separators, encoding=encoding, default=default,
+            **kw).encode(obj)
+
+
+    _default_decoder = JSONDecoder(encoding=None, object_hook=None)
+
+
+    def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None,
+            parse_int=None, parse_constant=None, **kw):
+        """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
+        a JSON document) to a Python object.
+
+        If the contents of ``fp`` is encoded with an ASCII based encoding other
+        than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must
+        be specified. Encodings that are not ASCII based (such as UCS-2) are
+        not allowed, and should be wrapped with
+        ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode``
+        object and passed to ``loads()``
+
+        ``object_hook`` is an optional function that will be called with the
+        result of any object literal decode (a ``dict``). The return value of
+        ``object_hook`` will be used instead of the ``dict``. This feature
+        can be used to implement custom decoders (e.g. JSON-RPC class hinting).
+
+        To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
+        kwarg.
+
+        """
+        return loads(fp.read(),
+            encoding=encoding, cls=cls, object_hook=object_hook,
+            parse_float=parse_float, parse_int=parse_int,
+            parse_constant=parse_constant, **kw)
+
+
+    def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None,
+            parse_int=None, parse_constant=None, **kw):
+        """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON
+        document) to a Python object.
+
+        If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding
+        other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name
+        must be specified. Encodings that are not ASCII based (such as UCS-2)
+        are not allowed and should be decoded to ``unicode`` first.
+
+        ``object_hook`` is an optional function that will be called with the
+        result of any object literal decode (a ``dict``). The return value of
+        ``object_hook`` will be used instead of the ``dict``. This feature
+        can be used to implement custom decoders (e.g. JSON-RPC class hinting).
+
+        ``parse_float``, if specified, will be called with the string
+        of every JSON float to be decoded. By default this is equivalent to
+        float(num_str). This can be used to use another datatype or parser
+        for JSON floats (e.g. decimal.Decimal).
+
+        ``parse_int``, if specified, will be called with the string
+        of every JSON int to be decoded. By default this is equivalent to
+        int(num_str). This can be used to use another datatype or parser
+        for JSON integers (e.g. float).
+
+        ``parse_constant``, if specified, will be called with one of the
+        following strings: -Infinity, Infinity, NaN, null, true, false.
+        This can be used to raise an exception if invalid JSON numbers
+        are encountered.
+
+        To use a custom ``JSONDecoder`` subclass, specify it with the ``cls``
+        kwarg.
+
+        """
+        if (cls is None and encoding is None and object_hook is None and
+                parse_int is None and parse_float is None and
+                parse_constant is None and not kw):
+            return _default_decoder.decode(s)
+        if cls is None:
+            cls = JSONDecoder
+        if object_hook is not None:
+            kw['object_hook'] = object_hook
+        if parse_float is not None:
+            kw['parse_float'] = parse_float
+        if parse_int is not None:
+            kw['parse_int'] = parse_int
+        if parse_constant is not None:
+            kw['parse_constant'] = parse_constant
+        return cls(encoding=encoding, **kw).decode(s)

+ 137 - 135
django/utils/simplejson/decoder.py

@@ -1,20 +1,17 @@
-"""
-Implementation of JSONDecoder
+"""Implementation of JSONDecoder
 """
 import re
 import sys
+import struct
+
+from django.utils.simplejson.scanner import make_scanner
+c_scanstring = None
 
-from django.utils.simplejson.scanner import Scanner, pattern
-try:
-    from django.utils.simplejson._speedups import scanstring as c_scanstring
-except ImportError:
-    pass
+__all__ = ['JSONDecoder']
 
 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
 
 def _floatconstants():
-    import struct
-    import sys
     _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
     if sys.byteorder != 'big':
         _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
@@ -34,6 +31,7 @@ def linecol(doc, pos):
 
 
 def errmsg(msg, doc, pos, end=None):
+    # Note that this function is called from _speedups
     lineno, colno = linecol(doc, pos)
     if end is None:
         return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
@@ -46,35 +44,8 @@ _CONSTANTS = {
     '-Infinity': NegInf,
     'Infinity': PosInf,
     'NaN': NaN,
-    'true': True,
-    'false': False,
-    'null': None,
 }
 
-def JSONConstant(match, context, c=_CONSTANTS):
-    s = match.group(0)
-    fn = getattr(context, 'parse_constant', None)
-    if fn is None:
-        rval = c[s]
-    else:
-        rval = fn(s)
-    return rval, None
-pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
-
-
-def JSONNumber(match, context):
-    match = JSONNumber.regex.match(match.string, *match.span())
-    integer, frac, exp = match.groups()
-    if frac or exp:
-        fn = getattr(context, 'parse_float', None) or float
-        res = fn(integer + (frac or '') + (exp or ''))
-    else:
-        fn = getattr(context, 'parse_int', None) or int
-        res = fn(integer)
-    return res, None
-pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
-
-
 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
 BACKSLASH = {
     '"': u'"', '\\': u'\\', '/': u'/',
@@ -84,6 +55,14 @@ BACKSLASH = {
 DEFAULT_ENCODING = "utf-8"
 
 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
+    """Scan the string s for a JSON string. End is the index of the
+    character in s after the quote that started the JSON string.
+    Unescapes all valid JSON string escape sequences and raises ValueError
+    on attempt to decode an invalid string. If strict is False then literal
+    control characters are allowed in the string.
+    
+    Returns a tuple of the decoded string and the index of the character in s
+    after the end quote."""
     if encoding is None:
         encoding = DEFAULT_ENCODING
     chunks = []
@@ -96,15 +75,19 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU
                 errmsg("Unterminated string starting at", s, begin))
         end = chunk.end()
         content, terminator = chunk.groups()
+        # Content is contains zero or more unescaped string characters
         if content:
             if not isinstance(content, unicode):
                 content = unicode(content, encoding)
             _append(content)
+        # Terminator is the end of string, a literal control character,
+        # or a backslash denoting that an escape sequence follows
         if terminator == '"':
             break
         elif terminator != '\\':
             if strict:
-                raise ValueError(errmsg("Invalid control character %r at", s, end))
+                msg = "Invalid control character %r at" % (terminator,)
+                raise ValueError(msg, s, end)
             else:
                 _append(terminator)
                 continue
@@ -113,142 +96,162 @@ def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHU
         except IndexError:
             raise ValueError(
                 errmsg("Unterminated string starting at", s, begin))
+        # If not a unicode escape sequence, must be in the lookup table
         if esc != 'u':
             try:
-                m = _b[esc]
+                char = _b[esc]
             except KeyError:
                 raise ValueError(
                     errmsg("Invalid \\escape: %r" % (esc,), s, end))
             end += 1
         else:
+            # Unicode escape sequence
             esc = s[end + 1:end + 5]
             next_end = end + 5
-            msg = "Invalid \\uXXXX escape"
-            try:
-                if len(esc) != 4:
-                    raise ValueError
-                uni = int(esc, 16)
-                if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
-                    msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
-                    if not s[end + 5:end + 7] == '\\u':
-                        raise ValueError
-                    esc2 = s[end + 7:end + 11]
-                    if len(esc2) != 4:
-                        raise ValueError
-                    uni2 = int(esc2, 16)
-                    uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
-                    next_end += 6
-                m = unichr(uni)
-            except ValueError:
+            if len(esc) != 4:
+                msg = "Invalid \\uXXXX escape"
                 raise ValueError(errmsg(msg, s, end))
+            uni = int(esc, 16)
+            # Check for surrogate pair on UCS-4 systems
+            if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
+                msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
+                if not s[end + 5:end + 7] == '\\u':
+                    raise ValueError(errmsg(msg, s, end))
+                esc2 = s[end + 7:end + 11]
+                if len(esc2) != 4:
+                    raise ValueError(errmsg(msg, s, end))
+                uni2 = int(esc2, 16)
+                uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
+                next_end += 6
+            char = unichr(uni)
             end = next_end
-        _append(m)
+        # Append the unescaped character
+        _append(char)
     return u''.join(chunks), end
 
 
-# Use speedup
-try:
-    scanstring = c_scanstring
-except NameError:
-    scanstring = py_scanstring
-
-def JSONString(match, context):
-    encoding = getattr(context, 'encoding', None)
-    strict = getattr(context, 'strict', True)
-    return scanstring(match.string, match.end(), encoding, strict)
-pattern(r'"')(JSONString)
+# Use speedup if available
+scanstring = c_scanstring or py_scanstring
 
+WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
+WHITESPACE_STR = ' \t\n\r'
 
-WHITESPACE = re.compile(r'\s*', FLAGS)
-
-def JSONObject(match, context, _w=WHITESPACE.match):
+def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
     pairs = {}
-    s = match.string
-    end = _w(s, match.end()).end()
+    # Use a slice to prevent IndexError from being raised, the following
+    # check will raise a more specific ValueError if the string is empty
     nextchar = s[end:end + 1]
-    # Trivial empty object
-    if nextchar == '}':
-        return pairs, end + 1
+    # Normally we expect nextchar == '"'
     if nextchar != '"':
-        raise ValueError(errmsg("Expecting property name", s, end))
+        if nextchar in _ws:
+            end = _w(s, end).end()
+            nextchar = s[end:end + 1]
+        # Trivial empty object
+        if nextchar == '}':
+            return pairs, end + 1
+        elif nextchar != '"':
+            raise ValueError(errmsg("Expecting property name", s, end))
     end += 1
-    encoding = getattr(context, 'encoding', None)
-    strict = getattr(context, 'strict', True)
-    iterscan = JSONScanner.iterscan
     while True:
         key, end = scanstring(s, end, encoding, strict)
-        end = _w(s, end).end()
+
+        # To skip some function call overhead we optimize the fast paths where
+        # the JSON key separator is ": " or just ":".
         if s[end:end + 1] != ':':
-            raise ValueError(errmsg("Expecting : delimiter", s, end))
-        end = _w(s, end + 1).end()
+            end = _w(s, end).end()
+            if s[end:end + 1] != ':':
+                raise ValueError(errmsg("Expecting : delimiter", s, end))
+
+        end += 1
+
         try:
-            value, end = iterscan(s, idx=end, context=context).next()
+            if s[end] in _ws:
+                end += 1
+                if s[end] in _ws:
+                    end = _w(s, end + 1).end()
+        except IndexError:
+            pass
+
+        try:
+            value, end = scan_once(s, end)
         except StopIteration:
             raise ValueError(errmsg("Expecting object", s, end))
         pairs[key] = value
-        end = _w(s, end).end()
-        nextchar = s[end:end + 1]
+
+        try:
+            nextchar = s[end]
+            if nextchar in _ws:
+                end = _w(s, end + 1).end()
+                nextchar = s[end]
+        except IndexError:
+            nextchar = ''
         end += 1
+
         if nextchar == '}':
             break
-        if nextchar != ',':
+        elif nextchar != ',':
             raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
-        end = _w(s, end).end()
-        nextchar = s[end:end + 1]
+
+        try:
+            nextchar = s[end]
+            if nextchar in _ws:
+                end += 1
+                nextchar = s[end]
+                if nextchar in _ws:
+                    end = _w(s, end + 1).end()
+                    nextchar = s[end]
+        except IndexError:
+            nextchar = ''
+
         end += 1
         if nextchar != '"':
             raise ValueError(errmsg("Expecting property name", s, end - 1))
-    object_hook = getattr(context, 'object_hook', None)
+
     if object_hook is not None:
         pairs = object_hook(pairs)
     return pairs, end
-pattern(r'{')(JSONObject)
 
-
-def JSONArray(match, context, _w=WHITESPACE.match):
+def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
     values = []
-    s = match.string
-    end = _w(s, match.end()).end()
-    # Look-ahead for trivial empty array
     nextchar = s[end:end + 1]
+    if nextchar in _ws:
+        end = _w(s, end + 1).end()
+        nextchar = s[end:end + 1]
+    # Look-ahead for trivial empty array
     if nextchar == ']':
         return values, end + 1
-    iterscan = JSONScanner.iterscan
+    _append = values.append
     while True:
         try:
-            value, end = iterscan(s, idx=end, context=context).next()
+            value, end = scan_once(s, end)
         except StopIteration:
             raise ValueError(errmsg("Expecting object", s, end))
-        values.append(value)
-        end = _w(s, end).end()
+        _append(value)
         nextchar = s[end:end + 1]
+        if nextchar in _ws:
+            end = _w(s, end + 1).end()
+            nextchar = s[end:end + 1]
         end += 1
         if nextchar == ']':
             break
-        if nextchar != ',':
+        elif nextchar != ',':
             raise ValueError(errmsg("Expecting , delimiter", s, end))
-        end = _w(s, end).end()
-    return values, end
-pattern(r'\[')(JSONArray)
 
+        try:
+            if s[end] in _ws:
+                end += 1
+                if s[end] in _ws:
+                    end = _w(s, end + 1).end()
+        except IndexError:
+            pass
 
-ANYTHING = [
-    JSONObject,
-    JSONArray,
-    JSONString,
-    JSONConstant,
-    JSONNumber,
-]
-
-JSONScanner = Scanner(ANYTHING)
-
+    return values, end
 
 class JSONDecoder(object):
-    """
-    Simple JSON <http://json.org> decoder
+    """Simple JSON <http://json.org> decoder
 
     Performs the following translations in decoding by default:
-    
+
     +---------------+-------------------+
     | JSON          | Python            |
     +===============+===================+
@@ -271,18 +274,15 @@ class JSONDecoder(object):
 
     It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
     their corresponding ``float`` values, which is outside the JSON spec.
-    """
 
-    _scanner = Scanner(ANYTHING)
-    __all__ = ['__init__', 'decode', 'raw_decode']
+    """
 
     def __init__(self, encoding=None, object_hook=None, parse_float=None,
             parse_int=None, parse_constant=None, strict=True):
-        """
-        ``encoding`` determines the encoding used to interpret any ``str``
+        """``encoding`` determines the encoding used to interpret any ``str``
         objects decoded by this instance (utf-8 by default).  It has no
         effect when decoding ``unicode`` objects.
-        
+
         Note that currently only encodings that are a superset of ASCII work,
         strings of other encodings should be passed in as ``unicode``.
 
@@ -302,21 +302,26 @@ class JSONDecoder(object):
         for JSON integers (e.g. float).
 
         ``parse_constant``, if specified, will be called with one of the
-        following strings: -Infinity, Infinity, NaN, null, true, false.
+        following strings: -Infinity, Infinity, NaN.
         This can be used to raise an exception if invalid JSON numbers
         are encountered.
+
         """
         self.encoding = encoding
         self.object_hook = object_hook
-        self.parse_float = parse_float
-        self.parse_int = parse_int
-        self.parse_constant = parse_constant
+        self.parse_float = parse_float or float
+        self.parse_int = parse_int or int
+        self.parse_constant = parse_constant or _CONSTANTS.__getitem__
         self.strict = strict
+        self.parse_object = JSONObject
+        self.parse_array = JSONArray
+        self.parse_string = scanstring
+        self.scan_once = make_scanner(self)
 
     def decode(self, s, _w=WHITESPACE.match):
-        """
-        Return the Python representation of ``s`` (a ``str`` or ``unicode``
+        """Return the Python representation of ``s`` (a ``str`` or ``unicode``
         instance containing a JSON document)
+
         """
         obj, end = self.raw_decode(s, idx=_w(s, 0).end())
         end = _w(s, end).end()
@@ -324,20 +329,17 @@ class JSONDecoder(object):
             raise ValueError(errmsg("Extra data", s, end, len(s)))
         return obj
 
-    def raw_decode(self, s, **kw):
-        """
-        Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
+    def raw_decode(self, s, idx=0):
+        """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
         with a JSON document) and return a 2-tuple of the Python
         representation and the index in ``s`` where the document ended.
 
         This can be used to decode a JSON document from a string that may
         have extraneous data at the end.
+
         """
-        kw.setdefault('context', self)
         try:
-            obj, end = self._scanner.iterscan(s, **kw).next()
+            obj, end = self.scan_once(s, idx)
         except StopIteration:
             raise ValueError("No JSON object could be decoded")
         return obj, end
-
-__all__ = ['JSONDecoder']

+ 210 - 165
django/utils/simplejson/encoder.py

@@ -1,12 +1,9 @@
-"""
-Implementation of JSONEncoder
+"""Implementation of JSONEncoder
 """
 import re
 
-try:
-    from django.utils.simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
-except ImportError:
-    pass
+c_encode_basestring_ascii = None
+c_make_encoder = None
 
 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
@@ -27,29 +24,9 @@ for i in range(0x20):
 INFINITY = float('1e66666')
 FLOAT_REPR = repr
 
-def floatstr(o, allow_nan=True):
-    # Check for specials.  Note that this type of test is processor- and/or
-    # platform-specific, so do tests which don't depend on the internals.
-
-    if o != o:
-        text = 'NaN'
-    elif o == INFINITY:
-        text = 'Infinity'
-    elif o == -INFINITY:
-        text = '-Infinity'
-    else:
-        return FLOAT_REPR(o)
-
-    if not allow_nan:
-        raise ValueError("Out of range float values are not JSON compliant: %r"
-            % (o,))
-
-    return text
-
-
 def encode_basestring(s):
-    """
-    Return a JSON representation of a Python string
+    """Return a JSON representation of a Python string
+
     """
     def replace(match):
         return ESCAPE_DCT[match.group(0)]
@@ -57,6 +34,9 @@ def encode_basestring(s):
 
 
 def py_encode_basestring_ascii(s):
+    """Return an ASCII-only JSON representation of a Python string
+
+    """
     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
         s = s.decode('utf-8')
     def replace(match):
@@ -76,18 +56,13 @@ def py_encode_basestring_ascii(s):
     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
 
 
-try:
-    encode_basestring_ascii = c_encode_basestring_ascii
-except NameError:
-    encode_basestring_ascii = py_encode_basestring_ascii
-
+encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii
 
 class JSONEncoder(object):
-    """
-    Extensible JSON <http://json.org> encoder for Python data structures.
+    """Extensible JSON <http://json.org> encoder for Python data structures.
 
     Supports the following objects and types by default:
-    
+
     +-------------------+---------------+
     | Python            | JSON          |
     +===================+===============+
@@ -110,15 +85,14 @@ class JSONEncoder(object):
     ``.default()`` method with another method that returns a serializable
     object for ``o`` if possible, otherwise it should call the superclass
     implementation (to raise ``TypeError``).
+
     """
-    __all__ = ['__init__', 'default', 'encode', 'iterencode']
     item_separator = ', '
     key_separator = ': '
     def __init__(self, skipkeys=False, ensure_ascii=True,
             check_circular=True, allow_nan=True, sort_keys=False,
             indent=None, separators=None, encoding='utf-8', default=None):
-        """
-        Constructor for JSONEncoder, with sensible defaults.
+        """Constructor for JSONEncoder, with sensible defaults.
 
         If skipkeys is False, then it is a TypeError to attempt
         encoding of keys that are not str, int, long, float or None.  If
@@ -158,6 +132,7 @@ class JSONEncoder(object):
         If encoding is not None, then all input strings will be
         transformed into unicode using that encoding prior to JSON-encoding.
         The default is UTF-8.
+
         """
 
         self.skipkeys = skipkeys
@@ -166,17 +141,132 @@ class JSONEncoder(object):
         self.allow_nan = allow_nan
         self.sort_keys = sort_keys
         self.indent = indent
-        self.current_indent_level = 0
         if separators is not None:
             self.item_separator, self.key_separator = separators
         if default is not None:
             self.default = default
         self.encoding = encoding
 
-    def _newline_indent(self):
-        return '\n' + (' ' * (self.indent * self.current_indent_level))
+    def default(self, o):
+        """Implement this method in a subclass such that it returns
+        a serializable object for ``o``, or calls the base implementation
+        (to raise a ``TypeError``).
+
+        For example, to support arbitrary iterators, you could
+        implement default like this::
+
+            def default(self, o):
+                try:
+                    iterable = iter(o)
+                except TypeError:
+                    pass
+                else:
+                    return list(iterable)
+                return JSONEncoder.default(self, o)
+
+        """
+        raise TypeError("%r is not JSON serializable" % (o,))
+
+    def encode(self, o):
+        """Return a JSON string representation of a Python data structure.
+
+        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
+        '{"foo": ["bar", "baz"]}'
+
+        """
+        # This is for extremely simple cases and benchmarks.
+        if isinstance(o, basestring):
+            if isinstance(o, str):
+                _encoding = self.encoding
+                if (_encoding is not None
+                        and not (_encoding == 'utf-8')):
+                    o = o.decode(_encoding)
+            if self.ensure_ascii:
+                return encode_basestring_ascii(o)
+            else:
+                return encode_basestring(o)
+        # This doesn't pass the iterator directly to ''.join() because the
+        # exceptions aren't as detailed.  The list call should be roughly
+        # equivalent to the PySequence_Fast that ''.join() would do.
+        chunks = self.iterencode(o, _one_shot=True)
+        if not isinstance(chunks, (list, tuple)):
+            chunks = list(chunks)
+        return ''.join(chunks)
+
+    def iterencode(self, o, _one_shot=False):
+        """Encode the given object and yield each string
+        representation as available.
+
+        For example::
+
+            for chunk in JSONEncoder().iterencode(bigobject):
+                mysocket.write(chunk)
+
+        """
+        if self.check_circular:
+            markers = {}
+        else:
+            markers = None
+        if self.ensure_ascii:
+            _encoder = encode_basestring_ascii
+        else:
+            _encoder = encode_basestring
+        if self.encoding != 'utf-8':
+            def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
+                if isinstance(o, str):
+                    o = o.decode(_encoding)
+                return _orig_encoder(o)
+
+        def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
+            # Check for specials.  Note that this type of test is processor- and/or
+            # platform-specific, so do tests which don't depend on the internals.
+
+            if o != o:
+                text = 'NaN'
+            elif o == _inf:
+                text = 'Infinity'
+            elif o == _neginf:
+                text = '-Infinity'
+            else:
+                return _repr(o)
+
+            if not allow_nan:
+                raise ValueError("Out of range float values are not JSON compliant: %r"
+                    % (o,))
+
+            return text
 
-    def _iterencode_list(self, lst, markers=None):
+
+        if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys:
+            _iterencode = c_make_encoder(
+                markers, self.default, _encoder, self.indent,
+                self.key_separator, self.item_separator, self.sort_keys,
+                self.skipkeys, self.allow_nan)
+        else:
+            _iterencode = _make_iterencode(
+                markers, self.default, _encoder, self.indent, floatstr,
+                self.key_separator, self.item_separator, self.sort_keys,
+                self.skipkeys, _one_shot)
+        return _iterencode(o, 0)
+
+def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
+        ## HACK: hand-optimized bytecode; turn globals into locals
+        False=False,
+        True=True,
+        ValueError=ValueError,
+        basestring=basestring,
+        dict=dict,
+        float=float,
+        id=id,
+        int=int,
+        isinstance=isinstance,
+        list=list,
+        long=long,
+        str=str,
+        tuple=tuple,
+    ):
+
+    def _iterencode_list(lst, _current_indent_level):
         if not lst:
             yield '[]'
             return
@@ -185,31 +275,51 @@ class JSONEncoder(object):
             if markerid in markers:
                 raise ValueError("Circular reference detected")
             markers[markerid] = lst
-        yield '['
-        if self.indent is not None:
-            self.current_indent_level += 1
-            newline_indent = self._newline_indent()
-            separator = self.item_separator + newline_indent
-            yield newline_indent
+        buf = '['
+        if _indent is not None:
+            _current_indent_level += 1
+            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
+            separator = _item_separator + newline_indent
+            buf += newline_indent
         else:
             newline_indent = None
-            separator = self.item_separator
+            separator = _item_separator
         first = True
         for value in lst:
             if first:
                 first = False
             else:
-                yield separator
-            for chunk in self._iterencode(value, markers):
-                yield chunk
+                buf = separator
+            if isinstance(value, basestring):
+                yield buf + _encoder(value)
+            elif value is None:
+                yield buf + 'null'
+            elif value is True:
+                yield buf + 'true'
+            elif value is False:
+                yield buf + 'false'
+            elif isinstance(value, (int, long)):
+                yield buf + str(value)
+            elif isinstance(value, float):
+                yield buf + _floatstr(value)
+            else:
+                yield buf
+                if isinstance(value, (list, tuple)):
+                    chunks = _iterencode_list(value, _current_indent_level)
+                elif isinstance(value, dict):
+                    chunks = _iterencode_dict(value, _current_indent_level)
+                else:
+                    chunks = _iterencode(value, _current_indent_level)
+                for chunk in chunks:
+                    yield chunk
         if newline_indent is not None:
-            self.current_indent_level -= 1
-            yield self._newline_indent()
+            _current_indent_level -= 1
+            yield '\n' + (' ' * (_indent * _current_indent_level))
         yield ']'
         if markers is not None:
             del markers[markerid]
 
-    def _iterencode_dict(self, dct, markers=None):
+    def _iterencode_dict(dct, _current_indent_level):
         if not dct:
             yield '{}'
             return
@@ -219,40 +329,27 @@ class JSONEncoder(object):
                 raise ValueError("Circular reference detected")
             markers[markerid] = dct
         yield '{'
-        key_separator = self.key_separator
-        if self.indent is not None:
-            self.current_indent_level += 1
-            newline_indent = self._newline_indent()
-            item_separator = self.item_separator + newline_indent
+        if _indent is not None:
+            _current_indent_level += 1
+            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
+            item_separator = _item_separator + newline_indent
             yield newline_indent
         else:
             newline_indent = None
-            item_separator = self.item_separator
+            item_separator = _item_separator
         first = True
-        if self.ensure_ascii:
-            encoder = encode_basestring_ascii
-        else:
-            encoder = encode_basestring
-        allow_nan = self.allow_nan
-        if self.sort_keys:
-            keys = dct.keys()
-            keys.sort()
-            items = [(k, dct[k]) for k in keys]
+        if _sort_keys:
+            items = dct.items()
+            items.sort(key=lambda kv: kv[0])
         else:
             items = dct.iteritems()
-        _encoding = self.encoding
-        _do_decode = (_encoding is not None
-            and not (_encoding == 'utf-8'))
         for key, value in items:
-            if isinstance(key, str):
-                if _do_decode:
-                    key = key.decode(_encoding)
-            elif isinstance(key, basestring):
+            if isinstance(key, basestring):
                 pass
             # JavaScript is weakly typed for these, so it makes sense to
             # also allow them.  Many encoders seem to do something like this.
             elif isinstance(key, float):
-                key = floatstr(key, allow_nan)
+                key = _floatstr(key)
             elif isinstance(key, (int, long)):
                 key = str(key)
             elif key is True:
@@ -261,7 +358,7 @@ class JSONEncoder(object):
                 key = 'false'
             elif key is None:
                 key = 'null'
-            elif self.skipkeys:
+            elif _skipkeys:
                 continue
             else:
                 raise TypeError("key %r is not a string" % (key,))
@@ -269,28 +366,39 @@ class JSONEncoder(object):
                 first = False
             else:
                 yield item_separator
-            yield encoder(key)
-            yield key_separator
-            for chunk in self._iterencode(value, markers):
-                yield chunk
+            yield _encoder(key)
+            yield _key_separator
+            if isinstance(value, basestring):
+                yield _encoder(value)
+            elif value is None:
+                yield 'null'
+            elif value is True:
+                yield 'true'
+            elif value is False:
+                yield 'false'
+            elif isinstance(value, (int, long)):
+                yield str(value)
+            elif isinstance(value, float):
+                yield _floatstr(value)
+            else:
+                if isinstance(value, (list, tuple)):
+                    chunks = _iterencode_list(value, _current_indent_level)
+                elif isinstance(value, dict):
+                    chunks = _iterencode_dict(value, _current_indent_level)
+                else:
+                    chunks = _iterencode(value, _current_indent_level)
+                for chunk in chunks:
+                    yield chunk
         if newline_indent is not None:
-            self.current_indent_level -= 1
-            yield self._newline_indent()
+            _current_indent_level -= 1
+            yield '\n' + (' ' * (_indent * _current_indent_level))
         yield '}'
         if markers is not None:
             del markers[markerid]
 
-    def _iterencode(self, o, markers=None):
+    def _iterencode(o, _current_indent_level):
         if isinstance(o, basestring):
-            if self.ensure_ascii:
-                encoder = encode_basestring_ascii
-            else:
-                encoder = encode_basestring
-            _encoding = self.encoding
-            if (_encoding is not None and isinstance(o, str)
-                    and not (_encoding == 'utf-8')):
-                o = o.decode(_encoding)
-            yield encoder(o)
+            yield _encoder(o)
         elif o is None:
             yield 'null'
         elif o is True:
@@ -300,12 +408,12 @@ class JSONEncoder(object):
         elif isinstance(o, (int, long)):
             yield str(o)
         elif isinstance(o, float):
-            yield floatstr(o, self.allow_nan)
+            yield _floatstr(o)
         elif isinstance(o, (list, tuple)):
-            for chunk in self._iterencode_list(o, markers):
+            for chunk in _iterencode_list(o, _current_indent_level):
                 yield chunk
         elif isinstance(o, dict):
-            for chunk in self._iterencode_dict(o, markers):
+            for chunk in _iterencode_dict(o, _current_indent_level):
                 yield chunk
         else:
             if markers is not None:
@@ -313,73 +421,10 @@ class JSONEncoder(object):
                 if markerid in markers:
                     raise ValueError("Circular reference detected")
                 markers[markerid] = o
-            for chunk in self._iterencode_default(o, markers):
+            o = _default(o)
+            for chunk in _iterencode(o, _current_indent_level):
                 yield chunk
             if markers is not None:
                 del markers[markerid]
 
-    def _iterencode_default(self, o, markers=None):
-        newobj = self.default(o)
-        return self._iterencode(newobj, markers)
-
-    def default(self, o):
-        """
-        Implement this method in a subclass such that it returns
-        a serializable object for ``o``, or calls the base implementation
-        (to raise a ``TypeError``).
-
-        For example, to support arbitrary iterators, you could
-        implement default like this::
-            
-            def default(self, o):
-                try:
-                    iterable = iter(o)
-                except TypeError:
-                    pass
-                else:
-                    return list(iterable)
-                return JSONEncoder.default(self, o)
-        """
-        raise TypeError("%r is not JSON serializable" % (o,))
-
-    def encode(self, o):
-        """
-        Return a JSON string representation of a Python data structure.
-
-        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
-        '{"foo": ["bar", "baz"]}'
-        """
-        # This is for extremely simple cases and benchmarks.
-        if isinstance(o, basestring):
-            if isinstance(o, str):
-                _encoding = self.encoding
-                if (_encoding is not None 
-                        and not (_encoding == 'utf-8')):
-                    o = o.decode(_encoding)
-            if self.ensure_ascii:
-                return encode_basestring_ascii(o)
-            else:
-                return encode_basestring(o)
-        # This doesn't pass the iterator directly to ''.join() because the
-        # exceptions aren't as detailed.  The list call should be roughly
-        # equivalent to the PySequence_Fast that ''.join() would do.
-        chunks = list(self.iterencode(o))
-        return ''.join(chunks)
-
-    def iterencode(self, o):
-        """
-        Encode the given object and yield each string
-        representation as available.
-        
-        For example::
-            
-            for chunk in JSONEncoder().iterencode(bigobject):
-                mysocket.write(chunk)
-        """
-        if self.check_circular:
-            markers = {}
-        else:
-            markers = None
-        return self._iterencode(o, markers)
-
-__all__ = ['JSONEncoder']
+    return _iterencode

+ 56 - 58
django/utils/simplejson/scanner.py

@@ -1,67 +1,65 @@
-"""
-Iterator based sre token scanner
+"""JSON token scanner
 """
 import re
-from re import VERBOSE, MULTILINE, DOTALL
-import sre_parse
-import sre_compile
-import sre_constants
-from sre_constants import BRANCH, SUBPATTERN
+try:
+    from simplejson._speedups import make_scanner as c_make_scanner
+except ImportError:
+    c_make_scanner = None
+
+__all__ = ['make_scanner']
 
-__all__ = ['Scanner', 'pattern']
+NUMBER_RE = re.compile(
+    r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?',
+    (re.VERBOSE | re.MULTILINE | re.DOTALL))
 
-FLAGS = (VERBOSE | MULTILINE | DOTALL)
+def py_make_scanner(context):
+    parse_object = context.parse_object
+    parse_array = context.parse_array
+    parse_string = context.parse_string
+    match_number = NUMBER_RE.match
+    encoding = context.encoding
+    strict = context.strict
+    parse_float = context.parse_float
+    parse_int = context.parse_int
+    parse_constant = context.parse_constant
+    object_hook = context.object_hook
 
-class Scanner(object):
-    def __init__(self, lexicon, flags=FLAGS):
-        self.actions = [None]
-        # Combine phrases into a compound pattern
-        s = sre_parse.Pattern()
-        s.flags = flags
-        p = []
-        for idx, token in enumerate(lexicon):
-            phrase = token.pattern
-            try:
-                subpattern = sre_parse.SubPattern(s,
-                    [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))])
-            except sre_constants.error:
-                raise
-            p.append(subpattern)
-            self.actions.append(token)
+    def _scan_once(string, idx):
+        try:
+            nextchar = string[idx]
+        except IndexError:
+            raise StopIteration
 
-        s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work
-        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
-        self.scanner = sre_compile.compile(p)
+        if nextchar == '"':
+            return parse_string(string, idx + 1, encoding, strict)
+        elif nextchar == '{':
+            return parse_object((string, idx + 1), encoding, strict, _scan_once, object_hook)
+        elif nextchar == '[':
+            return parse_array((string, idx + 1), _scan_once)
+        elif nextchar == 'n' and string[idx:idx + 4] == 'null':
+            return None, idx + 4
+        elif nextchar == 't' and string[idx:idx + 4] == 'true':
+            return True, idx + 4
+        elif nextchar == 'f' and string[idx:idx + 5] == 'false':
+            return False, idx + 5
 
-    def iterscan(self, string, idx=0, context=None):
-        """
-        Yield match, end_idx for each match
-        """
-        match = self.scanner.scanner(string, idx).match
-        actions = self.actions
-        lastend = idx
-        end = len(string)
-        while True:
-            m = match()
-            if m is None:
-                break
-            matchbegin, matchend = m.span()
-            if lastend == matchend:
-                break
-            action = actions[m.lastindex]
-            if action is not None:
-                rval, next_pos = action(m, context)
-                if next_pos is not None and next_pos != matchend:
-                    # "fast forward" the scanner
-                    matchend = next_pos
-                    match = self.scanner.scanner(string, matchend).match
-                yield rval, matchend
-            lastend = matchend
+        m = match_number(string, idx)
+        if m is not None:
+            integer, frac, exp = m.groups()
+            if frac or exp:
+                res = parse_float(integer + (frac or '') + (exp or ''))
+            else:
+                res = parse_int(integer)
+            return res, m.end()
+        elif nextchar == 'N' and string[idx:idx + 3] == 'NaN':
+            return parse_constant('NaN'), idx + 3
+        elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity':
+            return parse_constant('Infinity'), idx + 8
+        elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
+            return parse_constant('-Infinity'), idx + 9
+        else:
+            raise StopIteration
 
+    return _scan_once
 
-def pattern(pattern, flags=FLAGS):
-    def decorator(fn):
-        fn.pattern = pattern
-        fn.regex = re.compile(pattern, flags)
-        return fn
-    return decorator
+make_scanner = c_make_scanner or py_make_scanner

+ 5 - 14
django/utils/simplejson/tool.py

@@ -1,23 +1,14 @@
-r"""
-Using simplejson from the shell to validate and
+r"""Using simplejson from the shell to validate and
 pretty-print::
-    
-    $ echo '{"json":"obj"}' | python -msimplejson
+
+    $ echo '{"json":"obj"}' | python -msimplejson.tool
     {
         "json": "obj"
     }
-    $ echo '{ 1.2:3.4}' | python -msimplejson
+    $ echo '{ 1.2:3.4}' | python -msimplejson.tool
     Expecting property name: line 1 column 2 (char 2)
-
-Note that the JSON produced by this module's default settings
-is a subset of YAML, so it may be used as a serializer for that as well.
 """
-import django.utils.simplejson
-
-#
-# Pretty printer:
-#     curl http://mochikit.com/examples/ajax_tables/domains.json | python -msimplejson.tool
-#
+from django.utils import simplejson
 
 def main():
     import sys

+ 12 - 5
docs/topics/serialization.txt

@@ -162,11 +162,17 @@ For example::
     json_serializer = serializers.get_serializer("json")()
     json_serializer.serialize(queryset, ensure_ascii=False, stream=response)
 
-The Django source code includes the simplejson_ module. Be aware that if you're
-serializing using that module directly, not all Django output can be passed
-unmodified to simplejson. In particular, :ref:`lazy translation objects
-<lazy-translations>` need a `special encoder`_ written for them. Something like
-this will work::
+The Django source code includes the simplejson_ module. However, if you're
+using Python 2.6 (which includes a builtin version of the module), Django will
+use the builtin ``json`` module automatically. If you have a system installed
+version that includes the C-based speedup extension, or your system version is
+more recent than the version shipped with Django (currently, 2.0.7), the
+system version will be used instead of the version included with Django.
+
+Be aware that if you're serializing using that module directly, not all Django
+output can be passed unmodified to simplejson. In particular, :ref:`lazy
+translation objects <lazy-translations>` need a `special encoder`_ written for
+them. Something like this will work::
 
     from django.utils.functional import Promise
     from django.utils.encoding import force_unicode
@@ -178,3 +184,4 @@ this will work::
             return obj
 
 .. _special encoder: http://svn.red-bean.com/bob/simplejson/tags/simplejson-1.7/docs/index.html
+